def __init__(self, random_state=84, n_estimators=20, params={'c_k': [2**i for i in range(-7, 7, 2)]}, niterations=10): self.model = MLTSVM() self.params = params self.niterations = niterations
def ML_model_predict(train_x, train_y, test_x, model_name): print(f"--------train {model_name} model----------") classifier = None if model_name == "MLARAM": classifier = MLARAM(threshold=0.2) elif model_name == "MLkNN": classifier = MLkNN() elif model_name == "BRkNNa": classifier = BRkNNaClassifier() elif model_name == "BRkNNb": classifier = BRkNNbClassifier() elif model_name == "RF": classifier = RandomForestClassifier(n_estimators=1000, random_state=0, n_jobs=-1) elif model_name == "MLTSVM": classifier = MLTSVM(c_k=2**-1) classifier.fit(train_x, train_y) prediction = classifier.predict(test_x) return prediction
ft_OT = pd.read_csv("feature_similarity_chebi_ontology_DiShIn_2.csv", names=label_names) ft_OT.rename(mapper=lambda x: x + "_OT", axis=1, inplace=True) X = np.concatenate((ft_FP, ft_OT), axis=1) scoring_funcs = { "hamming loss": hamming_func, "aiming": aiming_func, "coverage": coverage_func, "accuracy": accuracy_func, "absolute true": absolute_true_func, } # Keep recorded parameters = {'c_k': [2**i for i in range(-5, 5)]} mtsvm = GridSearchCV(MLTSVM(), param_grid=parameters, n_jobs=-1, cv=loocv, scoring=scoring_funcs, verbose=3, refit="absolute true") mtsvm.fit(X, Y.values) print(mtsvm.best_score_) mytuple = (mtsvm, ) to_save = dump(mytuple, filename="mtsvm.joblib")
def classifiers(self): return [MLTSVM(c_k=2**-4)]
"coverage": coverage_func, "accuracy": accuracy_func, "absolute true": absolute_true_func, } # Keep recorded parameters = {'k': range(1,11), 's': [0.5, 0.7, 1.0]} mlknn = GridSearchCV(MLkNN(), param_grid=parameters, n_jobs=-1, cv=loocv, scoring=scoring_funcs, verbose=3, refit="absolute true") mlknn.fit(X, Y.values) print(mlknn.best_score_) parameters = {'c_k': [2**i for i in range(-5, 5)]} mtsvm = GridSearchCV(MLTSVM(), param_grid=parameters, n_jobs=-1, cv=loocv, scoring=scoring_funcs, verbose=3, refit="absolute true") mtsvm.fit(X, Y.values) print(mtsvm.best_score_) parameters = { 'classifier': [LabelPowerset()], 'classifier__classifier': [ExtraTreesClassifier()], 'classifier__classifier__n_estimators': [50, 100, 500, 1000], 'clusterer' : [ NetworkXLabelGraphClusterer(LabelCooccurrenceGraphBuilder(weighted=True, include_self_edges=False), 'louvain'), NetworkXLabelGraphClusterer(LabelCooccurrenceGraphBuilder(weighted=True, include_self_edges=False), 'lpa') ] }
""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" mlknn = MLkNN() vec = TfidfVectorizer(analyzer='word') pipe = Pipeline(steps=[('vec', vec), ('MLkNN', mlknn)]) parameters = [{ 'vec__ngram_range': [(1, 1), (1, 2), (1, 5)], 'vec__max_features': [5000, 10000, 50000, 100000], 'vec__stop_words': ['english', None], 'MLkNN__k': [1, 3, 10, 20, 50], 'MLkNN__s': [0.5, 0.7, 1.0] }] nested_cross_val(pipe, parameters, X, y, "MLkNN") """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """ Run MLTSVM """ """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" mltsvm = MLTSVM(max_iteration=1000) vec = TfidfVectorizer(analyzer='word') pipe = Pipeline(steps=[('vec', vec), ('MLTSVM', mltsvm)]) parameters = [{ 'vec__ngram_range': [(1, 1), (1, 2), (1, 5)], 'vec__max_features': [5000, 10000, 50000, 100000], 'vec__stop_words': ['english', None], 'MLTSVM__c_k': [2**i for i in range(-5, 10, 2)] }] nested_cross_val(pipe, parameters, X, y, "MLTSVM") """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """ Run MLARAM """ """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" mlaram = MLARAM() vec = TfidfVectorizer(analyzer='word') pipe = Pipeline(steps=[('vec', vec), ('MLARAM', mlaram)])