def __init__(self,
              random_state=84,
              n_estimators=20,
              params={'c_k': [2**i for i in range(-7, 7, 2)]},
              niterations=10):
     self.model = MLTSVM()
     self.params = params
     self.niterations = niterations
Exemple #2
0
def ML_model_predict(train_x, train_y, test_x, model_name):
    print(f"--------train {model_name} model----------")
    classifier = None
    if model_name == "MLARAM":
        classifier = MLARAM(threshold=0.2)
    elif model_name == "MLkNN":
        classifier = MLkNN()
    elif model_name == "BRkNNa":
        classifier = BRkNNaClassifier()
    elif model_name == "BRkNNb":
        classifier = BRkNNbClassifier()
    elif model_name == "RF":
        classifier = RandomForestClassifier(n_estimators=1000,
                                            random_state=0,
                                            n_jobs=-1)
    elif model_name == "MLTSVM":
        classifier = MLTSVM(c_k=2**-1)
    classifier.fit(train_x, train_y)
    prediction = classifier.predict(test_x)
    return prediction
ft_OT = pd.read_csv("feature_similarity_chebi_ontology_DiShIn_2.csv",
                    names=label_names)
ft_OT.rename(mapper=lambda x: x + "_OT", axis=1, inplace=True)

X = np.concatenate((ft_FP, ft_OT), axis=1)

scoring_funcs = {
    "hamming loss": hamming_func,
    "aiming": aiming_func,
    "coverage": coverage_func,
    "accuracy": accuracy_func,
    "absolute true": absolute_true_func,
}  # Keep recorded

parameters = {'c_k': [2**i for i in range(-5, 5)]}

mtsvm = GridSearchCV(MLTSVM(),
                     param_grid=parameters,
                     n_jobs=-1,
                     cv=loocv,
                     scoring=scoring_funcs,
                     verbose=3,
                     refit="absolute true")

mtsvm.fit(X, Y.values)
print(mtsvm.best_score_)

mytuple = (mtsvm, )

to_save = dump(mytuple, filename="mtsvm.joblib")
Exemple #4
0
 def classifiers(self):
     return [MLTSVM(c_k=2**-4)]
Exemple #5
0
                 "coverage": coverage_func, 
                 "accuracy": accuracy_func, 
                 "absolute true": absolute_true_func, 
                 } # Keep recorded


parameters = {'k': range(1,11), 's': [0.5, 0.7, 1.0]}

mlknn = GridSearchCV(MLkNN(), param_grid=parameters, n_jobs=-1, cv=loocv, 
                    scoring=scoring_funcs, verbose=3, refit="absolute true")
mlknn.fit(X, Y.values)
print(mlknn.best_score_)

parameters =  {'c_k': [2**i for i in range(-5, 5)]}

mtsvm = GridSearchCV(MLTSVM(), param_grid=parameters, n_jobs=-1, cv=loocv, 
                    scoring=scoring_funcs, verbose=3, refit="absolute true")

mtsvm.fit(X, Y.values)
print(mtsvm.best_score_)

parameters = {
    'classifier': [LabelPowerset()],
    'classifier__classifier': [ExtraTreesClassifier()],
    'classifier__classifier__n_estimators': [50, 100, 500, 1000],
    'clusterer' : [
        NetworkXLabelGraphClusterer(LabelCooccurrenceGraphBuilder(weighted=True, include_self_edges=False), 'louvain'),
        NetworkXLabelGraphClusterer(LabelCooccurrenceGraphBuilder(weighted=True, include_self_edges=False), 'lpa')
    ]
}
""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """"""
mlknn = MLkNN()
vec = TfidfVectorizer(analyzer='word')
pipe = Pipeline(steps=[('vec', vec), ('MLkNN', mlknn)])
parameters = [{
    'vec__ngram_range': [(1, 1), (1, 2), (1, 5)],
    'vec__max_features': [5000, 10000, 50000, 100000],
    'vec__stop_words': ['english', None],
    'MLkNN__k': [1, 3, 10, 20, 50],
    'MLkNN__s': [0.5, 0.7, 1.0]
}]
nested_cross_val(pipe, parameters, X, y, "MLkNN")
"""""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """
Run MLTSVM
""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """"""
mltsvm = MLTSVM(max_iteration=1000)
vec = TfidfVectorizer(analyzer='word')
pipe = Pipeline(steps=[('vec', vec), ('MLTSVM', mltsvm)])
parameters = [{
    'vec__ngram_range': [(1, 1), (1, 2), (1, 5)],
    'vec__max_features': [5000, 10000, 50000, 100000],
    'vec__stop_words': ['english', None],
    'MLTSVM__c_k': [2**i for i in range(-5, 10, 2)]
}]
nested_cross_val(pipe, parameters, X, y, "MLTSVM")
"""""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """
Run MLARAM
""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """"""
mlaram = MLARAM()
vec = TfidfVectorizer(analyzer='word')
pipe = Pipeline(steps=[('vec', vec), ('MLARAM', mlaram)])