def gradient_boosting_model(_remove_extra_classes=False, metric="accuracy"): print("###Running Gradient Boosting model| Limited dataset:{}".format( _remove_extra_classes)) dt = bdt.getOneHotEncodedDataset( remove_extra_classes=_remove_extra_classes) X = dt.iloc[:, 1:31] y = dt.iloc[:, 31] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y) train = bdt.getBalancedDataset(X_train, y_train) X_train = train.iloc[:, :-1] y_train = train.iloc[:, -1] gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.5, max_depth=3, random_state=42) gb_clf.fit(X_train, y_train) scores = cross_val_score(gb_clf, X_test, y_test, cv=5, scoring=metric) y_pred = gb_clf.predict(X_test) conf_matrix = confusion_matrix(y_test, y_pred) tn, fp, fn, tp = conf_matrix.ravel() print('{} testing : {:.3f} (+-{:.3f})'.format(metric, scores.mean(), scores.std())) print("confusion matrix:\n", conf_matrix) print( "True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}" .format(tn, fp, fn, tp)) print("###Finished running Gradient Boosting model") return
def svm_model(_remove_extra_classes=False, metric="accuracy"): print("###Running SVM model| Limited dataset:{}".format( _remove_extra_classes)) dt = bdt.getOneHotEncodedDataset( remove_extra_classes=_remove_extra_classes) X = dt.iloc[:, 1:31] y = dt.iloc[:, 31] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y) train = bdt.getBalancedDataset(X_train, y_train) X_train = train.iloc[:, :-1] y_train = train.iloc[:, -1] clf = svm.SVC(gamma='scale', decision_function_shape='ovo', C=1.0, cache_size=200, kernel='rbf') clf.fit(X_train, y_train) scores = cross_val_score(clf, X_test, y_test, cv=5, scoring=metric) y_pred = clf.predict(X_test) conf_matrix = confusion_matrix(y_test, y_pred) tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel() print('{} testing : {:.3f} (+-{:.3f})'.format(metric, scores.mean(), scores.std())) print("confusion matrix:\n", conf_matrix) print( "True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}" .format(tn, fp, fn, tp)) print("###Finished running SVM model") return
def mlp_model(_remove_extra_classes=False, metric="accuracy"): print( "Running MLP model| Limited dataset:{}".format(_remove_extra_classes)) dt = bdt.getOneHotEncodedDataset( remove_extra_classes=_remove_extra_classes) X = dt.iloc[:, 1:31] y = dt.iloc[:, 31] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y) train = bdt.getBalancedDataset(X_train, y_train) X_train = train.iloc[:, :-1] y_train = train.iloc[:, -1] clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(90, 40), random_state=1) clf.fit(X_train, y_train) scores = cross_val_score(clf, X_test, y_test, cv=5, scoring=metric) y_pred = clf.predict(X_test) conf_matrix = confusion_matrix(y_test, y_pred) tn, fp, fn, tp = conf_matrix.ravel() print('{} testing : {:.3f} (+-{:.3f})'.format(metric, scores.mean(), scores.std())) print("confusion matrix:\n", conf_matrix) print( "True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}" .format(tn, fp, fn, tp)) print("###Finished running MLP model") return
def main(): dt = bdt.getOneHotEncodedDataset() #dt = bdt.getBalancedDataset(dataset) X = dt.iloc[ : , 1:31 ] y = dt.iloc[ : , 31 ] subsets = [ ["IL-10 -592=CA","TNF-308=GG"], ["MBL -221=YX", "IL-10 -819=CT", "TNF-308=GG"], ["TNF-308=GG"], ["PTX3 rs2305619=GG", "MPO C-463T=GG"], ["PTX3 rs2305619=AA", "IL-10 -592=CA"], ["PTX3 rs2305619=AA"], ["IL-10 -819=CT", "MPO C-463T=GG"], ["PTX3 rs1840680=AA", "IL-28b rs12979860=CT"], ["MPO C-463T=GG"], ["PTX3 rs1840680=AA", "MBL -221=XX"] ] print("###Running Experinmento1:") for sset in subsets: X_sset = X[sset] #print(X_sset.columns) X_train, X_test, y_train, y_test = train_test_split(X_sset, y, test_size=0.33, random_state=100, stratify=y) gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.5, max_depth=3, random_state=100) gb_clf.fit(X_train, y_train) scores = cross_val_score(gb_clf, X_train, y_train, cv=5, scoring='accuracy') print("Score: {0:.3f} +-({1:.3f}))".format(scores.mean(), scores.std())) y_pred = gb_clf.predict(X_test) conf_matrix = confusion_matrix(y_test, y_pred) tn, fp, fn, tp = conf_matrix.ravel() print("confusion matrix:", conf_matrix) print("True Negative:{0}, False Positive:{1} \nFalse Negative:{2}, True Positive:{3}".format(tn, fp, fn, tp)) print("###Finished Experinmento1") return