def gbc_class(Xp_train, Xp_test, Yp_train, Yp_test, gbcObject, count): if (gbcObject.isUsed == False): return gbcObject.name = 'GBC' gbcObject.folds = count from sklearn.ensemble import GradientBoostingClassifier gbc = GradientBoostingClassifier(random_state=0, n_estimators=gbcObject.params['n_estimators'], criterion=gbcObject.params['criterion'],\ loss=gbcObject.params['loss'],learning_rate=gbcObject.params['learning_rate'],\ max_depth=gbcObject.params['max_depth'], min_samples_split=gbcObject.params['min_samples_split'],\ min_samples_leaf = gbcObject.params['min_samples_leaf'], min_weight_fraction_leaf= gbcObject.params['min_weight_fraction_leaf'],\ subsample = gbcObject.params['subsample'], max_features=gbcObject.params['max_features'],\ max_leaf_nodes=gbcObject.params['max_leaf_nodes'], min_impurity_decrease=gbcObject.params['min_impurity_decrease']) gbc.fit(Xp_train, Yp_train) Yp_pred = gbc.predict(Xp_test) probabilities = gbc.predict_proba(Xp_test) probabilities = probabilities[:, 1] # to generate AUROC, we only need positive probabilities cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, gbcObject) gbcObject.best_features = rank_feature_importance("GBC", gbc.feature_importances_, gbcObject.features, False)
def rfc_class(Xp_train, Xp_test, Yp_train, Yp_test, rfcObject, count): if (rfcObject.isUsed == False): return rfcObject.name = 'RFC' rfcObject.folds = count from sklearn.ensemble import RandomForestClassifier rfc = RandomForestClassifier(n_jobs=-1, criterion=rfcObject.params['criterion'], bootstrap=rfcObject.params['bootstrap'],\ random_state=0, n_estimators=rfcObject.params['n_estimators'], max_features=rfcObject.params['max_features'],\ max_depth=rfcObject.params['max_depth'], min_samples_split=rfcObject.params['min_samples_split'],\ min_samples_leaf=rfcObject.params['min_samples_leaf'], min_weight_fraction_leaf=rfcObject.params['min_weight_fraction_leaf'],\ max_leaf_nodes=rfcObject.params['max_leaf_nodes'], min_impurity_decrease=rfcObject.params['min_impurity_decrease'], oob_score = rfcObject.params['oob_score']) rfc.fit(Xp_train, Yp_train) Yp_pred = rfc.predict(Xp_test) probabilities = rfc.predict_proba(Xp_test) probabilities = probabilities[:, 1] # to generate AUROC, we only need positive probabilities cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, rfcObject) rfcObject.best_features = rank_feature_importance("RFC", rfc.feature_importances_, rfcObject.features, False)
def ens_class(s, r, g, l, k, a, b, Yp_test, ensObject, count): if (ensObject.isUsed == False): return ensObject.name = 'ENS' ensObject.folds = count probabilities = [] Yp_pred = [] for i in range(0, np.size(Yp_test)): prob_sum = 0 terms = 0 if (s.isUsed == True): prob_sum += float(str(round(s.probs[count][i], 8))) terms += 1 if (r.isUsed == True): prob_sum += r.probs[count][i] terms += 1 if (g.isUsed == True): prob_sum += g.probs[count][i] terms += 1 if (l.isUsed == True): prob_sum += l.probs[count][i] terms += 1 if (k.isUsed == True): prob_sum += k.probs[count][i] terms += 1 if (a.isUsed == True): prob_sum += a.probs[count][i] terms += 1 if (b.isUsed == True): prob_sum += b.probs[count][i] terms += 1 probabilities.append(prob_sum / terms) # print('Prob sum: ' + str(prob_sum)) # print('Terms: ' + str(terms)) # print(probabilities[i]) # input('Batman') if (probabilities[i] >= ensObject.params['threshold']): Yp_pred.append(4) else: Yp_pred.append(0) cm = my_confusion_matrix(Yp_test, Yp_pred) # print('Yp_test length: ' + str(len(Yp_test))) # print('Probabilities length: ' + str(len(probabilities))) # input('Enter') float_probs = np.zeros(len(probabilities)) for i in range(0, len(probabilities) - 1): float_probs[i] = probabilities[i] append_results(Yp_pred, float_probs, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, ensObject)
def gnb_class(Xp_train, Xp_test, Yp_train, Yp_test, gnbObject, count): if (gnbObject.isUsed == False): return gnbObject.name = 'GNB' gnbObject.folds = count from sklearn.naive_bayes import GaussianNB GNB = GaussianNB() GNB.fit(Xp_train, Yp_train) Yp_pred = GNB.predict(Xp_test) probabilities = GNB.predict_proba(Xp_test) probabilities = probabilities[:, 1] cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, gnbObject)
def astalt_class(Xp_test, Yp_test, astaltObject, count): if (astaltObject.isUsed == False): return astaltObject.name = 'ASTALT' astaltObject.folds = count Yp_pred = [] Yp_prob = [] astalt_values = [] for i in range(0, len(Xp_test)): astalt_values.append(Xp_test[i, 1] / Xp_test[i, 0]) Yp_pred.append(4 if (Xp_test[i, 1] / Xp_test[i, 0] >= 1) else 0) Yp_prob.append(np.nan) auroc_and_auprc_non_prob(astalt_values, Yp_test, astaltObject) cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, np.nan, Yp_test, np.nan, cm, count, astaltObject)
def apri_class(Xp_test, Yp_test, apriObject, count): if (apriObject.isUsed == False): return apriObject.name = 'APRI' apriObject.folds = count Yp_pred_new = [] Yp_test_new = [] Yp_prob_new = [] det_indices = [] apri_values = [] for i in range(0, len(Xp_test)): AST_upper = 31 if Xp_test[ i, 0] == 0 else 19 # Upper limit is 31 for men (0) and 19 for women AST = Xp_test[i, 1] Plt = Xp_test[i, 2] APRI = (100 * AST / AST_upper) / (Plt) if (APRI >= 2): Yp_pred_new.append(4) Yp_test_new.append(Yp_test[i]) Yp_prob_new.append(np.nan) det_indices.append([i]) apri_values.append(APRI) elif (APRI <= 0.5): Yp_pred_new.append(0) Yp_test_new.append(Yp_test[i]) Yp_prob_new.append(np.nan) det_indices.append([i]) apri_values.append(APRI) else: apriObject.indeterminate_count += 1 auroc_and_auprc_non_prob(apri_values, Yp_test_new, apriObject) cm = my_confusion_matrix(Yp_test_new, Yp_pred_new) # The best threshold is stored in index. append_results(Yp_pred_new, Yp_prob_new, Yp_test_new, np.nan, cm, count, apriObject) apriObject.determinate_indices.append(det_indices)
def knn_class(Xp_train, Xp_test, Yp_train, Yp_test, knnObject, count): if (knnObject.isUsed == False): return knnObject.name = 'KNN' knnObject.folds = count from sklearn.neighbors import KNeighborsClassifier KNN = KNeighborsClassifier(n_neighbors=knnObject.params['n_neighbors'], weights=knnObject.params['weights'],\ algorithm=knnObject.params['algorithm'], leaf_size=knnObject.params['leaf_size'],\ p=knnObject.params['p'], metric=knnObject.params['metric']) KNN.fit(Xp_train, Yp_train) Yp_pred = KNN.predict(Xp_test) probabilities = KNN.predict_proba(Xp_test) probabilities = probabilities[:, 1] cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, knnObject)
def ann_class(Xp_train, Xp_test, Yp_train, Yp_test, annObject, count): from sklearn.metrics import roc_auc_score if (annObject.isUsed == False): return annObject.name = 'ANN' annObject.folds = count # Initialising the ANN classifier = Sequential() # Adding the input layer and the first hidden layer classifier.add( Dense(16, kernel_initializer='uniform', activation='relu', input_dim=len(annObj.features))) classifier.add(Dense(8, kernel_initializer='uniform', activation='relu')) # Adding the output layer classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) # Compiling the ANN classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Fitting the ANN to the Training set classifier.fit(Xp_train, Yp_train, batch_size=10, epochs=1000)[0, 1, 3, 4, 7, 8, 9, 10, 31, 33] probabilities = classifier.predict(Xp_test) print(probabilities) Yp_pred = (probabilities > 0.5) * 4 cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, annObject)
def fib4_class(Xp_test, Yp_test, fib4Object, count): if (fib4Object.isUsed == False): return fib4Object.name = 'FIB4' fib4Object.folds = count Yp_pred_new = [] Yp_test_new = [] Yp_prob_new = [] det_indices = [] fib4_values = [] for i in range(0, len(Xp_test)): age = Xp_test[i, 0] ALT = Xp_test[i, 1] AST = Xp_test[i, 2] Plt = Xp_test[i, 3] fib4 = age * AST / (Plt * (ALT)**0.5) if (fib4 <= 1.45): Yp_pred_new.append(0) Yp_test_new.append(Yp_test[i]) Yp_prob_new.append(np.nan) det_indices.append([i]) fib4_values.append(fib4) elif (fib4 >= 3.25): Yp_pred_new.append(4) Yp_test_new.append(Yp_test[i]) Yp_prob_new.append(np.nan) det_indices.append([i]) fib4_values.append(fib4) else: fib4Object.indeterminate_count += 1 auroc_and_auprc_non_prob(fib4_values, Yp_test_new, fib4Object) cm = my_confusion_matrix(Yp_test_new, Yp_pred_new) # The best threshold is stored in index. append_results(Yp_pred_new, Yp_prob_new, Yp_test_new, np.nan, cm, count, fib4Object) fib4Object.determinate_indices.append(det_indices)
def log_class(Xp_train, Xp_test, Yp_train, Yp_test, logObject, count): if (logObject.isUsed == False): return logObject.name = 'LOG' logObject.folds = count from sklearn.linear_model import LogisticRegression log = LogisticRegression(random_state = 0, max_iter =logObject.params['max_iter'], solver=logObject.params['solver'],\ C=logObject.params['C'], tol=logObject.params['tol'], fit_intercept=logObject.params['fit_intercept'],\ penalty='l2', intercept_scaling=logObject.params['intercept_scaling'], dual=logObject.params['dual'],\ multi_class=logObject.params['multi_class']) log.fit(Xp_train, Yp_train / 4) Yp_pred = log.predict(Xp_test) * 4 probabilities = log.predict_proba(Xp_test) probabilities = probabilities[:, 1] # to generate AUROC, we only need positive probabilities cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, logObject)
def svm_class(Xp_train, Xp_test, Yp_train, Yp_test, svmObject, count): if (svmObject.isUsed == False): return svmObject.name = 'SVM' svmObject.folds = count from sklearn.svm import SVC svm = SVC(verbose=False, probability=True, C = svmObject.params['C'],\ gamma = svmObject.params['gamma'], kernel=svmObject.params['kernel'],\ degree= svmObject.params['degree'], coef0=svmObject.params['coef0'],\ shrinking=svmObject.params['shrinking'], tol=svmObject.params['tol']) svm.fit(Xp_train, Yp_train) Yp_pred = svm.predict(Xp_test) probabilities = svm.predict_proba(Xp_test) probabilities = probabilities[:, 1] if (svmObject.params['method'] == 'prob'): Yp_pred = (probabilities > svmObject.params['threshold']) * 4 cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, svmObject)
def mlp_class(Xp_train, Xp_test, Yp_train, Yp_test, mlpObject, count): if (mlpObject.isUsed == False): return mlpObject.name = 'MLP' mlpObject.folds = count from sklearn.neural_network import MLPClassifier MLP = MLPClassifier(activation=mlpObject.params['activation'], solver=mlpObject.params['solver'],\ tol= mlpObject.params['tol'], hidden_layer_sizes = mlpObject.params['hidden_layer_sizes'],\ max_iter=mlpObject.params['max_iter'], learning_rate=mlpObject.params['learning_rate'],\ alpha=mlpObject.params['alpha'], batch_size=mlpObject.params['batch_size'],\ power_t = mlpObject.params['power_t'], shuffle=mlpObject.params['shuffle'],\ momentum=mlpObject.params['momentum'], nesterovs_momentum=mlpObject.params['nesterovs_momentum'],\ early_stopping=mlpObject.params['early_stopping'], validation_fraction = mlpObject.params['validation_fraction'],\ beta_1=mlpObject.params['beta_1'], beta_2=mlpObject.params['beta_2'], epsilon=mlpObject.params['epsilon'], random_state = 0) MLP.fit(Xp_train, Yp_train) Yp_pred = MLP.predict(Xp_test) probabilities = MLP.predict_proba(Xp_test) probabilities = probabilities[:, 1] cm = my_confusion_matrix(Yp_test, Yp_pred) append_results(Yp_pred, probabilities, Yp_test, roc_auc_score(Yp_test / 4, probabilities), cm, count, mlpObject)