def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations ss = ShuffleSplit(test_size = test_percent, n_splits=iterNo, random_state = 545510477) sum_acc = 0 sum_auc = 0 for train_index, test_index in ss.split(X, Y): y_train1 = [] y_test1 = [] X_train1, X_test1 = X[train_index], X[test_index] for i in train_index: y_train1.append(Y[i]) for j in test_index: y_test1.append(Y[j]) answer1 = models_partc.logistic_regression_pred(X_train1, y_train1, X_test1) a1 = accuracy_score(y_test1, answer1) auc1 = roc_auc_score(y_test1, answer1) sum_acc += a1 sum_auc += auc1 return sum_acc/iterNo,sum_auc/iterNo
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations kf = ShuffleSplit(X.shape[0], n_iter=iterNo, test_size=test_percent, random_state=545510477) kf_auc = [] kf_acc = [] for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models_partc.classification_metrics( Y_pred, Y_test) kf_acc.append(acc) kf_auc.append(auc_) avg_acc = mean(kf_acc) avg_auc = mean(kf_auc) return avg_acc, avg_auc
def get_acc_auc_kfold(X,Y,k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(n_splits=k,random_state = 545510477) sum_acc = 0 sum_auc = 0 for train_index, test_index in kf.split(X, Y): y_train1 = [] y_test1 = [] X_train1, X_test1 = X[train_index], X[test_index] for i in train_index: y_train1.append(Y[i]) for j in test_index: y_test1.append(Y[j]) answer1 = models_partc.logistic_regression_pred(X_train1, y_train1, X_test1) a1 = accuracy_score(y_test1, answer1) auc1 = roc_auc_score(y_test1, answer1) sum_acc += a1 sum_auc += auc1 return sum_acc/k,sum_auc/k
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(X.shape[0], n_folds=k, random_state=545510477) kf_auc = [] kf_acc = [] for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models_partc.classification_metrics( Y_pred, Y_test) kf_acc.append(acc) kf_auc.append(auc_) avg_acc = mean(kf_acc) avg_auc = mean(kf_auc) return avg_acc, avg_auc
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations ss = ShuffleSplit(n_splits=iterNo, test_size=test_percent, random_state=RANDOM_STATE) acc_folds = [] auc_folds = [] for fold, (train,test) in enumerate(ss.split(X,Y)): pred = models_partc.logistic_regression_pred(X[train], Y[train], X[test]) acc, auc_, precision, recall, f1score = models_partc.classification_metrics(pred,Y[test]) acc_folds.append(acc) auc_folds.append(auc_) return mean(acc_folds), mean(auc_folds)
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kfolds = KFold(n_splits=k, shuffle=False, random_state=RANDOM_STATE) acc_auc_array = [] for train, test in kfolds.split(X): X_train, Y_train = (X[train, :], Y[train]) X_test, Y_test = (X[test, :], Y[test]) Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test) acc_auc_array.append([acc, auc]) acc_auc_array = np.array(acc_auc_array) return np.mean(acc_auc_array[:, 0]), np.mean(acc_auc_array[:, 1])
def get_acc_auc_kfold(X,Y,k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds k_fold = KFold(k, random_state=RANDOM_STATE) acc_folds = [] auc_folds = [] for fold, (train,test) in enumerate(k_fold.split(X,Y)): pred = models_partc.logistic_regression_pred(X[train], Y[train], X[test]) acc, auc_, precision, recall, f1score = models_partc.classification_metrics(pred,Y[test]) acc_folds.append(acc) auc_folds.append(auc_) return mean(acc_folds), mean(auc_folds)
def get_acc_auc_kfold(X, Y): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(len(Y), n_folds=5, random_state=RANDOM_STATE) accuracy = [] auc = [] for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) accuracy.append(models_partc.classification_metrics(Y_pred, Y_test)[0]) auc.append(models_partc.classification_metrics(Y_pred, Y_test)[1]) return mean(accuracy), mean(auc)
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds acc = 0 auc_ = 0 kf = KFold(n_splits=k, random_state=RANDOM_STATE) for train_index, test_index in kf.split(X): Y_pred = models_partc.logistic_regression_pred(X[train_index], Y[train_index], X[test_index]) acc_, auc__, precision_, recall_, f1score_ = models_partc.classification_metrics( Y_pred, Y[test_index]) acc += acc_ auc_ += auc__ return acc / k, auc_ / 5
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(n_splits=k, random_state=RANDOM_STATE) accuracyList = [] aucList = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) accuracy, auc = models_partc.classification_metrics(Y_pred, Y_test)[:2] accuracyList.append(accuracy) aucList.append(auc) return mean(accuracyList), mean(aucList)
def get_acc_auc_kfold(X,Y,k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(n_splits = k, random_state = RANDOM_STATE) scores = [] for i_train, i_test in kf.split(X): X_train, X_test = X[i_train], X[i_test] Y_train, Y_test = Y[i_train], Y[i_test] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) accuracy, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test) scores.append([accuracy, auc]) scores_df = pd.DataFrame(scores) return scores_df[0].mean(), scores_df[1].mean()
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations ss = ShuffleSplit(n_splits = iterNo, test_size = test_percent, random_state = RANDOM_STATE) scores = [] for i_train, i_test in ss.split(X): X_train, X_test = X[i_train], X[i_test] Y_train, Y_test = Y[i_train], Y[i_test] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) accuracy, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test) scores.append([accuracy, auc]) scores_df = pd.DataFrame(scores) return scores_df[0].mean(), scores_df[1].mean()
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations rs = ShuffleSplit(n_splits=iterNo, test_size=test_percent, random_state=RANDOM_STATE) accuracyList = [] aucList = [] for train_index, test_index in rs.split(X): X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) accuracy, auc = models_partc.classification_metrics(Y_pred, Y_test)[:2] accuracyList.append(accuracy) aucList.append(auc) return mean(accuracyList), mean(aucList)
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_size=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations random_acc_auc_array = [] kfolds = ShuffleSplit(n_splits=iterNo, test_size=test_size) for train, test in kfolds.split(X): X_train, Y_train = (X[train, :], Y[train]) X_test, Y_test = (X[test, :], Y[test]) Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test) random_acc_auc_array.append([acc, auc]) random_acc_auc_array = np.array(random_acc_auc_array) return np.mean(random_acc_auc_array[:, 0]), np.mean(random_acc_auc_array[:, 1])
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations sskf = ShuffleSplit(n_splits=iterNo, test_size=test_percent, random_state=RANDOM_STATE) sskf.get_n_splits(X) accuracies = [] aucs = [] for train_index, test_index in sskf.split(X): Y_pred = models_partc.logistic_regression_pred(X[train_index], Y[train_index], X[test_index]) #Y_pred = my_model.my_classifier_predictions(X[train_index],Y[train_index],X[test_index]) accuracy = accuracy_score(Y_pred, Y[test_index]) auc_score = roc_auc_score(sorted(Y_pred), sorted(Y[test_index])) accuracies.append(accuracy) aucs.append(auc_score) return np.mean(accuracies), np.mean(aucs)
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations acc = 0 auc_ = 0 ss = ShuffleSplit(n_splits=iterNo, test_size=test_percent, random_state=RANDOM_STATE) for train_index, test_index in ss.split(X): Y_pred = models_partc.logistic_regression_pred(X[train_index], Y[train_index], X[test_index]) acc_, auc__, precision_, recall_, f1score_ = models_partc.classification_metrics( Y_pred, Y[test_index]) acc += acc_ auc_ += auc__ return acc / iterNo, auc_ / iterNo
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(X.shape[0], n_folds=k, random_state=RANDOM_STATE) #cross_validation module is deprecated sum_acc = 0 sum_auc = 0 for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test) sum_acc += acc sum_auc += auc return float(sum_acc) / k, float(sum_auc) / k
def get_acc_auc_kfold(X, Y, algo="logistic regression", k=5, n_components=110): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(n=X.shape[0], n_folds=k) acc_list = [] auc_list = [] precision_list = [] recall_list = [] for train_idx, test_idx in kf: # X_train, X_test = PC_analysis(X[train_idx], X[test_idx], n_components) # PCA X_train, X_test = X[train_idx], X[test_idx] # without PCA if algo == "logistic regression": Y_pred = models_partc.logistic_regression_pred( X_train, Y[train_idx], X_test) elif algo == "linear_svm": Y_pred = models_partc.svm_pred(X_train, Y[train_idx], X_test) elif algo == "decision_tree": Y_pred = models_partc.decisionTree_pred(X_train, Y[train_idx], X_test) elif algo == "ada boost": Y_pred = models_partc.ada_boost_pred(X_train, Y[train_idx], X_test) elif algo == "bagging logistic": Y_pred = models_partc.bagging_log_pred(X_train, Y[train_idx], X_test) elif algo == "bagging_svm": Y_pred = models_partc.bagging_SVC_pred(X_train, Y[train_idx], X_test) elif algo == "neural_network": Y_pred = models_partc.neural_network(X_train, Y[train_idx], X_test) acc, auc_, precision, recall, f1score = models_partc.classification_metrics( Y_pred, Y[test_idx]) acc_list.append(acc) auc_list.append(auc_) precision_list.append(precision) recall_list.append(recall) acc_ave = mean(acc_list) auc_ave = mean(auc_list) precision_ave = mean(precision_list) recall_ave = mean(recall_list) return acc_ave, auc_ave, precision_ave, recall_ave
def get_acc_auc_kfold(X,Y,k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(n_splits = k, random_state=RANDOM_STATE) kf.get_n_splits(X) accuracies = [] aucs = [] for train_index, test_index in kf.split(X): Y_pred = models_partc.logistic_regression_pred(X[train_index], Y[train_index], X[test_index]) #Y_pred = my_model.my_classifier_predictions(X[train_index], Y[train_index], X[test_index]) accuracy = accuracy_score(Y_pred, Y[test_index]) auc_score = roc_auc_score(Y_pred, Y[test_index]) accuracies.append(accuracy) aucs.append(auc_score) return np.mean(accuracies), np.mean(aucs)
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds acc_array = [] kf = KFold(k) for train, test in kf.split(X): X_train, Y_train = (X[train, :], Y[train]) X_test, Y_test = (X[test, :], Y[test]) Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) acc, auc, precision, recall, f1score = models_partc.classification_metrics( Y_pred, Y_test) acc_array.append([acc, auc]) acc_array = np.array(acc_array) return np.mean(acc_array[:, 0]), np.mean(acc_array[:, 1])
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations ss = ShuffleSplit(X.shape[0], n_iter=iterNo, test_size=test_percent, random_state=RANDOM_STATE) sum_acc = 0 sum_auc = 0 for train_index, test_index in ss: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) acc, auc, _, _, _ = models_partc.classification_metrics(Y_pred, Y_test) sum_acc += acc sum_auc += auc return float(sum_acc) / iterNo, float(sum_auc) / iterNo
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(n_splits=k, random_state=RANDOM_STATE) accuracy_array = np.array([]) area_under_curve_array = np.array([]) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] y_pred = models_partc.logistic_regression_pred(X_train, y_train, X_test) accuracy, area_under_curve, precision, recall, f1score = models_partc.classification_metrics( Y_pred=y_pred, Y_true=y_test) accuracy_array = np.append(accuracy_array, accuracy) area_under_curve_array = np.append(area_under_curve_array, area_under_curve) return np.mean(accuracy_array), np.mean(area_under_curve_array)
def get_acc_auc_randomisedCV(X, Y): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations rs = ShuffleSplit(len(Y), n_iter=5, test_size=0.2, random_state=RANDOM_STATE) accuracylist = [] auclist = [] for train_index, test_index in rs: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred = models_partc.logistic_regression_pred(X_train, Y_train, X_test) accuracy, auc, precision, recall, f1score = models_partc.classification_metrics( Y_pred, Y_test) accuracylist.append(accuracy) auclist.append(auc) return mean(accuracylist), mean(auclist)
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations sp = ShuffleSplit(n_splits=iterNo, random_state=RANDOM_STATE, test_size=test_percent) accuracy_array = np.array([]) area_under_curve_array = np.array([]) for train_index, test_index in sp.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] y_pred = models_partc.logistic_regression_pred(X_train, y_train, X_test) accuracy, area_under_curve, precision, recall, f1score = models_partc.classification_metrics( Y_pred=y_pred, Y_true=y_test) accuracy_array = np.append(accuracy_array, accuracy) area_under_curve_array = np.append(area_under_curve_array, area_under_curve) return np.mean(accuracy_array), np.mean(area_under_curve_array)