def get_acc_auc_kfold(X,Y,k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds a=KFold(len(Y),k) acc=[] auc=[] for train_index, test_index in a: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred=models.logistic_regression_pred(X_train, Y_train, X_test) ''' false_positive_rate, true_positive_rate, thresholds = roc_curve(Y_test, Y_pred) roc_auc = sklearn.metrics.roc_auc_score(Y_test, Y_pred) plt.title('Receiver Operating Characteristic') plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc) plt.legend(loc='lower right') plt.plot([0,1],[0,1],'r--') plt.xlim([-0.1,1.2]) plt.ylim([-0.1,1.2]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() ''' acc_1=sklearn.metrics.accuracy_score(Y_test, Y_pred) auc_1=sklearn.metrics.roc_auc_score(Y_test, Y_pred) acc.append(acc_1) auc.append(auc_1) acc_mean=mean(acc) auc_mean=mean(auc) return acc_mean,auc_mean
def get_acc_auc_kfold(X, Y, k=5): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds kf = KFold(len(Y), n_folds=k, random_state=545510477) acc = [] auc = [] for train, test in kf: Y_pred = models.logistic_regression_pred(X[train], Y[train], X[test]) acc.append(accuracy_score(Y[test], Y_pred)) auc.append(roc_auc_score(Y[test], Y_pred)) return mean(acc), mean(auc)
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations kf = ShuffleSplit(len(Y), n_iter=iterNo, test_size=test_percent, random_state=545510477) acc = [] auc = [] for train, test in kf: Y_pred = models.logistic_regression_pred(X[train], Y[train], X[test]) acc.append(accuracy_score(Y[test], Y_pred)) auc.append(roc_auc_score(Y[test], Y_pred)) return mean(acc), mean(auc)
def get_acc_auc_kfold(X, Y, k=5): # TODO:First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the folds accuracy_arr = [] auc_arr = [] kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477) for train_i, test_i in kfold: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_kfold(X, Y, k=5): # TODO:First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the folds accuracy_arr = [] auc_arr = [] kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477) for train_i, test_i in kfold: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics( Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): # TODO: First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the iterations accuracy_arr = [] auc_arr = [] shuffle_split = ShuffleSplit(n=X.get_shape()[0], n_iter=5, test_size=.2, random_state=545510477) for train_i, test_i in shuffle_split: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations a=ShuffleSplit(len(Y),iterNo,test_percent) acc=[] auc=[] for train_index, test_index in a: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] Y_pred=models.logistic_regression_pred(X_train, Y_train, X_test) acc_1=sklearn.metrics.accuracy_score(Y_test, Y_pred) auc_1=sklearn.metrics.roc_auc_score(Y_test, Y_pred) acc.append(acc_1) auc.append(auc_1) acc_mean=mean(acc) auc_mean=mean(auc) return acc_mean,auc_mean
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): randomizedCV = ShuffleSplit(n = len(Y) , n_iter = iterNo, test_size = test_percent) # Train logistic regression classifier on each fold train set lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in randomizedCV] # Use the kfold object to get the true Y values for each fold test set Y_true_values = [Y[test_index] for train_index , test_index in randomizedCV] # Get the various classications metrics for the logistic regression models implemented on each fold metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ] # Calculate mean accuracy across all iterations mean_accuracy = mean([elt[0] for elt in metrics]) # Calculate mean AUC across all iterations mean_auc = mean([elt[1] for elt in metrics]) return mean_accuracy,mean_auc
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): # TODO: First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the iterations accuracy_arr = [] auc_arr = [] shuffle_split = ShuffleSplit(n=X.get_shape()[0], n_iter=5, test_size=.2, random_state=545510477) for train_i, test_i in shuffle_split: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics( Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_kfold(X,Y,k=5): # Get the train indices and test indices for each k-fold iteration kfold = KFold(n = len(Y) , n_folds = k) # Train logistic regression classifier on each fold train set lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in kfold] # Use the kfold object to get the true Y values for each fold test set Y_true_values = [Y[test_index] for train_index , test_index in kfold] # Get the various classications metrics for the logistic regression models implemented on each fold metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ] mean_accuracy = mean([elt[0] for elt in metrics]) mean_auc = mean([elt[1] for elt in metrics]) return mean_accuracy,mean_auc