def get_acc_auc_kfold(X,Y,k=10): #TODO:First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the folds accuracy = 0 auc_curve = 0 kf_total = KFold(Y.size, n_folds=k) for train_indices, test_indices in kf_total: lr = my_model.my_classifier_predictions(X[train_indices], Y[train_indices], X[test_indices]) acc , p , r , f = models.classification_metrics(lr, Y[test_indices]) accuracy += acc return accuracy/k
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): #TODO: First get the train indices and test indices for each iteration #Then train the classifier accordingly #Report the mean accuracy and mean auc of all the iterations accuracy = 0 auc_curve = 0 rs = ShuffleSplit(Y.size, n_iter=iterNo, test_size=test_percent) for train_indices, test_indices in rs: lr = my_model.my_classifier_predictions(X[train_indices], Y[train_indices], X[test_indices]) acc, p , r , f = models.classification_metrics(lr, Y[test_indices]) accuracy += acc return accuracy/iterNo
def get_acc_auc_kfold(X, Y, k=5): # TODO:First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the folds accuracy_arr = [] auc_arr = [] kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477) for train_i, test_i in kfold: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_kfold(X, Y, k=5): # TODO:First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the folds accuracy_arr = [] auc_arr = [] kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477) for train_i, test_i in kfold: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics( Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): # TODO: First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the iterations accuracy_arr = [] auc_arr = [] shuffle_split = ShuffleSplit(n=X.get_shape()[0], n_iter=5, test_size=.2, random_state=545510477) for train_i, test_i in shuffle_split: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2): randomizedCV = ShuffleSplit(n = len(Y) , n_iter = iterNo, test_size = test_percent) # Train logistic regression classifier on each fold train set lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in randomizedCV] # Use the kfold object to get the true Y values for each fold test set Y_true_values = [Y[test_index] for train_index , test_index in randomizedCV] # Get the various classications metrics for the logistic regression models implemented on each fold metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ] # Calculate mean accuracy across all iterations mean_accuracy = mean([elt[0] for elt in metrics]) # Calculate mean AUC across all iterations mean_auc = mean([elt[1] for elt in metrics]) return mean_accuracy,mean_auc
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2): # TODO: First get the train indices and test indices for each iteration # Then train the classifier accordingly # Report the mean accuracy and mean auc of all the iterations accuracy_arr = [] auc_arr = [] shuffle_split = ShuffleSplit(n=X.get_shape()[0], n_iter=5, test_size=.2, random_state=545510477) for train_i, test_i in shuffle_split: X_train, X_test = X[train_i], X[test_i] Y_train, Y_test = Y[train_i], Y[test_i] Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test) acc, auc_, precision, recall, f1score = models.classification_metrics( Y_pred, Y_test) accuracy_arr.append(acc) auc_arr.append(auc_) return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
def get_acc_auc_kfold(X,Y,k=5): # Get the train indices and test indices for each k-fold iteration kfold = KFold(n = len(Y) , n_folds = k) # Train logistic regression classifier on each fold train set lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in kfold] # Use the kfold object to get the true Y values for each fold test set Y_true_values = [Y[test_index] for train_index , test_index in kfold] # Get the various classications metrics for the logistic regression models implemented on each fold metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ] mean_accuracy = mean([elt[0] for elt in metrics]) mean_auc = mean([elt[1] for elt in metrics]) return mean_accuracy,mean_auc