def train(model_name): #SMOTE for upsampling the minority class X_train, Y_train = prepare_data(NUM_TRAIN, "data/exoTrain.csv") sm = SMOTE() X_train, Y_train = sm.fit_sample(X_train, Y_train) #Reshape the array from 2D into 3D X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) model = create_model() #Add some checkpoints tensorboard = TensorBoard(log_dir = './Graph', histogram_freq = 0, write_graph = True, write_images = True) checkpoint_train = ModelCheckpoint(model_path, monitor = "loss", save_best_only = True) print("Added checkpoints") model.fit(x = X_train, y = Y_train, epochs = EPOCHS, callbacks = [tensorboard, checkpoint_train])
# Python 3.7 used # Must have fashion-mnist repo locally import utilities as util import numpy as np from sklearn import metrics from sklearn.linear_model import LogisticRegression def init_logistic_regression(X_train, y_train, X_test, y_test, penalty = 'l2', C = 1.0, max_iter = 5000, solver='lbfgs'): clf = LogisticRegression(penalty=penalty, C=C, max_iter=max_iter, solver=solver) model = clf.fit(X_train, y_train) return model if __name__ == "__main__": # Prep Data x_train, y_train, x_test, y_test, data, target = util.prepare_data() # Plotting the Optimized Model Learning Curve # Set 1: util.learning_curve_plot(LogisticRegression(C=100), data, target, label='C=100', scoring='neg_mean_squared_error', colorTrain='blue', colorTest='magenta') lgr_1 = init_logistic_regression(x_train, y_train, x_test, y_test, C=100, max_iter=5000) pred_1 = lgr_1.predict(x_test) util.print_info(y_test, pred_1) util.learning_curve_plot(LogisticRegression(C=10), data, target, label='C=10', scoring='neg_mean_squared_error', colorTrain='green', colorTest='yellow') lgr_2 = init_logistic_regression(x_train, y_train, x_test, y_test, C=10, max_iter=5000) pred_2 = lgr_2.predict(x_test) util.print_info(y_test, pred_2) util.learning_curve_plot(LogisticRegression(C=1), data, target, label='C=1', scoring='neg_mean_squared_error', colorTrain='cyan', colorTest='red') lgr_3 = init_logistic_regression(x_train, y_train, x_test, y_test, C=1, max_iter=5000) pred_3 = lgr_3.predict(x_test)
RANDOM_STATE = 1 set_random_seed(RANDOM_STATE) #%% FLC data: from utilities import prepare_data from utilities import check_arrays_survival from flc_data_preprocess import flc_preprocess #Survival Data data_x, data_y, protect_attr = flc_preprocess() # train-test split data_X_train, data_X_test, data_y_train, data_y_test, S_train, S_test = train_test_split(data_x, data_y, protect_attr, test_size=0.2,stratify=data_y["death"], random_state=7) data_X_train, data_X_dev, data_y_train, data_y_dev, S_train, S_dev = train_test_split(data_X_train, data_y_train, S_train, test_size=0.2,stratify=data_y_train["death"], random_state=7) # data_X_train, data_event_train, data_time_train = check_arrays_survival(data_X_train, data_y_train) data_X_train, data_event_train, data_time_train, S_train = prepare_data(data_X_train, data_event_train, data_time_train, S_train) data_X_test, data_event_test, data_time_test = check_arrays_survival(data_X_test, data_y_test) data_X_test, data_event_test, data_time_test, S_test = prepare_data(data_X_test, data_event_test, data_time_test, S_test) # intersectionalGroups = np.unique(S_train,axis=0) # all intersecting groups, i.e. black-women, white-man etc # data normalization: mean subtraction method to compute euclidean distance scaler = StandardScaler() scaler.fit(data_X_train) data_X_train = scaler.transform(data_X_train) data_X_test = scaler.transform(data_X_test) #%% # hyperparameters of the model input_size = data_X_train.shape[1] output_size = 1
def test(predict_or_evaluate, model_path): #Different results due to loading the model - model is compiled exactly, meaning Dropout still remains model = load_model(model_path) X_test, Y_test = prepare_data(NUM_TEST, "data/exoTest.csv") #Reshape the array from 2D into 3D X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) #Predict the inputted images' output predictions = model.predict(X_test, verbose=1) print("Predictions: \n" + str(predictions)) if (predict_or_evaluate == "--evaluate"): #Evaluate the model on test images evaluations = model.evaluate(X_test, Y_test) print("Loss: " + str(evaluations[0])) print("Accuracy: " + str(evaluations[1] * 100) + " %") true_positives, false_positives, true_negatives, false_negatives = get_positives_and_negatives( predictions, Y_test) print("True positive: " + str(true_positives)) print("False positive: " + str(false_positives)) print("True negative: " + str(true_negatives)) print("False negative: " + str(false_negatives)) confusion_matrix = get_confusion_matrix(true_positives, false_positives, true_negatives, false_negatives) print("Confusion Matrix:\n" + str(confusion_matrix[0]) + "\n" + str(confusion_matrix[1])) precision = get_precision(true_positives, false_positives) print("Precision: " + str(precision)) recall = get_recall(true_positives, false_negatives) print("Recall/True Positive Rate: " + str(recall)) specificity = get_specificity(false_positives, true_negatives) print("Specificity/True Negative Rate: " + str(specificity)) F1_score = get_F1(precision, recall) print("F1 Score: " + str(F1_score)) #False and True Positive Rates specifically for the graph fpr, tpr, _ = metrics.roc_curve(Y_test, predictions) roc_auc = metrics.auc(fpr, tpr) #Graphing the ROC curve plt.title("ROC Curve for the Exoplanet Detector") plt.plot(fpr, tpr, "b", label="AUC = %0.2f" % roc_auc) plt.legend(loc="lower right") plt.ylabel("True Positive Rate") plt.xlabel("False Positive Rate") plt.show() #Precision and recall for the Precision-Recall curve precision_graph, recall_graph, _ = metrics.precision_recall_curve( Y_test, predictions) auc = metrics.auc(recall_graph, precision_graph) #Graphing the Precision-Recall Curve plt.title("Precision-Recall Curve for the Exoplanet Detector") plt.plot(recall_graph, precision_graph, "b", label="AUC = %0.2f" % auc) plt.legend(loc="lower right") plt.xlim([0, 1.1]) plt.ylim([0, 1.1]) plt.ylabel("Precision") plt.xlabel("Recall") plt.show() #Graphing a Confirmed Exoplanet plt.title("Confirmed Exoplanet") plt.plot(X_test[0]) plt.ylabel("Light Flux") plt.xlabel("Time") plt.show() #Graphing a Confirmed Non-Exoplanet plt.title("Confirmed Non-Exoplanet") plt.plot(X_test[569]) plt.ylabel("Light Flux") plt.xlabel("Time") plt.show()