Esempio n. 1
0
def train(model_name):

	#SMOTE for upsampling the minority class
	X_train, Y_train = prepare_data(NUM_TRAIN, "data/exoTrain.csv")
	sm = SMOTE()
	X_train, Y_train = sm.fit_sample(X_train, Y_train)

	#Reshape the array from 2D into 3D
	X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

	model = create_model()


	#Add some checkpoints
	tensorboard = TensorBoard(log_dir = './Graph', histogram_freq = 0, write_graph = True, write_images = True)
	checkpoint_train = ModelCheckpoint(model_path, monitor = "loss", save_best_only = True)
	print("Added checkpoints")

	model.fit(x = X_train, y = Y_train, epochs = EPOCHS,
		callbacks = [tensorboard, checkpoint_train])
Esempio n. 2
0
# Python 3.7 used
# Must have fashion-mnist repo locally
import utilities as util
import numpy as np
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

def init_logistic_regression(X_train, y_train, X_test, y_test, penalty = 'l2', C = 1.0, max_iter = 5000, solver='lbfgs'):
	clf = LogisticRegression(penalty=penalty, C=C, max_iter=max_iter, solver=solver)
	model = clf.fit(X_train, y_train)
	return model

if __name__ == "__main__":
	# Prep Data
	x_train, y_train, x_test, y_test, data, target = util.prepare_data()
	
	# Plotting the Optimized Model Learning Curve
	# Set 1:
	util.learning_curve_plot(LogisticRegression(C=100), data, target, label='C=100', scoring='neg_mean_squared_error', colorTrain='blue', colorTest='magenta')
	lgr_1 = init_logistic_regression(x_train, y_train, x_test, y_test, C=100, max_iter=5000)
	pred_1 = lgr_1.predict(x_test)
	util.print_info(y_test, pred_1)

	util.learning_curve_plot(LogisticRegression(C=10), data, target, label='C=10', scoring='neg_mean_squared_error', colorTrain='green', colorTest='yellow')
	lgr_2 = init_logistic_regression(x_train, y_train, x_test, y_test, C=10, max_iter=5000)
	pred_2 = lgr_2.predict(x_test)
	util.print_info(y_test, pred_2)
	
	util.learning_curve_plot(LogisticRegression(C=1), data, target, label='C=1', scoring='neg_mean_squared_error', colorTrain='cyan', colorTest='red')
	lgr_3 = init_logistic_regression(x_train, y_train, x_test, y_test, C=1, max_iter=5000)
	pred_3 = lgr_3.predict(x_test)
Esempio n. 3
0
RANDOM_STATE = 1
set_random_seed(RANDOM_STATE)
    
#%% FLC data:
from utilities import prepare_data
from utilities import check_arrays_survival
from flc_data_preprocess import flc_preprocess
#Survival Data
data_x, data_y, protect_attr = flc_preprocess()

# train-test split
data_X_train, data_X_test, data_y_train, data_y_test, S_train, S_test = train_test_split(data_x, data_y, protect_attr, test_size=0.2,stratify=data_y["death"], random_state=7)
data_X_train, data_X_dev, data_y_train, data_y_dev, S_train, S_dev = train_test_split(data_X_train, data_y_train, S_train, test_size=0.2,stratify=data_y_train["death"], random_state=7)
#
data_X_train, data_event_train, data_time_train = check_arrays_survival(data_X_train, data_y_train)
data_X_train, data_event_train, data_time_train, S_train = prepare_data(data_X_train, data_event_train, data_time_train, S_train)

data_X_test, data_event_test, data_time_test = check_arrays_survival(data_X_test, data_y_test)
data_X_test, data_event_test, data_time_test, S_test = prepare_data(data_X_test, data_event_test, data_time_test, S_test)
#
intersectionalGroups = np.unique(S_train,axis=0) # all intersecting groups, i.e. black-women, white-man etc 
# data normalization: mean subtraction method to compute euclidean distance
scaler = StandardScaler()
scaler.fit(data_X_train)
data_X_train = scaler.transform(data_X_train)
data_X_test = scaler.transform(data_X_test)

#%%
# hyperparameters of the model
input_size = data_X_train.shape[1]
output_size = 1
Esempio n. 4
0
def test(predict_or_evaluate, model_path):

    #Different results due to loading the model - model is compiled exactly, meaning Dropout still remains
    model = load_model(model_path)

    X_test, Y_test = prepare_data(NUM_TEST, "data/exoTest.csv")

    #Reshape the array from 2D into 3D
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    #Predict the inputted images' output
    predictions = model.predict(X_test, verbose=1)
    print("Predictions: \n" + str(predictions))

    if (predict_or_evaluate == "--evaluate"):

        #Evaluate the model on test images
        evaluations = model.evaluate(X_test, Y_test)
        print("Loss: " + str(evaluations[0]))
        print("Accuracy: " + str(evaluations[1] * 100) + " %")

        true_positives, false_positives, true_negatives, false_negatives = get_positives_and_negatives(
            predictions, Y_test)

        print("True positive: " + str(true_positives))
        print("False positive: " + str(false_positives))
        print("True negative: " + str(true_negatives))
        print("False negative: " + str(false_negatives))

        confusion_matrix = get_confusion_matrix(true_positives,
                                                false_positives,
                                                true_negatives,
                                                false_negatives)
        print("Confusion Matrix:\n" + str(confusion_matrix[0]) + "\n" +
              str(confusion_matrix[1]))

        precision = get_precision(true_positives, false_positives)
        print("Precision: " + str(precision))

        recall = get_recall(true_positives, false_negatives)
        print("Recall/True Positive Rate: " + str(recall))

        specificity = get_specificity(false_positives, true_negatives)
        print("Specificity/True Negative Rate: " + str(specificity))

        F1_score = get_F1(precision, recall)
        print("F1 Score: " + str(F1_score))

        #False and True Positive Rates specifically for the graph
        fpr, tpr, _ = metrics.roc_curve(Y_test, predictions)
        roc_auc = metrics.auc(fpr, tpr)

        #Graphing the ROC curve
        plt.title("ROC Curve for the Exoplanet Detector")
        plt.plot(fpr, tpr, "b", label="AUC = %0.2f" % roc_auc)
        plt.legend(loc="lower right")
        plt.ylabel("True Positive Rate")
        plt.xlabel("False Positive Rate")
        plt.show()

        #Precision and recall for the Precision-Recall curve
        precision_graph, recall_graph, _ = metrics.precision_recall_curve(
            Y_test, predictions)
        auc = metrics.auc(recall_graph, precision_graph)

        #Graphing the Precision-Recall Curve
        plt.title("Precision-Recall Curve for the Exoplanet Detector")
        plt.plot(recall_graph, precision_graph, "b", label="AUC = %0.2f" % auc)
        plt.legend(loc="lower right")
        plt.xlim([0, 1.1])
        plt.ylim([0, 1.1])
        plt.ylabel("Precision")
        plt.xlabel("Recall")
        plt.show()

        #Graphing a Confirmed Exoplanet
        plt.title("Confirmed Exoplanet")
        plt.plot(X_test[0])
        plt.ylabel("Light Flux")
        plt.xlabel("Time")
        plt.show()

        #Graphing a Confirmed Non-Exoplanet
        plt.title("Confirmed Non-Exoplanet")
        plt.plot(X_test[569])
        plt.ylabel("Light Flux")
        plt.xlabel("Time")
        plt.show()