def plotNN(mModel): plt.clf() print("Beginning best knn...") #Create a grid to classify over. testSet = [] for x in np.arange(0, 1, 0.02): for y in np.arange(0, 1, 0.02): testSet.append([x, y]) testSet = np.array(testSet) #Classify over the grid. predictedLabels = mModel.predictAll(testSet) predictedLabels = NNModel.oneHotEncodingToLabels(predictedLabels) #Group together to be filtered by color. data = pd.DataFrame(testSet, columns=['L', 'P']) data['D'] = predictedLabels posData = data[data.D == 1] negData = data[data.D == 0] plt.scatter(posData.L, posData.P, color="red") plt.scatter(negData.L, negData.P, color="blue") plt.title("Best Classifier Decision Boundary.") plt.show()
def runNetTrial(): # Build model. mModel = NNModel.Model() mModel.add(layer_size=784, learning_rate=.01, isInput=True) mModel.add(layer_size=200, learning_rate=.005, momentum_factor=0) #mModel.add(layer_size=100, learning_rate=.005, momentum_factor=0) mModel.add(layer_size=10, learning_rate=.005, momentum_factor=0) print("Created Model.") # Read data from file. xData = pd.read_csv('./MNISTnumImages5000.txt', sep='\t', header=None) yData = pd.read_csv('./MNISTnumLabels5000.txt', header=None, names=['labels']) xData['labels'] = yData.values # Break data into train and test sets. import random trainSet, testSet = train_test_split(xData, test_size=0.2, random_state=random.randint( 0, 100000)) # Break data into train and test sets. originalTrainLabels = trainSet['labels'].values originalTestLabels = testSet['labels'].values trainLabels = NNModel.labelToOneHotEncoding(originalTrainLabels) testLabels = NNModel.labelToOneHotEncoding(originalTestLabels) trainData = trainSet[trainSet.columns[:-1]].values testData = testSet[testSet.columns[:-1]].values print("Starting training.") trialWiseErrorList = mModel.train(trainData, trainLabels, validation_data_set=testData, validation_label_set=originalTestLabels, epochs=60) print("Training finished.") # Predict the test set metrics predictedLabels = mModel.predictAll(testData) predictedLabels = NNModel.oneHotEncodingToLabels(predictedLabels) accuracy = NNModel.calculateAccuracy(predictedLabels, originalTestLabels) #testSetMetrics = calculateMetrics(predictedLabels, originalTestLabels) testSetMetrics = {} testSetMetrics["accuracy"] = accuracy # Predict the train set metrics predictedLabels = mModel.predictAll(trainData) predictedLabels = NNModel.oneHotEncodingToLabels(predictedLabels) accuracy = NNModel.calculateAccuracy(predictedLabels, originalTrainLabels) #trainSetMetrics = calculateMetrics(predictedLabels, originalTrainLabels) trainSetMetrics = {} trainSetMetrics["accuracy"] = accuracy trainSetMetrics["accuracyList"] = trialWiseErrorList print("Orig labels: ") print(originalTrainLabels[0:20]) print("Pred labels: ") print(predictedLabels[0:20]) return mModel, trainSetMetrics, testSetMetrics
def runNetTrial(): # Build model. mModel = NNModel.Model() mModel.add(layer_size=2, learning_rate=.05, isInput=True) mModel.add(layer_size=20, learning_rate=.05) mModel.add(layer_size=2, learning_rate=.05) print("Created Model.") data = pd.read_table('./hw2_dataProblem.txt', sep=" +", engine='python') #Range scale the P data. data["P"] = data["P"].apply(lambda item: (item - data.P.min()) / (data.P.max() - data.P.min())) #Range scale the L data data["L"] = data["L"].apply(lambda item: (item - data.L.min()) / (data.L.max() - data.L.min())) #Split the data into training and test data sets. train0, test0 = train_test_split(data[data.D == 0].values, test_size=0.2, random_state=random.randint(0, 100000)) train1, test1 = train_test_split(data[data.D == 1].values, test_size=0.2, random_state=random.randint(0, 100000)) #Combine and shuffle the test and train examples. testSet = np.vstack((test0, test1)) np.random.shuffle(testSet) trainSet = np.vstack((train0, train1)) #trainSet = np.vstack((trainSet, train0)) np.random.shuffle(trainSet) testSetData = testSet[:, 0:2] testSetLabels = NNModel.labelToOneHotEncoding(testSet[:, 2]) trainSetData = trainSet[:, 0:2] trainSetLabels = NNModel.labelToOneHotEncoding(trainSet[:, 2]) print("Starting training.") trialWiseErrorList = mModel.train(trainSetData, trainSetLabels, epochs=200) print("Training finished.") # Predict the test set metrics predictedLabels = mModel.predictAll(testSetData) predictedLabels = NNModel.oneHotEncodingToLabels(predictedLabels) accuracy = calculateAccuracy(predictedLabels, testSet[:, 2].reshape( (len(testSet), 1))) testSetMetrics = calculateMetrics(predictedLabels, testSet[:, 2].reshape( (len(testSet), 1))) testSetMetrics["accuracy"] = accuracy # Predict the train set metrics predictedLabels = mModel.predictAll(trainSetData) predictedLabels = NNModel.oneHotEncodingToLabels(predictedLabels) accuracy = calculateAccuracy(predictedLabels, trainSet[:, 2].reshape( (len(trainSet), 1))) trainSetMetrics = calculateMetrics( predictedLabels, trainSet[:, 2].reshape((len(trainSet), 1))) trainSetMetrics["accuracy"] = accuracy trainSetMetrics["accuracyList"] = trialWiseErrorList # Print model metrics. # print("Predicted Labels:") # print(predictedLabels) # print("Accuracy on test set is: " + str(accuracy)) # print("Sensitivity: " + str(metrics["sensitivity"])) # print("Specificity: " + str(metrics["specificity"])) # print("ppv: " + str(metrics["ppv"])) # print("npv: " + str(metrics["npv"])) return mModel, trainSetMetrics, testSetMetrics