def SVMModel(train_x, train_y, val_x, val_y, testX, testY): #Building the SVM model, chosen c = 4 as best value for the model. score = 0 best_kernel = 'linear' kernel_types = ['linear', 'poly', 'rbf'] svm_kernel_error = [] for kernel_value in kernel_types: model = svm.SVC(kernel=kernel_value, C=5) model.fit(X=train_x, y=train_y) score = model.score(val_x, val_y) svm_kernel_error.append(1 - (score)) print("Predict the score for the training data set") model = svm.SVC(kernel=best_kernel, C=5) model.fit(X=train_x, y=train_y) score = model.score(val_x, val_y) print("ScorePrinted:", score) print("Presenting results for test data set") y_pred = model.predict(testX) #testY = Y_test.values #print("Accuracy:",metrics.accuracy_score(testY, y_pred)) conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix( testY, y_pred) print("Confusion Matrix: ") print(conf_matrix) print("Average Accuracy: {}\n".format(accuracy)) print("Per-Class Precision: {}]\n".format(precision_array)) print("Per-Class Recall: {}".format(recall_array)) return svm_kernel_error, (accuracy * 100), (max(recall_array) * 100), (max(precision_array) * 100)
def LogisticRegressionModel(train_x, train_y, val_x, val_y, testX, testY): C_param_range = [0.001, 0.01, 0.1, 1, 10, 100] score = [] accuracy = [] y_pred_all = [] for i in C_param_range: clf = LogisticRegression() # call the function fit() to train the class instance clf.fit(train_x, train_y) # scores over testing samples score.append(clf.score(val_x, val_y)) y_pred = clf.predict(testX) y_pred_all.append(y_pred) accuracy.append(accuracy_score(testY, y_pred)) #accuracy,precision,recall,f1_score=func_calConfusionMatrix(y_pred,testY) best_c = np.argmax(accuracy) y_pred_best = y_pred_all[best_c] conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix( testY, y_pred_best) print("Confusion Matrix: {} \n".format(conf_matrix)) print("Accuracy with the test data: {} \n".format(accuracy)) print("Per-Class Precision is: {} \n".format(precision_array)) print("Per-Class Recall rate: {} \n".format(recall_array)) logit_roc_auc = roc_auc_score(testY, y_pred_best) fpr, tpr, thresholds = roc_curve(testY, clf.predict_proba(testX)[:, 1]) plt.figure() plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc) plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.savefig('Log_ROC') plt.show() return (accuracy * 100), (max(recall_array) * 100), (max(precision_array) * 100)
def GaussianNBModel(train_X, train_y, val_x, val_y, testX, testY): # Calling the GaussianNB model classifier = GaussianNB() # Training the model on the train dataset classifier.fit(train_X, train_y) # Testing the model on the test dataset y_pred = classifier.predict(testX) #Calculating the metrics for the evaluation of the model conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix( testY, y_pred) # Plotting the ROC Curve fpr, tpr, thresholds = roc_curve(y_pred, testY) roc_auc = auc(fpr, tpr) plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve for GaussianNB') plt.legend(loc="lower right") plt.show() return accuracy, precision_array, recall_array, conf_matrix
def feed_forward_model(train_X,train_y,val_x,val_y,testX,testY): """feed_forward_model - specification list Create a feed forward model given a specification list Each element of the list represents a layer and is formed by a tuple. [(Dense, [20], {'activation':'relu', 'input_dim': M}), (Dense, [20], {'activation':'relu', 'input_dim':20}), (Dense, [N], {'activation':'softmax', 'input_dim':20}) ] """ model = Sequential() model_list = [ [(Dense, [100], {'activation': 'relu', 'input_dim': train_X.shape[1]}), (Dense, [200], {'activation': 'relu', 'input_dim': 100}), (Dense, [200], {'activation': 'relu', 'input_dim': 100}), (Dense, [2], {'activation': 'softmax', 'input_dim':100})], [(Dense, [500], {'activation': 'relu', 'input_dim': train_X.shape[1]}), (Dense, [1000], {'activation': 'relu', 'input_dim': 100}), (Dense, [2], {'activation': 'softmax', 'input_dim': 100})], [(Dense, [100], {'activation': 'relu', 'input_dim': train_X.shape[1]}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [100], {'activation': 'relu', 'input_dim': 100}), (Dense, [2], {'activation': 'softmax', 'input_dim': 100})], ] for item in model_list[2]: layertype = item[0] if (len(item) < 3): layer = layertype(*item[1]) else: layer = layertype(*item[1], **item[2]) model.add(layer) model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=['accuracy']) model.fit(train_X, to_categorical(train_y), verbose=0) model_eval_result = model.evaluate(val_x,to_categorical(val_y), verbose=False) print("Loss value", model_eval_result[0]) print("Accuracy", model_eval_result[1]) #prediction = tf.argmax(logits,1) y_pred_nn = model.predict(testX) y_pred_nn_val = np.argmax(y_pred_nn.round(), axis=1) y_pred=model.predict(testX) correct_labels = 0 for i in range(testX.shape[0]): if(testY[i] == y_pred_nn_val[i]): correct_labels += 1 #accuracy = correct_labels/testX.shape[0] #error = 1-accuracy conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix(testY, y_pred_nn_val) #print("Average Error-Neural networks: {}",error) print("Confusion Matrix: /n {}".format(conf_matrix)) print("Accuracy with the test data: {}".format(accuracy)) print("Per-Class Precision is: {}".format(precision_array)) print("Per-Class Recall rate: {}".format(recall_array)) #print('Accuracy of the best model on Test Data(Neural networks) : ', accuracy) return (accuracy*100),(max(recall_array)*100),(max(precision_array)*100)
to_categorical(y_train), batch_size=256, epochs=3, verbose=1, validation_data=(x_valid, to_categorical(y_valid))) predict_third_model = third_model.predict(x_valid) third_model_predictions = [ np.argmax(predictions) for predictions in predict_second_model ] print("Model 1") print(first_model_predictions[0], first_model_predictions[1], first_model_predictions[2], first_model_predictions[3]) cm_1, acc_1, recall_1, prediction_1 = func_confusion_matrix( y_valid, first_model_predictions) print("cm: ", cm_1, ",\n acc: ", acc_1, ",\n recall: ", recall_1, "\n, prediction: ", prediction_1) print("Model 2") print(predict_second_model[0], predict_second_model[1], predict_second_model[2], predict_second_model[3]) cm_2, acc_2, recall_2, prediction_2 = func_confusion_matrix( y_valid, second_model_predictions) print("cm: ", cm_2, "\n, acc: ", acc_2, "\n, recall: ", recall_2, "\n, prediction: ", prediction_2) print("Model 3") print(predict_third_model[0], predict_third_model[1], predict_third_model[2], predict_third_model[3])
def AdaBoostModel(train_X, train_y, val_x, val_y, testX, testY): # Uncomment this to make the base_estimator as SVM and put base_estimator as svc in AdaBoostClassifier #svc = SVC(probability = True, kernel = 'linear') n_estimatorslist = [25, 50, 100, 200] accuracy_score = [] for n in n_estimatorslist: # Running AdaBoost Model using the default base_estimator adaClassifier = AdaBoostClassifier(n_estimators=n, learning_rate=1) # Training the model using the training dataset model = adaClassifier.fit(train_X, train_y) # Testing the model to predict labels from the test dataset y_pred = model.predict(testX) # Calculating metrics for the model evaluation conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix( testY, y_pred) print("Confusion Matrix: ") print(conf_matrix) print("Average Accuracy: {}\n".format(accuracy)) accuracy_score.append(accuracy) print("Per-Class Precision: {}]\n".format(precision_array)) print("Per-Class Recall: {}".format(recall_array)) if (n == 50): print("ROC Curve for 50 estimators: \n") fpr, tpr, thresholds = roc_curve(y_pred, testY) roc_auc = auc(fpr, tpr) plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve for AdaBoost') plt.legend(loc="lower right") plt.show() # Graph to compare the accuracy with the number of estimators plt.figure() plt.plot(n_estimatorslist, accuracy_score) plt.ylim([0.0, 1.0]) plt.xlabel('Number of estimators') plt.ylabel('Accuracy') plt.show() return accuracy_score[2]
model1.add(Dense(128, activation='relu', input_shape=(784,))) model1.add(Dense(num_classes, activation='softmax')) model1.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) print(model1.summary()) model1.fit(xtrain, ytrain, batch_size=batch_size, epochs=epochs, verbose=1) ypred = model1.predict_classes(xval) conf, acc, rec, prec = func_confusion_matrix(yval,ypred) print(conf) print(acc) print(rec) print(prec) #model 2 #2 hidden layers #sigmoid activation #sigmoid output model2 = Sequential() model2.add(Dense(128, activation='sigmoid', input_shape=(784,))) model2.add(Dropout(0.2)) model2.add(Dense(64, activation='sigmoid')) model2.add(Dense(num_classes, activation='sigmoid'))
# validation set evaluation test_accuracy = sess.run(accuracy, feed_dict={ images_placeholder: data['images_test'], labels_placeholder: data['labels_test'] }) #'numpy.float32' object is not iterable print('Test accuracy {:g}'.format(test_accuracy)) true_false_prediction, prediction_matrix = sess.run( report, feed_dict={ images_placeholder: data['images_test'], labels_placeholder: data['labels_test'] }) conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix( prediction_matrix, y_test) # func modified due to tensors print("confusion matrix\n", conf_matrix) print("accuracy: ", accuracy) print("recall_array", recall_array) print(precision_array) # Ten Images Model Made Errors error_count = 1 # start at 1 not 0 falsely_predicted_indexes = [ i for i, x in enumerate(true_false_prediction) if x - 1 ] # if x = 1 (correct); 1 - 1 == false fig = plt.figure(None, (10, 10)) for index in falsely_predicted_indexes: # convert vector.shape(748) back to (28,28) so we can see data image = np.reshape(x_test[index], (28, 28)) fig.add_subplot(
def kMeansModel(train_X, train_y, val_x, val_y, testX, testY): n_clusters = [2, 3, 4, 5] accuracy_score = [] for n in n_clusters: # Calling the kMeans function from the library kmeans = KMeans(n) # Training the model to form the clusters kmeans = kmeans.fit(train_X) # Predicting the labels for the test dataset y_pred = kmeans.predict(testX) # Calculationg metrics for the model evaluation conf_matrix, accuracy, recall_array, precision_array = func_confusion_matrix( testY, y_pred) print("Evaluation metrics for {} clusters".format(n)) print("Confusion Matrix: ") print(conf_matrix) print("Average Accuracy: {}\n".format(accuracy)) accuracy_score.append(accuracy) print("Per-Class Precision: {}]\n".format(precision_array)) print("Per-Class Recall: {}".format(recall_array)) if (n == 2): print("For 2 clusters, the ROC Curve is: \n") fpr, tpr, thresholds = roc_curve(y_pred, testY) roc_auc = auc(fpr, tpr) plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve for k-Means') plt.legend(loc="lower right") plt.show() # Graph for the accuracy compared to the number of clusters plt.figure() plt.plot(n_clusters, accuracy_score) plt.ylim([0.0, 1.0]) plt.xlabel('Number of clusters') plt.ylabel('Accuracy') plt.show() return accuracy_score[0]
model3.compile(optimizer=RMSprop(lr=0.01), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model1.fit(x_train, y_train, epochs=5, validation_data=validation_data) model2.fit(x_train, y_train, epochs=5, validation_data=validation_data) model3.fit(x_train, y_train, epochs=5, validation_data=validation_data) y_pred2 = model2.predict(x_test) yli_pred2 = [] for i in range(len(y_test)): yli_pred2.append(list(y_pred2[i]).index(np.max(y_pred2[i]))) result = func_confusion_matrix(y_test, yli_pred2) conf_matrix = result[0] accuracy = result[1] recall = result[2] precision = result[3] print("Confusion Matrix : \n", conf_matrix) print("\n\nAccuracy : ", accuracy) print("\n\n") for i in range(10): print("Class ", i, " Recall : ", recall[i]) print("\n\n") for i in range(10): print("Class ", i, " Precision : ", precision[i]) (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
for i in range(total_batch): Input_batch = x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE] Output_batch = y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE] cost = FNN.train_step(Input_batch,Output_batch) avg_cost += cost/total_batch print("Epoch: {} =====> Cost = {}".format(epoch, avg_cost)) # test model predicted_output = FNN.predict(x_test, y_test) correct_prediction = np.equal(np.argmax(predicted_output,1), np.argmax(y_test,1)) # print(correct_prediction) accuracy = np.sum(correct_prediction.astype(float))/np.size(correct_prediction) print('Accuracy is {}'.format(accuracy)) y_test_label = [x[0] for x in oe.inverse_transform(y_test)] y_pred_label = [x[0] for x in oe.inverse_transform(predicted_output)] conf_matrix, accuracy, recall_array, precision_array = util.func_confusion_matrix(y_test_label,y_pred_label) wrong_img = np.not_equal(np.argmax(predicted_output,1), np.argmax(y_test,1)) wrong_img = np.where(wrong_img)[0] fig,ax = plt.subplots(2,5,figsize=(20,10)) fig.subplots_adjust(hspace=0, wspace=0.05) for count in range(10): img = np.copy(x_test[wrong_img[count]]) img = np.asarray(np.reshape(img,[28,28])) img = (img * 255).astype(np.uint8) img =Image.fromarray(img, 'L') ax[count%2][count//2].imshow(img,cmap='gray') ax[count%2][count//2].axis('off') title = "Pred: {} -- GT: {}".format(y_pred_label[wrong_img[count]],y_test_label[wrong_img[count]]) ax[count%2][count//2].set_title(title,fontsize= 25)
#############placeholder 4:testing ####################### # best_kernel = 'linear' # best_c = 9 # poly had many that were the "best" model = svm.SVC(kernel=best_kernel, C=best_c, gamma='scale') model.fit(X=x_train, y=y_train) #############placeholder end ####################### ## step 5 evaluate your results in terms of accuracy, real, or precision. #############placeholder 5: metrics ####################### # func_confusion_matrix is not included # You might re-use this function for the Part I. y_pred = model.predict(X_test) conf_matrix, accuracy, recall_array, precision_array = util.func_confusion_matrix( Y_test, y_pred) print("Confusion Matrix: ") print(conf_matrix) print("Average Accuracy: {}".format(accuracy)) print("Per-Class Precision: {}".format(precision_array)) print("Per-Class Recall: {}".format(recall_array)) #############placeholder end ####################### #############placeholder 6: success and failure examples ####################### # Success samples: samples for which you model can correctly predict their labels correct_sample = np.equal(y_pred, Y_test) correct_sample = np.where(correct_sample)[0] correct = pd.DataFrame(X_test[correct_sample])