def main(CV=False, PLOT=True): """Entry Point. Parameters ---------- CV: bool Cross-validation flag PLOT: bool Plotting flag """ _data = fetch_data() if CV: method, params = cross_validate(_data, 10) else: method = 'l2' params = {'n_neighbors': 1, 'metric': chisquare} data = normalise(_data, method) X_train, y_train = data['train'] X_test, y_test = data['test'] classifier = KNeighborsClassifier(**params) classifier.fit(X_train, y_train) print('ACCURACY: ', classifier.score(X_test, y_test)) if PLOT: y_hat = classifier.predict(X_test) cnf_matrix = confusion_matrix(y_test, y_hat) plot_confusion_matrix(cnf_matrix, classes=list(set(y_test)), title='K-Nearest-Neighbours\nConfusion Matrix', cmap=plt.cm.Greens) plt.savefig('data/out/knn_cnf_matrix.pdf', format='pdf', dpi=300, transparent=True) neighbors_matrix = classifier.kneighbors_graph(X_test) plot_kneighbors_graph(neighbors_matrix, title='Neighbours Graph') plt.savefig('data/out/knn_neighbours.pdf', format='pdf', dpi=300, transparent=True)
def main(CV=False, PLOT=True): """Entry Point. Parameters ---------- CV: bool Cross-validation flag PLOT: bool Plotting flag """ _data = fetch_data() if CV: method, params = cross_validate(_data) else: method = 'robust' params = {'activation': 'logistic', 'hidden_layer_sizes': (25, )} data = normalise(_data, method) X_train, y_train = data['train'] X_test, y_test = data['test'] classifier = MLPClassifier(learning_rate="adaptive", max_iter=5000, solver='adam', random_state=42, alpha=0.01, **params) classifier.fit(X_train, y_train) print('ACCURACY: ', classifier.score(X_test, y_test)) if PLOT: y_hat = classifier.predict(X_test) cnf_matrix = confusion_matrix(y_test, y_hat) plot_confusion_matrix(cnf_matrix, classes=list(set(y_test)), title='Multi-Layer-Perceptron\nConfusion Matrix', cmap=plt.cm.Reds) plt.savefig('data/out/mlp_cnf_matrix.pdf', format='pdf', dpi=300, transparent=True)
def main(CV=False, PLOT=True): """Entry Point. Parameters ---------- CV: bool Cross-validation flag PLOT: bool Plotting flag """ _data = fetch_data() if CV: method, params = cross_validate(_data) else: method = 'l2' params = {'metric': chisquare} data = normalise(_data, method) X_train, y_train = data['train'] X_test, y_test = data['test'] classifier = NearestCentroid(**params) classifier.fit(X_train, y_train) print('ACCURACY: ', classifier.score(X_test, y_test)) if PLOT: y_hat = classifier.predict(X_test) cnf_matrix = confusion_matrix(y_test, y_hat) plot_confusion_matrix(cnf_matrix, classes=list(set(y_test)), title='Nearest Centroid\nConfusion Matrix', cmap=plt.cm.Blues) plt.savefig('data/out/nc_cnf_matrix.pdf', format='pdf', dpi=300, transparent=True)
def validate(model_name): #load train dataset valid = load_valid_data("../data/data_for_validation.csv") #list of features column feature_columns = ['gender', 'caste', 'mathematics_marks', 'english_marks', 'science_marks', 'science_teacher', 'languages_teacher', 'guardian', 'internet'] y = valid['continue_drop'].values X = valid[feature_columns].values #check if the model exist real_model_name = model_name + ".pkl" file_path = os.path.join(MODELS,real_model_name) if not os.path.isfile(file_path): print("Sorry your {} model was no found".format(real_model_name)) logger.warning("You tried to run a {} model which was not found in your models directory".format(real_model_name)) else: # load the model from disk model = joblib.load(os.path.join(MODELS,real_model_name)) print("Start Validation by using {} model".format(model_name)) logger.info("Start Validation by using {} model".format(model_name)) y_pred = model.predict(X) fscore = f1_score(y, y_pred, average='weighted') #draw and save confusion matrix plot_confusion_matrix(y,y_pred, class_names, title = model_name + "_valid_cm") plt.savefig("../figures/{}_{}_cm.pdf".format(model_name, "valid"), bbox_inches="tight") plt.close() print("F1 Score for {0} model is {1:.3f}".format(model_name, fscore)) print("Validation Ends") logger.info(" mode: {0}, Model: {1}, F1 score: {2:.3f}".format("valid",model_name,fscore)) logger.info("************* Validation Ends *********************")
def predict(model_name): # load test data test = load_test_data("../data/test_data.csv") test = test.values #load test labels labels = load_test_labels("../data/test_label.csv") labels = labels['continue_drop'].values # check if the model exist real_model_name = model_name + ".pkl" file_path = os.path.join(MODELS,real_model_name) if not os.path.isfile(file_path): print("Sorry your {} model was no found".format(real_model_name)) logger.warning("You tried to run a {} model which was not found in your models directory".format(real_model_name)) else: # load the model from disk model = joblib.load(os.path.join(MODELS,real_model_name)) print("Start Testing by using {} model".format(model_name)) logger.info("Start Testing by using {} model".format(model_name)) y_pred = model.predict(test) fscore = f1_score(labels, y_pred, average="weighted") #draw and save confusion matrix plot_confusion_matrix(labels,y_pred, class_names,title = model_name + "_test_cm") plt.savefig("../figures/{}_{}_cm.pdf".format(model_name, "test"), bbox_inches="tight") plt.close() print("F1 Score for {0} model is {1:.3f}".format(model_name, fscore)) print("Testing Ends") logger.info(" mode: {0}, Model: {1}, F1 score: {2:.3f}".format("Test",model_name,fscore)) logger.info("************* Testing Ends *********************")
# Accuracy def getAccuracy(X,Y,model): outputs = model.predict(X) acc = np.sum(outputs==Y)/Y.shape[0] return acc print("Train Acc %.4f"%getAccuracy(X,Y,model)) print("Test Acc %.4f"%getAccuracy(XTest,YTest,model)) outputs = model.predict(X) cnf_matrix = confusion_matrix(outputs,Y) print(cnf_matrix) plot_confusion_matrix(cnf_matrix,classes=["Pikachu","Bulbasaur","Meowth"],title="Confusion Matrix") print(classification_report(outputs,Y)) test_outputs = model.predict(XTest) print(classification_report(test_outputs,YTest)) cnf_matrix = confusion_matrix(test_outputs,YTest) plot_confusion_matrix(cnf_matrix,classes=["Pikachu","Bulbasaur","Meowth"],title="Confusion Matrix Test") # Visualise Misclassifications for i in range(Y.shape[0]): if Y[i] != outputs[i]:
model.eval() y_label = [] y_predict = [] with torch.no_grad(): for i, data in enumerate(val_loader): images, labels = data N = images.size(0) images = Variable(images).to(device) outputs = model(images) prediction = outputs.max(1, keepdim=True)[1] y_label.extend(labels.cpu().numpy()) y_predict.extend(np.squeeze(prediction.cpu().numpy().T)) # compute the confusion matrix confusion_mtx = confusion_matrix(y_label, y_predict) # plot the confusion matrix plot_labels = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc'] plot_confusion_matrix(confusion_mtx, plot_labels) report = classification_report(y_label, y_predict, target_names=plot_labels) print(report) label_frac_error = 1 - np.diag(confusion_mtx) / np.sum(confusion_mtx, axis=1) plt.bar(np.arange(7), label_frac_error) plt.xlabel('True Label') plt.ylabel('Fraction classified incorrectly') torch.save(model, 'models/model.pth')
creport_df = pd.DataFrame(creport).transpose() acc = accuracy_score(actual_labels, predictions) all_test_accuracies.append(acc) kappa_score = cohen_kappa_score(actual_labels, predictions) all_kappa_scores.append(kappa_score) print(creport_df) print('Accuracy for {} is {}, kappa score is {}'.format( timestamp, acc, kappa_score)) predict_df = pd.DataFrame( data=results, columns=['SP0', 'SP1', 'SP2', 'SP3', 'SP4', 'SP5']) file_names = [x.split('/')[-1] for x in data_paths] predict_df['fname'] = file_names predict_df['Class'] = actual_labels # uncomment to save the predicted probabilites to use in svm #predict_df.to_csv(predicted_probabilities_csv_name) visualize.plot_confusion_matrix(cm, classes=classes_lst, title=parser_args.network + ' Confusion Matrix')
if cv: mean_n_support_ /= len(combs) logger.error('Mean `fit` Time %s' % mean_fit_time) logger.error('Mean `score` Time %s' % mean_score_time) logger.error('Mean Number of Support Vectors %s' % mean_n_support_) y_hat = np.argmax(LEADERBOARD, axis=0) acc = np.sum(y_test == y_hat) / K logger.error('Accuracy = %.2f%%' % (acc * 100)) cnf_matrix = confusion_matrix( y_test.ravel(), y_hat.ravel(), labels=list(classes)) # Plot non-normalized confusion matrix plt.figure() logger.info('Plotting confusion matrices...') plot_confusion_matrix(cnf_matrix, classes=classes, title='SVM One versus One - Confusion Matrix', cmap=plt.cm.Reds) plt.savefig('data/out/svm_ovo_cnf_matrix.pdf', format='pdf', dpi=300) # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True, title='SVM One versus One - Normalized Confusion Matrix', cmap=plt.cm.Reds) plt.savefig('data/out/svm_ovo_cnf_matrix_norm.pdf', format='pdf', dpi=300) logger.info( 'Exported at data/out/svm_ovo_cnf_matrix.pdf & data/out/svm_ovr_cnf_matrix_norm.pdf...')
# new_model.compile(optimizer='adam', # loss=tf.keras.losses.sparse_categorical_crossentropy, # metrics=['accuracy']) # loss, acc = new_model.evaluate(test_images, test_labels) # print("Restored model, accuracy: {:5.2f}%".format(100*acc)) # print("...........") ''' ### Visualize ####### Plot Confusion Matrix print('--- Plot Confusion Matrix') #y_pred = np.array([np.argmax(y) for y in new_model.predict(test_images)]) y_pred = new_model.predict(test_images) visualize.plot_confusion_matrix(y_pred, test_labels, to_file=os.path.join( root_dir, '{}_{}_Confusion_Matrix.png'.format( version, model_archi)), classes=genres, version=version) ####### Plot model structure print('--- Plot model structure') visualize.plot_model(new_model, to_file=os.path.join( root_dir, '{}_{}_model.png'.format(version, model_archi)), show_shapes=True, show_layer_names=True) ''' ####### Plot Feature Maps ind = random.randint(0, len(test_images))
target_names=classes_lst, digits=4, output_dict=True) creport_df = pd.DataFrame(creport).transpose() acc = accuracy_score(all_gt, all_predictions) kappa_score = cohen_kappa_score(all_gt, all_predictions) print(creport_df) print('Accuracy for {} is {}, Kappa Score is {}'.format( timestamp, acc, kappa_score)) #predict_df = pd.DataFrame(data=results, columns=['TP0', 'TP1', 'TP2', 'TP3', 'TP4', 'TP5']) #file_names = [x.split('/')[-1] for x in data_paths] #predict_df['fname'] = file_names #predict_df['Class'] = all_gt #predict_df.to_csv(predicted_probabilities_csv_name, index=False) visualize.plot_confusion_matrix(cm, classes=classes_lst, title='t-bilstm Confusion Matrix') #print(f'All test accuracies = {test_accuracies}') #creport_df.to_csv(parser_args.network+'creport.csv', index = True)
creport = classification_report(y_true=all_gt, y_pred=all_predictions, target_names=classes_lst, digits=4, output_dict=True) creport_df = pd.DataFrame(creport).transpose() acc = accuracy_score(all_gt, all_predictions) kappa_score = cohen_kappa_score(all_gt, all_predictions) test_accuracies.append(acc) print(creport_df) print('Accuracy for {} is {}, kappa score is {}'.format( timestamp, acc, kappa_score)) predict_df = pd.DataFrame( data=results, columns=['TP0', 'TP1', 'TP2', 'TP3', 'TP4', 'TP5']) predict_df.to_csv(predicted_probabilities_csv_name) visualize.plot_confusion_matrix(cm, classes=[ 'Corn', 'Cotton', 'Soy', 'Spring Wheat', 'Winter Wheat', 'Barley' ], title='t-lstm Confusion Matrix')