def train_model(dataset_feature, method, title): json = loadDict(dataset_feature) X = [item.reshape(-1) for item in json['list_features']] y = json['list_labels'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40) if method == 'knn': clf = KNeighborsClassifier(n_neighbors=3) clf.fit(X_train, y_train) elif method == 'svm': clf = SVC(kernel='linear') clf.fit(X_train, y_train) predict_list = clf.predict(X_test) target_names = set(y_test) plot_confusion_matrix(y_test, predict_list, classes=target_names, title=title, normalize=True) print('TITLE: ', title) print classification_report(y_test, predict_list, target_names=target_names)
def rbf_analysis(X, Y, c, g, title, filename): print "Performing Cross Validation on Penalty: {}".format(c) dataLength = len(X) loo = LeaveOneOut(dataLength) predictions = [] expected = [] TP, FN, TN, FP = 0, 0, 0, 0 Accuracy = 0 for train_index, test_index in loo: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index][0] clf = SVC(C=c, gamma=g, kernel='rbf') clf.fit(X_train, Y_train) prediction = clf.predict(X_test)[0] predictions.append(prediction) expected.append(Y_test) print("Calculating.....") for i, prediction in enumerate(predictions): if(prediction == 1 and expected[i] == 1): TP += 1 elif(prediction == 0 and expected[i] == 1): FN += 1 elif(prediction == 0 and expected[i] == 0): TN += 1 elif(prediction == 1 and expected[i] == 0): FP += 1 else: pass Sensitivity = TP/float(TP + FN) Specificity = TN/float(TN + FP) Accuracy = (TP + TN)/float(TP + TN + FP + FN) # Saving data to file with open(filename, 'ab') as f: f.write("Sensitivity of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Sensitivity, c, g)) f.write("Specificity of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Specificity, c, g)) f.write("Accuracy of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Accuracy, c, g)) f.write("Matthews Correlation Coeefficient Value: {}\n".format(matthews_corrcoef(predictions, expected))) f.write("Classification Report:\n") f.write(classification_report(predictions, expected)) f.write("Confusion Matrix\n") cm = confusion_matrix(predictions, expected) f.write(str(cm)) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] label1 = "Negative" label2 = "Positive" plt.figure() plot_confusion_matrix(cm, title, label1, label2)
def plot_matrix(y_pred: np.ndarray, dataset: tf.data.Dataset) -> None: y_pred = np.argmax(y_pred, axis=1) y_test = np.concatenate([y for x, y in dataset], axis=0) y_test = np.argmax(y_test, axis=1) cnf_matrix = confusion_matrix(y_test, y_pred) class_names = [str(cl) for cl in range(10)] plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names) plt.show()
def getConfusion(y_test, prediction, name): # confusion matrix for test # cnf_matrix = confusion_matrix(y_test, prediction) cnf_matrix = confusionMatrix(y_test, prediction) class_names = np.unique(prediction, return_counts=False) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() cf_mat.plot_confusion_matrix(cnf_matrix, classes=class_names, title=name) plt.savefig("results/" + name) # plt.show() return
def selectModel(data_set, models, visual=False, plist_file=None): scaler = StandardScaler() data_set.X_train = scaler.fit_transform(data_set.X_train) data_set.X_test = scaler.transform(data_set.X_test) #print("after scaling") #print(data_set.X_train.shape) # selector1= VarianceThreshold(threshold=(.9 * (1 - .9))) # X_train = selector1.fit_transform(X_train, y_train) # selector2 = SelectKBest(f_classif, k=min(X_train.shape[1], 3000)) # X_train = selector2.fit_transform(X_train, y_train) # dimension reduction #pca = PCA() #data_set.X_train = pca.fit_transform(data_set.X_train) #print("after PCA") #print(data_set.X_train.shape) # feature selection # backward search #svc = SVC(kernel="linear", C=0.001) #rfecv = RFECV(estimator=svc, step=10, cv=StratifiedKFold(3), n_jobs=-1, scoring='accuracy', verbose=9) #data_set.X_train = rfecv.fit_transform(data_set.X_train, data_set.y_train) #print("Backward search gives number of features : %d" % rfecv.n_features_) #data_set.X_test = scaler.transform(data_set.X_test) #data_set.X_test = rfecv.predict(data_set.X_test) for (model, configs) in models: if 'skip' in configs and configs['skip']: continue t0 = time.time() clf = model(data_set, configs).clf #y_predict = clf.predict(data_set.X_test) score = clf.score(data_set.X_test, data_set.y_test) logging.info("%s (%.2f) Test Accuracy: %0.4f" % (str(model), time.time() - t0, score)) # Plot normalized confusion matrix if plist_file is not None: with open(plist_file, 'r') as fd: label_names = json.load(fd).keys() else: label_names = [str(x) for x in range(1, max(data_set.y_train) + 1)] if visual: confusion_matrix.plot_confusion_matrix( data_set.y_test, y_predict, classes=label_names, normalize=True, title='Normalized confusion matrix')
def plot_confusion_matrices(activation_funs_with_names_list, xs, ys): """ :param activation_funs_with_names_list: list of pairs ((act_fun_1, act_fun_2), (name_1, name_2)) :param xs: xs: input data, NxD np.array :param ys: ys: output data, Nx1 np.array :return: """ for activation_funs, activation_names in activation_funs_with_names_list: estimator = KerasClassifier( build_fn=lambda: build_model_with_activation_funs(activation_funs[0], activation_funs[1]), epochs=30, batch_size=5, verbose=0) y_pred = cross_val_predict(estimator, xs, ys, cv=5) confusion_matrix.plot_confusion_matrix(y_true=ys.astype(int), y_pred=y_pred.astype(int), classes=[0, 1, 2, 3], title="Confusion matrix tasted on {} samples for " "{} and {} activation functions".format( len(y_pred), activation_names[0], activation_names[1])) plt.show()
def NN_evaluation(model, testloader, criterion, patience=100, device="cpu"): """This function evaluates the models on a test set""" y_pred_test = [] y_test = [] batch_loss = [] batch_accs = [] num_classes = 16 with torch.no_grad(): for i, data in enumerate(testloader): inputs = data['bands'].float().to(device) labels = data['labels'].long().to(device) logits = model(inputs) loss = criterion(logits, labels) batch_loss.append(loss.item()) batch_acc, batch_pred = logit_accuracy(logits, labels) y_pred_test.append(batch_pred) y_test.append(data['labels']) batch_accs.append(batch_acc) # print("Validation loss: {:1.3f}, Validation Acc: {:1.3f} \n". # format(np.mean(batch_loss), np.mean(batch_accs))) # Predicted labels to numpy array y_pred_test = np.concatenate([ y_pred_test[i].to("cpu").numpy() for i in range(len(y_pred_test)) ]).reshape(-1) y_test = np.concatenate( [y_test[i].to("cpu").numpy() for i in range(len(y_test))]).reshape(-1) # ============================================================================= # Confusion Matrix testidation Set # ============================================================================= from confusion_matrix import plot_confusion_matrix # testidation labels = list(set(y_test)) print(classification_report(y_test, y_pred_test, digits=3)) cm = confusion_matrix(y_test, y_pred_test, labels=list(range(num_classes))) print("\n") plt.rcParams["figure.figsize"] = (10, 6) plt.figure() plot_confusion_matrix(cm, classes=labels, title='Confusion matrix - Validation set', cmap=plt.cm.Greens) return y_test, y_pred_test, cm
def plot_confusion_matrices(neurons_numbers, xs, ys): """ :param neurons_numbers: neurons_numbers: list of neurons numbers in hidden layer to test :param xs: xs: input data, NxD np.array :param ys: ys: output data, Nx1 np.array :return: """ for neurons_no in neurons_numbers: estimator = KerasClassifier(build_fn=lambda: build_model_n(neurons_no), epochs=60, batch_size=5, verbose=0) y_pred = cross_val_predict(estimator, xs, ys, cv=5) confusion_matrix.plot_confusion_matrix( y_true=ys.astype(int), y_pred=y_pred.astype(int), classes=[0, 1, 2, 3], title="Confusion matrix tasted on {} samples for " "{} neurons in hidden layer".format(len(y_pred), neurons_no)) plt.show()
def create_confusion_matrix(model_info, y_test, y_predict, name): # Creating confusion matrix # ------------------------- print('Creating confusion matrix...') # Get a list of valid labels labels = np.unique(y_test) cnf_matrix = confusion_matrix(y_test, y_predict) np.set_printoptions(precision=2) plt.figure() plot_confusion_matrix(cnf_matrix, classes=labels, title='Confusion matrix, without normalization') filename = PATH_OUTPUT + name + "_" + model_info.name + "-" + model_info.label + "-" + \ model_info.param_name + "-" + str(model_info.param_value) + "_" + \ "cv" + str(model_info.cross_validation_round )+ '-confusion_matrix.pdf' plt.savefig(filename, format='pdf', dpi=300) plt.close()
def plot_confusion_matrices(optimizers_with_names, xs, ys): """ :param optimizers_with_names: list of pairs (optimizer, optimizer_name) :param xs: xs: input data, NxD np.array :param ys: ys: output data, Nx1 np.array :return: """ for optimizer, name in optimizers_with_names: estimator = KerasClassifier( build_fn=lambda: build_model_with_optimizer(optimizer), epochs=30, batch_size=5, verbose=0) y_pred = cross_val_predict(estimator, xs, ys, cv=5) confusion_matrix.plot_confusion_matrix( y_true=ys.astype(int), y_pred=y_pred.astype(int), classes=[0, 1, 2, 3], title="Confusion matrix tasted on {} samples for " "{} optimizer".format(len(y_pred), name)) plt.show()
def plot_confusion_matrices(hidden_layers_numbers, xs, ys): """ :param hidden_layers_numbers: hidden layers numbers list - elem. min.1, max. 3 :param xs: xs: input data, NxD np.array :param ys: ys: output data, Nx1 np.array :return: """ for hidden_layers_no in hidden_layers_numbers: estimator = KerasClassifier( build_fn=lambda: build_model_with_hidden_layers_no(hidden_layers_no ), epochs=30, batch_size=5, verbose=0) y_pred = cross_val_predict(estimator, xs, ys, cv=5) confusion_matrix.plot_confusion_matrix( y_true=ys.astype(int), y_pred=y_pred.astype(int), classes=[0, 1, 2, 3], title="Confusion matrix tasted on {} samples for " "{} hidden layers".format(len(y_pred), hidden_layers_no)) plt.show()
def plot_conf_matrix(y_true, y_pred, filename, binary=False, normalize=True, title=None, cmap='Greys'): if binary: plot_confusion_matrix(y_true, y_pred, ["Нет фронта", "Фронт"], normalize=normalize, title=title, cmap=cmap) else: plot_confusion_matrix( y_true, y_pred, ["Нет фронта", "Тёплый", "Холодный", "Стационарный", "Окклюзии"], normalize=normalize, title=title, cmap=cmap) plt.savefig(filename) plt.close()
def tune_params(X_train, y_train, X_val, y_val, verbose): best_f1 = 0 best_params = None kernels = ['linear', 'poly', 'rbf'] degrees = [2,3,4] gammas = ['auto', 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10] # Only for non-linears. Higher gammas tend to over-fit Cs = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10] # Penalty for classifying wrongly. Higher Cs tend to over-fit functions = ['ovo', 'ovr'] for k in kernels: for f in functions: for C in Cs: if k=='linear': clf = svm.SVC(kernel=k,C=C,decision_function_shape=f) clf.fit(X_train, y_train) y_predicted = clf.predict(X_val) f1 = f1_score(y_val, y_predicted, average='micro') if f1>=best_f1: best_f1 = f1 best_params = clf.get_params() if verbose: print(clf.get_params) print(f1) plot_confusion_matrix(y_val, y_predicted, np.array(('0', '1', '2'))) else: for g in gammas: if k=='poly': for d in degrees: clf = svm.SVC(kernel=k,gamma=g,degree=d,C=C,decision_function_shape=f) clf.fit(X_train, y_train) y_predicted = clf.predict(X_val) f1 = f1_score(y_val, y_predicted, average='micro') if f1>best_f1: best_f1 = f1 best_params = clf.get_params() if verbose: print(clf.get_params) print(f1) plot_confusion_matrix(y_val, y_predicted, np.array(('0', '1', '2'))) else: clf = svm.SVC(kernel=k,gamma=g,C=C,decision_function_shape=f) clf.fit(X_train, y_train) y_predicted = clf.predict(X_val) f1 = f1_score(y_val, y_predicted, average='micro') if f1>best_f1: best_f1 = f1 best_params = clf.get_params() if verbose: print(clf.get_params) print(f1) plot_confusion_matrix(y_val, y_predicted, np.array(('0', '1', '2'))) return best_params
def print_predictions_stats(predicted, actual): cwe_counts = defaultdict(int) for p in predicted: cwe_counts[p] += 1 correct = 0 for p, a in zip(predicted, actual): if p == a: correct += 1 # print("Correctly identified " + str(correct) + "/" + str(actual.shape[0])) # print("Accuracy: " + str(float(correct)/float(actual.shape[0]))) l1 = numpy.unique(actual) l2 = numpy.unique(predicted) labels = numpy.unique(list(set(l1).union(set(l2)))) return cm.plot_confusion_matrix(actual, predicted, labels), calculate_accuracy( predicted, actual)
X_test_cont = scaler.transform(X_test_cont) # fill scaled data with pd.option_context('mode.chained_assignment', None): for l, f in enumerate(features_to_extract): X_train.loc[:, f] = X_train_cont[:, l] X_test.loc[:, f] = X_test_cont[:, l] model = SVC(C=1 / float(best_penalties[0]), kernel="linear", gamma="scale") model.fit(X_train, Z_train) Z_pred = model.predict(X_test) plot_confusion_matrix(Z_test, Z_pred, normalize=True, ndecimals=3, title="Support Vector Machine Confusion Matrix", savename="CM_SVM") # compute final estimate of accuracy N = 5 kfold = KFold(n_splits=N, shuffle=True) accuracy_kfold = np.zeros(N) model = SVC(C=1 / float(best_penalties[0]), kernel="linear", gamma="scale") for k, (train_index, test_index) in enumerate(kfold.split(data, death)): x_train = data.iloc[train_index] y_train = np.ravel(death.iloc[train_index]) x_test = data.iloc[test_index] y_test = np.ravel(death.iloc[test_index])
def test_RF(fn): """ Function which will tune and test a Random Forest model. It will plot a confusion matrix and write a performance report to file. Arguments: - fn : Name of the input file. """ #Timer variables start = 0 end = 0 #Load datasets X_train_df = pd.read_csv("input/{}_train_X.csv".format(fn), sep=";") y_train_df = pd.read_csv("input/{}_train_y.csv".format(fn), sep=";") X_test_df = pd.read_csv("input/{}_test_X.csv".format(fn), sep=";") y_test_df = pd.read_csv("input/{}_test_y.csv".format(fn), sep=";") X_val_tr = X_train_df.values y_val_tr = y_train_df.values X_val_test = X_test_df.values y_val_test = y_test_df.values #Convert to numpy arrays X_train = X_val_tr[:].astype(float) y_train = y_val_tr[:] X_test = X_val_test[:].astype(float) y_test = y_val_test[:] #Scale X values (train) scaler = RobustScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) #Scale X values (test) scaler.fit(X_test) X_test = scaler.transform(X_test) #Transform non-numerical values into numericals encoder = LabelEncoder() encoder.fit(y_train.ravel()) encoded_y_train = encoder.transform(y_train.ravel()) encoder.fit(y_test.ravel()) encoded_y_test = encoder.transform(y_test.ravel()) #Fitting Random Forest Classifier to the Training set rf = RandomForestClassifier(n_estimators=10, criterion="entropy", random_state=7) start = time.time() rf.fit(X_train, encoded_y_train) end = time.time() #Predicted values y_pred = encoder.inverse_transform(rf.predict(X_test)) #Making the Confusion Matrix cm = confusion_matrix(y_test, y_pred) print(cm) print("\n") print(classification_report(y_test, y_pred)) print("Scores for final, best model:\n") print("Acc: {}".format(accuracy_score(y_test, y_pred))) #Find labels labels = [label for label in y_test_df.iloc[:, 0].unique()] #Plot confusion matrix plot_confusion_matrix(cm, sorted(labels), False) #Show the plot plt.savefig("figures/RF_confusion_matrix_{}.svg".format(int(time.time()))) #plt.show() #Write a .txt report file with open("reports/RF_{}_report.txt".format(fn), "w") as f: f.write("REPORT FOR \"{}\"\n\n".format(fn)) f.write("\n\n\nClassification Report:\n") for line in classification_report(y_test, y_pred): f.write(line) f.write("\nConfusion Matrix:\n\n") f.write(np.array2string(cm, separator=', ')) f.write("\n\nTime used to train the model: {} seconds".format(end - start)) f.write("\n\nScores for final, best model:\n") f.write("Accuracy: {}".format(accuracy_score(y_test, y_pred))) f.close()
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) # cudnn.benchmark = True # Redirect print to both console and log file if not args.evaluate: sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt')) # Create data loaders if args.height is None or args.width is None: args.height, args.width = (144, 56) if args.arch == 'inception' else \ (240, 240) dataset, num_classes, train_loader, val_loader, test_loader = \ get_data(args.dataset, args.split, args.data_dir, args.height, args.width, args.batch_size, args.workers, args.combine_trainval) # Create model img_branch = models.create(args.arch, cut_layer=args.cut_layer, num_classes=num_classes, num_features=args.features) diff_branch = models.create(args.arch, cut_layer=args.cut_layer, num_classes=num_classes, num_features=args.features) # Load from checkpoint start_epoch = best_top1 = 0 if args.resume: checkpoint = load_checkpoint(args.resume) img_branch.load_state_dict(checkpoint['state_dict_img']) diff_branch.load_state_dict(checkpoint['state_dict_diff']) start_epoch = checkpoint['epoch'] best_top1 = checkpoint['best_top1'] print("=> Start epoch {} best top1 {:.1%}".format( start_epoch, best_top1)) img_branch = nn.DataParallel(img_branch).cuda() diff_branch = nn.DataParallel(diff_branch).cuda() # img_branch = nn.DataParallel(img_branch) # diff_branch = nn.DataParallel(diff_branch) # Criterion criterion = nn.CrossEntropyLoss().cuda() # criterion = nn.CrossEntropyLoss() # Evaluator evaluator = Evaluator(img_branch, diff_branch, criterion) if args.evaluate: # print("Validation:") # top1, _ = evaluator.evaluate(val_loader) # print("Validation acc: {:.1%}".format(top1)) print("Test:") top1, (gt, pred) = evaluator.evaluate(test_loader) print("Test acc: {:.1%}".format(top1)) from confusion_matrix import plot_confusion_matrix plot_confusion_matrix(gt, pred, dataset.classes, args.logs_dir) return img_param_groups = [ { 'params': img_branch.module.low_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': img_branch.module.high_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': img_branch.module.classifier.parameters(), 'lr_mult': 1 }, ] diff_param_groups = [ { 'params': diff_branch.module.low_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': diff_branch.module.high_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': diff_branch.module.classifier.parameters(), 'lr_mult': 1 }, ] img_optimizer = torch.optim.SGD(img_param_groups, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) diff_optimizer = torch.optim.SGD(diff_param_groups, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) # Trainer trainer = Trainer(img_branch, diff_branch, criterion) # Schedule learning rate def adjust_lr(epoch): step_size = args.step_size lr = args.lr * (0.1**(epoch // step_size)) for g in img_optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) for g in diff_optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) # Start training for epoch in range(start_epoch, args.epochs): adjust_lr(epoch) trainer.train(epoch, train_loader, img_optimizer, diff_optimizer) if epoch < args.start_save: continue top1, _ = evaluator.evaluate(val_loader) is_best = top1 > best_top1 best_top1 = max(top1, best_top1) save_checkpoint( { 'state_dict_img': img_branch.module.state_dict(), 'state_dict_diff': diff_branch.module.state_dict(), 'epoch': epoch + 1, 'best_top1': best_top1, }, is_best, fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar')) print('\n * Finished epoch {:3d} top1: {:5.1%} best: {:5.1%}{}\n'. format(epoch, top1, best_top1, ' *' if is_best else '')) # Final test print('Test with best model:') checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar')) img_branch.module.load_state_dict(checkpoint['state_dict_img']) diff_branch.module.load_state_dict(checkpoint['state_dict_diff']) top1, (gt, pred) = evaluator.evaluate(test_loader) from confusion_matrix import plot_confusion_matrix plot_confusion_matrix(gt, pred, dataset.classes, args.logs_dir) print('\n * Test Accuarcy: {:5.1%}\n'.format(top1))
def linear(X, Y, title, filename): C = [1,2,5,10,15,20,25,30,50,100,200,500,1000,2000,5000,10000] dataLength = len(X) loo = LeaveOneOut(dataLength) avg_Accuracy = dict() sensitivity = dict() specificity = dict() for c in C: #print "Performing Cross Validation on Penalty: {}".format(c) predictions = [] expected = [] TP, FN, TN, FP = 0, 0, 0, 0 Accuracy = 0 for train_index, test_index in loo: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index][0] clf = SVC(C=c, kernel='linear') clf.fit(X_train, Y_train) prediction = clf.predict(X_test)[0] #print("Prediction: {}".format(prediction)) #print("Expected Result: {}".format(Y_test)) predictions.append(prediction) expected.append(Y_test) #print("Calculating Accuracy of Prediction") for i, prediction in enumerate(predictions): if(prediction == 1 and expected[i] == 1): TP += 1 elif(prediction == 0 and expected[i] == 1): FN += 1 elif(prediction == 0 and expected[i] == 0): TN += 1 elif(prediction == 1 and expected[i] == 0): FP += 1 else: pass Sensitivity = TP/float(TP + FN) Specificity = TN/float(TN + FP) Accuracy = (TP + TN)/float(TP + TN + FP + FN) #print("Accuracy of Prediction: {} @ Penalty: {}".format(Accuracy, c)) avg_Accuracy[c] = Accuracy sensitivity[c] = Sensitivity specificity[c] = Specificity bestC = max(avg_Accuracy.iterkeys(), key=(lambda k: avg_Accuracy[k])) # We are hashing the Specificity and Sensitivity based on the key that gives best accuracy bestSensitivity = sensitivity[bestC] bestSpecificity = specificity[bestC] bestAccuracy = avg_Accuracy[bestC] with open(filename, 'ab') as f: f.write("All Accuracy Values @ Each Penalty: {} \n".format(avg_Accuracy)) f.write("Most Accurate Penalty Value: {}\n".format(bestC)) f.write("Accuracy of Prediction: {} @ Penalty: {}\n".format(bestAccuracy, c)) f.write("Sensitivity of Prediction: {} @ Penalty: {}\n".format(bestSensitivity, c)) f.write("Specificity of Prediction: {} @ Penalty: {}\n".format(bestSpecificity, c)) f.write("Matthews Correlation Coeefficient Value: {}\n".format(matthews_corrcoef(predictions, expected))) f.write("Classification Report: \n") f.write(classification_report(predictions, expected)) f.write("Confusion Matrix\n") cm = confusion_matrix(predictions, expected) f.write(str(cm)) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] label1 = "Negative" label2 = "Positive" plt.figure() plot_confusion_matrix(cm, title, label1, label2)
frame = np.asarray(frame) em_pred = emotion_recog(frame) y_pred.append(em_pred) # go down one directory os.chdir(original) # checking list lengths print(len(y_pred)) print(len(y_true)) labels = ["Angry", "Disgusted", "Fearful", "Happy", "Neutral", "Sad", "Surprised"] # Return to the main environment to save the plot os.chdir(w_env + "/output") cm.plot_confusion_matrix(y_true, y_pred, labels, "Confusion Matrix (No Mask)") elif mode == "maskMatrix": y_pred = [] y_true = [] # Load the trained model model.load_weights('model.h5') # Load the cascade classifier facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') w_env = os.getcwd() # changing to the test directory os.chdir("./data_masks")
true_classes.append(correct_class) predicted_classes.append(prediction) else: L2_distances = np.square(train_histograms[:, None] - val_histograms).sum(axis=2).T index = 0 mAP = 0 for val_img in VAL_IMAGE_PATHS: distances = L2_distances[index] idx = np.argpartition(distances, knn) correct_class = val_img.split("/")[-2] prediction = predict(TRAIN_IMAGE_PATHS, idx[:knn]) if prediction == correct_class: mAP += 1 index += 1 true_classes.append(correct_class) predicted_classes.append(prediction) print("mAP:", mAP / len(VAL_IMAGE_PATHS), "\n") # Confusion Matrix confusion_matrix_title = "Accuraccy=" + str( mAP / len(VAL_IMAGE_PATHS)) + " (k-Means:" + str( k_means) + " StepSize:" + str(step_size) + " k-NN:" + str( knn) + ")" cm.plot_confusion_matrix(true_classes, predicted_classes, CLASS_NAMES, title=confusion_matrix_title) plt.show()
# visualize confusion matrix # split data set test_size = 0.35 train_size = 1 - test_size X_train, X_test, Z_train, Z_test = train_test_split(data, np.ravel(death), train_size=train_size, test_size=test_size) model = RFC(n_estimators=50, max_depth=md_best, max_features=mf_best) model.fit(X_train, Z_train) Z_pred = model.predict(X_test) plot_confusion_matrix(Z_test, Z_pred, normalize=True, ndecimals=3, title="Random Forest Confusion Matrix", savename="CM_RF") # compute final estimate of accuracy N = 5 kfold = KFold(n_splits=N, shuffle=True) accuracy_kfold = np.zeros(N) model = RFC(n_estimators=50, max_depth=md_best, max_features=mf_best) for k, (train_index, test_index) in enumerate(kfold.split(data, death)): x_train = data.iloc[train_index] y_train = np.ravel(death.iloc[train_index]) x_test = data.iloc[test_index] y_test = np.ravel(death.iloc[test_index])
def evaluate(modelPath): labels = ["BG", "figure", "table", "text"] use_cuda = torch.cuda.is_available() val_data = data_loader.ClassSeg(root=data_path, split='test', transform=True, filePath='DSSE',chanelCat=in_channels_Nmuber) val_loader = torch.utils.data.DataLoader(val_data,batch_size=1,shuffle=False,num_workers=5) print('load model .....') print("Using FCNs") vgg_model = models.VGGNet(model='vgg_self', pretrained=False, in_channels=in_channels_Nmuber) fcn_model = models.FCNs(pretrained_net=vgg_model, n_class=n_class, Attention=True) fcn_model.load_state_dict(torch.load(modelPath)) if use_cuda: fcn_model.cuda() fcn_model.eval() label_trues, label_preds = [], [] matrixs = np.zeros((n_class,n_class)) for idx, (img, label,_) in enumerate(val_loader): img, label,Image_Path = val_data[idx] img = img.unsqueeze(0) if use_cuda: img = img.cuda() img = Variable(img) out = fcn_model(img) # 1, 21, 320, 320 srcImage = mpimg.imread(Image_Path) pred = out.data.max(1)[1].squeeze_(1).squeeze_(0) # 320, 320 if use_cuda: pred = pred.cpu() # 后处理 data = pred.numpy() # CutX=int(data.shape[1]/32) # # for Cuti in range(data.shape[0]): # for Cutj in range(0,data.shape[1],CutX): # temp=data[Cuti,Cutj:Cutj+CutX-1] # data[Cuti, Cutj:Cutj + CutX - 1]=stats.mode(temp)[0][0] # # dataT = data.T # CutY = int(dataT.shape[1]/3) # # for Cuti in range(dataT.shape[0]): # for Cutj in range(0,dataT.shape[1],CutY): # temp=dataT[Cuti,Cutj:Cutj+CutX-1] # data[Cutj:Cutj + CutX - 1,Cuti]=stats.mode(temp)[0][0] # # ------------------------------------------------------------------------- # # if len(srcImage.shape) == 3: # image = cv2.cvtColor(srcImage, cv2.COLOR_BGR2GRAY) # 将图像转化为灰度图像 # else: # image = srcImage # # h = int(max_height / 64) * 64 # w = int(image.shape[1] * (max_height / image.shape[0]) / 64) * 64 # # image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LINEAR) # # print(image.shape) # sobelCombinedIMG = sobelCombined(image) # data=np.multiply(data,sobelCombinedIMG) # # #------------------------------------------------------------------------- label_trues.append(label.numpy()) label_preds.append(data) if idx % 2 == 0: print('evaluate [%d/%d]' % (idx, len(val_loader))) label_matrix_T=label.numpy() pre_matrix_T = data label_matrix = label_matrix_T.flatten() pre_matrix = pre_matrix_T.flatten() matrix = metrics.confusion_matrix(label_matrix, pre_matrix) if sum(sum(matrixs)) == 0: for i in range(len(matrixs)): for j in range(len(matrixs[0])): matrixs[i][j] = matrix[i][j] else: # 迭代输出行 for i in range(len(matrix)): # 迭代输出列 for j in range(len(matrix[0])): # print(range(len(matrix[0]))) matrixs[i][j] = (matrixs[i][j] + matrix[i][j])/2 # Mymetrics = tools.accuracy_score(label_trues, label_preds,n_class) # Mymetrics = np.array(Mymetrics) # Mymetrics *= 100 # print('''\ # Accuracy: {0} # Accuracy Class: {1} # Mean IU: {2} # FWAV Accuracy: {3}'''.format(*Mymetrics)) plot_confusion_matrix(matrixs, classes=labels, normalize=True, title='Normalized confusion matrix',cmap=plt.cm.Blues,yMax=3.5) # numberTotal = sum(sum(matrixs)) muberTrue = 0 PercisionList = [] RecallList = [] for i in range(len(matrixs)): for j in range(len(matrixs[0])): if i == j: muberTrue = muberTrue + matrixs[i, j] for i in range(len(matrixs)): PercisionList.append(matrixs[i, i] / sum(matrixs[:, i])) for i in range(len(matrixs)): RecallList.append(matrixs[i, i] / sum(matrixs[i, :])) Acurracy = muberTrue / numberTotal Percision = sum(PercisionList) / len(PercisionList) Recall = sum(RecallList) / len(RecallList) F1 = (2 * Percision * Recall) / (Percision + Recall) print(Acurracy) print(Percision) print(Recall) print(F1)
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=args.seed) if args.use_split_dataset: dataset_tmp = facenet.get_dataset(args.data_dir) train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class) if (args.mode=='TRAIN'): dataset = train_set elif (args.mode=='CLASSIFY'): dataset = test_set else: dataset = facenet.get_dataset(args.data_dir) # Check that there are at least one training image per class for cls in dataset: assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset') paths, labels = facenet.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) # Load the model print('Loading feature extraction model') facenet.load_model(args.model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int(math.ceil(1.0*nrof_images / args.batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i*args.batch_size end_index = min((i+1)*args.batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, False, False, args.image_size) feed_dict = { images_placeholder:images, phase_train_placeholder:False } emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict) classifier_filename_exp = os.path.expanduser(args.classifier_filename) if (args.mode=='TRAIN'): # Train classifier print('Training classifier') # model = SVC(kernel='linear', probability=True) # model = LSHForest(probability=True) model = KNeighborsClassifier() model.fit(emb_array, labels) # Create a list of class names class_names = [ cls.name.replace('_', ' ') for cls in dataset] # Saving classifier model with open(classifier_filename_exp, 'wb') as outfile: pickle.dump((model, class_names), outfile) print('Saved classifier model to file "%s"' % classifier_filename_exp) elif (args.mode=='CLASSIFY'): # Classify images print('Testing classifier') with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) print(class_names) accuracy = np.mean(np.equal(best_class_indices, labels)) print('Accuracy: %.3f' % accuracy) fig, ax = plot_confusion_matrix(best_class_indices, labels, classes=np.array(class_names), title='Confusion matrix, without normalization') fig.savefig('full_figure.png')
def new_write_file_content(pickle_file_path, measure, results_path): # Setup the path and the name of the file dataset_name = corpus_name() pickle_file_content = open_pickle_content(pickle_file_path) file_path = (results_path + "\\" + measure.upper().replace("_", " ") + " for " + dataset_name + ".xlsx") # Create an new Excel file and add a worksheet. workbook = xlsxwriter.Workbook(file_path) worksheet = workbook.add_worksheet() # Write titels extra_big = workbook.add_format({ "bold": True, "font_size": 17, "underline": True }) big = workbook.add_format({"bold": True, "font_size": 12}) worksheet.write("A2", dataset_name, big) worksheet.write( "A3", "The classification results are shown in the table below in percentages" ) # Write general data bold_gray = workbook.add_format({"bold": True, "font_color": "gray"}) gray = workbook.add_format({"font_color": "gray"}) worksheet.write("A5", "General information about the classification software", bold_gray) now = datetime.now() worksheet.write("A6", "Issue date: " + now.strftime("%d/%m/%Y %H:%M:%S"), gray) version = (str(sys.version_info[0]) + "." + str(sys.version_info[1]) + "." + str(sys.version_info[2])) worksheet.write("A7", "Python version: Python " + version, gray) worksheet.write( "A8", "Python classification libraries: keras, sklearn, tensorflow, VADAR from nltk, WordCloud", gray, ) # Write normalization worksheet.write("A10", "Pre Processing", bold_gray) worksheet.write("A11", "A - Acronyms", gray) worksheet.write("A12", "L - Lowercase", gray) """worksheet.write("A13", "AR - Apostrophe Removal", gray) worksheet.write("A11", "C - Spelling Correction", gray) worksheet.write("A12", "L - Lowercase", gray) worksheet.write("A13", "H - HTML tags", gray) worksheet.write("A14", "P - Punctuations", gray) worksheet.write("A15", "R - Repeated chars", gray) worksheet.write("A16", "T - Stemming", gray) worksheet.write("A17", "M - Lemmatizer", gray)""" # Write learning methods worksheet.write("H10", "Learning methods", bold_gray) worksheet.write("H11", "svc - Linear SVC", gray) worksheet.write("H12", "rf - Random Forest", gray) worksheet.write("H13", "mlp - Multilayer Perceptron", gray) worksheet.write("H14", "lr - Logistic Regression", gray) worksheet.write("H15", "mnb - Multinomial Naive Bayes", gray) worksheet.write("H16", "rnn - Recurrent Neural Network", gray) # Write stop words option worksheet.write("D10", "Stop Words Options", bold_gray) worksheet.write("D11", "E - English stop words", gray) worksheet.write("D12", "H - Hebrew stop words", gray) worksheet.write("D13", "X - Extended Hebrew stop words", gray) # Write differences significance option worksheet.write("L10", "Statistical Significance Options", bold_gray) worksheet.write("L11", "V - Significantly larger than the baseline", gray) worksheet.write("L12", "* - Significantly smaller than the baseline", gray) # Write stylistic features option worksheet.write("F19", "Stylistic Features Options", bold_gray) worksheet.write("F20", "CC - chars count", gray) worksheet.write("F21", "WC - words count", gray) worksheet.write("F22", "SC - sentence count", gray) worksheet.write("F23", "EMC - exclamation mark (!) count", gray) worksheet.write("F24", "QSMC - question mark (?) count", gray) worksheet.write("F25", "SCC - special characters (@, #, $, &, *, %, ^) count", gray) worksheet.write("F26", "QTMC - quotation mark (\", ') count", gray) worksheet.write("F27", "ALW - average letters in words", gray) worksheet.write("F28", "ALS - average letters in sentence", gray) worksheet.write("F29", "AWS - average words in sentence", gray) worksheet.write("F30", "AWL - average words length", gray) worksheet.write("F31", "IE - increasing expressions", gray) worksheet.write("F32", "DE - doubt expressions", gray) worksheet.write("F33", "NW - negative terms", gray) worksheet.write("F34", "PW - positive terms", gray) worksheet.write("F35", "TE - time expressions", gray) worksheet.write("F36", "EE - emotion expressions", gray) worksheet.write("I20", "FPE - first person expressions", gray) worksheet.write("I21", "SPE - second person expressions", gray) worksheet.write("I22", "TPE - third person expressions", gray) worksheet.write("I23", "INE - inclusion expressions", gray) worksheet.write("I24", "P1 - expressions form power 1", gray) worksheet.write("I25", "P2 - expressions form power 2", gray) worksheet.write("I26", "P3 - expressions form power 3", gray) worksheet.write("I27", "PM1 - expressions form power -1", gray) worksheet.write("I28", "PM2 - expressions form power -2", gray) worksheet.write("I29", "PM3 - expressions form power -3", gray) worksheet.write("I30", "PM4 - expressions form power -4", gray) worksheet.write("I31", "AP - expressions form all the powers", gray) worksheet.write( "I32", "TOPA1 - Enable all features on the 1'st trimester of the text", gray) worksheet.write( "I32", "TOPA2 - Enable all features on the 2'nd trimester of the text", gray) worksheet.write( "I33", "TOPA3 - Enable all features on the 3'rd trimester of the text", gray) worksheet.write( "I34", "TOPB1 - Enable all features on the first ten words of the text", gray) worksheet.write( "I35", "TOPB2 - Enable all features on the text without 10 first and last 10 words", gray, ) worksheet.write( "I36", "TOPB3 - Enable all features on the last ten words of the text", gray) # Write the result row = 40 kind = {"w": "Words", "c": "Chars"} ngrams = {"1": "Unigrams", "2": "Bigrams", "3": "Trigrams"} tf = {"tf": "TF", "tfidf": "TF-IDF"} methods = {"svc": 12, "rf": 13, "mlp": 14, "lr": 15, "mnb": 16, "rnn": 17} if measure == "accuracy_&_confusion_matrix": maxes = { "svc": [[0, 0, { "accuracy": 0, "matrix": None }]], "rf": [[0, 0, { "accuracy": 0, "matrix": None }]], "mlp": [[0, 0, { "accuracy": 0, "matrix": None }]], "lr": [[0, 0, { "accuracy": 0, "matrix": None }]], "mnb": [[0, 0, { "accuracy": 0, "matrix": None }]], "rnn": [[0, 0, { "accuracy": 0, "matrix": None }]], } best = [[0, 0, {"accuracy": 0, "matrix": None}]] else: maxes = { "svc": [[0, 0, 0]], "rf": [[0, 0, 0]], "mlp": [[0, 0, 0]], "lr": [[0, 0, 0]], "mnb": [[0, 0, 0]], "rnn": [[0, 0, 0]], } best = [[0, 0, 0]] image_num = 0 for key in sorted(pickle_file_content): value = pickle_file_content[key] # Gather all the results all_averages = [] # N-Grams data cell_format = workbook.add_format() cell_format.set_text_wrap() cell_format.set_align("vcenter") cell_format.set_align("center") features = value["featurs"] if features: count = "" type = "" tfidf = "" grams = "" skips = "" for feature in features: feature = feature.split("_") count += feature[1] + "\n" type += kind[feature[2]] + "\n" tfidf += tf[feature[3]] + "\n" grams += ngrams[feature[4]] + "\n" skips += feature[5] + "\n" worksheet.write_number(row, 0, int(count[:-1]), cell_format) worksheet.write(row, 1, type[:-1], cell_format) worksheet.write(row, 2, grams[:-1], cell_format) worksheet.write(row, 3, tfidf[:-1], cell_format) worksheet.write(row, 4, skips[:-1], cell_format) # Stylistic Features data stylistic_features = "" num_of_features = 0 stylistic_features_dict = initialize_features_dict() if value["stylistic_features"]: for styl_feature in value["stylistic_features"]: stylistic_features += styl_feature.upper() + " " worksheet.write(row, 5, stylistic_features[:-2], cell_format) # Write the num of features worksheet.write_number(row, 0, value["num_of_features"], cell_format) # Pre Processing and Stop Words data cell_format = workbook.add_format() cell_format.set_align("center") cell_format.set_align("vcenter") normalization = "" stopwords = "" for char in value["normalization"]: if char.lower() in "sbx": stopwords += (char.replace("s", "E").replace("b", "H").replace( "x", "X") + " ") else: normalization += char.upper() + " " if normalization == "": normalization = "NONE" if stopwords == "": stopwords = "NONE" try: worksheet.write(row, 6, str(value["selection"][0]), cell_format) worksheet.write(row, 7, str(value["selection"][1]), cell_format) except: pass worksheet.write(row, 8, normalization, cell_format) worksheet.write(row, 9, stopwords, cell_format) worksheet.write(row, 10, value["k_folds"], cell_format) worksheet.write(row, 11, value["iterations"], cell_format) # ML methods and result data for method, result in value["results"].items(): # confusion matrix if not isinstance(result, list): title = measure + str(image_num) if measure == "confusion_matrix": plot_confusion_matrix(result, results_path, title=title) worksheet.set_column(methods[method], methods[method], 40) worksheet.set_row(row, 140) elif measure == "roc_curve": plot_roc_curve(result, results_path, method, title=title) worksheet.set_column(methods[method], methods[method], 50) worksheet.set_row(row, 225) elif measure == "precision_recall_curve": plot_precision_recall_curve(result, results_path, title=title) worksheet.set_column(methods[method], methods[method], 47) worksheet.set_row(row, 215) elif measure == "accuracy_&_confusion_matrix": plot_confusion_matrix( result["matrix"], results_path, title=title, accuracy=result["accuracy"], cmap=plt.cm.Greys, ) worksheet.set_column(methods[method], methods[method], 40) worksheet.set_row(row, 170) best, maxes = find_maxes_best(best, maxes, method, methods, row, result) worksheet.insert_image(row, methods[method], results_path + "\\" + title + ".jpg") image_num += 1 continue if isinstance(result, list): sign = differences_significance(value["baseline_path"], result, measure, value["k_folds"]) val = str(float("{0:.4g}".format( avg(result) * 100))) + " " + sign all_averages += [float("{0:.4g}".format(avg(result) * 100))] else: val = result worksheet.write(row, methods[method], str(val), cell_format) # Check if val bigger then max best, maxes = find_maxes_best(best, maxes, method, methods, row, val) # write the max result of each classification worksheet.write_number("S" + str(row + 1), max(all_averages), cell_format) row += 1 worksheet.write("A19", "Colors", bold_gray) good = workbook.add_format({"bold": True, "font_color": "blue"}) good.set_align("center") good.set_align("vcenter") for _, method in maxes.items(): for val in method: if isinstance(val[2], dict): if val[2]["accuracy"] != 0: image_num += 1 title = measure + str(image_num) plot_confusion_matrix( val[2]["matrix"], results_path, title=title, accuracy=val[2]["accuracy"], cmap=plt.cm.Blues, color="blue", ) worksheet.insert_image( val[0], val[1], results_path + "\\" + title + ".jpg") else: worksheet.write(val[0], val[1], val[2], good) good = workbook.add_format({"font_color": "blue"}) worksheet.write("A20", "The best result of the learning method", good) good = workbook.add_format({"bold": True, "font_color": "red"}) good.set_align("center") good.set_align("vcenter") for val in best: if isinstance(val[2], dict): if val[2]["accuracy"] != 0: image_num += 1 title = measure + str(image_num) plot_confusion_matrix( val[2]["matrix"], results_path, title=title, accuracy=val[2]["accuracy"], cmap=plt.cm.Reds, color="red", ) worksheet.insert_image(val[0], val[1], results_path + "\\" + title + ".jpg") else: worksheet.write(val[0], val[1], val[2], good) good = workbook.add_format({"font_color": "red"}) worksheet.write("A21", "The best result in all classification", good) bold = workbook.add_format({"bold": True}) worksheet.write("A39", "Results", bold) worksheet.add_table( "A40:S" + str(row), { "columns": [ { "header": "Number" }, { "header": "Type" }, { "header": "N-GRAMS" }, { "header": "TF" }, { "header": "Skips" }, { "header": "Stylistic Features" }, { "header": "Selection" }, { "header": "Number Selected" }, { "header": "Pre Processing" }, { "header": "Stop Words" }, { "header": "K-Folds CV" }, { "header": "Iterations" }, { "header": "SVC" }, { "header": "RF" }, { "header": "MLP" }, { "header": "LR" }, { "header": "MNB" }, { "header": "RNN" }, { "header": "Max Method" }, ], "style": "Table Style Light 8", }, ) worksheet.write("A1", "Classification results: " + measure.replace("_", " "), extra_big) workbook.close() # Delete the images of the non integer measures for file in os.listdir(results_path): if file.endswith(".jpg"): os.remove(results_path + "\\" + file)
model.add(Conv2D(1024, kernel_size=(3, 3), activation='elu')) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) model.fit(train_X, train_y, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(validation_X, validation_y)) score = model.evaluate(validation_X, validation_y, verbose=0) print('Val loss:', score[0]) print('Val accuracy:', score[1]) score = model.evaluate(test_X, test_y, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) preds = model.predict(test_X) plt.figure(figsize=[12] * 2) plot_confusion_matrix(test_y.argmax(1), preds.argmax(1), [genres[i] for i in top6_idxs], normalize=False)
predictions_3 = model_3.decision_function(test_set_3) model_4 = LogisticRegression(C=0.8) model_4.fit(train_set_4, train_labels_4) predictions_4 = model_4.decision_function(test_set_4) model_5 = LogisticRegression(C=0.5) model_5.fit(train_set_5, train_labels_5) predictions_5 = model_5.decision_function(test_set_5) model_6 = LogisticRegression(C=1.4) model_6.fit(train_set_6, train_labels_6) predictions_6 = model_6.decision_function(test_set_6) predictions = majority_vote([ predictions_1, predictions_2, predictions_3, predictions_4, predictions_5, predictions_6 ]) print(accuracy_score(test_labels, predictions)) print(precision_score(test_labels, predictions)) print(recall_score(test_labels, predictions)) print(f1_score(test_labels, predictions)) cnf_matrix = confusion_matrix(test_labels, predictions) np.set_printoptions(precision=2) plt = plot_confusion_matrix(cnf_matrix, classes=[0, 1], title='Confusion matrix Logistic Regression') plt.savefig("graphs/logistic_confusion_matrix.png")
model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) predict_classes = model.predict_classes(x_test, batch_size=1) true_classes = np.argmax(y_test, 1) confusion_matrix.plot_confusion_matrix(true_classes, predict_classes, save_flg=True)
def test_KNN(fn): """ Function which will tune and test a K-Nearest Neighbors model. It will plot a confusion matrix and write a performance report to file. Arguments: - fn : Name of the input file. """ #Timer variables start = 0 end = 0 #Load datasets X_train_df = pd.read_csv("input/{}_train_X.csv".format(fn), sep=";") y_train_df = pd.read_csv("input/{}_train_y.csv".format(fn), sep=";") X_test_df = pd.read_csv("input/{}_test_X.csv".format(fn), sep=";") y_test_df = pd.read_csv("input/{}_test_y.csv".format(fn), sep=";") X_val_tr = X_train_df.values y_val_tr = y_train_df.values X_val_test = X_test_df.values y_val_test = y_test_df.values #Convert to numpy arrays X_train = X_val_tr[:].astype(float) y_train = y_val_tr[:] X_test = X_val_test[:].astype(float) y_test = y_val_test[:] #Scale X values (train) scaler = RobustScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) #Scale X values (test) scaler.fit(X_test) X_test = scaler.transform(X_test) #Transform non-numerical values into numericals encoder = LabelEncoder() encoder.fit(y_train.ravel()) encoded_y_train = encoder.transform(y_train.ravel()) encoder.fit(y_test.ravel()) encoded_y_test = encoder.transform(y_test.ravel()) #Number of neighbors (K) to test nr_of_neighbors = [x for x in range(5, 100, 5)] #Variables to store the best values best_model = KNeighborsClassifier() best_acc = 0.0 time_taken = 0 #Test different values for K for K in nr_of_neighbors: knn = KNeighborsClassifier(n_neighbors=K) #Train the model start = time.time() knn.fit(X_train, encoded_y_train) end = time.time() #Predicted values y_pred = knn.predict(X_test) print("\nK: {}".format(knn.get_params()['n_neighbors'])) print("Acc: {}".format(accuracy_score(encoded_y_test, y_pred))) #Measure accuracy and save model if it is the best one if accuracy_score(encoded_y_test, y_pred) > best_acc: time_taken = end - start best_model = knn best_acc = accuracy_score(encoded_y_test, y_pred) #Predict using the best model y_pred = encoder.inverse_transform(best_model.predict(X_test)) K = best_model.get_params()['n_neighbors'] #Making the Confusion Matrix cm = confusion_matrix(y_test, y_pred) print(cm) print("\n") print(classification_report(y_test, y_pred)) print("Scores for final, best model:\n") print("\nK: {}".format(K)) print("Acc: {}".format(accuracy_score(y_test, y_pred))) #Find labels labels = [label for label in y_test_df.iloc[:, 0].unique()] #Plot confusion matrix plot_confusion_matrix(cm, sorted(labels), False) #Show the plot plt.savefig("figures/KNN_confusion_matrix_{}.svg".format(int(time.time()))) #plt.show() #Write a .txt report file with open("reports/KNN_{}_report.txt".format(fn), "w") as f: f.write("REPORT FOR \"{}\"\n\n".format(fn)) f.write("Best value for K: {}".format(K)) f.write("\n\n\nClassification Report:\n") for line in classification_report(y_test, y_pred): f.write(line) f.write("\nConfusion Matrix:\n\n") f.write(np.array2string(cm, separator=', ')) f.write( "\n\nTime used to train the model: {} seconds".format(time_taken)) f.write("\n\nScores for final, best model:\n") f.write("Accuracy: {}".format(best_acc)) f.close()
def new_write_file_content(pickle_file_path, measure, results_path): # Setup the path and the name of the file dataset_name = corpus_name() pickle_file_content = open_pickle_content(pickle_file_path) file_path = os.path.join(results_path, measure.upper().replace( '_', ' ')) + " for " + dataset_name + '.xlsx' # Create an new Excel file and add a worksheet. workbook = xlsxwriter.Workbook(file_path) worksheet = workbook.add_worksheet() # Write titels extra_big = workbook.add_format({ 'bold': True, 'font_size': 17, 'underline': True }) big = workbook.add_format({'bold': True, 'font_size': 12}) worksheet.write('A2', dataset_name, big) worksheet.write( 'A3', 'The classification results are shown in the table below in percentages' ) # Write general data bold_gray = workbook.add_format({'bold': True, 'font_color': 'gray'}) gray = workbook.add_format({'font_color': 'gray'}) worksheet.write('A5', 'General information about the classification software', bold_gray) now = datetime.now() worksheet.write('A6', 'Issue date: ' + now.strftime("%d/%m/%Y %H:%M:%S"), gray) version = str(sys.version_info[0]) + '.' + str( sys.version_info[1]) + '.' + str(sys.version_info[2]) worksheet.write('A7', 'Python version: Python ' + version, gray) worksheet.write( 'A8', 'Python classification libraries: keras, sklearn, tensorflow, VADAR from nltk, WordCloud', gray) # Write normalization worksheet.write('A10', 'Pre Processing', bold_gray) worksheet.write('A11', "C - Spelling Correction", gray) worksheet.write('A12', "L - Lowercase", gray) worksheet.write('A13', "H - HTML tags", gray) worksheet.write('A14', "P - Punctuations", gray) worksheet.write('A15', "R - Repeated chars", gray) worksheet.write('A16', "T - Stemming", gray) worksheet.write('A17', "M - Lemmatizer", gray) # Write learning methods worksheet.write('H10', 'Learning methods', bold_gray) worksheet.write('H11', "svc - Linear SVC", gray) worksheet.write('H12', "rf - Random Forest", gray) worksheet.write('H13', "mlp - Multilayer Perceptron", gray) worksheet.write('H14', "lr - Logistic Regression", gray) worksheet.write('H15', "mnb - Multinomial Naive Bayes", gray) worksheet.write('H16', "rnn - Recurrent Neural Network", gray) # Write stop words option worksheet.write('D10', 'Stop Words Options', bold_gray) worksheet.write('D11', "E - English stop words", gray) worksheet.write('D12', "H - Hebrew stop words", gray) worksheet.write('D13', "X - Extended Hebrew stop words", gray) # Write stylistic features option worksheet.write('F19', 'Stylistic Features Options', bold_gray) worksheet.write('F20', "CC - chars count", gray) worksheet.write('F21', "WC - words count", gray) worksheet.write('F22', "SC - sentence count", gray) worksheet.write('F23', "EMC - exclamation mark (!) count", gray) worksheet.write('F24', "QSMC - question mark (?) count", gray) worksheet.write('F25', "SCC - special characters (@, #, $, &, *, %, ^) count", gray) worksheet.write('F26', "QTMC - quotation mark (\", ') count", gray) worksheet.write('F27', "ALW - average letters in words", gray) worksheet.write('F28', "ALS - average letters in sentence", gray) worksheet.write('F29', "AWS - average words in sentence", gray) worksheet.write('F30', "AWL - average words length", gray) worksheet.write('F31', "IE - increasing expressions", gray) worksheet.write('F32', "DE - doubt expressions", gray) worksheet.write('F33', "NW - negative terms", gray) worksheet.write('F34', "PW - positive terms", gray) worksheet.write('F35', "TE - time expressions", gray) worksheet.write('F36', "EE - emotion expressions", gray) worksheet.write('I20', "FPE - first person expressions", gray) worksheet.write('I21', "SPE - second person expressions", gray) worksheet.write('I22', "TPE - third person expressions", gray) worksheet.write('I23', "INE - inclusion expressions", gray) worksheet.write('I24', "P1 - expressions form power 1", gray) worksheet.write('I25', "P2 - expressions form power 2", gray) worksheet.write('I26', "P3 - expressions form power 3", gray) worksheet.write('I27', "PM1 - expressions form power -1", gray) worksheet.write('I28', "PM2 - expressions form power -2", gray) worksheet.write('I29', "PM3 - expressions form power -3", gray) worksheet.write('I30', "PM4 - expressions form power -4", gray) worksheet.write('I31', "AP - expressions form all the powers", gray) worksheet.write( 'I32', "TOPA1 - Enable all features on the 1'st trimester of the text", gray) worksheet.write( 'I32', "TOPA2 - Enable all features on the 2'nd trimester of the text", gray) worksheet.write( 'I33', "TOPA3 - Enable all features on the 3'rd trimester of the text", gray) worksheet.write( 'I34', "TOPB1 - Enable all features on the first ten words of the text", gray) worksheet.write( 'I35', "TOPB2 - Enable all features on the text without 10 first and last 10 words", gray) worksheet.write( 'I36', "TOPB3 - Enable all features on the last ten words of the text", gray) # Write the result row = 40 kind = {'w': 'Words', 'c': 'Chars'} ngrams = {'1': 'Unigrams', '2': 'Bigrams', '3': 'Trigrams', '4': '4-gram'} tf = {'tf': 'TF', 'tfidf': 'TF-IDF'} methods = {'svc': 8, 'rf': 9, 'mlp': 10, 'lr': 11, 'mnb': 12, 'rnn': 13} if measure == "accuracy_&_confusion_matrix": maxes = { 'svc': [[0, 0, { "accuracy": 0, "matrix": None }]], 'rf': [[0, 0, { "accuracy": 0, "matrix": None }]], 'mlp': [[0, 0, { "accuracy": 0, "matrix": None }]], 'lr': [[0, 0, { "accuracy": 0, "matrix": None }]], 'mnb': [[0, 0, { "accuracy": 0, "matrix": None }]], 'rnn': [[0, 0, { "accuracy": 0, "matrix": None }]] } best = [[0, 0, {"accuracy": 0, "matrix": None}]] else: maxes = { 'svc': [[0, 0, 0]], 'rf': [[0, 0, 0]], 'mlp': [[0, 0, 0]], 'lr': [[0, 0, 0]], 'mnb': [[0, 0, 0]], 'rnn': [[0, 0, 0]] } best = [[0, 0, 0]] image_num = 0 for key in sorted(pickle_file_content): value = pickle_file_content[key] # N-Grams data cell_format = workbook.add_format() cell_format.set_text_wrap() cell_format.set_align('vcenter') cell_format.set_align('center') features = value['features'] count = '' type = '' tfidf = '' grams = '' skips = '' for feature in features: feature = feature.split('_') print(feature) count += feature[1] + '\n' type += kind[feature[2]] + '\n' tfidf += tf[feature[3]] + '\n' grams += ngrams[feature[4]] + '\n' skips += feature[5] + '\n' worksheet.write(row, 0, str(count[:-1]), cell_format) worksheet.write(row, 1, type[:-1], cell_format) worksheet.write(row, 2, grams[:-1], cell_format) worksheet.write(row, 3, tfidf[:-1], cell_format) worksheet.write(row, 4, skips[:-1], cell_format) # Stylistic Features data stylistic_features = '' for styl_feature in value['stylistic_features']: stylistic_features += styl_feature.upper() + ' ' worksheet.write(row, 5, stylistic_features[:-2], cell_format) # Pre Processing and Stop Words data cell_format = workbook.add_format() cell_format.set_align('center') cell_format.set_align('vcenter') normalization = "" stopwords = "" for char in value['normalization']: if char in "sbx": stopwords += char.replace('s', 'E').replace('b', 'H').replace('x', 'X') else: normalization += char.upper() if normalization == "": normalization = "NONE" if stopwords == "": stopwords = "NONE" worksheet.write(row, 6, normalization, cell_format) worksheet.write(row, 7, stopwords, cell_format) # ML methods and result data for method, result in value['results'].items(): # confusion matrix if not isinstance(result, float): title = measure + str(image_num) if measure == "confusion_matrix": plot_confusion_matrix(result, results_path, title=title) worksheet.set_column(methods[method], methods[method], 40) worksheet.set_row(row, 140) elif measure == "roc_curve": plot_roc_curve(result, results_path, method, title=title) worksheet.set_column(methods[method], methods[method], 50) worksheet.set_row(row, 225) elif measure == "precision_recall_curve": plot_precision_recall_curve(result, results_path, title=title) worksheet.set_column(methods[method], methods[method], 47) worksheet.set_row(row, 215) elif measure == "accuracy_&_confusion_matrix": plot_confusion_matrix(result["matrix"], results_path, title=title, accuracy=result["accuracy"], cmap=plt.cm.Greys) worksheet.set_column(methods[method], methods[method], 40) worksheet.set_row(row, 170) best, maxes = find_maxes_best(best, maxes, method, methods, row, result) worksheet.insert_image( row, methods[method], os.path.join(results_path, title) + ".jpg") image_num += 1 continue if isinstance(result, float): val = float('{0:.4g}'.format(result * 100)) else: val = result worksheet.write(row, methods[method], val, cell_format) # Check if val bigger then max best, maxes = find_maxes_best(best, maxes, method, methods, row, val) row += 1 worksheet.write('A19', 'Colors', bold_gray) good = workbook.add_format({'bold': True, 'font_color': 'blue'}) good.set_align('center') good.set_align('vcenter') for _, method in maxes.items(): for val in method: if isinstance(val[2], dict): if val[2]["accuracy"] != 0: image_num += 1 title = measure + str(image_num) plot_confusion_matrix(val[2]["matrix"], results_path, title=title, accuracy=val[2]["accuracy"], cmap=plt.cm.Blues, color='blue') worksheet.insert_image( val[0], val[1], os.path.join(results_path, title) + ".jpg") else: worksheet.write(val[0], val[1], val[2], good) good = workbook.add_format({'font_color': 'blue'}) worksheet.write('A20', 'The best result of the learning method', good) good = workbook.add_format({'bold': True, 'font_color': 'red'}) good.set_align('center') good.set_align('vcenter') for val in best: if isinstance(val[2], dict): if val[2]["accuracy"] != 0: image_num += 1 title = measure + str(image_num) plot_confusion_matrix(val[2]["matrix"], results_path, title=title, accuracy=val[2]["accuracy"], cmap=plt.cm.Reds, color='red') worksheet.insert_image( val[0], val[1], os.path.join(results_path, title + ".jpg")) else: worksheet.write(val[0], val[1], val[2], good) good = workbook.add_format({'font_color': 'red'}) worksheet.write('A21', 'The best result in all classification', good) bold = workbook.add_format({'bold': True}) worksheet.write('A39', 'Results', bold) worksheet.add_table( "A40:N" + str(row), { 'columns': [{ 'header': 'Number' }, { 'header': 'Type' }, { 'header': 'N-GRAMS' }, { 'header': 'TF' }, { 'header': 'Skips' }, { 'header': 'Stylistic Features' }, { 'header': 'Pre Processing' }, { 'header': 'Stop Words' }, { 'header': 'svc' }, { 'header': 'rf' }, { 'header': 'mlp' }, { 'header': 'lr' }, { 'header': 'mnb' }, { 'header': 'rnn' }], 'style': 'Table Style Light 8' }) worksheet.write('A1', 'Classification results: ' + measure.replace("_", " "), extra_big) workbook.close() # Delete the images of the non integer measures for file in os.listdir(results_path): if file.endswith('.jpg'): os.remove(os.path.join(results_path, file))
y_pred.append(pred_class_num) except: print("Unexpected error:", sys.exc_info()[0]) traceback.print_exc() print("len", len(Y)) import confusion_matrix as cm import matplotlib.pyplot as plt # Compute confusion matrix from sklearn.metrics import accuracy_score, confusion_matrix accuracy = accuracy_score(Y, y_pred) cnf_matrix = confusion_matrix(Y, y_pred) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() cm.plot_confusion_matrix(cnf_matrix, classes, accuracy, normalize=True, title='Confusion matrix') plt.show() else: test_image(img_name, model_name)
def _train(self) -> Optional[float]: criterion = nn.CrossEntropyLoss() print_freq = 10 acc = None max_accuracy = 0.0 print("Evaluation before fine-tuning") correct = 0 total = 0 count = 0.0 running_val_loss = 0.0 self._state.model.eval() if self._train_cfg.architecture == 'PNASNet': self._state.model.module.cell_11.eval() self._state.model.module.cell_10.eval() self._state.model.module.cell_9.eval() self._state.model.module.dropout.eval() elif self._train_cfg.architecture == 'EfficientNet': self._state.model.module.classifier.eval() self._state.model.module.conv_head.eval() self._state.model.module.bn2.eval() else: self._state.model.module.layer4[2].bn3.eval() with torch.no_grad(): y_pred = [] y_true = [] for data in self._test_loader: images, labels = data images = images.cuda(self._train_cfg.local_rank, non_blocking=True) labels = labels.cuda(self._train_cfg.local_rank, non_blocking=True) outputs = self._state.model(images) loss_val = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) y_pred = y_pred + predicted.tolist() y_true = y_true + labels.tolist() total += labels.size(0) correct += (predicted == labels).sum().item() running_val_loss += loss_val.item() count = count + 1.0 acc = correct / total ls_nm = running_val_loss / count cnf_matrix = confusion_matrix(y_true, y_pred) # cnf_matrix = confusion_matrix(y_true, y_pred) print('Confusion matrix:') print(cnf_matrix) print('\nAccuracy_confusion_matrix:', np.diagonal(cnf_matrix).sum() / cnf_matrix.sum()) print(f"Accuracy of the network on the {total} test images: {acc:.1%}", flush=True) print(f"Loss of the network on the {total} test images: {ls_nm:.3f}", flush=True) print("Accuracy before fine-tuning : " + str(acc)) max_accuracy = np.max((max_accuracy, acc)) start_epoch = self._state.epoch # Start from the loaded epoch j = 0 for epoch in range(start_epoch, self._train_cfg.epochs): print(f"Start epoch {epoch}", flush=True) self._state.model.eval() if self._train_cfg.architecture == 'PNASNet': self._state.model.module.cell_11.train() self._state.model.module.cell_10.train() self._state.model.module.cell_9.train() self._state.model.module.dropout.train() elif self._train_cfg.architecture == 'EfficientNet': self._state.model.module.classifier.train() self._state.model.module.conv_head.train() self._state.model.module.bn2.train() else: self._state.model.module.layer4[2].bn3.train() j = epoch * len(self._train_loader) self._state.lr_scheduler.step(epoch) self._state.epoch = epoch running_loss = 0.0 y_pred_train = [] y_true_train = [] for i, data in enumerate(self._train_loader): inputs, labels = data inputs = inputs.cuda(self._train_cfg.local_rank, non_blocking=True) labels = labels.cuda(self._train_cfg.local_rank, non_blocking=True) outputs = self._state.model(inputs) loss = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) y_pred_train = y_pred_train + predicted.tolist() y_true_train = y_true_train + labels.tolist() self._state.optimizer.zero_grad() loss.backward() self._state.optimizer.step() print("Shape_data_train:", len(y_true_train)) running_loss += loss.item() if i % print_freq == print_freq - 1: acc = accuracy_score(y_true_train, y_pred_train) import collections count_y_train_T = collections.Counter(y_true_train) # print("######") # print(count_y_train_T) count_y_train_T = OrderedDict( sorted(count_y_train_T.items())) # plt.hist(count_y_train_T.values(), bins=list(count_y_train_T.keys())) # plt.title('Số lượng mẫu (data train) tương ứng của mỗi class') # plt.xlabel('Class') # plt.ylabel('Số lượng mẫu') # plt.savefig('EDA_train_data.png') train_eda = pygal.Bar() train_eda.title = 'Số lượng mẫu của từng lớp trên tập data train' train_eda.x_labels = map(str, count_y_train_T.keys()) train_eda.add('number of data', count_y_train_T.values()) train_eda.render_to_png('eda_data_train.png') train_eda.render() f1 = f1_score(y_true, y_pred, average='micro') precision = precision_score(y_true, y_pred, average='micro') recall = recall_score(y_true, y_pred, average='micro') # print(f"[{epoch:02d}, {i:05d}] loss: {running_loss/print_freq:.3f}", flush=True) writer.add_scalar("loss iter", running_loss / print_freq, j) writer.add_scalar("accuracy iter", acc, j) writer.add_scalar("f1-score iter", f1, j) writer.add_scalar("precsion_score iter", precision, j) writer.add_scalar("recall_score iter", recall, j) writer.add_scalar( "learning rate iter", self._state.lr_scheduler.get_last_lr()[0], j) running_loss = 0.0 if epoch == self._train_cfg.epochs - 1: print("Start evaluation of the model", flush=True) correct = 0 total = 0 count = 0.0 running_val_loss = 0.0 self._state.model.eval() if self._train_cfg.architecture == 'PNASNet': self._state.model.module.cell_11.eval() self._state.model.module.cell_10.eval() self._state.model.module.cell_9.eval() self._state.model.module.dropout.eval() elif self._train_cfg.architecture == 'EfficientNet': self._state.model.module.classifier.eval() self._state.model.module.conv_head.eval() self._state.model.module.bn2.eval() else: self._state.model.module.layer4[2].bn3.eval() with torch.no_grad(): y_true = [] y_pred = [] for data in self._test_loader: images, labels = data images = images.cuda(self._train_cfg.local_rank, non_blocking=True) labels = labels.cuda(self._train_cfg.local_rank, non_blocking=True) outputs = self._state.model(images) loss_val = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) y_pred = y_pred + predicted.tolist() y_true = y_true + labels.tolist() total += labels.size(0) correct += (predicted == labels).sum().item() running_val_loss += loss_val.item() count = count + 1.0 print("Shape_test_data:", len(y_true)) cnf_matrix = confusion_matrix(y_true, y_pred) pred_true = np.diag(cnf_matrix) pred_true = list(pred_true) import collections counter_true = collections.Counter(y_true) counter_true = OrderedDict(sorted(counter_true.items())) # print("true", counter_true) y_true_gb = counter_true.values() # sub = np.array(list(y_true_gb)) - np.array(pred_true) # a, y_true_gb = zip(*sorted(zip(list(sub), list(y_true_gb)))) # b, pred_true = zip(*sorted(zip(list(sub), pred_true))) line_chart = pygal.Bar() line_chart.title = 'Đồ thị biểu diễn khả năng dự đoán của model cho từng class (data test)' line_chart.x_labels = map(str, range(37)) line_chart.add('y_true', y_true_gb) line_chart.add('y_pred_true', pred_true) line_chart.render_to_png('chart_pred_true.png') line_chart.render() test_eda = pygal.Bar() test_eda.title = 'Số lượng mẫu của từng lớp trên tập data test' test_eda.x_labels = map(str, counter_true.keys()) test_eda.add('number of data test', counter_true.values()) test_eda.render_to_png('eda_data_test.png') test_eda.render() # df = pd.DataFrame({'y_true': y_true_gb, # 'y_pred_true': pred_true}, index=index) # # ax = df.plot.bar(rot=0) # plot = ax # fig = plot.get_figure() # ax = fig.add_subplot(ax) # fig.savefig("pred_true.png") path = 'save_folder/confusion_matrix.csv' np.savetxt(path, cnf_matrix.astype(np.int32), delimiter=",") print('Confusion matrix:') print(cnf_matrix) # Plot non-normalized confusion matrix class_names = [i for i in range(len(cnf_matrix))] plt.figure() plot_confusion_matrix( cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') # Plot normalized confusion matrix plt.figure(figsize=(50, 50)) plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=False, title='Confusion matrix') plt.savefig("mygraph.png") print('\nAccuracy_confusion_matrix:', np.diagonal(cnf_matrix).sum() / cnf_matrix.sum()) # Model accuracy: how often is the classifier correct? acc_ = 100 * accuracy_score(y_true, y_pred) print("Accuracy: ", acc_) precision_micro = 100 * precision_score( y_true, y_pred, average='micro') print("Precision micro: ", precision_micro) precision_macro = 100 * precision_score( y_true, y_pred, average='macro') print("precision (macro): ", precision_macro) recall_micro = 100 * recall_score( y_true, y_pred, average='micro') print("Recall(micro): ", recall_micro) recall_macro = 100 * recall_score( y_true, y_pred, average='macro') print("Recall(macro): ", recall_macro) f1_macro = 100 * f1_score(y_true, y_pred, average='macro') print("F1-scores(macro): ", f1_macro) f1_micro = 100 * f1_score(y_true, y_pred, average='micro') print("F1-scores(micro): ", f1_micro) acc = correct / total ls_nm = running_val_loss / count matrix_measure = [ acc_, precision_micro, precision_macro, recall_micro, recall_macro, f1_micro, f1_macro ] header = map(str, matrix_measure) import csv writer_ = csv.writer(open('accuracy_measure.csv', 'w+')) for word in header: writer_.writerow([ word, ]) for item in matrix_measure: writer_.writerow([ item, ]) print( f"Accuracy of the network on the {total} test images: {acc:.1%}", flush=True) print( f"Loss of the network on the {total} test images: {ls_nm:.3f}", flush=True) self._state.accuracy = acc if self._train_cfg.global_rank == 0: self.checkpoint(rm_init=False) if epoch == self._train_cfg.epochs - 1: return acc