def q4confmat(full_dat, noisy_dat): ref_dict = full_dat.getDictionary() # ground truth labels annotations = [] for attrib in noisy_dat.attrib: attribString = ','.join(str(v) for v in attrib) if not attribString in ref_dict: print("ERROR: attribString not present!") continue annotations.append(ref_dict[attribString]) evaluator = Evaluator() c_matrix = evaluator.confusion_matrix(noisy_dat.labels, annotations) print(c_matrix) target_names = ["A", "C", "E", "G", "O", "Q"] plot_confusion_matrix(c_matrix, target_names, "Noisy vs Full") precision, macro_p = evaluator.precision(c_matrix) recall, macro_r = evaluator.recall(c_matrix) f1, macro_f1 = evaluator.f1_score(c_matrix) p = np.append(precision, macro_p) r = np.append(recall, macro_r) f1 = np.append(f1, macro_f1) performance_matrix = np.vstack((p, np.vstack((r, f1)))) print(performance_matrix) plot_other_stats(performance_matrix, "Train_noisy") return
def old_test(): # data_read("data/toy.txt") prediction = ["A", "B"] annotation = ["A", "A"] class_labels = ["B", "A"] obj = Evaluator() matrix = obj.confusion_matrix(prediction, annotation, class_labels) print(str.format('{0:.15f}', obj.accuracy(matrix))) print(obj.precision(matrix)) print(obj.recall(matrix)) print(obj.f1_score(matrix))
def print_stats(predictions, y_test): eval = Evaluator() confusion = eval.confusion_matrix(predictions, y_test) accuracy = eval.accuracy(confusion) precision = eval.precision(confusion) recall = eval.recall(confusion) f1 = eval.f1_score(confusion) print("confusion", confusion) print("accuracy", accuracy) print("precision", precision) print("recall", recall) print("f1", f1) return
def main(): print("Loading the training dataset...") x = np.array([[5, 7, 1], [4, 6, 2], [4, 6, 3], [1, 3, 1], [2, 1, 2], [5, 2, 6]]) y = np.array(["A", "A", "A", "C", "C", "C"]) print("Training the decision tree...") classifier = DecisionTreeClassifier() classifier = classifier.train(x, y) print("Loading the test set...") x_test = np.array([[1, 6, 3], [0, 5, 5], [1, 5, 0], [2, 4, 2]]) y_test = np.array(["A", "A", "C", "C"]) predictions = classifier.predict(x_test) print("Predictions: {}".format(predictions)) classes = ["A", "C"] print("Evaluating test predictions...") evaluator = Evaluator() confusion = evaluator.confusion_matrix(predictions, y_test) print("Confusion matrix:") print(confusion) accuracy = evaluator.accuracy(confusion) print() print("Accuracy: {}".format(accuracy)) (p, macro_p) = evaluator.precision(confusion) (r, macro_r) = evaluator.recall(confusion) (f, macro_f) = evaluator.f1_score(confusion) print() print("Class: Precision, Recall, F1") for (i, (p1, r1, f1)) in enumerate(zip(p, r, f)): print("{}: {:.2f}, {:.2f}, {:.2f}".format(classes[i], p1, r1, f1)) print() print("Macro-averaged Precision: {:.2f}".format(macro_p)) print("Macro-averaged Recall: {:.2f}".format(macro_r)) print("Macro-averaged F1: {:.2f}".format(macro_f))
def test_DecisionTreeClassifier(dataset_filename: str = "toy.txt", should_load_file=False): # train extless_filename = dataset_filename[:-4] start = time.time() saved_tree_file = None if should_load_file: saved_tree_file = "tree_" + extless_filename + ".obj" cl = DecisionTreeClassifier(saved_tree_file=saved_tree_file) dataset = data_read("data/" + dataset_filename) unique_lbls = np.unique([e.label for e in dataset.entries]) x, y = dataset.shim_to_arrays() cl.train(x, y) cl.tree.save_tree("tree_" + extless_filename + ".obj") visualize_tree(cl.tree, save_filename=f"visualize_tree_{extless_filename}.txt", max_depth=8) duration = time.time() - start print("duration: ", duration) # predict test_dataset = data_read("data/test.txt") x_test, y_test = test_dataset.shim_to_arrays() preds = cl.predict(x_test) # preds = [random.choice('ACEGOQ') # for _ in range(len(y_test))] # testing random # evaluate ev = Evaluator() matrix = ev.confusion_matrix(preds, y_test, unique_lbls) print("real accuracy: ", accuracy_score(y_test, preds)) print("\nour calc accuracy: ", str.format('{0:.15f}', ev.accuracy(matrix))) print("\n precision:", precision_score(y_test, preds, average="macro")) print("\n our precision: ", ev.precision(matrix)) print("\nreal recall: ", recall_score(y_test, preds, average="macro")) print("\n our recall: ", ev.recall(matrix)) print("\n f1_score", f1_score(y_test, preds, average="macro")) print("\n f1_score: ", ev.f1_score(matrix)) print(matrix)
predictions = classifier.predict(x_test) print("Predictions: {}".format(predictions)) classes = ["A", "C"] print("Evaluating test predictions...") evaluator = Evaluator() confusion = evaluator.confusion_matrix(predictions, y_test) print("Confusion matrix:") print(confusion) accuracy = evaluator.accuracy(confusion) print() print("Accuracy: {}".format(accuracy)) (p, macro_p) = evaluator.precision(confusion) (r, macro_r) = evaluator.recall(confusion) (f, macro_f) = evaluator.f1_score(confusion) print() print("Class: Precision, Recall, F1") for (i, (p1, r1, f1)) in enumerate(zip(p, r, f)): print("{}: {:.2f}, {:.2f}, {:.2f}".format(classes[i], p1, r1, f1)) print() print("Macro-averaged Precision: {:.2f}".format(macro_p)) print("Macro-averaged Recall: {:.2f}".format(macro_r)) print("Macro-averaged F1: {:.2f}".format(macro_f))
def calc_stats(self, test_path, path_to_data, plt_title, prune, pruneAggressively): #load dataset, atttribs, labels d_subset = ClassifierDataset() d_subset.initFromFile(path_to_data) attribs = d_subset.attrib labels = d_subset.labels ds_test = ClassifierDataset() ds_test.initFromFile(test_path) test_attribs = ds_test.attrib test_labels = ds_test.labels #train and predict print("TRAINING") tree = DecisionTreeClassifier() tree.train(attribs, labels) print("FINISHED TRAINING") if prune == True: print("PRUNING") validationDataset = ClassifierDataset() validationDataset.initFromFile(val_path) Prune(tree, validationDataset.attrib, validationDataset.labels, pruneAggressively) print("FINISHED PRUNING") predictions = tree.predict(test_attribs) evaluator = Evaluator() c_matrix = evaluator.confusion_matrix(predictions, test_labels) print(c_matrix) a = ["A", "C", "E", "G", "O", "Q"] b = path_to_data[7:-4] if prune: if pruneAggressively: b = b + "_aggressively_pruned" else: b += "_pruned" else: b += "_not_pruned" plot_confusion_matrix(c_matrix, a, plt_title) print(" ") print("Accuracy: " + str(evaluator.accuracy(c_matrix))) print(" ") precision, macro_p = evaluator.precision(c_matrix) recall, macro_r = evaluator.recall(c_matrix) f1, macro_f1 = evaluator.f1_score(c_matrix) p = np.append(precision, macro_p) r = np.append(recall, macro_r) f1 = np.append(f1, macro_f1) performance_matrix = np.vstack((p, np.vstack((r, f1)))) print(performance_matrix) plot_other_stats(performance_matrix, plt_title) ''' print("Precision: " + str(precision)) print("Recall: " + str(recall)) print("F1 Score: " + str(f1))''' print(" ") print("Macro avg recall:" + str(macro_r)) print("Macro avg precision:" + str(macro_p)) print("Macro avg f1:" + str(macro_f1)) print(" ")