def q4confmat(full_dat, noisy_dat):
    ref_dict = full_dat.getDictionary()

    # ground truth labels
    annotations = []
    for attrib in noisy_dat.attrib:
        attribString = ','.join(str(v) for v in attrib)
        if not attribString in ref_dict:
            print("ERROR: attribString not present!")
            continue
        annotations.append(ref_dict[attribString])
    evaluator = Evaluator()
    c_matrix = evaluator.confusion_matrix(noisy_dat.labels, annotations)
    print(c_matrix)
    target_names = ["A", "C", "E", "G", "O", "Q"]
    plot_confusion_matrix(c_matrix, target_names, "Noisy vs Full")

    precision, macro_p = evaluator.precision(c_matrix)
    recall, macro_r = evaluator.recall(c_matrix)
    f1, macro_f1 = evaluator.f1_score(c_matrix)

    p = np.append(precision, macro_p)
    r = np.append(recall, macro_r)
    f1 = np.append(f1, macro_f1)

    performance_matrix = np.vstack((p, np.vstack((r, f1))))
    print(performance_matrix)
    plot_other_stats(performance_matrix, "Train_noisy")
    return
Example #2
0
def old_test():
    # data_read("data/toy.txt")
    prediction = ["A", "B"]
    annotation = ["A", "A"]
    class_labels = ["B", "A"]
    obj = Evaluator()
    matrix = obj.confusion_matrix(prediction, annotation, class_labels)
    print(str.format('{0:.15f}', obj.accuracy(matrix)))
    print(obj.precision(matrix))
    print(obj.recall(matrix))
    print(obj.f1_score(matrix))
Example #3
0
def print_stats(predictions, y_test):

    eval = Evaluator()
    confusion = eval.confusion_matrix(predictions, y_test)

    accuracy = eval.accuracy(confusion)
    precision = eval.precision(confusion)
    recall = eval.recall(confusion)
    f1 = eval.f1_score(confusion)

    print("confusion", confusion)
    print("accuracy", accuracy)
    print("precision", precision)
    print("recall", recall)
    print("f1", f1)

    return
Example #4
0
def main():
    print("Loading the training dataset...")
    x = np.array([[5, 7, 1], [4, 6, 2], [4, 6, 3], [1, 3, 1], [2, 1, 2],
                  [5, 2, 6]])

    y = np.array(["A", "A", "A", "C", "C", "C"])

    print("Training the decision tree...")
    classifier = DecisionTreeClassifier()
    classifier = classifier.train(x, y)

    print("Loading the test set...")

    x_test = np.array([[1, 6, 3], [0, 5, 5], [1, 5, 0], [2, 4, 2]])

    y_test = np.array(["A", "A", "C", "C"])

    predictions = classifier.predict(x_test)
    print("Predictions: {}".format(predictions))

    classes = ["A", "C"]

    print("Evaluating test predictions...")
    evaluator = Evaluator()
    confusion = evaluator.confusion_matrix(predictions, y_test)

    print("Confusion matrix:")
    print(confusion)

    accuracy = evaluator.accuracy(confusion)
    print()
    print("Accuracy: {}".format(accuracy))

    (p, macro_p) = evaluator.precision(confusion)
    (r, macro_r) = evaluator.recall(confusion)
    (f, macro_f) = evaluator.f1_score(confusion)

    print()
    print("Class: Precision, Recall, F1")
    for (i, (p1, r1, f1)) in enumerate(zip(p, r, f)):
        print("{}: {:.2f}, {:.2f}, {:.2f}".format(classes[i], p1, r1, f1))

    print()
    print("Macro-averaged Precision: {:.2f}".format(macro_p))
    print("Macro-averaged Recall: {:.2f}".format(macro_r))
    print("Macro-averaged F1: {:.2f}".format(macro_f))
Example #5
0
def test_DecisionTreeClassifier(dataset_filename: str = "toy.txt",
                                should_load_file=False):
    # train
    extless_filename = dataset_filename[:-4]
    start = time.time()
    saved_tree_file = None
    if should_load_file:
        saved_tree_file = "tree_" + extless_filename + ".obj"
    cl = DecisionTreeClassifier(saved_tree_file=saved_tree_file)
    dataset = data_read("data/" + dataset_filename)
    unique_lbls = np.unique([e.label for e in dataset.entries])
    x, y = dataset.shim_to_arrays()
    cl.train(x, y)
    cl.tree.save_tree("tree_" + extless_filename + ".obj")
    visualize_tree(cl.tree,
                   save_filename=f"visualize_tree_{extless_filename}.txt",
                   max_depth=8)
    duration = time.time() - start
    print("duration: ", duration)

    # predict
    test_dataset = data_read("data/test.txt")
    x_test, y_test = test_dataset.shim_to_arrays()
    preds = cl.predict(x_test)
    # preds = [random.choice('ACEGOQ')
    #  for _ in range(len(y_test))]  # testing random
    # evaluate
    ev = Evaluator()
    matrix = ev.confusion_matrix(preds, y_test, unique_lbls)
    print("real accuracy: ", accuracy_score(y_test, preds))
    print("\nour calc accuracy: ", str.format('{0:.15f}', ev.accuracy(matrix)))
    print("\n precision:", precision_score(y_test, preds, average="macro"))
    print("\n our precision: ", ev.precision(matrix))
    print("\nreal recall: ", recall_score(y_test, preds, average="macro"))
    print("\n our recall: ", ev.recall(matrix))
    print("\n f1_score", f1_score(y_test, preds, average="macro"))
    print("\n f1_score: ", ev.f1_score(matrix))
    print(matrix)
Example #6
0
    predictions = classifier.predict(x_test)
    print("Predictions: {}".format(predictions))

    classes = ["A", "C"]

    print("Evaluating test predictions...")
    evaluator = Evaluator()
    confusion = evaluator.confusion_matrix(predictions, y_test)

    print("Confusion matrix:")
    print(confusion)

    accuracy = evaluator.accuracy(confusion)
    print()
    print("Accuracy: {}".format(accuracy))

    (p, macro_p) = evaluator.precision(confusion)
    (r, macro_r) = evaluator.recall(confusion)
    (f, macro_f) = evaluator.f1_score(confusion)

    print()
    print("Class: Precision, Recall, F1")
    for (i, (p1, r1, f1)) in enumerate(zip(p, r, f)):
        print("{}: {:.2f}, {:.2f}, {:.2f}".format(classes[i], p1, r1, f1))

    print()
    print("Macro-averaged Precision: {:.2f}".format(macro_p))
    print("Macro-averaged Recall: {:.2f}".format(macro_r))
    print("Macro-averaged F1: {:.2f}".format(macro_f))
Example #7
0
    def calc_stats(self, test_path, path_to_data, plt_title, prune,
                   pruneAggressively):
        #load dataset, atttribs, labels
        d_subset = ClassifierDataset()
        d_subset.initFromFile(path_to_data)
        attribs = d_subset.attrib
        labels = d_subset.labels

        ds_test = ClassifierDataset()
        ds_test.initFromFile(test_path)
        test_attribs = ds_test.attrib
        test_labels = ds_test.labels

        #train and predict
        print("TRAINING")
        tree = DecisionTreeClassifier()
        tree.train(attribs, labels)

        print("FINISHED TRAINING")
        if prune == True:
            print("PRUNING")
            validationDataset = ClassifierDataset()
            validationDataset.initFromFile(val_path)

            Prune(tree, validationDataset.attrib, validationDataset.labels,
                  pruneAggressively)

            print("FINISHED PRUNING")

        predictions = tree.predict(test_attribs)

        evaluator = Evaluator()
        c_matrix = evaluator.confusion_matrix(predictions, test_labels)
        print(c_matrix)

        a = ["A", "C", "E", "G", "O", "Q"]
        b = path_to_data[7:-4]
        if prune:
            if pruneAggressively:
                b = b + "_aggressively_pruned"
            else:
                b += "_pruned"

        else:
            b += "_not_pruned"

        plot_confusion_matrix(c_matrix, a, plt_title)
        print(" ")
        print("Accuracy: " + str(evaluator.accuracy(c_matrix)))
        print(" ")

        precision, macro_p = evaluator.precision(c_matrix)
        recall, macro_r = evaluator.recall(c_matrix)
        f1, macro_f1 = evaluator.f1_score(c_matrix)

        p = np.append(precision, macro_p)
        r = np.append(recall, macro_r)
        f1 = np.append(f1, macro_f1)

        performance_matrix = np.vstack((p, np.vstack((r, f1))))
        print(performance_matrix)
        plot_other_stats(performance_matrix, plt_title)
        '''
        print("Precision: " + str(precision))
        print("Recall: " + str(recall))
        print("F1 Score: " + str(f1))'''

        print(" ")
        print("Macro avg recall:" + str(macro_r))
        print("Macro avg precision:" + str(macro_p))
        print("Macro avg f1:" + str(macro_f1))
        print(" ")