예제 #1
0
def misclassification(dataFile, rootNode):
    """
    Report number of correctly and incorrectly classified samples in the provided data set
    :param dataFile: datafile/data set
    :param rootNode: trained root node of decision tree
    :return: Number of misclassified and correctly classified samples
    """
    miss_prediction = [0, 0, 0, 0]
    correct_prediction = [0, 0, 0, 0]
    correct = 0
    incorrect = 0
    attribute, label, _ = loadData(dataFile)
    attribute = np.array(attribute)
    for sample_index in range(attribute.shape[0]):

        prediction = classify(attribute[sample_index], rootNode)
        if int(label[sample_index]) == int(prediction):
            correct_prediction[label[sample_index] - 1] += 1
            correct += 1
        else:
            miss_prediction[label[sample_index] - 1] += 1
            incorrect += 1
    accuracy = correct / (correct + incorrect)
    mean_per_class_accuracy = 0
    for counter in range(CLASSES):
        correct = correct_prediction[counter]
        incorrect = miss_prediction[counter]
        mean_per_class_accuracy += correct / (CLASSES * (correct + incorrect))
    print("\n--------------------------------- Recognition Rate ----------------------------------")
    print("Total Accuracy           : ", accuracy)
    print("Mean Per Class Accuracy  : ", mean_per_class_accuracy)
    print("-------------------------------------------------------------------------------------\n")

    return miss_prediction, correct_prediction
예제 #2
0
def decision_boundary(treeRoot, figure, data_file):
    """
    It plots a graph of decision boundary and data points
    :param treeRoot: Root node of the decision tree
    :param figure: figure in plot
    :param data_file: data file
    :return: decision plot
    """
    decision_plot = figure.add_subplot(111)
    attribute, label, _ = loadData(data_file)
    attribute, label = np.array(attribute), np.array(label)
    classes = [1, 2, 3, 4]
    colors_box = ['y', 'b', 'g', 'm']
    marker_box = ['*', '+', 'x', 'o']
    step = .001

    x1_corr, x2_corr = np.meshgrid(np.arange(0, 1, step),
                                   np.arange(0, 1, step))

    Y_predicted = []
    for i in range(x1_corr.shape[0]):
        Y_predicted.append([])
        for j in range(x1_corr.shape[1]):
            sample = [x1_corr[i][j], x2_corr[i][j]]
            predicted = classify(np.array(sample), treeRoot)
            Y_predicted[i].append(predicted)

    decision_plot.contourf(x1_corr, x2_corr, np.array(Y_predicted))

    for index in classes:
        x1 = [
            attribute[i][0] for i in range(len(attribute[:]))
            if label[i] == index
        ]
        x2 = [
            attribute[i][1] for i in range(len(attribute[:]))
            if label[i] == index
        ]
        decision_plot.scatter(x1,
                              x2,
                              color=colors_box[index - 1],
                              marker=marker_box[index - 1],
                              label=CLASS_LIST[index - 1],
                              s=100)

    decision_plot.legend(loc='upper right')
    decision_plot.set_xlabel("Six fold Rotational Symmetry")
    decision_plot.set_ylabel("Eccentricity")
    decision_plot.set_title("Decision boundary")
    return decision_plot
예제 #3
0
def get_confusion_matrix(rootNode, data_file):
    """
    Construct confusion matrix by predicting class for given set of data and MLP
    :param rootNode: root node of the decision tree
    :param data_file: CSV data file
    :return: confusion matrix List of List
    """
    attribute, label, _ = loadData(data_file)
    attribute = np.array(attribute)

    confusion_matrix = []

    for _ in range(CLASSES):
        confusion_matrix.append([])
        for _ in range(CLASSES):
            confusion_matrix[-1].append(0)
    for sample_counter in range(attribute.shape[0]):
        actual_class = label[sample_counter]
        predicted_class = classify(attribute[sample_counter], rootNode)
        confusion_matrix[int(predicted_class) - 1][int(actual_class) - 1] += 1

    print_data(confusion_matrix)
    return confusion_matrix