def misclassification(dataFile, rootNode): """ Report number of correctly and incorrectly classified samples in the provided data set :param dataFile: datafile/data set :param rootNode: trained root node of decision tree :return: Number of misclassified and correctly classified samples """ miss_prediction = [0, 0, 0, 0] correct_prediction = [0, 0, 0, 0] correct = 0 incorrect = 0 attribute, label, _ = loadData(dataFile) attribute = np.array(attribute) for sample_index in range(attribute.shape[0]): prediction = classify(attribute[sample_index], rootNode) if int(label[sample_index]) == int(prediction): correct_prediction[label[sample_index] - 1] += 1 correct += 1 else: miss_prediction[label[sample_index] - 1] += 1 incorrect += 1 accuracy = correct / (correct + incorrect) mean_per_class_accuracy = 0 for counter in range(CLASSES): correct = correct_prediction[counter] incorrect = miss_prediction[counter] mean_per_class_accuracy += correct / (CLASSES * (correct + incorrect)) print("\n--------------------------------- Recognition Rate ----------------------------------") print("Total Accuracy : ", accuracy) print("Mean Per Class Accuracy : ", mean_per_class_accuracy) print("-------------------------------------------------------------------------------------\n") return miss_prediction, correct_prediction
def decision_boundary(treeRoot, figure, data_file): """ It plots a graph of decision boundary and data points :param treeRoot: Root node of the decision tree :param figure: figure in plot :param data_file: data file :return: decision plot """ decision_plot = figure.add_subplot(111) attribute, label, _ = loadData(data_file) attribute, label = np.array(attribute), np.array(label) classes = [1, 2, 3, 4] colors_box = ['y', 'b', 'g', 'm'] marker_box = ['*', '+', 'x', 'o'] step = .001 x1_corr, x2_corr = np.meshgrid(np.arange(0, 1, step), np.arange(0, 1, step)) Y_predicted = [] for i in range(x1_corr.shape[0]): Y_predicted.append([]) for j in range(x1_corr.shape[1]): sample = [x1_corr[i][j], x2_corr[i][j]] predicted = classify(np.array(sample), treeRoot) Y_predicted[i].append(predicted) decision_plot.contourf(x1_corr, x2_corr, np.array(Y_predicted)) for index in classes: x1 = [ attribute[i][0] for i in range(len(attribute[:])) if label[i] == index ] x2 = [ attribute[i][1] for i in range(len(attribute[:])) if label[i] == index ] decision_plot.scatter(x1, x2, color=colors_box[index - 1], marker=marker_box[index - 1], label=CLASS_LIST[index - 1], s=100) decision_plot.legend(loc='upper right') decision_plot.set_xlabel("Six fold Rotational Symmetry") decision_plot.set_ylabel("Eccentricity") decision_plot.set_title("Decision boundary") return decision_plot
def get_confusion_matrix(rootNode, data_file): """ Construct confusion matrix by predicting class for given set of data and MLP :param rootNode: root node of the decision tree :param data_file: CSV data file :return: confusion matrix List of List """ attribute, label, _ = loadData(data_file) attribute = np.array(attribute) confusion_matrix = [] for _ in range(CLASSES): confusion_matrix.append([]) for _ in range(CLASSES): confusion_matrix[-1].append(0) for sample_counter in range(attribute.shape[0]): actual_class = label[sample_counter] predicted_class = classify(attribute[sample_counter], rootNode) confusion_matrix[int(predicted_class) - 1][int(actual_class) - 1] += 1 print_data(confusion_matrix) return confusion_matrix