Example #1
0
def make_and_save_d_tree(df_labels, df_data, N):
    print(">> Running decision tree algorithm on a single process.\n")

    res = pd.DataFrame(0,
                       index=cnst.EMOTIONS_INDICES,
                       columns=cnst.EMOTIONS_INDICES)

    segments = util.preprocess_for_cross_validation(N)
    T = []
    # Split data into 90% Training and 10% Testing
    validation_data, validation_targets, train_df_data, train_df_targets = util.divide_data(
        segments[-1], N, df_data, df_labels)

    # Train Trees
    for e in cnst.EMOTIONS_LIST:
        print("Building decision tree for emotion: ", e)
        train_binary_targets = util.filter_for_emotion(train_df_targets,
                                                       cnst.EMOTIONS_DICT[e])
        root = dtree.decision_tree(train_df_data, set(cnst.AU_INDICES),
                                   train_binary_targets)
        print("Decision tree built. Now appending...")
        T.append(root)

    # Use validation data to set a priority to each tree based on which is more accurate
    percentage = []
    T_P = []
    for e in cnst.EMOTIONS_LIST:
        print("\nValidation phase for emotion: ", e)
        validation_binary_targets = util.filter_for_emotion(
            validation_targets, cnst.EMOTIONS_DICT[e])
        results = []
        # Calculate how accurate each tree is when predicting emotions
        for i in validation_data.index.values:
            results.append(
                TreeNode.dfs2(T[cnst.EMOTIONS_DICT[e] - 1],
                              validation_data.loc[i],
                              validation_binary_targets.loc[i].at[0]))
        ones = results.count(1)
        percentage.append(ones / len(results))
        print("Validation phase ended. Priority levels have been set.")

    print("All decision trees built.\n")

    # List containing (Tree, Percentage) tuples
    T_P = list(zip(T, percentage))
    util.save_trees_to_file(T_P)
def cross_validation_error(df_labels, N, df_data, segments):

    error_list = {
        'anger': 1,
        'disgust': 2,
        'fear': 3,
        'happiness': 4,
        'sadness': 5,
        'surprise': 6
    }
    for e in cnst.EMOTIONS_LIST:
        total_error_for_emotion = 0
        error_list[1] = 2
        print("/\ Decision tree building for emotion:", e)
        binary_targets = util.filter_for_emotion(df_labels,
                                                 cnst.EMOTIONS_DICT[e])
        for test_seg in segments:
            test_df_data, test_df_targets, train_df_data, train_df_targets = util.divide_data(
                test_seg, N, df_data, df_labels)
            root = dtree.decision_tree(train_df_data, set(cnst.AU_INDICES),
                                       train_df_targets)
            TreeNode.plot_tree(root, e)
            # root = decision_tree(df_data, set(cnst.AU_INDICES), binary_targets)
            print("/\ Decision tree built.\n")
            count = 0
            # Counts number of incorrectly predicted tests
            for i in test_df_data.index.values:
                count += 1 - TreeNode.dfs2(root, test_df_data.loc[i],
                                           test_df_targets.loc[i].at[0])

            error = count / len(test_df_targets)
            total_error_for_emotion += error
            print()

        total_error_for_emotion /= 10
        error_list[e] = total_error_for_emotion
        print()
        print("Total error:", total_error_for_emotion)
        print()
Example #3
0
def apply_d_tree(df_labels, df_data, N):
    print(">> Running decision tree algorithm on a single process.\n")

    res = pd.DataFrame(0,
                       index=cnst.EMOTIONS_INDICES,
                       columns=cnst.EMOTIONS_INDICES)

    segments = util.preprocess_for_cross_validation(N)
    total_accuracy = 0
    for test_seg in segments:
        print(">> Starting fold... from:", test_seg)
        print()

        T = []
        # Split data into 90% Training and 10% Testing
        test_df_data, test_df_targets, train_df_data, train_df_targets = util.divide_data(
            test_seg, N, df_data, df_labels)

        # Further split trainig data into 90% Training and 10% Validation data
        K = train_df_data.shape[0]
        segs = util.preprocess_for_cross_validation(K)
        validation_data, validation_targets, train_data, train_targets = util.divide_data(
            segs[-1], K, train_df_data, train_df_targets)

        # Train Trees
        for e in cnst.EMOTIONS_LIST:
            print("Building decision tree for emotion: ", e)
            train_binary_targets = util.filter_for_emotion(
                train_df_targets, cnst.EMOTIONS_DICT[e])
            root = decision_tree(train_data, set(cnst.AU_INDICES),
                                 train_binary_targets)
            print("Decision tree built. Now appending...")
            T.append(root)

        # Use validation data to set a priority to each tree based on which is more accurate
        percentage = []
        T_P = []
        for e in cnst.EMOTIONS_LIST:
            print("\nValidation phase for emotion: ", e)
            validation_binary_targets = util.filter_for_emotion(
                validation_targets, cnst.EMOTIONS_DICT[e])
            results = []
            # Calculate how accurate each tree is when predicting emotions
            for i in validation_data.index.values:
                results.append(
                    TreeNode.dfs2(T[cnst.EMOTIONS_DICT[e] - 1],
                                  validation_data.loc[i],
                                  validation_binary_targets.loc[i].at[0]))
            ones = results.count(1)
            percentage.append(ones / len(results))
            print("Validation phase ended. Priority levels have been set.")

        print("All decision trees built.\n")

        # List containing (Tree, Percentage) tuples
        T_P = list(zip(T, percentage))

        predictions = test_trees(T_P, test_df_data)
        confusion_matrix = compare_pred_expect(predictions, test_df_targets)

        print(confusion_matrix)
        # Print accuracy for each fold
        diag = sum(
            pd.Series(np.diag(confusion_matrix),
                      index=[confusion_matrix.index,
                             confusion_matrix.columns]))
        sum_all = confusion_matrix.values.sum()
        accuracy = (diag / sum_all) * 100
        total_accuracy += accuracy
        print("Accuracy:", accuracy)

        res = res.add(confusion_matrix)
        print("Folding ended.\n")
        print()
    print("Total accuracy:", accuracy)
    res = res.div(res.sum(axis=1), axis=0)
    print(res)
    return res

    res = res.div(res.sum(axis=1), axis=0)

    for e in cnst.EMOTIONS_LIST:
        print(
            "----------------------------------- MEASUREMENTS -----------------------------------"
        )
        print(
            measures.compute_binary_confusion_matrix(res,
                                                     cnst.EMOTIONS_DICT[e]))

    return res