Ejemplo n.º 1
0
def make_and_save_d_tree(df_labels, df_data, N):
    print(">> Running decision tree algorithm on a single process.\n")

    res = pd.DataFrame(0,
                       index=cnst.EMOTIONS_INDICES,
                       columns=cnst.EMOTIONS_INDICES)

    segments = util.preprocess_for_cross_validation(N)
    T = []
    # Split data into 90% Training and 10% Testing
    validation_data, validation_targets, train_df_data, train_df_targets = util.divide_data(
        segments[-1], N, df_data, df_labels)

    # Train Trees
    for e in cnst.EMOTIONS_LIST:
        print("Building decision tree for emotion: ", e)
        train_binary_targets = util.filter_for_emotion(train_df_targets,
                                                       cnst.EMOTIONS_DICT[e])
        root = dtree.decision_tree(train_df_data, set(cnst.AU_INDICES),
                                   train_binary_targets)
        print("Decision tree built. Now appending...")
        T.append(root)

    # Use validation data to set a priority to each tree based on which is more accurate
    percentage = []
    T_P = []
    for e in cnst.EMOTIONS_LIST:
        print("\nValidation phase for emotion: ", e)
        validation_binary_targets = util.filter_for_emotion(
            validation_targets, cnst.EMOTIONS_DICT[e])
        results = []
        # Calculate how accurate each tree is when predicting emotions
        for i in validation_data.index.values:
            results.append(
                TreeNode.dfs2(T[cnst.EMOTIONS_DICT[e] - 1],
                              validation_data.loc[i],
                              validation_binary_targets.loc[i].at[0]))
        ones = results.count(1)
        percentage.append(ones / len(results))
        print("Validation phase ended. Priority levels have been set.")

    print("All decision trees built.\n")

    # List containing (Tree, Percentage) tuples
    T_P = list(zip(T, percentage))
    util.save_trees_to_file(T_P)
def apply_d_forest(df_labels, df_data, N):
    print(">> Running decision forest algorithm on a single process.\n")

    res = pd.DataFrame(0,
                       index=cnst.EMOTIONS_INDICES,
                       columns=cnst.EMOTIONS_INDICES)

    segments = util.preprocess_for_cross_validation(N)

    for test_seg in segments:
        print(">> Starting fold... from:", test_seg)
        print()

        forest_T = []
        test_df_data, test_df_targets, train_df_data, train_df_targets = util.divide_data(
            test_seg, N, df_data, df_labels)

        samples = split_in_random(train_df_data, train_df_targets)
        print("Building decision forest...")
        for e in cnst.EMOTIONS_LIST:
            T = []
            for (sample_target, sample_data) in samples:
                print("Building decision tree for emotion...", e)
                train_binary_targets = util.filter_for_emotion(
                    sample_target, cnst.EMOTIONS_DICT[e])
                root = dtree.decision_tree(sample_data, set(cnst.AU_INDICES),
                                           train_binary_targets)
                print("Decision tree built. Now appending...\n")
                T.append(root)
            forest_T.append(T)

        predictions_forest = test_forest_trees(forest_T, test_df_data)
        confusion_matrix = dtree.compare_pred_expect(predictions_forest,
                                                     test_df_targets)
        print(
            "----------------------------------- CONFUSION MATRIX -----------------------------------\n"
        )
        print(confusion_matrix)
        res = res.add(confusion_matrix)

    diag_res = sum(pd.Series(np.diag(res), index=[res.index, res.columns]))
    sum_all_res = res.values.sum()
    accuracy_res = (diag_res / sum_all_res) * 100
    print(
        "-----------------------------------  AVERAGE ACCURACY -----------------------------------\n:",
        accuracy_res)

    # res = res.div(10)
    res = res.div(res.sum(axis=1), axis=0)
    for e in cnst.EMOTIONS_LIST:
        print(
            "----------------------------------- MEASUREMENTS -----------------------------------"
        )
        print(
            measures.compute_binary_confusion_matrix(res,
                                                     cnst.EMOTIONS_DICT[e]))

    return res
Ejemplo n.º 3
0
def make_and_save_d_forest(df_labels, df_data, N):
    print(">> Running decision forest algorithm on multiple processes.\n")

    res = pd.DataFrame(0,
                       index=cnst.EMOTIONS_INDICES,
                       columns=cnst.EMOTIONS_INDICES)

    forest_T = []

    samples = dforest.split_in_random(df_data, df_labels)
    print("Building decision forest...")
    for e in cnst.EMOTIONS_LIST:
        T = []

        processes = []
        queue_list = []

        for (sample_target, sample_data) in samples:
            print("Building decision tree for emotion...", e)
            train_binary_targets = util.filter_for_emotion(
                sample_target, cnst.EMOTIONS_DICT[e])

            q = Queue()
            queue_list.append(q)

            process = Process(target=dtree.decision_tree_parallel,
                              args=(sample_data, set(cnst.AU_INDICES),
                                    train_binary_targets, q))
            processes.append(process)
            process.start()

        for p in processes:
            p.join()

        for q in queue_list:
            T.append(q.get())

        forest_T.append(T)
    util.save_forest_to_file(forest_T)
def cross_validation_error(df_labels, N, df_data, segments):

    error_list = {
        'anger': 1,
        'disgust': 2,
        'fear': 3,
        'happiness': 4,
        'sadness': 5,
        'surprise': 6
    }
    for e in cnst.EMOTIONS_LIST:
        total_error_for_emotion = 0
        error_list[1] = 2
        print("/\ Decision tree building for emotion:", e)
        binary_targets = util.filter_for_emotion(df_labels,
                                                 cnst.EMOTIONS_DICT[e])
        for test_seg in segments:
            test_df_data, test_df_targets, train_df_data, train_df_targets = util.divide_data(
                test_seg, N, df_data, df_labels)
            root = dtree.decision_tree(train_df_data, set(cnst.AU_INDICES),
                                       train_df_targets)
            TreeNode.plot_tree(root, e)
            # root = decision_tree(df_data, set(cnst.AU_INDICES), binary_targets)
            print("/\ Decision tree built.\n")
            count = 0
            # Counts number of incorrectly predicted tests
            for i in test_df_data.index.values:
                count += 1 - TreeNode.dfs2(root, test_df_data.loc[i],
                                           test_df_targets.loc[i].at[0])

            error = count / len(test_df_targets)
            total_error_for_emotion += error
            print()

        total_error_for_emotion /= 10
        error_list[e] = total_error_for_emotion
        print()
        print("Total error:", total_error_for_emotion)
        print()
Ejemplo n.º 5
0
def apply_d_tree(df_labels, df_data, N):
    print(">> Running decision tree algorithm on a single process.\n")

    res = pd.DataFrame(0,
                       index=cnst.EMOTIONS_INDICES,
                       columns=cnst.EMOTIONS_INDICES)

    segments = util.preprocess_for_cross_validation(N)
    total_accuracy = 0
    for test_seg in segments:
        print(">> Starting fold... from:", test_seg)
        print()

        T = []
        # Split data into 90% Training and 10% Testing
        test_df_data, test_df_targets, train_df_data, train_df_targets = util.divide_data(
            test_seg, N, df_data, df_labels)

        # Further split trainig data into 90% Training and 10% Validation data
        K = train_df_data.shape[0]
        segs = util.preprocess_for_cross_validation(K)
        validation_data, validation_targets, train_data, train_targets = util.divide_data(
            segs[-1], K, train_df_data, train_df_targets)

        # Train Trees
        for e in cnst.EMOTIONS_LIST:
            print("Building decision tree for emotion: ", e)
            train_binary_targets = util.filter_for_emotion(
                train_df_targets, cnst.EMOTIONS_DICT[e])
            root = decision_tree(train_data, set(cnst.AU_INDICES),
                                 train_binary_targets)
            print("Decision tree built. Now appending...")
            T.append(root)

        # Use validation data to set a priority to each tree based on which is more accurate
        percentage = []
        T_P = []
        for e in cnst.EMOTIONS_LIST:
            print("\nValidation phase for emotion: ", e)
            validation_binary_targets = util.filter_for_emotion(
                validation_targets, cnst.EMOTIONS_DICT[e])
            results = []
            # Calculate how accurate each tree is when predicting emotions
            for i in validation_data.index.values:
                results.append(
                    TreeNode.dfs2(T[cnst.EMOTIONS_DICT[e] - 1],
                                  validation_data.loc[i],
                                  validation_binary_targets.loc[i].at[0]))
            ones = results.count(1)
            percentage.append(ones / len(results))
            print("Validation phase ended. Priority levels have been set.")

        print("All decision trees built.\n")

        # List containing (Tree, Percentage) tuples
        T_P = list(zip(T, percentage))

        predictions = test_trees(T_P, test_df_data)
        confusion_matrix = compare_pred_expect(predictions, test_df_targets)

        print(confusion_matrix)
        # Print accuracy for each fold
        diag = sum(
            pd.Series(np.diag(confusion_matrix),
                      index=[confusion_matrix.index,
                             confusion_matrix.columns]))
        sum_all = confusion_matrix.values.sum()
        accuracy = (diag / sum_all) * 100
        total_accuracy += accuracy
        print("Accuracy:", accuracy)

        res = res.add(confusion_matrix)
        print("Folding ended.\n")
        print()
    print("Total accuracy:", accuracy)
    res = res.div(res.sum(axis=1), axis=0)
    print(res)
    return res

    res = res.div(res.sum(axis=1), axis=0)

    for e in cnst.EMOTIONS_LIST:
        print(
            "----------------------------------- MEASUREMENTS -----------------------------------"
        )
        print(
            measures.compute_binary_confusion_matrix(res,
                                                     cnst.EMOTIONS_DICT[e]))

    return res
Ejemplo n.º 6
0
def visualise(df_labels, df_data, N):
    for e in cnst.EMOTIONS_LIST:
        root = decision_tree(
            df_data, set(cnst.AU_INDICES),
            util.filter_for_emotion(df_labels, cnst.EMOTIONS_DICT[e]))
        TreeNode.plot_tree(root, e)