def make_and_save_d_tree(df_labels, df_data, N): print(">> Running decision tree algorithm on a single process.\n") res = pd.DataFrame(0, index=cnst.EMOTIONS_INDICES, columns=cnst.EMOTIONS_INDICES) segments = util.preprocess_for_cross_validation(N) T = [] # Split data into 90% Training and 10% Testing validation_data, validation_targets, train_df_data, train_df_targets = util.divide_data( segments[-1], N, df_data, df_labels) # Train Trees for e in cnst.EMOTIONS_LIST: print("Building decision tree for emotion: ", e) train_binary_targets = util.filter_for_emotion(train_df_targets, cnst.EMOTIONS_DICT[e]) root = dtree.decision_tree(train_df_data, set(cnst.AU_INDICES), train_binary_targets) print("Decision tree built. Now appending...") T.append(root) # Use validation data to set a priority to each tree based on which is more accurate percentage = [] T_P = [] for e in cnst.EMOTIONS_LIST: print("\nValidation phase for emotion: ", e) validation_binary_targets = util.filter_for_emotion( validation_targets, cnst.EMOTIONS_DICT[e]) results = [] # Calculate how accurate each tree is when predicting emotions for i in validation_data.index.values: results.append( TreeNode.dfs2(T[cnst.EMOTIONS_DICT[e] - 1], validation_data.loc[i], validation_binary_targets.loc[i].at[0])) ones = results.count(1) percentage.append(ones / len(results)) print("Validation phase ended. Priority levels have been set.") print("All decision trees built.\n") # List containing (Tree, Percentage) tuples T_P = list(zip(T, percentage)) util.save_trees_to_file(T_P)
def cross_validation_error(df_labels, N, df_data, segments): error_list = { 'anger': 1, 'disgust': 2, 'fear': 3, 'happiness': 4, 'sadness': 5, 'surprise': 6 } for e in cnst.EMOTIONS_LIST: total_error_for_emotion = 0 error_list[1] = 2 print("/\ Decision tree building for emotion:", e) binary_targets = util.filter_for_emotion(df_labels, cnst.EMOTIONS_DICT[e]) for test_seg in segments: test_df_data, test_df_targets, train_df_data, train_df_targets = util.divide_data( test_seg, N, df_data, df_labels) root = dtree.decision_tree(train_df_data, set(cnst.AU_INDICES), train_df_targets) TreeNode.plot_tree(root, e) # root = decision_tree(df_data, set(cnst.AU_INDICES), binary_targets) print("/\ Decision tree built.\n") count = 0 # Counts number of incorrectly predicted tests for i in test_df_data.index.values: count += 1 - TreeNode.dfs2(root, test_df_data.loc[i], test_df_targets.loc[i].at[0]) error = count / len(test_df_targets) total_error_for_emotion += error print() total_error_for_emotion /= 10 error_list[e] = total_error_for_emotion print() print("Total error:", total_error_for_emotion) print()
def apply_d_tree(df_labels, df_data, N): print(">> Running decision tree algorithm on a single process.\n") res = pd.DataFrame(0, index=cnst.EMOTIONS_INDICES, columns=cnst.EMOTIONS_INDICES) segments = util.preprocess_for_cross_validation(N) total_accuracy = 0 for test_seg in segments: print(">> Starting fold... from:", test_seg) print() T = [] # Split data into 90% Training and 10% Testing test_df_data, test_df_targets, train_df_data, train_df_targets = util.divide_data( test_seg, N, df_data, df_labels) # Further split trainig data into 90% Training and 10% Validation data K = train_df_data.shape[0] segs = util.preprocess_for_cross_validation(K) validation_data, validation_targets, train_data, train_targets = util.divide_data( segs[-1], K, train_df_data, train_df_targets) # Train Trees for e in cnst.EMOTIONS_LIST: print("Building decision tree for emotion: ", e) train_binary_targets = util.filter_for_emotion( train_df_targets, cnst.EMOTIONS_DICT[e]) root = decision_tree(train_data, set(cnst.AU_INDICES), train_binary_targets) print("Decision tree built. Now appending...") T.append(root) # Use validation data to set a priority to each tree based on which is more accurate percentage = [] T_P = [] for e in cnst.EMOTIONS_LIST: print("\nValidation phase for emotion: ", e) validation_binary_targets = util.filter_for_emotion( validation_targets, cnst.EMOTIONS_DICT[e]) results = [] # Calculate how accurate each tree is when predicting emotions for i in validation_data.index.values: results.append( TreeNode.dfs2(T[cnst.EMOTIONS_DICT[e] - 1], validation_data.loc[i], validation_binary_targets.loc[i].at[0])) ones = results.count(1) percentage.append(ones / len(results)) print("Validation phase ended. Priority levels have been set.") print("All decision trees built.\n") # List containing (Tree, Percentage) tuples T_P = list(zip(T, percentage)) predictions = test_trees(T_P, test_df_data) confusion_matrix = compare_pred_expect(predictions, test_df_targets) print(confusion_matrix) # Print accuracy for each fold diag = sum( pd.Series(np.diag(confusion_matrix), index=[confusion_matrix.index, confusion_matrix.columns])) sum_all = confusion_matrix.values.sum() accuracy = (diag / sum_all) * 100 total_accuracy += accuracy print("Accuracy:", accuracy) res = res.add(confusion_matrix) print("Folding ended.\n") print() print("Total accuracy:", accuracy) res = res.div(res.sum(axis=1), axis=0) print(res) return res res = res.div(res.sum(axis=1), axis=0) for e in cnst.EMOTIONS_LIST: print( "----------------------------------- MEASUREMENTS -----------------------------------" ) print( measures.compute_binary_confusion_matrix(res, cnst.EMOTIONS_DICT[e])) return res