def generate_set_rating_data(set_data, group_name, save_dir, save=True, show=True): """Utility function that generates all rating statistics for a all sets of data. Creates a DataFrame of the results and saves to .csv and creates a multi-graph plot and saves to .png. Args: set_data (dict): Dictionary with set names as keys and Dataframe of user ratings by dialogues as values. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of ratings by set sets_frame = pd.DataFrame() for set_name, set_frame in set_data.items(): set_frame = pd.concat([set_frame], keys=[set_name], names=['Set'], axis=1) sets_frame = pd.concat([sets_frame, set_frame], axis=1, sort=False) # Creat plots of ratings by set data = dataframe_wide_to_long(sets_frame) g, fig = plot_facetgrid(data, hue='variable_2', title='', y_label='Confidence', kind='violin', share_y=True, colour='five_colour', num_legend_col=5, inner='box', cut=0) # Save and show results if show: print(sets_frame) fig.show() if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), sets_frame) g.savefig(os.path.join(save_dir, group_name + ".png")) return sets_frame, fig
def generate_ordered_time_data(set_data, group_name, save_dir, save=True, show=True): """Utility function that generates all ordered timing statistics for a all set data. Creates a DataFrame of the results and saves to .csv and creates a multi-graph plot and saves to .png. Args: set_data (dict): Dictionary with set names as keys and Dataframe of ordered user times by dialogues as values. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of timings by set sets_frame = pd.DataFrame() for set_name, set_frame in set_data.items(): sets_frame = pd.concat([sets_frame, set_frame], axis=1, sort=False) # Creat plots of timings by set data = dataframe_wide_to_long(sets_frame) g, fig = plot_violin_chart(data, title='', y_label='Average Utterance Time (Seconds)', colour='five_colour') # Add mean and SD for each dialogue sets_frame['min'] = sets_frame.min(axis=1) sets_frame['max'] = sets_frame.max(axis=1) sets_frame['mean'] = sets_frame.mean(axis=1) sets_frame['std'] = sets_frame.std(axis=1) # Save and show results if show: print(sets_frame) fig.show() if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), sets_frame) fig.savefig(os.path.join(save_dir, group_name + ".png")) return sets_frame, fig
def generate_label_type_rating_statistics(group_data, groups, group_name, save_dir, save=True, show=True): """Utility function that generates all p-values and effect size for pairwise comparisons of label type rating data. Also compares label types as groups. Creates a DataFrame of the results and saves to .csv. Args: group_data (dict): Dictionary with set names as keys and Dataframe of user ratings by dialogues as values. groups (list): List of groups to process together. Should be keys in the group_data dict. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of ratings by group groups_frame = pd.DataFrame() for group in groups: group_frame = pd.concat([group_data[group]], keys=[group.replace("_", " ")], names=['Group'], axis=1) groups_frame = pd.concat([groups_frame, group_frame], axis=1, sort=False) data = dataframe_wide_to_long(groups_frame) data.drop('group', axis=1, inplace=True) data.rename(columns={ 'metric': 'group', 'variable_2': 'label_type' }, inplace=True) # Generate Tukey HSD, compare label types across task/non-task oriented dialogues label_type_frame = tukey_hsd(data, 'label_type', 'value') # Add the anova for the full label_type comparison and effect size anova_labels = anova_test(data, 'label_type', 'value') label_type_frame.loc[3, 'p-value'] = anova_labels.loc['C(label_type)', 'PR(>F)'] label_type_frame.loc[3, 'eta_sq'] = anova_labels.loc['C(label_type)', 'eta_sq'] label_type_frame.loc[3, 'omega_sq'] = anova_labels.loc['C(label_type)', 'omega_sq'] label_type_frame.loc[3, 'cohen_f'] = anova_labels.loc['C(label_type)', 'cohen_f'] label_type_frame.loc[3, 'n'] = anova_labels.loc['C(label_type)', 'n'] label_type_frame.loc[3, 'exp_n'] = anova_labels.loc['C(label_type)', 'exp_n'] label_type_frame.loc[3, 'power'] = anova_labels.loc['C(label_type)', 'power'] label_type_frame.loc[3, 'exp_power'] = anova_labels.loc['C(label_type)', 'exp_power'] # Test for normality and heteroscedasticity # da_values = data.loc[data['label_type'] == 'da']['value'].to_list() # ap_values = data.loc[data['label_type'] == 'ap']['value'].to_list() # ap_type_values = data.loc[data['label_type'] == 'ap type']['value'].to_list() # print("Test for normal distribution:") # da_w, da_p = shapiro(da_values) # print("DA w: " + str(round(da_w, 6)) + " p-value: " + str(round(da_p, 6))) # ap_w, ap_p = shapiro(ap_values) # print("AP w: " + str(round(ap_w, 6)) + " p-value: " + str(round(ap_p, 6))) # ap_type_w, ap_type_p = shapiro(ap_type_values) # print("AP-type w: " + str(round(ap_type_w, 6)) + " p-value: " + str(round(ap_type_p, 6))) # # print("Test for heteroscedasticity:") # levene_t, levene_p = levene(da_values, ap_values, ap_type_values) # print("t: " + str(round(levene_t, 6)) + " p-value: " + str(round(levene_p, 6))) # Save and show results if show: print('Compare Label Types:') print(label_type_frame) if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), label_type_frame) return label_type_frame
def generate_corpora_rating_statistics(group_data, groups, group_name, save_dir, save=True, show=True): """Utility function that generates all p-values and effect size for given corpora groups of data. Also compares label types as groups. Creates a DataFrame of the results and saves to .csv. Args: group_data (dict): Dictionary with set names as keys and Dataframe of user ratings by dialogues as values. groups (list): List of groups to process together. Should be keys in the group_data dict. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of ratings by group groups_frame = pd.DataFrame() for group in groups: group_frame = pd.concat([group_data[group]], keys=[group.replace("_", " ").split()[0]], names=['Group'], axis=1) groups_frame = pd.concat([groups_frame, group_frame], axis=1, sort=False) data = dataframe_wide_to_long(groups_frame) data.drop('group', axis=1, inplace=True) data.rename(columns={ 'metric': 'group', 'variable_2': 'label_type' }, inplace=True) # Get pairwise comparisons for each label type and combined corpora_type_frame = pd.DataFrame() for label_type in ['DA', 'AP', 'AP type', 'All']: # Get the data for the current label type if label_type == 'All': corpora_label_data = data else: corpora_label_data = data.loc[data['label_type'] == label_type.lower()] # Generate Tukey HSD, compare corpora dialogues label_type_frame = tukey_hsd(corpora_label_data, 'group', 'value') # Add the anova for the full corpora comparison and effect size anova_corpora = anova_test(corpora_label_data, 'group', 'value') label_type_frame.loc[6, 'p-value'] = anova_corpora.loc['C(group)', 'PR(>F)'] label_type_frame.loc[6, 'eta_sq'] = anova_corpora.loc['C(group)', 'eta_sq'] label_type_frame.loc[6, 'omega_sq'] = anova_corpora.loc['C(group)', 'omega_sq'] label_type_frame.loc[6, 'cohen_f'] = anova_corpora.loc['C(group)', 'cohen_f'] label_type_frame.loc[6, 'n'] = anova_corpora.loc['C(group)', 'n'] label_type_frame.loc[6, 'exp_n'] = anova_corpora.loc['C(group)', 'exp_n'] label_type_frame.loc[6, 'power'] = anova_corpora.loc['C(group)', 'power'] label_type_frame.loc[6, 'exp_power'] = anova_corpora.loc['C(group)', 'exp_power'] label_type_frame.columns = pd.MultiIndex.from_product( [[label_type], label_type_frame.columns]) corpora_type_frame = pd.concat([corpora_type_frame, label_type_frame], axis=1) # Get mean, sd and min/max for groups per agreement statistic basic_stat_frame = pd.DataFrame() for group in data.group.unique(): # Get each label type for the group group_data = data.loc[data['group'] == group] tmp = group_data.groupby(['label_type'], sort=False).agg( {'value': ['min', 'max', 'mean', 'std']}) tmp.columns = tmp.columns.droplevel() tmp = tmp.T tmp = tmp.reindex(columns=['da', 'ap', 'ap type']) # Add overall stats tmp['all'] = pd.Series({ 'min': group_data['value'].min(), 'max': group_data['value'].max(), 'mean': group_data['value'].mean(), 'std': group_data['value'].std() }) # Add to basic stats frame tmp.columns = pd.MultiIndex.from_product([[group.split()[0]], tmp.columns]) basic_stat_frame = pd.concat([basic_stat_frame, tmp], axis=1) # # Test for normality and heteroscedasticity # da_values = group_data.loc[group_data['label_type'] == 'da']['value'].to_list() # ap_values = group_data.loc[group_data['label_type'] == 'ap']['value'].to_list() # ap_type_values = group_data.loc[group_data['label_type'] == 'ap type']['value'].to_list() # print(group) # print("Test for normal distribution:") # da_w, da_p = shapiro(da_values) # print("DA w: " + str(round(da_w, 6)) + " p-value: " + str(round(da_p, 6))) # ap_w, ap_p = shapiro(ap_values) # print("AP w: " + str(round(ap_w, 6)) + " p-value: " + str(round(ap_p, 6))) # ap_type_w, ap_type_p = shapiro(ap_type_values) # print("AP-type w: " + str(round(ap_type_w, 6)) + " p-value: " + str(round(ap_type_p, 6))) # # print("Test for heteroscedasticity:") # levene_t, levene_p = levene(da_values, ap_values, ap_type_values) # print("t: " + str(round(levene_t, 6)) + " p-value: " + str(round(levene_p, 6))) # Save and show results if show: print('Compare Corpora:') print(corpora_type_frame) print('General Corpora stats:') print(basic_stat_frame) if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), corpora_type_frame) save_dataframe(os.path.join(save_dir, group_name + " (basic).csv"), basic_stat_frame) return corpora_type_frame, basic_stat_frame
def generate_dialogue_type_rating_statistics(group_data, groups, group_name, save_dir, save=True, show=True): """Utility function that generates all p-values and effect size for given dialogue type groups of data. Also compares label types as groups. Creates a DataFrame of the results and saves to .csv. Args: group_data (dict): Dictionary with set names as keys and Dataframe of user ratings by dialogues as values. groups (list): List of groups to process together. Should be keys in the group_data dict. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of ratings by group groups_frame = pd.DataFrame() for group in groups: group_frame = pd.concat([group_data[group]], keys=[group.replace("_", " ")], names=['Group'], axis=1) groups_frame = pd.concat([groups_frame, group_frame], axis=1, sort=False) data = dataframe_wide_to_long(groups_frame) data.drop('group', axis=1, inplace=True) data.rename(columns={ 'metric': 'group', 'variable_2': 'label_type' }, inplace=True) # Generate the pairwise t-test data for label types per group dialogue_type_frame = multi_t_test(data, 'group', 'label_type', 'value') # Set da to first row and rename label type column dialogue_type_frame.reset_index(inplace=True) dialogue_type_frame["new"] = range(1, len(dialogue_type_frame) + 1) dialogue_type_frame.loc[2, 'new'] = 0 dialogue_type_frame = dialogue_type_frame.sort_values("new").reset_index( drop='True').drop('new', axis=1) dialogue_type_frame.rename(columns={'index': 'label_type'}, inplace=True) # Add t-test for all combined label types dialogue_type_frame = dialogue_type_frame.append(t_test( data, 'group', 'value'), ignore_index=True, sort=False) dialogue_type_frame.loc[3, 'label_type'] = 'all' # Add the anova for the full group effect size # anova_groups = anova_test(data, 'group', 'value') # dialogue_type_frame.loc[3, 'eta_sq'] = anova_groups.loc['C(group)', 'eta_sq'] # dialogue_type_frame.loc[3, 'omega_sq'] = anova_groups.loc['C(group)', 'omega_sq'] # Get mean, sd and min/max for groups per agreement statistic basic_stat_frame = pd.DataFrame() for group in data.group.unique(): # Get each label type for the group group_data = data.loc[data['group'] == group] tmp = group_data.groupby(['label_type'], sort=False).agg( {'value': ['min', 'max', 'mean', 'std']}) tmp.columns = tmp.columns.droplevel() tmp = tmp.T tmp = tmp.reindex(columns=['da', 'ap', 'ap type']) # Add overall stats tmp['all'] = pd.Series({ 'min': group_data['value'].min(), 'max': group_data['value'].max(), 'mean': group_data['value'].mean(), 'std': group_data['value'].std() }) # Add to basic stats frame tmp.columns = pd.MultiIndex.from_product([[group.split()[0]], tmp.columns]) basic_stat_frame = pd.concat([basic_stat_frame, tmp], axis=1) # Test for normality and heteroscedasticity # da_values = group_data.loc[group_data['label_type'] == 'da']['value'].to_list() # ap_values = group_data.loc[group_data['label_type'] == 'ap']['value'].to_list() # ap_type_values = group_data.loc[group_data['label_type'] == 'ap type']['value'].to_list() # print(group) # print("Test for normal distribution:") # da_w, da_p = shapiro(da_values) # print("DA w: " + str(round(da_w, 6)) + " p-value: " + str(round(da_p, 6))) # ap_w, ap_p = shapiro(ap_values) # print("AP w: " + str(round(ap_w, 6)) + " p-value: " + str(round(ap_p, 6))) # ap_type_w, ap_type_p = shapiro(ap_type_values) # print("AP-type w: " + str(round(ap_type_w, 6)) + " p-value: " + str(round(ap_type_p, 6))) # # print("Test for heteroscedasticity:") # levene_t, levene_p = levene(da_values, ap_values, ap_type_values) # print("t: " + str(round(levene_t, 6)) + " p-value: " + str(round(levene_p, 6))) # Save and show results if show: print('Compare Task-oriented and Non-task-oriented dialogues:') print(dialogue_type_frame) print('General Task-oriented and Non-task-oriented stats:') print(basic_stat_frame) if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), dialogue_type_frame) save_dataframe(os.path.join(save_dir, group_name + " (basic).csv"), basic_stat_frame) return dialogue_type_frame, basic_stat_frame
def generate_corpora_timing_statistics(group_data, groups, group_name, save_dir, save=True, show=True): """Utility function that generates all p-values and effect size for given corpora of data. Creates a DataFrame of the results and saves to .csv. Args: group_data (dict): Dictionary with set names as keys and Dataframe of user ratings by dialogues as values. groups (list): List of groups to process together. Should be keys in the group_data dict. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of timings by group groups_frame = pd.DataFrame() for group in groups: group_frame = pd.concat([group_data[group]], keys=[group.replace("_", " ").split()[0]], names=['group'], axis=1) groups_frame = pd.concat([groups_frame, group_frame], axis=1, sort=False) data = dataframe_wide_to_long(groups_frame) data.rename(columns={None: 'users'}, inplace=True) data.drop('users', axis=1, inplace=True) # Generate Tukey HSD, compare label types across task/non-task oriented dialogues stats_frame = tukey_hsd(data, 'group', 'value') # Add the anova for the full label_type comparison and effect size anova_labels = anova_test(data, 'group', 'value') stats_frame.loc[6, 'p-value'] = anova_labels.loc['C(group)', 'PR(>F)'] stats_frame.loc[6, 'eta_sq'] = anova_labels.loc['C(group)', 'eta_sq'] stats_frame.loc[6, 'omega_sq'] = anova_labels.loc['C(group)', 'omega_sq'] stats_frame.loc[6, 'cohen_f'] = anova_labels.loc['C(group)', 'cohen_f'] stats_frame.loc[6, 'n'] = anova_labels.loc['C(group)', 'n'] stats_frame.loc[6, 'exp_n'] = anova_labels.loc['C(group)', 'exp_n'] stats_frame.loc[6, 'power'] = anova_labels.loc['C(group)', 'power'] stats_frame.loc[6, 'exp_power'] = anova_labels.loc['C(group)', 'exp_power'] # Get mean, sd and min/max for groups per agreement statistic basic_stat_frame = data.groupby(['group'], sort=False).agg( {'value': ['min', 'max', 'mean', 'std']}) basic_stat_frame.columns = basic_stat_frame.columns.droplevel() basic_stat_frame = basic_stat_frame.T # # Test for normality and heteroscedasticity # groups_values = [] # for group in data.group.unique(): # print(group) # group_values = data.loc[data['group'] == group]['value'].to_list() # print("Test for normal distribution:") # w, p = shapiro(group_values) # print("w: " + str(round(w, 6)) + " p-value: " + str(round(p, 6))) # groups_values.append(group_values) # print("Test for heteroscedasticity:") # levene_t, levene_p = levene(*groups_values) # print("t: " + str(round(levene_t, 6)) + " p-value: " + str(round(levene_p, 6))) # Save and show results if show: print('Compare Corpora:') print(stats_frame) print('General Corpora stats:') print(basic_stat_frame) if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), stats_frame) save_dataframe(os.path.join(save_dir, group_name + " (basic).csv"), basic_stat_frame) return stats_frame, basic_stat_frame
def generate_dialogue_type_timing_statistics(group_data, groups, group_name, save_dir, save=True, show=True): """Utility function that generates all p-values and effect size for given dialogue type groups of data. Creates a DataFrame of the results and saves to .csv. Args: group_data (dict): Dictionary with set names as keys and Dataframe of user ratings by dialogues as values. groups (list): List of groups to process together. Should be keys in the group_data dict. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of timings by group groups_frame = pd.DataFrame() for group in groups: group_frame = pd.concat([group_data[group]], keys=[group.replace("_", " ").split()[0]], names=['group'], axis=1) groups_frame = pd.concat([groups_frame, group_frame], axis=1, sort=False) data = dataframe_wide_to_long(groups_frame) data.rename(columns={None: 'users'}, inplace=True) data.drop('users', axis=1, inplace=True) # Generate the t-test data stats_frame = t_test(data, 'group', 'value') # Generate the anova data for effect size # anova_frame = anova_test(data, 'group', 'value') # # Add effect size to data # stats_frame['eta_sq'] = anova_frame.loc['C(group)', 'eta_sq'] # stats_frame['omega_sq'] = anova_frame.loc['C(group)', 'omega_sq'] # Get mean, sd and min/max for groups per agreement statistic basic_stat_frame = data.groupby(['group'], sort=False).agg( {'value': ['min', 'max', 'mean', 'std']}) basic_stat_frame.columns = basic_stat_frame.columns.droplevel() basic_stat_frame = basic_stat_frame.T # # Test for normality and heteroscedasticity # task_values = data.loc[data['group'] == 'task-oriented']['value'].to_list() # non_task_values = data.loc[data['group'] == 'task-oriented']['value'].to_list() # print("Test for normal distribution:") # task_w, task_p = shapiro(task_values) # print("task w: " + str(round(task_w, 6)) + " p-value: " + str(round(task_p, 6))) # non_task_w, non_task_p = shapiro(non_task_values) # print("non-task w: " + str(round(non_task_w, 6)) + " p-value: " + str(round(non_task_p, 6))) # # print("Test for heteroscedasticity:") # levene_t, levene_p = levene(task_values, non_task_values) # print("t: " + str(round(levene_t, 6)) + " p-value: " + str(round(levene_p, 6))) # Save and show results if show: print('Compare Task-oriented and Non-task-oriented dialogues:') print(stats_frame) print('General Task-oriented and Non-task-oriented stats:') print(basic_stat_frame) if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), stats_frame) save_dataframe(os.path.join(save_dir, group_name + " (basic).csv"), basic_stat_frame) return stats_frame, basic_stat_frame
def generate_group_time_data(group_data, groups, group_name, save_dir, save=True, show=True): """Utility function that generates all timing statistics for a given list of group data. Creates a DataFrame of the results and saves to .csv and creates a multi-graph plot and saves to .png. Calculates mean, standard deviation, min and max of each group of data. Args: group_data (dict): Dictionary with set names as keys and Dataframe of user times by dialogues as values. groups (list): List of groups to process together. Should be keys in the group_data dict. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # For each group group_frame_list = [] group_stats_frame = pd.DataFrame(columns=['mean', 'sd', 'min', 'max']) for group in groups: # Get the data from group_data dict group_frame = group_data[group] # Calculate the statistics tmp_frame = pd.DataFrame() tmp_frame['mean'] = group_frame.mean(numeric_only=True, axis=1) tmp_frame['sd'] = group_frame.std(numeric_only=True, axis=1) tmp_frame['min'] = group_frame.min(numeric_only=True, axis=1) tmp_frame['max'] = group_frame.max(numeric_only=True, axis=1) # Add dataframe to list group_frame = tmp_frame.loc[:, ['mean', 'sd', 'min', 'max']] group_frame.columns = pd.MultiIndex.from_product( [[group.replace("_", " ").split()[0]], group_frame.columns]) group_frame_list.append(group_frame) # Get stats for the whole group group_stats_frame.loc[group.replace("_", " ").split()[0], 'mean'] = tmp_frame['mean'].mean(axis=0) group_stats_frame.loc[group.replace("_", " ").split()[0], 'sd'] = tmp_frame['sd'].mean(axis=0) group_stats_frame.loc[group.replace("_", " ").split()[0], 'min'] = tmp_frame['min'].min(axis=0) group_stats_frame.loc[group.replace("_", " ").split()[0], 'max'] = tmp_frame['max'].max(axis=0) # Create frame for all groups groups_frame = pd.concat(group_frame_list, axis=1, sort=True) # Create plot of timing stats by group data = dataframe_wide_to_long(group_stats_frame) g = fig = plot_bar_chart(data, title='', y_label='Average Utterance Time (Seconds)', dodge=True, num_legend_col=2) # Save and show results if show: print(groups_frame) fig.show() if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), groups_frame) g.savefig(os.path.join(save_dir, group_name + ".png")) return groups_frame, fig
def generate_set_time_data(set_data, group_name, save_dir, save=True, show=True): """Utility function that generates all timing statistics for a given set data. Creates a DataFrame of the results and saves to .csv and creates a multi-graph plot and saves to .png. Args: set_data (dict): Dictionary with set names as keys and Dataframe of user times by dialogues as values. group_name (str): Name of set for file/graph titles. save_dir (str): Directory to save the resulting .csv and .png files to. save (bool): Whether to save the resulting .csv and .png files. Default=True. show (bool): Whether to print/show the resulting graphs and dataframes. Default=True. """ # Crate dataframe of timings by set sets_frame = pd.DataFrame() for set_name, set_frame in set_data.items(): set_frame.columns = pd.MultiIndex.from_product([[set_name], set_frame.columns]) sets_frame = pd.concat([sets_frame, set_frame], axis=1, sort=False) # Create mean and sum of set times dataframe mean_times = pd.Series(sets_frame.mean(), name='Mean time').to_frame().T mean_times.columns = mean_times.columns.droplevel() sum_times = pd.Series(sets_frame.sum(), name='Total time').to_frame().T sum_times.columns = sum_times.columns.droplevel() mean_and_sum = pd.concat([mean_times, sum_times]) mean_and_sum.columns = pd.MultiIndex.from_product([['Total'], mean_and_sum.columns]) # Add to sets frame sets_frame_columns = sets_frame.columns.tolist() sets_frame_totals = pd.concat([sets_frame, mean_and_sum], axis=0, sort=False) # Set order of concatenated mean_and_sum frame columns (because concat reorders them) sets_frame_totals = sets_frame_totals[sets_frame_columns + mean_and_sum.columns.tolist()] # Creat plots of timings by set data = dataframe_wide_to_long(sets_frame_totals) g, fig = plot_facetgrid(data, title='', y_label='Total Time (Seconds)', kind='bar', share_y=True, colour='triples', num_legend_col=5, all_legend=True, show_bar_value=True, bar_value_rotation=90) # Save and show results if show: print(sets_frame) fig.show() if save: save_dataframe(os.path.join(save_dir, group_name + ".csv"), sets_frame) g.savefig(os.path.join(save_dir, group_name + ".png")) return sets_frame, fig