def generate_main_results(): """Generate the main results of the experiment.""" wide_optimal = sort_tbl( calculate_wide_optimal(results), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES )\ .set_index(['Dataset', 'Classifier', 'Metric'])\ .apply(lambda row: make_bold(row, num_decimals=3), axis=1)\ .reset_index() wide_optimal['Dataset'] = wide_optimal['Dataset'].apply( lambda x: x.title() if len(x.split(' ')) == 1 else ''.join([w[0] for w in x.split(' ')]) ) mean_sem_scores = sort_tbl( generate_mean_std_tbl_bold(*calculate_mean_sem_scores(results), maximum=True, decimals=3), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES ) mean_sem_perc_diff_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(results, ['SMOTE', 'K-SMOTE'])), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES ) mean_sem_ranking = sort_tbl( generate_mean_std_tbl_bold(*calculate_mean_sem_ranking(results), maximum=False), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES ) main_results_names = ('wide_optimal', 'mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking') return zip(main_results_names, (wide_optimal, mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))
def generate_main_results(): """Generate the main results of the experiment.""" main_results = {} for ratio in UNDERSAMPLING_RATIOS: # Generate results results = generate_results(ratio) # Calculate results mean_sem_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_scores(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) mean_sem_perc_diff_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores( results, ['NO OVERSAMPLING', 'G-SMOTE'])), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) mean_sem_ranking = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_ranking(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) # Populate main results main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking') main_results[ratio] = zip( main_results_names, (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking)) return main_results
def test_mean_sem_ranking(): """Test the mean ranking results of experiment.""" mean_ranking, sem_ranking = calculate_mean_sem_ranking(EXPERIMENT.results_) assert set(mean_ranking.Classifier.unique()) == set( EXPERIMENT.classifiers_names_) assert set(sem_ranking.Classifier.unique()) == set( EXPERIMENT.classifiers_names_) assert set(mean_ranking.columns[2:]) == set(EXPERIMENT.oversamplers_names_) assert set(sem_ranking.columns[2:]) == set(EXPERIMENT.oversamplers_names_) assert len(mean_ranking) == len(CLASSIFIERS) assert len(sem_ranking) == len(CLASSIFIERS)
def make_mean_rank_bar_chart(): """Generates bar chart.""" load_plt_sns_configs() ranks = calculate_mean_sem_ranking(results)[0] ranks['Metric'] = ranks['Metric'].apply(lambda x: METRICS_MAPPING[x]) fig, axes = plt.subplots( ranks.Classifier.unique().shape[0], ranks.Metric.unique().shape[0], figsize=(5,6) ) lranks = ranks.set_index(['Classifier', 'Metric']) for (row, clf), (col, metric) in product( enumerate(ranks.Classifier.unique()), enumerate(ranks.Metric.unique()) ): dat = len(OVRS_NAMES) - lranks.loc[(clf,metric)].loc[list(OVRS_NAMES[::-1])] axes[row, col].bar( dat.index, dat.values, color=['indianred']+['steelblue' for i in range(len(OVRS_NAMES)-1)] ) plt.sca(axes[row, col]) plt.yticks(range(len(OVRS_NAMES)), [None]+list(range(1, len(OVRS_NAMES)))[::-1]) plt.xticks(rotation=90) if row == 0: plt.title(metric) if col == 0: plt.ylabel(f'{clf}') if row != len(ranks.Classifier.unique())-1: plt.xticks(range(len(OVRS_NAMES)), []) if col != 0: plt.yticks(range(len(OVRS_NAMES)), []) sns.despine(left=True) plt.grid(b=None, axis='x') fig.savefig(join(analysis_path, 'mean_rankings_bar_chart.pdf'), format='pdf', bbox_inches='tight') plt.close()
def generate_main_results(): """Generate the main results of the experiment.""" # Generate results results = generate_results() # Calculate results mean_sem_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_scores(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) keys = mean_sem_scores[['Classifier', 'Metric']] mean_sem_perc_diff_scores = [] for oversampler in ('SMOTE', 'K-MEANS SMOTE', 'SOMO', 'G-SMOTE'): perc_diff_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores( results, [oversampler, 'G-SOMO'])), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) perc_diff_scores = perc_diff_scores.rename(columns={ 'Difference': oversampler }).drop(columns=['Classifier', 'Metric']) mean_sem_perc_diff_scores.append(perc_diff_scores) mean_sem_perc_diff_scores = pd.concat( [keys, pd.concat(mean_sem_perc_diff_scores, axis=1)], axis=1) mean_sem_ranking = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_ranking(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) # Generate main results main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking') main_results = zip( main_results_names, (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking)) return main_results