def generate_main_results(): """Generate the main results of the experiment.""" wide_optimal = sort_tbl( calculate_wide_optimal(results), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES )\ .set_index(['Dataset', 'Classifier', 'Metric'])\ .apply(lambda row: make_bold(row, num_decimals=3), axis=1)\ .reset_index() wide_optimal['Dataset'] = wide_optimal['Dataset'].apply( lambda x: x.title() if len(x.split(' ')) == 1 else ''.join([w[0] for w in x.split(' ')]) ) mean_sem_scores = sort_tbl( generate_mean_std_tbl_bold(*calculate_mean_sem_scores(results), maximum=True, decimals=3), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES ) mean_sem_perc_diff_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(results, ['SMOTE', 'K-SMOTE'])), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES ) mean_sem_ranking = sort_tbl( generate_mean_std_tbl_bold(*calculate_mean_sem_ranking(results), maximum=False), ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES ) main_results_names = ('wide_optimal', 'mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking') return zip(main_results_names, (wide_optimal, mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))
def generate_main_results(): """Generate the main results of the experiment.""" main_results = {} for ratio in UNDERSAMPLING_RATIOS: # Generate results results = generate_results(ratio) # Calculate results mean_sem_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_scores(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) mean_sem_perc_diff_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores( results, ['NO OVERSAMPLING', 'G-SMOTE'])), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) mean_sem_ranking = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_ranking(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) # Populate main results main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking') main_results[ratio] = zip( main_results_names, (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking)) return main_results
def test_mean_sem_scores(): """Test the mean scores results of experiment.""" mean_scores, sem_scores = calculate_mean_sem_scores(EXPERIMENT.results_) assert set(mean_scores.Classifier.unique()) == set( EXPERIMENT.classifiers_names_) assert set(sem_scores.Classifier.unique()) == set( EXPERIMENT.classifiers_names_) assert set(mean_scores.columns[2:]) == set(EXPERIMENT.oversamplers_names_) assert set(sem_scores.columns[2:]) == set(EXPERIMENT.oversamplers_names_) assert len(mean_scores) == len(CLASSIFIERS) assert len(sem_scores) == len(CLASSIFIERS)
def generate_main_results(): """Generate the main results of the experiment.""" # Generate results results = generate_results() # Calculate results mean_sem_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_scores(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) keys = mean_sem_scores[['Classifier', 'Metric']] mean_sem_perc_diff_scores = [] for oversampler in ('SMOTE', 'K-MEANS SMOTE', 'SOMO', 'G-SMOTE'): perc_diff_scores = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores( results, [oversampler, 'G-SOMO'])), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) perc_diff_scores = perc_diff_scores.rename(columns={ 'Difference': oversampler }).drop(columns=['Classifier', 'Metric']) mean_sem_perc_diff_scores.append(perc_diff_scores) mean_sem_perc_diff_scores = pd.concat( [keys, pd.concat(mean_sem_perc_diff_scores, axis=1)], axis=1) mean_sem_ranking = sort_tbl( generate_mean_std_tbl(*calculate_mean_sem_ranking(results)), ovrs_order=OVERSAMPLERS_NAMES, clfs_order=CLASSIFIERS_NAMES) # Generate main results main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking') main_results = zip( main_results_names, (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking)) return main_results