Exemple #1
0
def generate_main_results():
    """Generate the main results of the experiment."""

    wide_optimal = sort_tbl(
        calculate_wide_optimal(results),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )\
        .set_index(['Dataset', 'Classifier', 'Metric'])\
        .apply(lambda row: make_bold(row, num_decimals=3), axis=1)\
        .reset_index()
    wide_optimal['Dataset'] = wide_optimal['Dataset'].apply(
        lambda x: x.title()
        if len(x.split(' ')) == 1
        else ''.join([w[0] for w in x.split(' ')])
    )

    mean_sem_scores = sort_tbl(
        generate_mean_std_tbl_bold(*calculate_mean_sem_scores(results), maximum=True, decimals=3),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )
    mean_sem_perc_diff_scores = sort_tbl(
        generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(results, ['SMOTE', 'K-SMOTE'])),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )
    mean_sem_ranking = sort_tbl(
        generate_mean_std_tbl_bold(*calculate_mean_sem_ranking(results), maximum=False),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )
    main_results_names = ('wide_optimal', 'mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking')

    return zip(main_results_names, (wide_optimal, mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))
Exemple #2
0
def generate_main_results():
    """Generate the main results of the experiment."""

    main_results = {}
    for ratio in UNDERSAMPLING_RATIOS:

        # Generate results
        results = generate_results(ratio)

        # Calculate results
        mean_sem_scores = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_scores(results)),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)
        mean_sem_perc_diff_scores = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(
                results, ['NO OVERSAMPLING', 'G-SMOTE'])),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)
        mean_sem_ranking = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_ranking(results)),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)

        # Populate main results
        main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores',
                              'mean_sem_ranking')
        main_results[ratio] = zip(
            main_results_names,
            (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))

    return main_results
def test_mean_sem_ranking():
    """Test the mean ranking results of experiment."""
    mean_ranking, sem_ranking = calculate_mean_sem_ranking(EXPERIMENT.results_)
    assert set(mean_ranking.Classifier.unique()) == set(
        EXPERIMENT.classifiers_names_)
    assert set(sem_ranking.Classifier.unique()) == set(
        EXPERIMENT.classifiers_names_)
    assert set(mean_ranking.columns[2:]) == set(EXPERIMENT.oversamplers_names_)
    assert set(sem_ranking.columns[2:]) == set(EXPERIMENT.oversamplers_names_)
    assert len(mean_ranking) == len(CLASSIFIERS)
    assert len(sem_ranking) == len(CLASSIFIERS)
Exemple #4
0
def make_mean_rank_bar_chart():
    """Generates bar chart."""

    load_plt_sns_configs()

    ranks = calculate_mean_sem_ranking(results)[0]
    ranks['Metric'] = ranks['Metric'].apply(lambda x: METRICS_MAPPING[x])
    fig, axes = plt.subplots(
        ranks.Classifier.unique().shape[0],
        ranks.Metric.unique().shape[0],
        figsize=(5,6)
    )
    lranks = ranks.set_index(['Classifier', 'Metric'])
    for (row, clf), (col, metric) in product(
            enumerate(ranks.Classifier.unique()),
            enumerate(ranks.Metric.unique())
    ):
        dat = len(OVRS_NAMES) - lranks.loc[(clf,metric)].loc[list(OVRS_NAMES[::-1])]
        axes[row, col].bar(
            dat.index,
            dat.values,
            color=['indianred']+['steelblue' for i in range(len(OVRS_NAMES)-1)]
        )
        plt.sca(axes[row, col])
        plt.yticks(range(len(OVRS_NAMES)), [None]+list(range(1, len(OVRS_NAMES)))[::-1])
        plt.xticks(rotation=90)
        if row == 0:
            plt.title(metric)
        if col == 0:
            plt.ylabel(f'{clf}')
        if row != len(ranks.Classifier.unique())-1:
            plt.xticks(range(len(OVRS_NAMES)), [])
        if col != 0:
            plt.yticks(range(len(OVRS_NAMES)), [])
        sns.despine(left=True)
        plt.grid(b=None, axis='x')

    fig.savefig(join(analysis_path, 'mean_rankings_bar_chart.pdf'),
            format='pdf', bbox_inches='tight')
    plt.close()
Exemple #5
0
def generate_main_results():
    """Generate the main results of the experiment."""

    # Generate results
    results = generate_results()

    # Calculate results
    mean_sem_scores = sort_tbl(
        generate_mean_std_tbl(*calculate_mean_sem_scores(results)),
        ovrs_order=OVERSAMPLERS_NAMES,
        clfs_order=CLASSIFIERS_NAMES)
    keys = mean_sem_scores[['Classifier', 'Metric']]
    mean_sem_perc_diff_scores = []
    for oversampler in ('SMOTE', 'K-MEANS SMOTE', 'SOMO', 'G-SMOTE'):
        perc_diff_scores = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(
                results, [oversampler, 'G-SOMO'])),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)
        perc_diff_scores = perc_diff_scores.rename(columns={
            'Difference': oversampler
        }).drop(columns=['Classifier', 'Metric'])
        mean_sem_perc_diff_scores.append(perc_diff_scores)
    mean_sem_perc_diff_scores = pd.concat(
        [keys, pd.concat(mean_sem_perc_diff_scores, axis=1)], axis=1)
    mean_sem_ranking = sort_tbl(
        generate_mean_std_tbl(*calculate_mean_sem_ranking(results)),
        ovrs_order=OVERSAMPLERS_NAMES,
        clfs_order=CLASSIFIERS_NAMES)

    # Generate main results
    main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores',
                          'mean_sem_ranking')
    main_results = zip(
        main_results_names,
        (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))

    return main_results