Exemple #1
0
def generate_main_results():
    """Generate the main results of the experiment."""

    wide_optimal = sort_tbl(
        calculate_wide_optimal(results),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )\
        .set_index(['Dataset', 'Classifier', 'Metric'])\
        .apply(lambda row: make_bold(row, num_decimals=3), axis=1)\
        .reset_index()
    wide_optimal['Dataset'] = wide_optimal['Dataset'].apply(
        lambda x: x.title()
        if len(x.split(' ')) == 1
        else ''.join([w[0] for w in x.split(' ')])
    )

    mean_sem_scores = sort_tbl(
        generate_mean_std_tbl_bold(*calculate_mean_sem_scores(results), maximum=True, decimals=3),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )
    mean_sem_perc_diff_scores = sort_tbl(
        generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(results, ['SMOTE', 'K-SMOTE'])),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )
    mean_sem_ranking = sort_tbl(
        generate_mean_std_tbl_bold(*calculate_mean_sem_ranking(results), maximum=False),
        ovrs_order=OVRS_NAMES, clfs_order=CLFS_NAMES
    )
    main_results_names = ('wide_optimal', 'mean_sem_scores', 'mean_sem_perc_diff_scores', 'mean_sem_ranking')

    return zip(main_results_names, (wide_optimal, mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))
Exemple #2
0
def generate_main_results():
    """Generate the main results of the experiment."""

    main_results = {}
    for ratio in UNDERSAMPLING_RATIOS:

        # Generate results
        results = generate_results(ratio)

        # Calculate results
        mean_sem_scores = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_scores(results)),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)
        mean_sem_perc_diff_scores = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(
                results, ['NO OVERSAMPLING', 'G-SMOTE'])),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)
        mean_sem_ranking = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_ranking(results)),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)

        # Populate main results
        main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores',
                              'mean_sem_ranking')
        main_results[ratio] = zip(
            main_results_names,
            (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))

    return main_results
def test_mean_sem_scores():
    """Test the mean scores results of experiment."""
    mean_scores, sem_scores = calculate_mean_sem_scores(EXPERIMENT.results_)
    assert set(mean_scores.Classifier.unique()) == set(
        EXPERIMENT.classifiers_names_)
    assert set(sem_scores.Classifier.unique()) == set(
        EXPERIMENT.classifiers_names_)
    assert set(mean_scores.columns[2:]) == set(EXPERIMENT.oversamplers_names_)
    assert set(sem_scores.columns[2:]) == set(EXPERIMENT.oversamplers_names_)
    assert len(mean_scores) == len(CLASSIFIERS)
    assert len(sem_scores) == len(CLASSIFIERS)
Exemple #4
0
def generate_main_results():
    """Generate the main results of the experiment."""

    # Generate results
    results = generate_results()

    # Calculate results
    mean_sem_scores = sort_tbl(
        generate_mean_std_tbl(*calculate_mean_sem_scores(results)),
        ovrs_order=OVERSAMPLERS_NAMES,
        clfs_order=CLASSIFIERS_NAMES)
    keys = mean_sem_scores[['Classifier', 'Metric']]
    mean_sem_perc_diff_scores = []
    for oversampler in ('SMOTE', 'K-MEANS SMOTE', 'SOMO', 'G-SMOTE'):
        perc_diff_scores = sort_tbl(
            generate_mean_std_tbl(*calculate_mean_sem_perc_diff_scores(
                results, [oversampler, 'G-SOMO'])),
            ovrs_order=OVERSAMPLERS_NAMES,
            clfs_order=CLASSIFIERS_NAMES)
        perc_diff_scores = perc_diff_scores.rename(columns={
            'Difference': oversampler
        }).drop(columns=['Classifier', 'Metric'])
        mean_sem_perc_diff_scores.append(perc_diff_scores)
    mean_sem_perc_diff_scores = pd.concat(
        [keys, pd.concat(mean_sem_perc_diff_scores, axis=1)], axis=1)
    mean_sem_ranking = sort_tbl(
        generate_mean_std_tbl(*calculate_mean_sem_ranking(results)),
        ovrs_order=OVERSAMPLERS_NAMES,
        clfs_order=CLASSIFIERS_NAMES)

    # Generate main results
    main_results_names = ('mean_sem_scores', 'mean_sem_perc_diff_scores',
                          'mean_sem_ranking')
    main_results = zip(
        main_results_names,
        (mean_sem_scores, mean_sem_perc_diff_scores, mean_sem_ranking))

    return main_results