Esempio n. 1
0
def test_random_over_sampler_shrinkage_behaviour(data):
    # check the behaviour of the shrinkage parameter
    # the covariance of the data generated with the larger shrinkage factor
    # should also be larger.
    X, y = data

    ros = RandomOverSampler(shrinkage=1, random_state=0)
    X_res_shink_1, y_res_shrink_1 = ros.fit_resample(X, y)

    ros.set_params(shrinkage=5)
    X_res_shink_5, y_res_shrink_5 = ros.fit_resample(X, y)

    disperstion_shrink_1 = np.linalg.det(
        np.cov(X_res_shink_1[y_res_shrink_1 == 0].T))
    disperstion_shrink_5 = np.linalg.det(
        np.cov(X_res_shink_5[y_res_shrink_5 == 0].T))

    assert disperstion_shrink_1 < disperstion_shrink_5
Esempio n. 2
0
plot_decision_function(X, y, model, axs[1],
                       f"Using {model[0].__class__.__name__}")

fig.suptitle(f"Decision function of {clf.__class__.__name__}")
fig.tight_layout()

# %% [markdown]
# By default, random over-sampling generates a bootstrap. The parameter
# `shrinkage` allows adding a small perturbation to the generated data
# to generate a smoothed bootstrap instead. The plot below shows the difference
# between the two data generation strategies.

# %%
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

sampler.set_params(shrinkage=None)
plot_resampling(X, y, sampler, ax=axs[0], title="Normal bootstrap")

sampler.set_params(shrinkage=0.3)
plot_resampling(X, y, sampler, ax=axs[1], title="Smoothed bootstrap")

fig.suptitle(f"Resampling with {sampler.__class__.__name__}")
fig.tight_layout()

# %% [markdown]
# It looks like more samples are generated with smoothed bootstrap. This is due
# to the fact that the samples generated are not superimposing with the
# original samples.
#
# More advanced over-sampling using ADASYN and SMOTE
# --------------------------------------------------