Esempi in Python per KMeansSMOTE.fit_sample, esempi in Python per kmeans_smote.KMeansSMOTE.fit_sample

Esempio n. 1

0

Mostra file

File: test_kmeans_smote.py Progetto: valeman/kmeans_smote

def test_random_oversampling_limit_case(plot=False):
    """Execute k-means SMOTE with parameters equivalent to random oversampling"""
    kmeans_smote = KMeansSMOTE(
        random_state=RND_SEED,
        imbalance_ratio_threshold=float('Inf'),
        kmeans_args={
            'n_clusters': 1
        },
        smote_args={
            'k_neighbors': 0
        }
    )
    random_oversampler = RandomOverSampler(random_state=RND_SEED)
    X_resampled, y_resampled = kmeans_smote.fit_sample(X, Y)
    X_resampled_random_oversampler, y_resampled_random_oversampler = random_oversampler.fit_sample(
        X, Y)

    if plot:
        plot_resampled(X_resampled, y_resampled,
                       'random_oversampling_limit_case_test_kmeans_smote')
        plot_resampled(X_resampled_random_oversampler, y_resampled_random_oversampler,
                       'random_oversampling_limit_case_test_random_oversampling')

    assert_array_equal(X_resampled, X_resampled_random_oversampler)
    assert_array_equal(y_resampled, y_resampled_random_oversampler)

Esempio n. 2

0

Mostra file

File: SMOTE.py Progetto: zibrahim/OutcomePrediction

def smote(target_df, target_outcome, grouping):
    y = target_df[target_outcome]
    target_df.drop(target_outcome, axis=1, inplace=True)

    target_df[grouping] = [
        float((x.partition('_')[2])) for x in target_df[grouping]
    ]
    #target_df.drop(grouping, axis=1, inplace=True)
    X = target_df

    target_columns = target_df.columns
    #target_columns = target_columns.insert(0, grouping)
    target_columns = target_columns.insert(len(target_columns), target_outcome)
    kmeans_smote = KMeansSMOTE(kmeans_args={'n_clusters': 5},
                               smote_args={'k_neighbors': 10})
    X_resampled, y_resampled = kmeans_smote.fit_sample(X, y)
    X_resampled = pd.DataFrame(X_resampled)
    y_resampled = pd.DataFrame(y_resampled)
    frames = [X_resampled, y_resampled]

    total_df = pd.concat(frames, axis=1)
    total_df.columns = target_columns
    total_df[grouping] = ['p_' + str(x) for x in total_df[grouping]]

    return total_df

Esempio n. 3

0

Mostra file

File: test_kmeans_smote.py Progetto: valeman/kmeans_smote

def test_smote_fallback(plot=False):
    """Assert that regular SMOTE is applied if no minority clusters are found."""
    kmeans_smote = KMeansSMOTE(
        random_state=RND_SEED,
        kmeans_args={
            'n_clusters': 1
        }
    )
    smote = SMOTE(random_state=RND_SEED)
    with warnings.catch_warnings(record=True) as w:
        X_resampled, y_resampled = kmeans_smote.fit_sample(X, Y)

        assert len(w) == 1
        assert "No minority clusters found" in str(w[0].message)
        assert "Performing regular SMOTE" in str(w[0].message)
        assert issubclass(w[0].category, UserWarning)

        X_resampled_smote, y_resampled_smote = smote.fit_sample(X, Y)

        if plot:
            plot_resampled(X_resampled, y_resampled,
                        'smote_fallback_test_kmeans_smote')
            plot_resampled(X_resampled_smote, y_resampled_smote,
                        'smote_fallback_test_smote')

        assert_array_equal(X_resampled, X_resampled_smote)
        assert_array_equal(y_resampled, y_resampled_smote)

Esempio n. 4

0

Mostra file

def test_smoke(plot=False):
    """Execute k-means SMOTE with default parameters"""
    kmeans_smote = KMeansSMOTE(random_state=RND_SEED)
    X_resampled, y_resampled = kmeans_smote.fit_sample(X, Y)

    assert (np.unique(y_resampled, return_counts=True)[1] == np.unique(
        Y_EXPECTED, return_counts=True)[1]).all()
    assert (X_resampled.shape == X_SHAPE_EXPECTED)
    if plot:
        plot_resampled(X, X_resampled, Y, y_resampled, 'smoke_test')

Esempio n. 5

0

Mostra file

File: test_kmeans_smote.py Progetto: yongledang/kmeans_smote

def test_backwards_compatibility(plot=False):
    """Test if deprecated parameter ratio can still be used without error"""
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        kmeans_smote = KMeansSMOTE(random_state=RND_SEED, ratio={0: Y.sum()})
        X_resampled, y_resampled = kmeans_smote.fit_sample(X, Y)
    assert (np.unique(y_resampled, return_counts=True)[1] == np.unique(
        Y_EXPECTED, return_counts=True)[1]).all()
    assert (X_resampled.shape == X_SHAPE_EXPECTED)
    if plot:
        plot_resampled(X, X_resampled, Y, y_resampled, 'smoke_test')

Esempio n. 6

0

Mostra file

def test_smoke_multiclass(plot=False):
    """Execute k-means SMOTE with default parameters for multi-class dataset"""
    kmeans_smote = KMeansSMOTE(random_state=RND_SEED)
    X_resampled, y_resampled = kmeans_smote.fit_sample(X_MULTICLASS,
                                                       Y_MULTICLASS)

    assert (np.unique(y_resampled, return_counts=True)[1] == np.unique(
        Y_MULTICLASS_EXPECTED, return_counts=True)[1]).all()
    assert (X_resampled.shape == X_MULTICLASS_SHAPE_EXPECTED)
    if plot:
        plot_resampled(X_MULTICLASS, X_resampled, Y_MULTICLASS, y_resampled,
                       'smoke_multiclass_test')

Esempio n. 7

0

Mostra file

def test_multiclass(plot=False):
    """Execute k-means SMOTE for multi-class dataset with user-defined n_clusters"""
    kmeans_smote = KMeansSMOTE(random_state=RND_SEED,
                               kmeans_args={'n_clusters': 10})
    X_resampled, y_resampled = kmeans_smote.fit_sample(X_MULTICLASS,
                                                       Y_MULTICLASS)

    assert (np.unique(y_resampled, return_counts=True)[1] == np.unique(
        Y_MULTICLASS_EXPECTED, return_counts=True)[1]).all()
    assert (X_resampled.shape == X_MULTICLASS_SHAPE_EXPECTED)
    if plot:
        plot_resampled(X_MULTICLASS, X_resampled, Y_MULTICLASS, y_resampled,
                       'multiclass_test')

Esempio n. 8

0

Mostra file

def test_documentation_example():
    """Test basic code example shown in documentation"""
    from imblearn.datasets import fetch_datasets

    datasets = fetch_datasets(filter_data=['oil'])
    X, y = datasets['oil']['data'], datasets['oil']['target']

    labels, counts = np.unique(y, return_counts=True)
    assert counts[0] > counts[1]

    kmeans_smote = KMeansSMOTE(kmeans_args={'n_clusters': 100},
                               smote_args={'k_neighbors': 10})
    X_resampled, y_resampled = kmeans_smote.fit_sample(X, y)

    labels, counts = np.unique(y_resampled, return_counts=True)
    assert counts[0] == counts[1]

Esempio n. 9

0

Mostra file

def test_smote_limit_case(plot=False):
    """Execute k-means SMOTE with parameters equivalent to SMOTE"""
    kmeans_smote = KMeansSMOTE(random_state=RND_SEED,
                               imbalance_ratio_threshold=float('Inf'),
                               kmeans_args={'n_clusters': 1})
    smote = SMOTE(random_state=RND_SEED)
    X_resampled, y_resampled = kmeans_smote.fit_sample(X, Y)
    X_resampled_smote, y_resampled_smote = smote.fit_sample(X, Y)

    if plot:
        plot_resampled(X, X_resampled, Y, y_resampled,
                       'smote_limit_case_test_kmeans_smote')
        plot_resampled(X, X_resampled_smote, Y, y_resampled_smote,
                       'smote_limit_case_test_smote')

    assert_array_equal(X_resampled, X_resampled_smote)
    assert_array_equal(y_resampled, y_resampled_smote)

Esempio n. 10

0

Mostra file

def test_multiclass_irt_dict(plot=False):
    """
    Execute k-means SMOTE for multi-class dataset with
    different imbalance ratio thresholds per class.
    """
    kmeans_smote = KMeansSMOTE(random_state=RND_SEED,
                               kmeans_args={'n_clusters': 10},
                               imbalance_ratio_threshold={
                                   1: 1,
                                   2: np.inf
                               })
    X_resampled, y_resampled = kmeans_smote.fit_sample(X_MULTICLASS,
                                                       Y_MULTICLASS)

    assert (np.unique(y_resampled, return_counts=True)[1] == np.unique(
        Y_MULTICLASS_EXPECTED, return_counts=True)[1]).all()
    assert (X_resampled.shape == X_MULTICLASS_SHAPE_EXPECTED)
    if plot:
        plot_resampled(X_MULTICLASS, X_resampled, Y_MULTICLASS, y_resampled,
                       'multiclass_test')

Esempio n. 11

0

Mostra file

def test_smote_limit_case_multiclass(plot=False):
    """Execute k-means SMOTE with parameters equivalent to SMOTE"""
    kmeans_smote = KMeansSMOTE(random_state=RND_SEED,
                               imbalance_ratio_threshold=float('Inf'),
                               kmeans_args={'n_clusters': 1},
                               smote_args={'k_neighbors': 3})
    smote = SMOTE(random_state=RND_SEED, k_neighbors=3)
    X_resampled, y_resampled = kmeans_smote.fit_sample(X_MULTICLASS,
                                                       Y_MULTICLASS)
    X_resampled_smote, y_resampled_smote = smote.fit_sample(
        X_MULTICLASS, Y_MULTICLASS)

    if plot:
        plot_resampled(X_MULTICLASS, X_resampled, Y_MULTICLASS, y_resampled,
                       'smote_limit_case_multiclass_test_kmeans_smote')
        plot_resampled(X_MULTICLASS, X_resampled_smote, Y_MULTICLASS,
                       y_resampled_smote,
                       'smote_limit_case_multiclass_test_smote')

    assert_array_equal(X_resampled, X_resampled_smote)
    assert_array_equal(y_resampled, y_resampled_smote)

Esempio n. 12

0

Mostra file

File: get_all_data.py Progetto: XrosLiang/xgboos_classification

def split_dataset(X_1,
                  y_1,
                  X_0,
                  y_0,
                  test_size=0.2,
                  random_seed=0,
                  smote=True):
    if smote:
        dataX_1, testX_1, datay_1, testy_1 = train_test_split(
            X_1, y_1, test_size=0.16, random_state=random_seed)
        dataX_0, testX_0, datay_0, testy_0 = train_test_split(
            X_0, y_0, test_size=0.082, random_state=random_seed)
        test_X, test_y = shuffled(array_joint(testX_1, testX_0),
                                  array_joint(testy_1, testy_0),
                                  random_seed=random_seed)
        # print(len(testy_1), len(testy_0), len(testy_1) + len(testy_0))
        addX, addy = shuffled(array_joint(dataX_1, dataX_0),
                              array_joint(datay_1, datay_0),
                              random_seed=random_seed)
        kmeans_smote = KMeansSMOTE(kmeans_args={'n_clusters': 2},
                                   smote_args={'k_neighbors': 2},
                                   random_state=random_seed)
        X_resampled, y_resampled = kmeans_smote.fit_sample(addX, addy)
        # datay_1 = np.ones(len(smote_dataX_1), dtype=np.int16)
        train_X, train_y = shuffled(X_resampled,
                                    y_resampled,
                                    random_seed=random_seed)
    else:
        dataX_1, testX_1, datay_1, testy_1 = train_test_split(
            X_1, y_1, test_size=0.2, random_state=random_seed)
        dataX_0, testX_0, datay_0, testy_0 = train_test_split(
            X_0, y_0, test_size=0.067, random_state=random_seed)
        test_X, test_y = shuffled(array_joint(testX_1, testX_0),
                                  array_joint(testy_1, testy_0),
                                  random_seed=random_seed)
        train_X, train_y = shuffled(array_joint(dataX_1, dataX_0),
                                    array_joint(datay_1, datay_0),
                                    random_seed=random_seed)

    return train_X, test_X, train_y, test_y

Esempio n. 13

0

Mostra file

File: kmeans_smote_experiment.py Progetto: morvanTseng/paper

from kmeans_smote import KMeansSMOTE
from TestDataGeneration.data_generation import DataGenerator
from experiment import Experiment

if __name__ == "__main__":
    total_number = 10000
    ratio = 0.9
    dg1 = DataGenerator(total_number=total_number, ratio=ratio)
    data_train, label_train = dg1.generate()
    dg2 = DataGenerator(total_number=total_number, ratio=ratio)
    data_test, label_test = dg2.generate()
    Ratio = "minority"
    kmeans_args = {"n_clusters": 20}
    imbalance_ratio_threshold = 20
    smote_args = {"k_neighbors": 10}
    kmeans_smote = KMeansSMOTE(
        ratio=Ratio,
        kmeans_args=kmeans_args,
        smote_args=smote_args,
        imbalance_ratio_threshold=imbalance_ratio_threshold)
    X_resampled, Y_resampled = kmeans_smote.fit_sample(data_train, label_train)
    exp = Experiment(data=X_resampled, label=Y_resampled)
    n_neighbors = [1, 3, 5, 7, 9, 11, 13, 15, 17]
    for neighbor in n_neighbors:
        true_posi, false_posi, true_neg, false_neg = exp.get_confusion_matrix(
            data=data_test, label=label_test, n_neighbors=neighbor)
        print("true_posi:", true_posi, "false_posi:", false_posi, "true_neg:",
              true_neg, "false_neg:", false_neg)

Esempio n. 14

0

Mostra file

ly = LabelEncoder()
y = ly.fit_transform(y)

import numpy as np
#from imblearn.datasets import fetch_datasets
from kmeans_smote import KMeansSMOTE

[
    print('Class {} has {} instances'.format(label, count))
    for label, count in zip(*np.unique(y, return_counts=True))
]

kmeans_smote = KMeansSMOTE(sampling_strategy='minority',
                           kmeans_args={'n_clusters': 100},
                           smote_args={'k_neighbors': 10})
X_resampled, y_resampled = kmeans_smote.fit_sample(X, y)

[
    print('Class {} has {} instances after oversampling'.format(label, count))
    for label, count in zip(*np.unique(y_resampled, return_counts=True))
]

#Splitting Training and Test Set
#Since we have a very small dataset, we will train our model with all availabe data.
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X_resampled,
                                                    y_resampled,
                                                    test_size=0.2)

from sklearn.svm import SVC
svc1 = SVC(C=50, kernel='rbf', gamma=1)