Ejemplo n.º 1
0
def test_rest(x, y,c=0,ratio='auto'):
    c=c
    if(c==0):
        print('Random under-sampling')
        US = UnderSampler(indices_support=indices_support, verbose=verbose,ratio=ratio)
        x, y, idx_tmp = US.fit_transform(x, y)
        print ('Indices selected')
        print(idx_tmp)
    elif(c==1):
        print('Tomek links')
        TL = TomekLinks(verbose=verbose,ratio=ratio)
        x, y = TL.fit_transform(x, y)
    elif(c==2):
        print('Clustering centroids')
        CC = ClusterCentroids(verbose=verbose,ratio=ratio)
        x, y = CC.fit_transform(x, y)
    elif(c==3):
        print('NearMiss-1')
        NM1 = NearMiss(version=1, indices_support=indices_support, verbose=verbose,ratio=ratio)
        x, y, idx_tmp = NM1.fit_transform(x, y)
        print ('Indices selected')
        print(idx_tmp)
    elif(c==4):
        print('NearMiss-2')
        NM2 = NearMiss(version=2, indices_support=indices_support, verbose=verbose,ratio=ratio)
        x, y, idx_tmp = NM2.fit_transform(x, y)
        print ('Indices selected')
        print(idx_tmp)
    elif(c==5):
        print('NearMiss-3')
        NM3 = NearMiss(version=3, indices_support=indices_support, verbose=verbose,ratio=ratio)
        x, y, idx_tmp = NM3.fit_transform(x, y)
        print ('Indices selected')
        print(idx_tmp)
    elif(c==6):
        print('Neighboorhood Cleaning Rule')
        NCR = NeighbourhoodCleaningRule(indices_support=indices_support, verbose=verbose,ratio=ratio)
        x, y, idx_tmp = NCR.fit_transform(x, y)
        print ('Indices selected')
        print(idx_tmp)
    elif(c==7):
        print('Random over-sampling')
        OS = OverSampler(verbose=verbose,ratio=ratio)
        x, y = OS.fit_transform(x, y)
    elif(c==8):
        print('SMOTE Tomek links')
        STK = SMOTETomek(verbose=verbose,ratio=ratio)
        x, y = STK.fit_transform(x, y)
    elif(c==9):
        print('SMOTE ENN')
        SENN = SMOTEENN(verbose=verbose,ratio=ratio)
        x, y = SENN.fit_transform(x, y)
    else:
        print('EasyEnsemble')
        EE = EasyEnsemble(verbose=verbose,ratio=ratio)
        x, y = EE.fit_transform(x, y)
    return x, y
Ejemplo n.º 2
0
def test_rest(x, y):

    print('Random under-sampling')
    US = UnderSampler(indices_support=indices_support, verbose=verbose)
    usx, usy, idx_tmp = US.fit_transform(x, y)
    print ('Indices selected')
    print(idx_tmp)

    print('Tomek links')
    TL = TomekLinks(verbose=verbose)
    tlx, tly = TL.fit_transform(x, y)

    print('Clustering centroids')
    CC = ClusterCentroids(verbose=verbose)
    ccx, ccy = CC.fit_transform(x, y)

    print('NearMiss-1')
    NM1 = NearMiss(version=1, indices_support=indices_support, verbose=verbose)
    nm1x, nm1y, idx_tmp = NM1.fit_transform(x, y)
    print ('Indices selected')
    print(idx_tmp)

    print('NearMiss-2')
    NM2 = NearMiss(version=2, indices_support=indices_support, verbose=verbose)
    nm2x, nm2y, idx_tmp = NM2.fit_transform(x, y)
    print ('Indices selected')
    print(idx_tmp)

    print('NearMiss-3')
    NM3 = NearMiss(version=3, indices_support=indices_support, verbose=verbose)
    nm3x, nm3y, idx_tmp = NM3.fit_transform(x, y)
    print ('Indices selected')
    print(idx_tmp)

    print('Neighboorhood Cleaning Rule')
    NCR = NeighbourhoodCleaningRule(indices_support=indices_support, verbose=verbose)
    ncrx, ncry, idx_tmp = NCR.fit_transform(x, y)
    print ('Indices selected')
    print(idx_tmp)

    print('Random over-sampling')
    OS = OverSampler(verbose=verbose)
    ox, oy = OS.fit_transform(x, y)

    print('SMOTE Tomek links')
    STK = SMOTETomek(verbose=verbose)
    stkx, stky = STK.fit_transform(x, y)

    print('SMOTE ENN')
    SENN = SMOTEENN(verbose=verbose)
    sennx, senny = SENN.fit_transform(x, y)

    print('EasyEnsemble')
    EE = EasyEnsemble(verbose=verbose)
    eex, eey = EE.fit_transform(x, y)
def nearmiss_undersampling(X, y, version):
    """
	Perform NearMiss undersampling

	Keyword arguments:
	X -- The feature vectors
	y -- The target classes
	"""

    if verbose:
        print '\nUndersampling with NearMiss-' + str(version) + ' ...'

    undersampler = NearMiss(verbose=verbose, version=version)
    X_undersampled, y_undersampled = undersampler.fit_transform(X, y)
    return X_undersampled, y_undersampled
def nearmiss_undersampling(X,y,version):
	"""
	Perform NearMiss undersampling

	Keyword arguments:
	X -- The feature vectors
	y -- The target classes
	"""

	if verbose:
		print '\nUndersampling with NearMiss-'+str(version)+' ...'

	undersampler=NearMiss(verbose=verbose,version=version)
	X_undersampled,y_undersampled = undersampler.fit_transform(X,y)
	return X_undersampled,y_undersampled
Ejemplo n.º 5
0
def test_nm1_fit_transform_half():
    """Test fit and transform routines with .5 ratio"""

    # Define the parameter for the under-sampling
    ratio = .5

    # Create the object
    nm1 = NearMiss(ratio=ratio, random_state=RND_SEED,
                   version=VERSION_NEARMISS)

    # Fit and transform
    X_resampled, y_resampled = nm1.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'nm1_x_05.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'nm1_y_05.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
Ejemplo n.º 6
0
def test_nm1_fit_transform_auto_indices():
    """Test fit and transform routines with auto ratio and indices support"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    nm1 = NearMiss(ratio=ratio, random_state=RND_SEED,
                   version=VERSION_NEARMISS, return_indices=True)

    # Fit and transform
    X_resampled, y_resampled, idx_under = nm1.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'nm1_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'nm1_y.npy'))
    idx_gt = np.load(os.path.join(currdir, 'data', 'nm1_idx.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
    assert_array_equal(idx_under, idx_gt)
Ejemplo n.º 7
0
from unbalanced_dataset.under_sampling import NearMiss

# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
                           n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1,
                           n_samples=5000, random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply the random under-sampling
nm3 = NearMiss(version=1)
X_resampled, y_resampled = nm3.fit_transform(X, y)
X_res_vis = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)

ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=0.5,
            edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=0.5,
            edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
ax1.set_title('Original set')

ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
            label="Class #0", alpha=.5, edgecolor=almost_black,
            facecolor=palette[0], linewidth=0.15)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
                           n_informative=3,
                           n_redundant=1,
                           flip_y=0,
                           n_features=20,
                           n_clusters_per_class=1,
                           n_samples=5000,
                           random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply the random under-sampling
nm3 = NearMiss(version=1)
X_resampled, y_resampled = nm3.fit_transform(X, y)
X_res_vis = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)

ax1.scatter(X_vis[y == 0, 0],
            X_vis[y == 0, 1],
            label="Class #0",
            alpha=0.5,
            edgecolor=almost_black,
            facecolor=palette[0],
            linewidth=0.15)
ax1.scatter(X_vis[y == 1, 0],
            X_vis[y == 1, 1],
            label="Class #1",