コード例 #1
0
ファイル: test.py プロジェクト: ARGHZ/ClassifTweets
def test_CNN(x, y):
    print('Condensed Nearest Neighbour')
    CNN = CondensedNearestNeighbour(verbose=verbose)
    cnnx, cnny = CNN.fit_transform(x, y)

    print('One-Sided Selection')
    OSS = OneSidedSelection(verbose=verbose)
    ossx, ossy = OSS.fit_transform(x, y)

    print('BalanceCascade')
    BS = BalanceCascade(verbose=verbose)
    bsx, bsy = BS.fit_transform(x, y)
コード例 #2
0
def test_CNN(x, y):
    print('Condensed Nearest Neighbour')
    CNN = CondensedNearestNeighbour(verbose=verbose)
    cnnx, cnny = CNN.fit_transform(x, y)

    print('One-Sided Selection')
    OSS = OneSidedSelection(verbose=verbose)
    ossx, ossy = OSS.fit_transform(x, y)

    print('BalanceCascade')
    BS = BalanceCascade(verbose=verbose)
    bsx, bsy = BS.fit_transform(x, y)
def test_cnn_fit_transform():
    """Test the fit transform routine"""

    # Resample the data
    cnn = CondensedNearestNeighbour(random_state=RND_SEED)
    X_resampled, y_resampled = cnn.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'cnn_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'cnn_y.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
def test_cnn_fit():
    """Test the fitting method"""

    # Create the object
    cnn = CondensedNearestNeighbour(random_state=RND_SEED)
    # Fit the data
    cnn.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(cnn.min_c_, 0)
    assert_equal(cnn.maj_c_, 1)
    assert_equal(cnn.stats_c_[0], 500)
    assert_equal(cnn.stats_c_[1], 4500)
def test_cnn_fit_transform_with_indices():
    """Test the fit transform routine with indices support"""

    # Resample the data
    cnn = CondensedNearestNeighbour(return_indices=True, random_state=RND_SEED)
    X_resampled, y_resampled, idx_under = cnn.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'cnn_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'cnn_y.npy'))
    idx_gt = np.load(os.path.join(currdir, 'data', 'cnn_idx.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
    assert_array_equal(idx_under, idx_gt)
コード例 #6
0
    def balance_data_ensemblesampling_condensed_nearest_neighbour(self):
        '''
        Balance data using condensed nearest neighbour.
        '''
        x = self.X
        y = self.y
        y.shape = (len(self.y))
        verbose = True

        CNN = CondensedNearestNeighbour(verbose=verbose)
        cnnx, cnny = CNN.fit_transform(x, y)

        self.X = cnnx
        self.y = cnny
        self.y.shape = (len(self.y), 1)
コード例 #7
0
def test_CNN(x, y):
    print('Condensed Nearest Neighbour')
    CNN = CondensedNearestNeighbour(indices_support=indices_support, verbose=verbose)
    cnnx, cnny, idx_tmp = CNN.fit_transform(x, y)
    print ('Indices selected')
    print(idx_tmp)

    print('One-Sided Selection')
    OSS = OneSidedSelection(indices_support=indices_support, verbose=verbose)
    ossx, ossy, idx_tmp = OSS.fit_transform(x, y)
    print ('Indices selected')
    print(idx_tmp)

    print('BalanceCascade')
    BS = BalanceCascade(verbose=verbose)
    bsx, bsy = BS.fit_transform(x, y)
def test_cnn_transform_wt_fit():
    """Test either if an error is raised when transform is called before
    fitting"""

    # Create the object
    cnn = CondensedNearestNeighbour(random_state=RND_SEED)
    assert_raises(RuntimeError, cnn.transform, X, Y)
コード例 #9
0
ファイル: utils.py プロジェクト: Zerowxm/kdd-cup2009
def test_CNN(x, y,c=0,ratio='auto'):
    if(c==0):
        print('Condensed Nearest Neighbour')
        CNN = CondensedNearestNeighbour(indices_support=indices_support, verbose=verbose)
        x,y, idx_tmp = CNN.fit_transform(x, y)
        print ('Indices selected')
        print(idx_tmp)
    elif(c==1):
        print('One-Sided Selection')
        OSS = OneSidedSelection(indices_support=indices_support, verbose=verbose)
        x,y, idx_tmp = OSS.fit_transform(x, y)
        print ('Indices selected')
        print(idx_tmp)
    elif(c==2):
        print('BalanceCascade')
        BS = BalanceCascade(ratio='auto',verbose=verbose)
        x,y = BS.fit_transform(x, y)
    return x,y  
コード例 #10
0
def test_cnn_fit_single_class():
    """Test either if an error when there is a single class"""

    # Create the object
    cnn = CondensedNearestNeighbour(random_state=RND_SEED)
    # Resample the data
    # Create a wrong y
    y_single_class = np.zeros((X.shape[0], ))
    assert_raises(RuntimeError, cnn.fit, X, y_single_class)
コード例 #11
0
def test_cnn_init():
    """Test the initialisation of the object"""

    # Define a ratio
    verbose = True
    cnn = CondensedNearestNeighbour(random_state=RND_SEED, verbose=verbose)

    assert_equal(cnn.size_ngh, 1)
    assert_equal(cnn.n_seeds_S, 1)
    assert_equal(cnn.n_jobs, -1)
    assert_equal(cnn.rs_, RND_SEED)
    assert_equal(cnn.verbose, verbose)
    assert_equal(cnn.min_c_, None)
    assert_equal(cnn.maj_c_, None)
    assert_equal(cnn.stats_c_, {})
コード例 #12
0
from unbalanced_dataset.under_sampling import CondensedNearestNeighbour

# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
                           n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1,
                           n_samples=5000, random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply the random under-sampling
cnn = CondensedNearestNeighbour()
X_resampled, y_resampled = cnn.fit_transform(X, y)
X_res_vis = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)

ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=0.5,
            edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=0.5,
            edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
ax1.set_title('Original set')

ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
            label="Class #0", alpha=.5, edgecolor=almost_black,
            facecolor=palette[0], linewidth=0.15)