def test_CNN(x, y): print('Condensed Nearest Neighbour') CNN = CondensedNearestNeighbour(verbose=verbose) cnnx, cnny = CNN.fit_transform(x, y) print('One-Sided Selection') OSS = OneSidedSelection(verbose=verbose) ossx, ossy = OSS.fit_transform(x, y) print('BalanceCascade') BS = BalanceCascade(verbose=verbose) bsx, bsy = BS.fit_transform(x, y)
def test_cnn_fit_transform(): """Test the fit transform routine""" # Resample the data cnn = CondensedNearestNeighbour(random_state=RND_SEED) X_resampled, y_resampled = cnn.fit_transform(X, Y) currdir = os.path.dirname(os.path.abspath(__file__)) X_gt = np.load(os.path.join(currdir, 'data', 'cnn_x.npy')) y_gt = np.load(os.path.join(currdir, 'data', 'cnn_y.npy')) assert_array_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt)
def test_cnn_fit(): """Test the fitting method""" # Create the object cnn = CondensedNearestNeighbour(random_state=RND_SEED) # Fit the data cnn.fit(X, Y) # Check if the data information have been computed assert_equal(cnn.min_c_, 0) assert_equal(cnn.maj_c_, 1) assert_equal(cnn.stats_c_[0], 500) assert_equal(cnn.stats_c_[1], 4500)
def test_cnn_fit_transform_with_indices(): """Test the fit transform routine with indices support""" # Resample the data cnn = CondensedNearestNeighbour(return_indices=True, random_state=RND_SEED) X_resampled, y_resampled, idx_under = cnn.fit_transform(X, Y) currdir = os.path.dirname(os.path.abspath(__file__)) X_gt = np.load(os.path.join(currdir, 'data', 'cnn_x.npy')) y_gt = np.load(os.path.join(currdir, 'data', 'cnn_y.npy')) idx_gt = np.load(os.path.join(currdir, 'data', 'cnn_idx.npy')) assert_array_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt) assert_array_equal(idx_under, idx_gt)
def balance_data_ensemblesampling_condensed_nearest_neighbour(self): ''' Balance data using condensed nearest neighbour. ''' x = self.X y = self.y y.shape = (len(self.y)) verbose = True CNN = CondensedNearestNeighbour(verbose=verbose) cnnx, cnny = CNN.fit_transform(x, y) self.X = cnnx self.y = cnny self.y.shape = (len(self.y), 1)
def test_CNN(x, y): print('Condensed Nearest Neighbour') CNN = CondensedNearestNeighbour(indices_support=indices_support, verbose=verbose) cnnx, cnny, idx_tmp = CNN.fit_transform(x, y) print ('Indices selected') print(idx_tmp) print('One-Sided Selection') OSS = OneSidedSelection(indices_support=indices_support, verbose=verbose) ossx, ossy, idx_tmp = OSS.fit_transform(x, y) print ('Indices selected') print(idx_tmp) print('BalanceCascade') BS = BalanceCascade(verbose=verbose) bsx, bsy = BS.fit_transform(x, y)
def test_cnn_transform_wt_fit(): """Test either if an error is raised when transform is called before fitting""" # Create the object cnn = CondensedNearestNeighbour(random_state=RND_SEED) assert_raises(RuntimeError, cnn.transform, X, Y)
def test_CNN(x, y,c=0,ratio='auto'): if(c==0): print('Condensed Nearest Neighbour') CNN = CondensedNearestNeighbour(indices_support=indices_support, verbose=verbose) x,y, idx_tmp = CNN.fit_transform(x, y) print ('Indices selected') print(idx_tmp) elif(c==1): print('One-Sided Selection') OSS = OneSidedSelection(indices_support=indices_support, verbose=verbose) x,y, idx_tmp = OSS.fit_transform(x, y) print ('Indices selected') print(idx_tmp) elif(c==2): print('BalanceCascade') BS = BalanceCascade(ratio='auto',verbose=verbose) x,y = BS.fit_transform(x, y) return x,y
def test_cnn_fit_single_class(): """Test either if an error when there is a single class""" # Create the object cnn = CondensedNearestNeighbour(random_state=RND_SEED) # Resample the data # Create a wrong y y_single_class = np.zeros((X.shape[0], )) assert_raises(RuntimeError, cnn.fit, X, y_single_class)
def test_cnn_init(): """Test the initialisation of the object""" # Define a ratio verbose = True cnn = CondensedNearestNeighbour(random_state=RND_SEED, verbose=verbose) assert_equal(cnn.size_ngh, 1) assert_equal(cnn.n_seeds_S, 1) assert_equal(cnn.n_jobs, -1) assert_equal(cnn.rs_, RND_SEED) assert_equal(cnn.verbose, verbose) assert_equal(cnn.min_c_, None) assert_equal(cnn.maj_c_, None) assert_equal(cnn.stats_c_, {})
from unbalanced_dataset.under_sampling import CondensedNearestNeighbour # Generate the dataset X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=10) # Instanciate a PCA object for the sake of easy visualisation pca = PCA(n_components=2) # Fit and transform x to visualise inside a 2D feature space X_vis = pca.fit_transform(X) # Apply the random under-sampling cnn = CondensedNearestNeighbour() X_resampled, y_resampled = cnn.fit_transform(X, y) X_res_vis = pca.transform(X_resampled) # Two subplots, unpack the axes array immediately f, (ax1, ax2) = plt.subplots(1, 2) ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=0.5, edgecolor=almost_black, facecolor=palette[0], linewidth=0.15) ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=0.5, edgecolor=almost_black, facecolor=palette[2], linewidth=0.15) ax1.set_title('Original set') ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1], label="Class #0", alpha=.5, edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)