def test_sample_regular_half(): """Test sample function with regular SMOTE and a ratio of 0.5.""" # Create the object ratio = 0.8 smote = SMOTEENN(ratio=ratio, random_state=RND_SEED) # Fit the data smote.fit(X, Y) X_resampled, y_resampled = smote.fit_sample(X, Y) X_gt = np.array([[0.11622591, -0.0317206], [1.25192108, -0.22367336], [0.53366841, -0.30312976], [1.52091956, -0.49283504], [0.88407872, 0.35454207], [1.31301027, -0.92648734], [-0.41635887, -0.38299653], [1.70580611, -0.11219234], [0.36784496, -0.1953161], [-0.28162401, -2.10400981], [0.83680821, 1.72827342], [0.08711622, 0.93259929]]) y_gt = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]) assert_array_almost_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt)
def test_sample_regular(): """Test sample function with regular SMOTE.""" # Create the object smote = SMOTEENN(random_state=RND_SEED) # Fit the data smote.fit(X, Y) X_resampled, y_resampled = smote.fit_sample(X, Y) X_gt = np.array([[0.11622591, -0.0317206], [1.25192108, -0.22367336], [0.53366841, -0.30312976], [1.52091956, -0.49283504], [0.88407872, 0.35454207], [1.31301027, -0.92648734], [-0.41635887, -0.38299653], [1.70580611, -0.11219234], [0.29307743, -0.14670439], [0.84976473, -0.15570176], [0.61319159, -0.11571668], [0.66052536, -0.28246517], [-0.28162401, -2.10400981], [0.83680821, 1.72827342], [0.08711622, 0.93259929]]) y_gt = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]) assert_array_almost_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt)
def test_sample_wrong_X(): """Test either if an error is raised when X is different at fitting and sampling""" # Create the object sm = SMOTEENN(random_state=RND_SEED) sm.fit(X, Y) assert_raises(RuntimeError, sm.sample, np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
def test_sample_wrong_X(): """Test either if an error is raised when X is different at fitting and sampling""" # Create the object sm = SMOTEENN(random_state=RND_SEED) sm.fit(X, Y) assert_raises(RuntimeError, sm.sample, np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
def test_smote_fit(): """Test the fitting method""" # Create the object smote = SMOTEENN(random_state=RND_SEED) # Fit the data smote.fit(X, Y) # Check if the data information have been computed assert_equal(smote.min_c_, 0) assert_equal(smote.maj_c_, 1) assert_equal(smote.stats_c_[0], 8) assert_equal(smote.stats_c_[1], 12)
def test_smote_fit(): """Test the fitting method""" # Create the object smote = SMOTEENN(random_state=RND_SEED) # Fit the data smote.fit(X, Y) # Check if the data information have been computed assert_equal(smote.min_c_, 0) assert_equal(smote.maj_c_, 1) assert_equal(smote.stats_c_[0], 500) assert_equal(smote.stats_c_[1], 4500)
def test_sample_regular(): """Test sample function with regular SMOTE.""" # Create the object smote = SMOTEENN(random_state=RND_SEED) # Fit the data smote.fit(X, Y) X_resampled, y_resampled = smote.fit_sample(X, Y) currdir = os.path.dirname(os.path.abspath(__file__)) X_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_x.npy')) y_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_y.npy')) assert_array_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt)
def test_sample_regular(): """Test sample function with regular SMOTE.""" # Create the object smote = SMOTEENN(random_state=RND_SEED) # Fit the data smote.fit(X, Y) X_resampled, y_resampled = smote.fit_sample(X, Y) currdir = os.path.dirname(os.path.abspath(__file__)) X_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_x.npy')) y_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_y.npy')) assert_array_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt)
class OUSampler(SamplerMixin): def __init__(self, seed=666, activate=True): self.seed = seed self.activate = activate self.sampler = SMOTEENN(random_state=self.seed, n_jobs=-1) def fit(self, X, y): if self.activate: self.sampler.fit(X, y) # Return fit object return self def _sample(self, X, y): if self.activate: sampled_X, sampled_y = self.sampler.sample(X, y) sampled_X = pd.DataFrame(sampled_X, columns=X.columns) sampled_y = pd.Series(sampled_y, name=y.name) return sampled_X, sampled_y else: return X, y def sample(self, X, y): return self._sample(X, y)