Esempio n. 1
0
def test_sample_regular_half():
    """Test sample function with regular SMOTE and a ratio of 0.5."""

    # Create the object
    ratio = 0.8
    smote = SMOTEENN(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    X_gt = np.array([[0.11622591, -0.0317206],
                     [1.25192108, -0.22367336],
                     [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504],
                     [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734],
                     [-0.41635887, -0.38299653],
                     [1.70580611, -0.11219234],
                     [0.36784496, -0.1953161],
                     [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342],
                     [0.08711622, 0.93259929]])
    y_gt = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1])
    assert_array_almost_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
Esempio n. 2
0
def test_sample_regular():
    """Test sample function with regular SMOTE."""

    # Create the object
    smote = SMOTEENN(random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    X_gt = np.array([[0.11622591, -0.0317206],
                     [1.25192108, -0.22367336],
                     [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504],
                     [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734],
                     [-0.41635887, -0.38299653],
                     [1.70580611, -0.11219234],
                     [0.29307743, -0.14670439],
                     [0.84976473, -0.15570176],
                     [0.61319159, -0.11571668],
                     [0.66052536, -0.28246517],
                     [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342],
                     [0.08711622, 0.93259929]])
    y_gt = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1])
    assert_array_almost_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
Esempio n. 3
0
def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    sm = SMOTEENN(random_state=RND_SEED)
    sm.fit(X, Y)
    assert_raises(RuntimeError, sm.sample, np.random.random((100, 40)),
                  np.array([0] * 50 + [1] * 50))
Esempio n. 4
0
def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    sm = SMOTEENN(random_state=RND_SEED)
    sm.fit(X, Y)
    assert_raises(RuntimeError, sm.sample, np.random.random((100, 40)),
                  np.array([0] * 50 + [1] * 50))
Esempio n. 5
0
def test_smote_fit():
    """Test the fitting method"""

    # Create the object
    smote = SMOTEENN(random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(smote.min_c_, 0)
    assert_equal(smote.maj_c_, 1)
    assert_equal(smote.stats_c_[0], 8)
    assert_equal(smote.stats_c_[1], 12)
Esempio n. 6
0
def test_smote_fit():
    """Test the fitting method"""

    # Create the object
    smote = SMOTEENN(random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(smote.min_c_, 0)
    assert_equal(smote.maj_c_, 1)
    assert_equal(smote.stats_c_[0], 500)
    assert_equal(smote.stats_c_[1], 4500)
Esempio n. 7
0
def test_sample_regular():
    """Test sample function with regular SMOTE."""

    # Create the object
    smote = SMOTEENN(random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_y.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
Esempio n. 8
0
def test_sample_regular():
    """Test sample function with regular SMOTE."""

    # Create the object
    smote = SMOTEENN(random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_y.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
class OUSampler(SamplerMixin):
    def __init__(self, seed=666, activate=True):
        self.seed = seed
        self.activate = activate
        self.sampler = SMOTEENN(random_state=self.seed, n_jobs=-1)

    def fit(self, X, y):
        if self.activate:
            self.sampler.fit(X, y)
        # Return fit object
        return self

    def _sample(self, X, y):
        if self.activate:
            sampled_X, sampled_y = self.sampler.sample(X, y)
            sampled_X = pd.DataFrame(sampled_X, columns=X.columns)
            sampled_y = pd.Series(sampled_y, name=y.name)
            return sampled_X, sampled_y
        else:
            return X, y

    def sample(self, X, y):
        return self._sample(X, y)