def ezensemble(X_train, y_train): a = list(X_train) ee = EasyEnsemble(random_state=0, n_subsets=10) ee.fit(X_train, y_train) X_resampled, y_resampled = ee.fit_sample(X_train, y_train) X_resampled = pd.DataFrame(X_resampled[1], columns=a) y_resampled = pd.DataFrame(y_resampled[1], columns=['Target']) return X_resampled, y_resampled
def test_sample_wrong_X(): """Test either if an error is raised when X is different at fitting and sampling""" # Create the object ee = EasyEnsemble(random_state=RND_SEED) ee.fit(X, Y) assert_raises(RuntimeError, ee.sample, np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
def test_ee_fit(): """Test the fitting method""" # Define the parameter for the under-sampling ratio = 'auto' # Create the object ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED) # Fit the data ee.fit(X, Y) # Check if the data information have been computed assert_equal(ee.min_c_, 0) assert_equal(ee.maj_c_, 1) assert_equal(ee.stats_c_[0], 500) assert_equal(ee.stats_c_[1], 4500)
def get_batch(self, tokenized_samples, labels): e = EasyEnsemble(random_state=0, n_subsets=1) e.fit(tokenized_samples, labels) X_resampled, y_resampled = e.sample(tokenized_samples, labels) X = X_resampled[0] y = y_resampled[0] targets = np.zeros(shape=(len(X), self._num_labels)) samples = np.zeros(shape=(len(X), self._max_document_length)) for sample_ix, sample in enumerate(X): label = y[sample_ix] targets[sample_ix, label] = 1 samples[sample_ix, :sample.shape[0]] = \ sample[:self._max_document_length] return samples, targets