def fit_resample(self, X, y, **fit_params): """Resample the dataset. Parameters ---------- X : {array-like, dataframe, sparse matrix} of shape \ (n_samples, n_features) Matrix containing the data which have to be sampled. y : array-like of shape (n_samples,) Corresponding label for each sample in X. Returns ------- X_resampled : {array-like, dataframe, sparse matrix} of shape \ (n_samples_new, n_features) The array containing the resampled data. y_resampled : array-like of shape (n_samples_new,) The corresponding label of `X_resampled`. """ check_classification_targets(y) arrays_transformer = ArraysTransformer(X, y) X, y, binarize_y = self._check_X_y(X, y) self._check(X, y)._fit(X, y, **fit_params) output = self._fit_resample(X, y) y_ = ( label_binarize(y=output[1], classes=np.unique(y)) if binarize_y else output[1] ) X_, y_ = arrays_transformer.transform(output[0], y_) return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
def test_arrays_transformer_numpy(): X = np.array([[0, 0], [1, 1]]) y = np.array([[0, 0], [1, 1]]) arrays_transformer = ArraysTransformer(X, y) X_res, y_res = arrays_transformer.transform(X, y) assert isinstance(X_res, np.ndarray) assert isinstance(y_res, np.ndarray)
def test_arrays_transformer_plain_list(): X = np.array([[0, 0], [1, 1]]) y = np.array([[0, 0], [1, 1]]) arrays_transformer = ArraysTransformer(X.tolist(), y.tolist()) X_res, y_res = arrays_transformer.transform(X, y) assert isinstance(X_res, list) assert isinstance(y_res, list)
def test_arrays_transformer_pandas(): pd = pytest.importorskip("pandas") X = np.array([[0, 0], [1, 1]]) y = np.array([0, 1]) X_df = pd.DataFrame(X, columns=["a", "b"]) X_df = X_df.astype(int) y_df = pd.DataFrame(y, columns=["target", ]) y_df = y_df.astype(int) y_s = pd.Series(y, name="target", dtype=int) # DataFrame and DataFrame case arrays_transformer = ArraysTransformer(X_df, y_df) X_res, y_res = arrays_transformer.transform(X, y) assert isinstance(X_res, pd.DataFrame) assert_array_equal(X_res.columns, X_df.columns) assert_array_equal(X_res.dtypes, X_df.dtypes) assert isinstance(y_res, pd.DataFrame) assert_array_equal(y_res.columns, y_df.columns) assert_array_equal(y_res.dtypes, y_df.dtypes) # DataFrames and Series case arrays_transformer = ArraysTransformer(X_df, y_s) _, y_res = arrays_transformer.transform(X, y) assert isinstance(y_res, pd.Series) assert_array_equal(y_res.name, y_s.name) assert_array_equal(y_res.dtype, y_s.dtype)