コード例 #1
0
    def fit_resample(self, X, y, **fit_params):
        """Resample the dataset.

        Parameters
        ----------
        X : {array-like, dataframe, sparse matrix} of shape \
                (n_samples, n_features)
            Matrix containing the data which have to be sampled.
        y : array-like of shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : {array-like, dataframe, sparse matrix} of shape \
                (n_samples_new, n_features)
            The array containing the resampled data.
        y_resampled : array-like of shape (n_samples_new,)
            The corresponding label of `X_resampled`.
        """
        check_classification_targets(y)
        arrays_transformer = ArraysTransformer(X, y)
        X, y, binarize_y = self._check_X_y(X, y)

        self._check(X, y)._fit(X, y, **fit_params)

        output = self._fit_resample(X, y)

        y_ = (
            label_binarize(y=output[1], classes=np.unique(y))
            if binarize_y
            else output[1]
        )

        X_, y_ = arrays_transformer.transform(output[0], y_)
        return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
コード例 #2
0
def test_arrays_transformer_numpy():
    X = np.array([[0, 0], [1, 1]])
    y = np.array([[0, 0], [1, 1]])

    arrays_transformer = ArraysTransformer(X, y)
    X_res, y_res = arrays_transformer.transform(X, y)
    assert isinstance(X_res, np.ndarray)
    assert isinstance(y_res, np.ndarray)
コード例 #3
0
def test_arrays_transformer_plain_list():
    X = np.array([[0, 0], [1, 1]])
    y = np.array([[0, 0], [1, 1]])

    arrays_transformer = ArraysTransformer(X.tolist(), y.tolist())
    X_res, y_res = arrays_transformer.transform(X, y)
    assert isinstance(X_res, list)
    assert isinstance(y_res, list)
コード例 #4
0
def test_arrays_transformer_pandas():
    pd = pytest.importorskip("pandas")

    X = np.array([[0, 0], [1, 1]])
    y = np.array([0, 1])

    X_df = pd.DataFrame(X, columns=["a", "b"])
    X_df = X_df.astype(int)
    y_df = pd.DataFrame(y, columns=["target", ])
    y_df = y_df.astype(int)
    y_s = pd.Series(y, name="target", dtype=int)

    # DataFrame and DataFrame case
    arrays_transformer = ArraysTransformer(X_df, y_df)
    X_res, y_res = arrays_transformer.transform(X, y)
    assert isinstance(X_res, pd.DataFrame)
    assert_array_equal(X_res.columns, X_df.columns)
    assert_array_equal(X_res.dtypes, X_df.dtypes)
    assert isinstance(y_res, pd.DataFrame)
    assert_array_equal(y_res.columns, y_df.columns)
    assert_array_equal(y_res.dtypes, y_df.dtypes)

    # DataFrames and Series case
    arrays_transformer = ArraysTransformer(X_df, y_s)
    _, y_res = arrays_transformer.transform(X, y)
    assert isinstance(y_res, pd.Series)
    assert_array_equal(y_res.name, y_s.name)
    assert_array_equal(y_res.dtype, y_s.dtype)