Ejemplo n.º 1
0
    def fit(self, X, y=None):
        """Fit OneHotEncoder to X.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data to determine the categories of each feature.

        Returns
        -------
        self
        """
        if self.handle_unknown not in ('error', 'ignore'):
            msg = ("handle_unknown should be either 'error' or 'ignore', "
                   "got {0}.".format(self.handle_unknown))
            raise ValueError(msg)

        self._handle_deprecations(X)

        if self._legacy_mode:
            _transform_selected(X, self._legacy_fit_transform, self.dtype,
                                self._categorical_features,
                                copy=True)
            return self
        else:
            self._fit(X, handle_unknown=self.handle_unknown)
            return self
Ejemplo n.º 2
0
def test_transform_selected_retain_order():
    X = [[-1, 1], [2, -2]]

    assert_raise_message(ValueError,
                         "The retain_order option can only be set to True "
                         "for dense matrices.",
                         _transform_selected, sparse.csr_matrix(X),
                         Binarizer().transform, dtype=np.int, selected=[0],
                         retain_order=True)

    def transform(X):
        return np.hstack((X, [[0], [0]]))

    assert_raise_message(ValueError,
                         "The retain_order option can only be set to True "
                         "if the dimensions of the input array match the "
                         "dimensions of the transformed array.",
                         _transform_selected, X, transform, dtype=np.int,
                         selected=[0], retain_order=True)

    X_expected = [[-1, 1], [2, 0]]
    Xtr = _transform_selected(X, Binarizer().transform, dtype=np.int,
                              selected=[1], retain_order=True)
    assert_array_equal(toarray(Xtr), X_expected)

    X_expected = [[0, 1], [1, -2]]
    Xtr = _transform_selected(X, Binarizer().transform, dtype=np.int,
                              selected=[0], retain_order=True)
    assert_array_equal(toarray(Xtr), X_expected)
Ejemplo n.º 3
0
    def fit_transform(self, X, y=None):
        """Fit OneHotEncoder to X, then transform X.

        Equivalent to fit(X).transform(X) but more convenient.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data to encode.

        Returns
        -------
        X_out : sparse matrix if sparse=True else a 2-d array
            Transformed input.
        """
        if self.handle_unknown not in ('error', 'ignore'):
            msg = ("handle_unknown should be either 'error' or 'ignore', "
                   "got {0}.".format(self.handle_unknown))
            raise ValueError(msg)

        self._handle_deprecations(X)

        if self._legacy_mode:
            return _transform_selected(
                X, self._legacy_fit_transform, self.dtype,
                self._categorical_features, copy=True)
        else:
            return self.fit(X).transform(X)
Ejemplo n.º 4
0
 def transform(self, X, y=None):
     check_is_fitted(self, 'n_features_')
     n_features = X.shape[1]
     if n_features != self.n_features_:
         raise ValueError(f'X has different shape than during fitting.'
                          f' Expected {self.n_features_} features,'
                          f' got {n_features}.')
     return _transform_selected(
         X,
         self._transform,
         dtype=X.dtype,
         selected=self.periodic_features,
         copy=True
     )
Ejemplo n.º 5
0
def test_transform_selected_copy_arg(output_dtype, input_dtype):
    # transformer that alters X
    def _mutating_transformer(X):
        X[0, 0] = X[0, 0] + 1
        return X

    original_X = np.asarray([[1, 2], [3, 4]], dtype=input_dtype)
    expected_Xtr = np.asarray([[2, 2], [3, 4]], dtype=output_dtype)

    X = original_X.copy()
    Xtr = _transform_selected(X, _mutating_transformer, output_dtype,
                              copy=True, selected='all')

    assert_array_equal(toarray(X), toarray(original_X))
    assert_array_equal(toarray(Xtr), expected_Xtr)
Ejemplo n.º 6
0
    def transform(self, X):
        """Transform X using one-hot encoding.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data to encode.

        Returns
        -------
        X_out : sparse matrix if sparse=True else a 2-d array
            Transformed input.
        """
        if self._legacy_mode:
            return _transform_selected(X, self._legacy_transform, self.dtype,
                                       self._categorical_features,
                                       copy=True)
        else:
            return self._transform_new(X)
Ejemplo n.º 7
0
def _check_transform_selected(X, X_expected, dtype, sel):
    for M in (X, sparse.csr_matrix(X)):
        Xtr = _transform_selected(M, Binarizer().transform, dtype, sel)
        assert_array_equal(toarray(Xtr), X_expected)