コード例 #1
0
    def fit(self, X, y=None):
        self._reset()
        attributes = OrderedDict()
        feature_range = self.feature_range

        if feature_range[0] >= feature_range[1]:
            raise ValueError("Minimum of desired feature "
                             "range must be smaller than maximum.")

        data_min = X.min(0)
        data_max = X.max(0)
        data_range = data_max - data_min
        scale = (feature_range[1] -
                 feature_range[0]) / handle_zeros_in_scale(data_range)

        attributes["data_min_"] = data_min
        attributes["data_max_"] = data_max
        attributes["data_range_"] = data_range
        attributes["scale_"] = scale
        attributes["min_"] = feature_range[0] - data_min * scale
        attributes["n_samples_seen_"] = np.nan

        values = compute(*attributes.values())
        for k, v in zip(attributes, values):
            setattr(self, k, v)
        return self
コード例 #2
0
    def fit(
        self,
        X: Union[ArrayLike, DataFrameType],
        y: Optional[Union[ArrayLike, SeriesType]] = None,
    ) -> "MinMaxScaler":
        self._reset()
        attributes = OrderedDict()
        feature_range = self.feature_range

        if feature_range[0] >= feature_range[1]:
            raise ValueError("Minimum of desired feature "
                             "range must be smaller than maximum.")

        data_min = X.min(0)
        data_max = X.max(0)
        data_range = data_max - data_min
        scale = (feature_range[1] -
                 feature_range[0]) / handle_zeros_in_scale(data_range)

        attributes["data_min_"] = data_min
        attributes["data_max_"] = data_max
        attributes["data_range_"] = data_range
        attributes["scale_"] = scale
        attributes["min_"] = feature_range[0] - data_min * scale
        attributes["n_samples_seen_"] = X.shape[0]

        values = compute(*attributes.values())
        for k, v in zip(attributes, values):
            setattr(self, k, v)
        self.n_features_in_: int = X.shape[1]
        return self
コード例 #3
0
    def fit(self, X, y=None):
        self._reset()
        to_persist = OrderedDict()
        feature_range = self.feature_range

        if feature_range[0] >= feature_range[1]:
            raise ValueError("Minimum of desired feature "
                             "range must be smaller than maximum.")

        _X = slice_columns(X, self.columns)
        data_min = _X.min(0)
        data_max = _X.max(0)
        data_range = data_max - data_min
        scale = ((feature_range[1] - feature_range[0]) /
                 handle_zeros_in_scale(data_range))

        to_persist["data_min_"] = data_min
        to_persist["data_max_"] = data_max
        to_persist["data_range_"] = data_range
        to_persist["scale_"] = scale
        to_persist["min_"] = feature_range[0] - data_min * scale
        to_persist["n_samples_seen_"] = np.nan

        values = persist(*to_persist.values())
        for k, v in zip(to_persist, values):
            setattr(self, k, v)
        return self
コード例 #4
0
ファイル: test_utils.py プロジェクト: jborchma/dask-ml
def test_handle_zeros_in_scale():
    s2 = handle_zeros_in_scale(s)
    a2 = handle_zeros_in_scale(a)

    assert list(s2.compute()) == [1, 1, 2, 3, 1]
    assert list(a2.compute()) == [1, 1, 2, 3, 1]

    x = np.array([1, 2, 3, 0], dtype="f8")
    expected = np.array([1, 2, 3, 1], dtype="f8")
    result = handle_zeros_in_scale(x)
    np.testing.assert_array_equal(result, expected)

    x = pd.Series(x)
    expected = pd.Series(expected)
    result = handle_zeros_in_scale(x)
    tm.assert_series_equal(result, expected)

    x = da.from_array(x.values, chunks=2)
    expected = expected.values
    result = handle_zeros_in_scale(x)
    assert_eq_ar(result, expected)

    x = dd.from_dask_array(x)
    expected = pd.Series(expected)
    result = handle_zeros_in_scale(x)
    assert_eq_df(result, expected)
コード例 #5
0
    def fit(
        self,
        X,
        y=None,
    ):
        from dask_ml.utils import handle_zeros_in_scale

        self._reset()
        if isinstance(X, (pd.DataFrame, np.ndarray)):
            return super().fit(X, y)

        max_abs = X.reduction(lambda x: x.abs().max(),
                              aggregate=lambda x: x.max(),
                              token=self.__class__.__name__).compute()
        scale = handle_zeros_in_scale(max_abs)

        setattr(self, 'max_abs_', max_abs)
        setattr(self, 'scale_', scale)
        setattr(self, 'n_samples_seen_', 0)

        self.n_features_in_ = X.shape[1]
        return self
コード例 #6
0
def test_handle_zeros_in_scale():
    s2 = handle_zeros_in_scale(s)
    a2 = handle_zeros_in_scale(a)

    assert list(s2.compute()) == [1, 1, 2, 3, 1]
    assert list(a2.compute()) == [1, 1, 2, 3, 1]