def fit(self, X, y=None): self._reset() attributes = OrderedDict() feature_range = self.feature_range if feature_range[0] >= feature_range[1]: raise ValueError("Minimum of desired feature " "range must be smaller than maximum.") data_min = X.min(0) data_max = X.max(0) data_range = data_max - data_min scale = (feature_range[1] - feature_range[0]) / handle_zeros_in_scale(data_range) attributes["data_min_"] = data_min attributes["data_max_"] = data_max attributes["data_range_"] = data_range attributes["scale_"] = scale attributes["min_"] = feature_range[0] - data_min * scale attributes["n_samples_seen_"] = np.nan values = compute(*attributes.values()) for k, v in zip(attributes, values): setattr(self, k, v) return self
def fit( self, X: Union[ArrayLike, DataFrameType], y: Optional[Union[ArrayLike, SeriesType]] = None, ) -> "MinMaxScaler": self._reset() attributes = OrderedDict() feature_range = self.feature_range if feature_range[0] >= feature_range[1]: raise ValueError("Minimum of desired feature " "range must be smaller than maximum.") data_min = X.min(0) data_max = X.max(0) data_range = data_max - data_min scale = (feature_range[1] - feature_range[0]) / handle_zeros_in_scale(data_range) attributes["data_min_"] = data_min attributes["data_max_"] = data_max attributes["data_range_"] = data_range attributes["scale_"] = scale attributes["min_"] = feature_range[0] - data_min * scale attributes["n_samples_seen_"] = X.shape[0] values = compute(*attributes.values()) for k, v in zip(attributes, values): setattr(self, k, v) self.n_features_in_: int = X.shape[1] return self
def fit(self, X, y=None): self._reset() to_persist = OrderedDict() feature_range = self.feature_range if feature_range[0] >= feature_range[1]: raise ValueError("Minimum of desired feature " "range must be smaller than maximum.") _X = slice_columns(X, self.columns) data_min = _X.min(0) data_max = _X.max(0) data_range = data_max - data_min scale = ((feature_range[1] - feature_range[0]) / handle_zeros_in_scale(data_range)) to_persist["data_min_"] = data_min to_persist["data_max_"] = data_max to_persist["data_range_"] = data_range to_persist["scale_"] = scale to_persist["min_"] = feature_range[0] - data_min * scale to_persist["n_samples_seen_"] = np.nan values = persist(*to_persist.values()) for k, v in zip(to_persist, values): setattr(self, k, v) return self
def test_handle_zeros_in_scale(): s2 = handle_zeros_in_scale(s) a2 = handle_zeros_in_scale(a) assert list(s2.compute()) == [1, 1, 2, 3, 1] assert list(a2.compute()) == [1, 1, 2, 3, 1] x = np.array([1, 2, 3, 0], dtype="f8") expected = np.array([1, 2, 3, 1], dtype="f8") result = handle_zeros_in_scale(x) np.testing.assert_array_equal(result, expected) x = pd.Series(x) expected = pd.Series(expected) result = handle_zeros_in_scale(x) tm.assert_series_equal(result, expected) x = da.from_array(x.values, chunks=2) expected = expected.values result = handle_zeros_in_scale(x) assert_eq_ar(result, expected) x = dd.from_dask_array(x) expected = pd.Series(expected) result = handle_zeros_in_scale(x) assert_eq_df(result, expected)
def fit( self, X, y=None, ): from dask_ml.utils import handle_zeros_in_scale self._reset() if isinstance(X, (pd.DataFrame, np.ndarray)): return super().fit(X, y) max_abs = X.reduction(lambda x: x.abs().max(), aggregate=lambda x: x.max(), token=self.__class__.__name__).compute() scale = handle_zeros_in_scale(max_abs) setattr(self, 'max_abs_', max_abs) setattr(self, 'scale_', scale) setattr(self, 'n_samples_seen_', 0) self.n_features_in_ = X.shape[1] return self
def test_handle_zeros_in_scale(): s2 = handle_zeros_in_scale(s) a2 = handle_zeros_in_scale(a) assert list(s2.compute()) == [1, 1, 2, 3, 1] assert list(a2.compute()) == [1, 1, 2, 3, 1]