def test_slice_columns(): columns = [2, 3] df2 = slice_columns(df, columns) X2 = slice_columns(X, columns) assert list(df2.columns) == columns assert_eq_df(df[columns].compute(), df2.compute()) assert_eq_ar(X.compute(), X2.compute())
def fit(self, X, y=None): self._reset() to_persist = OrderedDict() feature_range = self.feature_range if feature_range[0] >= feature_range[1]: raise ValueError("Minimum of desired feature " "range must be smaller than maximum.") _X = slice_columns(X, self.columns) data_min = _X.min(0) data_max = _X.max(0) data_range = data_max - data_min scale = ((feature_range[1] - feature_range[0]) / handle_zeros_in_scale(data_range)) to_persist["data_min_"] = data_min to_persist["data_max_"] = data_max to_persist["data_range_"] = data_range to_persist["scale_"] = scale to_persist["min_"] = feature_range[0] - data_min * scale to_persist["n_samples_seen_"] = np.nan values = persist(*to_persist.values()) for k, v in zip(to_persist, values): setattr(self, k, v) return self
def transform(self, X, y=None, copy=None): _X = slice_columns(X, self.columns) _X *= self.scale_ _X += self.min_ if isinstance(_X, dd.DataFrame) and self.columns: for column in self.columns: X[column] = _X[column] return X else: return _X
def inverse_transform(self, X, y=None, copy=None): if not hasattr(self, "scale_"): raise Exception("This %(name)s instance is not fitted yet. " "Call 'fit' with appropriate arguments before " "using this method.") _X = slice_columns(X, self.columns) _X -= self.min_ _X /= self.scale_ if isinstance(_X, dd.DataFrame) and self.columns: for column in self.columns: X[column] = _X[column] return X else: return _X