예제 #1
0
def test_slice_columns():
    columns = [2, 3]
    df2 = slice_columns(df, columns)
    X2 = slice_columns(X, columns)

    assert list(df2.columns) == columns
    assert_eq_df(df[columns].compute(), df2.compute())
    assert_eq_ar(X.compute(), X2.compute())
예제 #2
0
    def fit(self, X, y=None):
        self._reset()
        to_persist = OrderedDict()
        feature_range = self.feature_range

        if feature_range[0] >= feature_range[1]:
            raise ValueError("Minimum of desired feature "
                             "range must be smaller than maximum.")

        _X = slice_columns(X, self.columns)
        data_min = _X.min(0)
        data_max = _X.max(0)
        data_range = data_max - data_min
        scale = ((feature_range[1] - feature_range[0]) /
                 handle_zeros_in_scale(data_range))

        to_persist["data_min_"] = data_min
        to_persist["data_max_"] = data_max
        to_persist["data_range_"] = data_range
        to_persist["scale_"] = scale
        to_persist["min_"] = feature_range[0] - data_min * scale
        to_persist["n_samples_seen_"] = np.nan

        values = persist(*to_persist.values())
        for k, v in zip(to_persist, values):
            setattr(self, k, v)
        return self
예제 #3
0
    def transform(self, X, y=None, copy=None):
        _X = slice_columns(X, self.columns)
        _X *= self.scale_
        _X += self.min_

        if isinstance(_X, dd.DataFrame) and self.columns:
            for column in self.columns:
                X[column] = _X[column]
            return X
        else:
            return _X
예제 #4
0
    def inverse_transform(self, X, y=None, copy=None):
        if not hasattr(self, "scale_"):
            raise Exception("This %(name)s instance is not fitted yet. "
                            "Call 'fit' with appropriate arguments before "
                            "using this method.")
        _X = slice_columns(X, self.columns)
        _X -= self.min_
        _X /= self.scale_

        if isinstance(_X, dd.DataFrame) and self.columns:
            for column in self.columns:
                X[column] = _X[column]
            return X
        else:
            return _X