def test_robust_scaler_zero_variance_features():
    """Check RobustScaler on toy data with zero variance features"""
    X = [[0., 1., +0.5],
         [0., 1., -0.1],
         [0., 1., +1.1]]

    scaler = RobustScaler()
    X_trans = scaler.fit_transform(X)

    # NOTE: for such a small sample size, what we expect in the third column
    # depends HEAVILY on the method used to calculate quantiles. The values
    # here were calculated to fit the quantiles produces by np.percentile
    # using numpy 1.9 Calculating quantiles with
    # scipy.stats.mstats.scoreatquantile or scipy.stats.mstats.mquantiles
    # would yield very different results!
    X_expected = [[0., 0., +0.0],
                  [0., 0., -1.0],
                  [0., 0., +1.0]]
    assert_array_almost_equal(X_trans, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # make sure new data gets transformed correctly
    X_new = [[+0., 2., 0.5],
             [-1., 1., 0.0],
             [+0., 1., 1.5]]
    X_trans_new = scaler.transform(X_new)
    X_expected_new = [[+0., 1., +0.],
                      [-1., 0., -0.83333],
                      [+0., 0., +1.66667]]
    assert_array_almost_equal(X_trans_new, X_expected_new, decimal=3)
Example #2
0
class RobustScalerImpl():

    def __init__(self, with_centering=True, with_scaling=True, quantile_range=(25.0, 75.0), copy=True):
        self._hyperparams = {
            'with_centering': with_centering,
            'with_scaling': with_scaling,
            'quantile_range': quantile_range,
            'copy': copy}
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
Example #3
0
class CreateRobustScaler(CreateModel):
    def fit(self, data, args):
        self.model = RobustScaler()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval

    def test(self, data):
        assert self.model is not None

        return self.model.transform(data.X_test)

    def predict(self, data):
        with Timer() as t:
            self.predictions = self.test(data)

        data.learning_task = LearningTask.REGRESSION
        return t.interval
Example #4
0
def test_robust_scaler_zero_variance_features():
    """Check RobustScaler on toy data with zero variance features"""
    X = [[0., 1., +0.5], [0., 1., -0.1], [0., 1., +1.1]]

    scaler = RobustScaler()
    X_trans = scaler.fit_transform(X)

    # NOTE: for such a small sample size, what we expect in the third column
    # depends HEAVILY on the method used to calculate quantiles. The values
    # here were calculated to fit the quantiles produces by np.percentile
    # using numpy 1.9 Calculating quantiles with
    # scipy.stats.mstats.scoreatquantile or scipy.stats.mstats.mquantiles
    # would yield very different results!
    X_expected = [[0., 0., +0.0], [0., 0., -1.0], [0., 0., +1.0]]
    assert_array_almost_equal(X_trans, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # make sure new data gets transformed correctly
    X_new = [[+0., 2., 0.5], [-1., 1., 0.0], [+0., 1., 1.5]]
    X_trans_new = scaler.transform(X_new)
    X_expected_new = [[+0., 1., +0.], [-1., 0., -0.83333], [+0., 0., +1.66667]]
    assert_array_almost_equal(X_trans_new, X_expected_new, decimal=3)