def test_robust_scaler_zero_variance_features(): """Check RobustScaler on toy data with zero variance features""" X = [[0., 1., +0.5], [0., 1., -0.1], [0., 1., +1.1]] scaler = RobustScaler() X_trans = scaler.fit_transform(X) # NOTE: for such a small sample size, what we expect in the third column # depends HEAVILY on the method used to calculate quantiles. The values # here were calculated to fit the quantiles produces by np.percentile # using numpy 1.9 Calculating quantiles with # scipy.stats.mstats.scoreatquantile or scipy.stats.mstats.mquantiles # would yield very different results! X_expected = [[0., 0., +0.0], [0., 0., -1.0], [0., 0., +1.0]] assert_array_almost_equal(X_trans, X_expected) X_trans_inv = scaler.inverse_transform(X_trans) assert_array_almost_equal(X, X_trans_inv) # make sure new data gets transformed correctly X_new = [[+0., 2., 0.5], [-1., 1., 0.0], [+0., 1., 1.5]] X_trans_new = scaler.transform(X_new) X_expected_new = [[+0., 1., +0.], [-1., 0., -0.83333], [+0., 0., +1.66667]] assert_array_almost_equal(X_trans_new, X_expected_new, decimal=3)
class RobustScalerImpl(): def __init__(self, with_centering=True, with_scaling=True, quantile_range=(25.0, 75.0), copy=True): self._hyperparams = { 'with_centering': with_centering, 'with_scaling': with_scaling, 'quantile_range': quantile_range, 'copy': copy} self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
class CreateRobustScaler(CreateModel): def fit(self, data, args): self.model = RobustScaler() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval def test(self, data): assert self.model is not None return self.model.transform(data.X_test) def predict(self, data): with Timer() as t: self.predictions = self.test(data) data.learning_task = LearningTask.REGRESSION return t.interval