Esempio n. 1
0
 def __init__(self, horizon=2):
     super().__init__()
     if horizon < 2:
         raise ValueError("horizon must be greater than 1")
     self.horizon = horizon
     self.autoregressive_transformer = AutoregressiveTransformer(
         num_lags=self.horizon, pred_stride=1)
def make_pipeline(model):
    steps = [('features', FeatureUnion([
        ('seasonal_features', SeasonalTransformer(seasonal_period=1)),
        ('ar_features', AutoregressiveTransformer(num_lags=1))
    ]))]

    #steps = list() 
    steps.append(('post_feature_imputer', ReversibleImputer()))
    # standardization    
    steps.append(('standardize', StandardScaler()))
    # normalization
    steps.append(('normalize', MinMaxScaler()))
    # the model
    steps.append(('model', model))
    # create pipeline
    #pipeline = ForecasterPipeline(steps=steps)
    pipeline = ForecasterPipeline([
    ('pre_differencer', DifferenceTransformer(period=1)),
    ('pre_diff_imputer', ReversibleImputer()),
    ('pre_day_differencer', DifferenceTransformer(period=1)),
    ('pre_day_diff_imputer', ReversibleImputer()),
    ('pre_scaler', StandardScaler()),
    ('features', FeatureUnion([
        ('ar_features', AutoregressiveTransformer(num_lags=1)),
        ('seasonal_features', SeasonalTransformer(seasonal_period=1)),
    ])),
    ('post_feature_imputer', ReversibleImputer()),
    ('post_feature_scaler', StandardScaler()),
    ('model', LinearRegression(fit_intercept=False))])

    
    return pipeline
Esempio n. 3
0
    def test_autoregressive_transform_2(self):
        X = np.arange(1, 6, dtype=np.float64)[:, np.newaxis]
        at = AutoregressiveTransformer(num_lags=2, pred_stride=2)
        X_trans = at.fit_transform(X)

        expected = np.array([
            [np.nan, np.nan],
            [np.nan, np.nan],
            [np.nan, np.nan],
            [1, 2],
            [2, 3],
        ], dtype=np.float64)
        assert np.allclose(X_trans, expected, equal_nan=True)
Esempio n. 4
0
    def test_autoregressive_transform_3(self):
        X = np.arange(1, 9, dtype=np.float64)[:, np.newaxis]
        at = AutoregressiveTransformer(num_lags=3, pred_stride=2)
        X_trans = at.fit_transform(X)

        expected = np.array([
            [np.nan, np.nan, np.nan], # y = 1
            [np.nan, np.nan, np.nan], # y = 2
            [np.nan, np.nan, np.nan], # y = 3
            [np.nan, np.nan, np.nan], # y = 4
            [1, 2, 3],                # y = 5
            [2, 3, 4],                # y = 6
            [3, 4, 5],                # y = 7
            [4, 5, 6],                # y = 8
        ], dtype=np.float64)
        assert np.allclose(X_trans, expected, equal_nan=True)
Esempio n. 5
0
class HorizonTransformer(BaseEstimator, TransformerMixin):

    needs_refit = True
    y_only = True

    def __init__(self, horizon=2):
        super().__init__()
        if horizon < 2:
            raise ValueError("horizon must be greater than 1")
        self.horizon = horizon
        self.autoregressive_transformer = AutoregressiveTransformer(
            num_lags=self.horizon, pred_stride=1)

    def fit(self, X, y=None):
        self.autoregressive_transformer.fit(expand_dim_if_needed(X))
        return self

    def transform(self, X, y=None, refit=False):
        X = expand_dim_if_needed(X)
        if refit:
            self.autoregressive_transformer.fit(X)
        Xt = self.autoregressive_transformer.transform(X)
        # The autoregressive transformer won't build lags _with_ the last element of X.
        # So, we have to manually tack it on here.
        last_non_nan_piece = np.hstack((Xt[-1, 1:], X[-1]))
        Xt = np.vstack(
            (Xt[self.horizon:, :], last_non_nan_piece, Xt[1:self.horizon, :]))
        return Xt

    def inverse_transform(self, X, y=None):
        Xt = np.vstack((X[-self.horizon:, :], X[:-self.horizon, :]))
        return self.autoregressive_transformer.inverse_transform(Xt)

    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)
Esempio n. 6
0
class HorizonTransformer(BaseEstimator, TransformerMixin):

    needs_refit = True
    y_only = True

    def __init__(self, horizon=2):
        super().__init__()
        if horizon < 2:
            raise ValueError('horizon must be greater than 1')
        self.horizon = horizon
        self.autoregressive_transformer = AutoregressiveTransformer(
            num_lags=self.horizon, pred_stride=1)

    def fit(self, X, y=None):
        self.autoregressive_transformer.fit(expand_dim_if_needed(X))
        return self

    def transform(self, X, y=None, refit=False):
        X = expand_dim_if_needed(X)
        if refit:
            self.autoregressive_transformer.fit(X)
        Xt = self.autoregressive_transformer.transform(X)
        # Need to move beginning of Xt to the end.
        Xt = np.vstack((Xt[self.horizon:, :], Xt[:self.horizon, :]))
        # TODO: replace beginning with nans?
        return Xt

    def inverse_transform(self, X, y=None):
        Xt = np.vstack((X[-self.horizon:, :], X[:-self.horizon, :]))
        return self.autoregressive_transformer.inverse_transform(Xt)

    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)
Esempio n. 7
0
    def test_multiouput_prediction(self):
        # TODO: Make this a real test

        steps = [('pre_horizon', HorizonTransformer(horizon=4)),
                 ('pre_imputer', ReversibleImputer(y_only=True)),
                 ('features',
                  FeatureUnion([('ar_transformer',
                                 AutoregressiveTransformer(num_lags=3))])),
                 ('post_lag_imputer', ReversibleImputer()),
                 ('regressor', LinearRegression())]

        pipeline = ForecasterPipeline(steps)

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, .1, size=100)

        pipeline.fit(y[:, np.newaxis], y)

        pipeline.predict(y[:, np.newaxis], to_scale=True, refit=True)
Esempio n. 8
0
 def get_pipeline(self):
     regressor = None
     if self.learning_method == "linear":
         regressor = MultiOutputRegressor(LinearRegression(fit_intercept=self.fit_intercept),
                                          n_jobs=6)
     elif self.learning_method == "booster":
         regressor = MultiOutputRegressor(XGBRegressor(n_jobs=12,
                                                       n_estimators=self.no_estimators))
     elif self.learning_method == "deep":
         regressor = NeuralNetRegressor(
             module=TemporalConvNet,
             module__num_inputs=1,
             module__num_channels=[2] * self.no_channels,
             module__output_sz=self.horizon,
             module__kernel_size=5,
             module__dropout=0.0,
             max_epochs=60,
             batch_size=256,
             lr=2e-3,
             optimizer=torch.optim.Adam,
             device='cpu',
             iterator_train__shuffle=True,
             callbacks=[GradientNormClipping(gradient_clip_value=1,
                                             gradient_clip_norm_type=2)],
             train_split=None,
         )
     return ForecasterPipeline([
         # Convert the `y` target into a horizon
         ('pre_horizon', HorizonTransformer(horizon=self.horizon)),
         ('pre_reversible_imputer', ReversibleImputer(y_only=True)),
         ('features', FeatureUnion([
             # Generate a week's worth of autoregressive features
             ('ar_features', AutoregressiveTransformer(
                 num_lags=int(self.horizon * self.num_lags), pred_stride=self.pred_stride)),
         ])),
         ('post_feature_imputer', ReversibleImputer()),
         ('regressor', regressor)
     ])
Esempio n. 9
0
    def test_multiouput_forecast(self):
        # TODO: Make this a real test

        steps = [
            ("pre_horizon", HorizonTransformer(horizon=4)),
            ("pre_imputer", ReversibleImputer(y_only=True)),
            (
                "features",
                FeatureUnion([("ar_transformer",
                               AutoregressiveTransformer(num_lags=3))]),
            ),
            ("post_lag_imputer", ReversibleImputer()),
            ("regressor", LinearRegression()),
        ]

        pipeline = ForecasterPipeline(steps)

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, 0.1, size=100)

        pipeline.fit(y[:, np.newaxis], y)

        pipeline.forecast(y[:, np.newaxis], 20)
Esempio n. 10
0
class TestPipelines:

    steps = [
        ('pre_differencer', DifferenceTransformer(period=1)),
        ('pre_imputer_1', ReversibleImputer()),
        ('features',
         FeatureUnion([
             ('ar_transformer', AutoregressiveTransformer(num_lags=3)),
             ('seasonal_transformer', SeasonalTransformer(seasonal_period=4))
         ])),
        ('post_lag_imputer_2', ReversibleImputer()),
    ]

    dt = DifferenceTransformer(period=1)
    ri1 = ReversibleImputer()
    fe = FeatureUnion([
        ('ar_transformer', AutoregressiveTransformer(num_lags=3)),
        ('seasonal_transformer', SeasonalTransformer(seasonal_period=4))
    ])
    ri2 = ReversibleImputer()

    def test_predict(self):
        # Let's just see if it works
        # TODO: Make this a real test
        np.random.seed(SEED)
        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, .1, size=100)

        # Ignore the DifferenceTransformer. It's actually bad.
        steps = list(self.steps[1:])
        steps.append(('regressor', LinearRegression(fit_intercept=False)))

        pipeline = ForecasterPipeline(steps)

        pipeline.fit(y[:, np.newaxis], y)
        y_pred = pipeline.predict(y[:, np.newaxis], to_scale=True, refit=True)
        assert np.mean((y_pred - y.squeeze())**2) < 0.05

    def test_forecast(self):
        # Let's just see if it works
        # TODO: Make this a real test

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, .1, size=100)

        steps = list(self.steps)
        steps.append(('regressor', LinearRegression(fit_intercept=False)))

        pipeline = ForecasterPipeline(steps)
        pipeline.fit(y[:, np.newaxis], y)

        pipeline.forecast(y[:, np.newaxis], 20)

    def test_classifier(self):
        # Let's just see if it works
        # TODO: Make this a real test
        np.random.seed(SEED)

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, .1, size=100)

        steps = list(self.steps)
        steps.append(('classifier',
                      LogisticRegression(solver='lbfgs', fit_intercept=False)))

        pipeline = ClassifierPipeline(steps)

        y_true = y > 0
        pipeline.fit(y[:, np.newaxis], y_true)
        y_pred = pipeline.predict(y[:, np.newaxis])
        assert (y_pred == y_true).mean() > 0.75

    def test_multiouput_prediction(self):
        # TODO: Make this a real test

        steps = [('pre_horizon', HorizonTransformer(horizon=4)),
                 ('pre_imputer', ReversibleImputer(y_only=True)),
                 ('features',
                  FeatureUnion([('ar_transformer',
                                 AutoregressiveTransformer(num_lags=3))])),
                 ('post_lag_imputer', ReversibleImputer()),
                 ('regressor', LinearRegression())]

        pipeline = ForecasterPipeline(steps)

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, .1, size=100)

        pipeline.fit(y[:, np.newaxis], y)

        pipeline.predict(y[:, np.newaxis], to_scale=True, refit=True)

    def test_multiouput_forecast(self):
        # TODO: Make this a real test

        steps = [('pre_horizon', HorizonTransformer(horizon=4)),
                 ('pre_imputer', ReversibleImputer(y_only=True)),
                 ('features',
                  FeatureUnion([('ar_transformer',
                                 AutoregressiveTransformer(num_lags=3))])),
                 ('post_lag_imputer', ReversibleImputer()),
                 ('regressor', LinearRegression())]

        pipeline = ForecasterPipeline(steps)

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, .1, size=100)

        pipeline.fit(y[:, np.newaxis], y)

        pipeline.forecast(y[:, np.newaxis], 20)