Ejemplo n.º 1
0
def test_make_reduction_infer_scitype_raises_error():
    """Test make_reduction.

    The scitype of pipeline cannot be inferred here, as it may be used together
    with a tabular or time series regressor.
    """
    estimator = make_pipeline(Tabularizer(), LinearRegression())
    with pytest.raises(ValueError):
        make_reduction(estimator, scitype="infer")
Ejemplo n.º 2
0
def test_dummy_regressor_mean_prediction_endogenous_only(
    fh, window_length, strategy, scitype
):
    """Test dummy regressor mean prediction endogenous_only.

    The DummyRegressor ignores the input feature data X, hence we can use it for
    testing reduction from forecasting to both tabular and time series regression.
    The DummyRegressor also supports the 'multioutput' strategy.
    """
    y = make_forecasting_problem()
    fh = check_fh(fh)
    y_train, y_test = temporal_train_test_split(y, fh=fh)

    regressor = DummyRegressor(strategy="mean")
    forecaster = make_reduction(
        regressor, scitype=scitype, window_length=window_length, strategy=strategy
    )
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    if strategy == "recursive":
        # For the recursive strategy, we always use the first-step ahead as the
        # target vector in the regression problem during training, regardless of the
        # actual forecasting horizon.
        effective_window_length = window_length
    else:
        # For the other strategies, we split the data taking into account the steps
        # ahead we want to predict.
        effective_window_length = window_length + max(fh) - 1

    # In the sliding-window transformation, the first values of the target series
    # make up the first window and are not used in the transformed target vector. So
    # the expected result should be the mean of the remaining values.
    expected = np.mean(y_train[effective_window_length:])
    np.testing.assert_array_almost_equal(actual, expected)
Ejemplo n.º 3
0
def test_multioutput_direct_equivalence_tabular_linear_regression(fh):
    # multioutput and direct strategies with linear regression
    # regressor should produce same predictions
    y, X = make_forecasting_problem(make_X=True)
    y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh)

    estimator = LinearRegression()
    direct = make_reduction(estimator, strategy="direct")
    multioutput = make_reduction(estimator, strategy="multioutput")

    y_pred_direct = direct.fit(y_train, X_train, fh=fh).predict(fh, X_test)
    y_pred_multioutput = multioutput.fit(y_train, X_train,
                                         fh=fh).predict(fh, X_test)

    np.testing.assert_array_almost_equal(y_pred_direct.to_numpy(),
                                         y_pred_multioutput.to_numpy())
Ejemplo n.º 4
0
def test_make_reduction_construct_instance(scitype, strategy, klass, window_length):
    """Test make_reduction."""
    estimator = DummyRegressor()
    forecaster = make_reduction(
        estimator, window_length=window_length, scitype=scitype, strategy=strategy
    )
    assert isinstance(forecaster, klass)
    assert forecaster.get_params()["window_length"] == window_length
Ejemplo n.º 5
0
def test_dirrec_against_recursive_accumulated_error():
    # recursive and dirrec regressor strategies
    # dirrec regressor should produce lower error due to less cumulative error
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    estimator = LinearRegression()
    recursive = make_reduction(estimator,
                               scitype="tabular-regressor",
                               strategy="recursive")
    dirrec = make_reduction(estimator,
                            scitype="tabular-regressor",
                            strategy="dirrec")

    preds_recursive = recursive.fit(y_train, fh=fh).predict(fh)
    preds_dirrec = dirrec.fit(y_train, fh=fh).predict(fh)

    assert smape_loss(y_test, preds_dirrec) < smape_loss(
        y_test, preds_recursive)
Ejemplo n.º 6
0
def test_consistent_data_passing_to_component_estimators_in_fit_and_predict(
    estimator, window_length, strategy
):
    """Test consistent data passing to component estimators in fit and predict.

    We generate data that represents time points in its values, i.e. an array of
    values that increase in unit steps for each time point.
    """
    n_variables = 3
    n_timepoints = 10
    y, X = _make_y_X(n_timepoints, n_variables)
    y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=FH)

    forecaster = make_reduction(
        estimator, strategy=strategy, window_length=window_length
    )
    forecaster.fit(y_train, X_train, fh=FH)
    forecaster.predict(X=X_test)

    # Get recorded data.
    if strategy == "direct":
        estimator_ = forecaster.estimators_[0]
    else:
        estimator_ = forecaster.estimator_

    X_fit = estimator_.X_fit
    y_fit = estimator_.y_fit
    X_pred = estimator_.X_pred

    # Format feature data into 3d array if the data is not in that format already.
    X_fit = X_fit.reshape(X_fit.shape[0], n_variables, -1)
    X_pred = X_pred.reshape(X_pred.shape[0], n_variables, -1)

    # Format target data into 2d array.
    y_fit = y_fit.reshape(y_fit.shape[0], -1)

    # Check that both fit and predict data have unit steps between them.
    assert np.allclose(np.diff(X_fit), 1)
    assert np.allclose(np.diff(X_pred), 1)

    # Check that predict data is a step ahead from last row in fit data.
    np.testing.assert_array_equal(X_pred, X_fit[[-1]] + 1)

    # Check that y values are further ahead than X values.
    assert np.all(X_fit < y_fit[:, np.newaxis, :])
Ejemplo n.º 7
0
def test_linear_extrapolation_endogenous_only(
    fh, window_length, strategy, method, slope, regressor, scitype
):
    """Test linear extrapolation endogenous only."""
    n_timepoints = 13
    y = _make_y(0, n_timepoints, method=method, slope=slope)
    y = pd.Series(y)
    fh = check_fh(fh)

    forecaster = make_reduction(
        regressor, scitype=scitype, window_length=window_length, strategy=strategy
    )
    forecaster.fit(y, fh=fh)
    actual = forecaster.predict()

    end = n_timepoints + max(fh) + 1
    expected = _make_y(n_timepoints, end, method=method, slope=slope)[fh.to_indexer()]
    np.testing.assert_almost_equal(actual, expected)
Ejemplo n.º 8
0
                     tickangle=35)
    return fig


fh = np.arange(1, 14 + 1)
y = pd.Series(data=trend_newCases.values, index=total_de_casos_amazonas.date)
y.index.freq = 'D'
model = LGBMRegressor(random_state=4,
                      learning_rate=0.04591301953670739,
                      num_leaves=45,
                      min_child_samples=1,
                      subsample=0.05,
                      colsample_bytree=0.9828905761860228,
                      subsample_freq=1,
                      n_estimators=685)
reg = make_reduction(estimator=model, window_length=14)
cv = ExpandingWindowSplitter(initial_window=60)
cross_val = evaluate(forecaster=reg,
                     y=y,
                     cv=cv,
                     strategy="refit",
                     return_data=True)
reg.fit(y)
y_pred = reg.predict(fh).round()


def show_figure11():
    fig = go.Figure()

    fig.add_trace(
        go.Bar(x=total_de_casos_amazonas['date'].tail(30),
Ejemplo n.º 9
0
                                names=["instances", "timepoints"])
y_group2 = pd.DataFrame(y_test.values, index=mi, columns=["y"])

y_test_grp = pd.concat([y_group1, y_group2])

# Get different WindowSummarizer functions
kwargs = WindowSummarizer.get_test_params()[0]
kwargs_alternames = WindowSummarizer.get_test_params()[1]
kwargs_variant = WindowSummarizer.get_test_params()[2]

regressor = make_pipeline(RandomForestRegressor(random_state=1), )

forecaster1 = make_reduction(
    regressor,
    scitype="tabular-regressor",
    transformers=[WindowSummarizer(**kwargs)],
    window_length=None,
    strategy="recursive",
)

# forecaster1.fit(y=y_train_grp, X=y_train_grp)

# # check_estimator(forecaster1, return_exceptions=False)
# y_pred1 = forecaster1.predict(X=y_test_grp, fh=[1, 2, 12])

forecaster2 = make_reduction(
    regressor,
    scitype="tabular-regressor",
    transformers=[WindowSummarizer(**kwargs, n_jobs=1)],
    window_length=None,
    strategy="recursive",
Ejemplo n.º 10
0
def test_make_reduction_infer_scitype(estimator, scitype):
    """Test make_reduction."""
    forecaster = make_reduction(estimator, scitype="infer")
    assert forecaster._estimator_scitype == scitype