예제 #1
0
def test_gscv(forecaster, param_grid, cv, scoring):
    y, X = load_longley()
    gscv = ForecastingGridSearchCV(forecaster,
                                   param_grid=param_grid,
                                   cv=cv,
                                   scoring=scoring)
    gscv.fit(y, X)

    param_grid = ParameterGrid(param_grid)
    _check_cv(forecaster, gscv, cv, param_grid, y, X, scoring)
예제 #2
0
def test_evaluate_no_exog_against_with_exog():
    """Check that adding exogenous data produces different results."""
    y, X = load_longley()
    forecaster = ARIMA(suppress_warnings=True)
    cv = SlidingWindowSplitter()
    scoring = MeanAbsolutePercentageError(symmetric=True)

    out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring)
    out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring)

    scoring_name = f"test_{scoring.name}"
    assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
예제 #3
0
def test_evaluate_no_exog_against_with_exog():
    # Check that adding exogenous data produces different results
    y, X = load_longley()
    forecaster = ARIMA(suppress_warnings=True)
    cv = SlidingWindowSplitter()
    scoring = sMAPE()

    out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring)
    out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring)

    scoring_name = f"test_{scoring.name}"
    assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
예제 #4
0
def test_rscv(forecaster, param_grid, cv, scoring, n_iter, random_state):
    """Tests that ForecastingRandomizedSearchCV successfully searches the
    parameter distributions to identify the best parameter set
    """
    y, X = load_longley()
    rscv = ForecastingRandomizedSearchCV(
        forecaster,
        param_distributions=param_grid,
        cv=cv,
        scoring=scoring,
        n_iter=n_iter,
        random_state=random_state,
    )
    rscv.fit(y, X)

    param_distributions = list(
        ParameterSampler(param_grid, n_iter, random_state=random_state))
    _check_cv(forecaster, rscv, cv, param_distributions, y, X, scoring)
예제 #5
0
def test_ARIMAForecaster_multivariate(fh):
    X, y = load_longley(return_X_y=True)

    #  get data in required format
    max_fh = np.max(fh)
    m = len(y.iloc[0])
    cutoff = m - max_fh

    y_train = pd.Series([y.iloc[0].iloc[:cutoff]])
    y_test = pd.Series([y.iloc[0].iloc[cutoff:]])
    X_train = pd.DataFrame(
        [pd.Series([X.iloc[0, i].iloc[:cutoff]]) for i in range(X.shape[1])]).T
    X_train.columns = X.columns
    X_test = pd.DataFrame(
        [pd.Series([X.iloc[0, i].iloc[cutoff:]]) for i in range(X.shape[1])]).T
    X_test.columns = X.columns

    m = ARIMAForecaster()
    m.fit(y_train, X=X_train)
    y_pred = m.predict(fh=fh, X=X_test)
    assert y_pred.shape[0] == len(fh)
    assert m.score(y_test, fh=fh, X=X_test) > 0
예제 #6
0
def test_ForecastingStrategy_multivariate():
    longley = load_longley(return_X_y=False)
    train = pd.DataFrame([
        pd.Series([longley.iloc[0, i].iloc[:13]])
        for i in range(longley.shape[1])
    ]).T
    train.columns = longley.columns

    test = pd.DataFrame([
        pd.Series([longley.iloc[0, i].iloc[13:]])
        for i in range(longley.shape[1])
    ]).T
    test.columns = longley.columns
    target = "TOTEMP"
    fh = np.arange(len(test[target].iloc[0])) + 1
    task = ForecastingTask(target=target, fh=fh, metadata=train)

    estimator = ARIMAForecaster()
    s = ForecastingStrategy(estimator=estimator)
    s.fit(task, train)
    y_pred = s.predict(data=test)
    assert y_pred.shape == test[task.target].iloc[0].shape
예제 #7
0
#!/usr/bin/env python3 -u
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file).
"""Unit tests of DateTimeFeatures functionality."""

import pytest

from sktime.datasets import load_airline, load_longley
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.transformations.series.date import DateTimeFeatures

# Load multivariate dataset longley and apply calendar extraction

y, X = load_longley()
y_train, y_test, X_train, X_test = temporal_train_test_split(y, X)

# Test that comprehensive feature_scope works for weeks
pipe = DateTimeFeatures(ts_freq="W", feature_scope="comprehensive")
pipe.fit(X_train)
test_full_featurescope = pipe.transform(X_train).columns.to_list()

# Test that minimal feature_scope works for weeks
pipe = DateTimeFeatures(ts_freq="W", feature_scope="minimal")
pipe.fit(X_train)
test_reduced_featurescope = pipe.transform(X_train).columns.to_list()

# Test that comprehensive feature_scope works for months
pipe = DateTimeFeatures(ts_freq="M", feature_scope="comprehensive")
pipe.fit(X_train)
test_changing_frequency = pipe.transform(X_train).columns.to_list()
y_multi = get_examples(mtype="pd-multiindex", as_scitype="Panel")[0]
# y Train will be univariate data set
y_train, y_test = temporal_train_test_split(y)

# Create Panel sample data
mi = pd.MultiIndex.from_product([[0], y.index],
                                names=["instances", "timepoints"])
y_group1 = pd.DataFrame(y.values, index=mi, columns=["y"])

mi = pd.MultiIndex.from_product([[1], y.index],
                                names=["instances", "timepoints"])
y_group2 = pd.DataFrame(y.values, index=mi, columns=["y"])

y_grouped = pd.concat([y_group1, y_group2])

y_ll, X_ll = load_longley()
y_ll_train, _, X_ll_train, X_ll_test = temporal_train_test_split(y_ll, X_ll)

# Get different WindowSummarizer functions
kwargs = WindowSummarizer.get_test_params()[0]
kwargs_alternames = WindowSummarizer.get_test_params()[1]
kwargs_variant = WindowSummarizer.get_test_params()[2]


def count_gt100(x):
    """Count how many observations lie above threshold 100."""
    return np.sum((x > 100)[::-1])


# Cannot be pickled in get_test_params, therefore here explicit
kwargs_custom = {