def test_gscv(forecaster, param_grid, cv, scoring): y, X = load_longley() gscv = ForecastingGridSearchCV(forecaster, param_grid=param_grid, cv=cv, scoring=scoring) gscv.fit(y, X) param_grid = ParameterGrid(param_grid) _check_cv(forecaster, gscv, cv, param_grid, y, X, scoring)
def test_evaluate_no_exog_against_with_exog(): """Check that adding exogenous data produces different results.""" y, X = load_longley() forecaster = ARIMA(suppress_warnings=True) cv = SlidingWindowSplitter() scoring = MeanAbsolutePercentageError(symmetric=True) out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring) out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring) scoring_name = f"test_{scoring.name}" assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
def test_evaluate_no_exog_against_with_exog(): # Check that adding exogenous data produces different results y, X = load_longley() forecaster = ARIMA(suppress_warnings=True) cv = SlidingWindowSplitter() scoring = sMAPE() out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring) out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring) scoring_name = f"test_{scoring.name}" assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
def test_rscv(forecaster, param_grid, cv, scoring, n_iter, random_state): """Tests that ForecastingRandomizedSearchCV successfully searches the parameter distributions to identify the best parameter set """ y, X = load_longley() rscv = ForecastingRandomizedSearchCV( forecaster, param_distributions=param_grid, cv=cv, scoring=scoring, n_iter=n_iter, random_state=random_state, ) rscv.fit(y, X) param_distributions = list( ParameterSampler(param_grid, n_iter, random_state=random_state)) _check_cv(forecaster, rscv, cv, param_distributions, y, X, scoring)
def test_ARIMAForecaster_multivariate(fh): X, y = load_longley(return_X_y=True) # get data in required format max_fh = np.max(fh) m = len(y.iloc[0]) cutoff = m - max_fh y_train = pd.Series([y.iloc[0].iloc[:cutoff]]) y_test = pd.Series([y.iloc[0].iloc[cutoff:]]) X_train = pd.DataFrame( [pd.Series([X.iloc[0, i].iloc[:cutoff]]) for i in range(X.shape[1])]).T X_train.columns = X.columns X_test = pd.DataFrame( [pd.Series([X.iloc[0, i].iloc[cutoff:]]) for i in range(X.shape[1])]).T X_test.columns = X.columns m = ARIMAForecaster() m.fit(y_train, X=X_train) y_pred = m.predict(fh=fh, X=X_test) assert y_pred.shape[0] == len(fh) assert m.score(y_test, fh=fh, X=X_test) > 0
def test_ForecastingStrategy_multivariate(): longley = load_longley(return_X_y=False) train = pd.DataFrame([ pd.Series([longley.iloc[0, i].iloc[:13]]) for i in range(longley.shape[1]) ]).T train.columns = longley.columns test = pd.DataFrame([ pd.Series([longley.iloc[0, i].iloc[13:]]) for i in range(longley.shape[1]) ]).T test.columns = longley.columns target = "TOTEMP" fh = np.arange(len(test[target].iloc[0])) + 1 task = ForecastingTask(target=target, fh=fh, metadata=train) estimator = ARIMAForecaster() s = ForecastingStrategy(estimator=estimator) s.fit(task, train) y_pred = s.predict(data=test) assert y_pred.shape == test[task.target].iloc[0].shape
#!/usr/bin/env python3 -u # -*- coding: utf-8 -*- # copyright: sktime developers, BSD-3-Clause License (see LICENSE file). """Unit tests of DateTimeFeatures functionality.""" import pytest from sktime.datasets import load_airline, load_longley from sktime.forecasting.model_selection import temporal_train_test_split from sktime.transformations.series.date import DateTimeFeatures # Load multivariate dataset longley and apply calendar extraction y, X = load_longley() y_train, y_test, X_train, X_test = temporal_train_test_split(y, X) # Test that comprehensive feature_scope works for weeks pipe = DateTimeFeatures(ts_freq="W", feature_scope="comprehensive") pipe.fit(X_train) test_full_featurescope = pipe.transform(X_train).columns.to_list() # Test that minimal feature_scope works for weeks pipe = DateTimeFeatures(ts_freq="W", feature_scope="minimal") pipe.fit(X_train) test_reduced_featurescope = pipe.transform(X_train).columns.to_list() # Test that comprehensive feature_scope works for months pipe = DateTimeFeatures(ts_freq="M", feature_scope="comprehensive") pipe.fit(X_train) test_changing_frequency = pipe.transform(X_train).columns.to_list()
y_multi = get_examples(mtype="pd-multiindex", as_scitype="Panel")[0] # y Train will be univariate data set y_train, y_test = temporal_train_test_split(y) # Create Panel sample data mi = pd.MultiIndex.from_product([[0], y.index], names=["instances", "timepoints"]) y_group1 = pd.DataFrame(y.values, index=mi, columns=["y"]) mi = pd.MultiIndex.from_product([[1], y.index], names=["instances", "timepoints"]) y_group2 = pd.DataFrame(y.values, index=mi, columns=["y"]) y_grouped = pd.concat([y_group1, y_group2]) y_ll, X_ll = load_longley() y_ll_train, _, X_ll_train, X_ll_test = temporal_train_test_split(y_ll, X_ll) # Get different WindowSummarizer functions kwargs = WindowSummarizer.get_test_params()[0] kwargs_alternames = WindowSummarizer.get_test_params()[1] kwargs_variant = WindowSummarizer.get_test_params()[2] def count_gt100(x): """Count how many observations lie above threshold 100.""" return np.sum((x > 100)[::-1]) # Cannot be pickled in get_test_params, therefore here explicit kwargs_custom = {