def _make_fit_args(estimator, **kwargs): if isinstance(estimator, BaseForecaster): # we need to handle the TransformedTargetForecaster separately if isinstance(estimator, _SeriesToSeriesTransformer): y = _make_series(**kwargs) else: # create matching n_columns input, if n_columns not passed # e.g., to give bivariate y to strictly multivariate forecaster if "n_columns" not in kwargs.keys(): n_columns = _get_n_columns( estimator.get_tag(tag_name="scitype:y", raise_error=False))[0] y = make_forecasting_problem(n_columns=n_columns, **kwargs) else: y = make_forecasting_problem(**kwargs) fh = 1 X = None return y, X, fh elif isinstance(estimator, BaseSeriesAnnotator): X = make_annotation_problem(**kwargs) return (X, ) elif isinstance(estimator, BaseClassifier): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseRegressor): return make_regression_problem(**kwargs) elif isinstance( estimator, (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer)): X = _make_series(**kwargs) return (X, ) elif isinstance(estimator, (_PanelToTabularTransformer, _PanelToPanelTransformer)): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseTransformer) and estimator.get_tag("requires_y"): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseTransformer): X = _make_series(**kwargs) return (X, ) elif isinstance(estimator, BaseClusterer): return (make_clustering_problem(**kwargs), ) elif isinstance(estimator, BasePairwiseTransformer): return None, None elif isinstance(estimator, BasePairwiseTransformerPanel): return None, None elif isinstance(estimator, BaseAligner): X = [ _make_series(n_columns=2, **kwargs), _make_series(n_columns=2, **kwargs) ] return (X, ) else: raise ValueError(_get_err_msg(estimator))
def test_invalid_aggfuncs(forecasters, aggfunc): """Check if invalid aggregation functions return Error.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc) forecaster.fit(y, fh=[1, 2]) with pytest.raises(ValueError, match=r"not recognized"): forecaster.predict()
def test_predict_time_index_with_X(self, estimator_instance, n_columns, index_fh_comb, fh_int_oos): """Check that predicted time index matches forecasting horizon.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": return None # todo: ensure check_estimator works with pytest.skip like below # pytest.skip( # "ForecastingHorizon with timedelta values " # "is currently experimental and not supported everywhere" # ) z, X = make_forecasting_problem(index_type=index_type, make_X=True) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. y = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y.index[len(y) // 2] fh = _make_fh(cutoff, fh_int_oos, fh_type, is_relative) y_train, _, X_train, X_test = temporal_train_test_split(y, X, fh=fh) try: estimator_instance.fit(y_train, X_train, fh=fh) y_pred = estimator_instance.predict(X=X_test) _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def _make_fit_args(estimator, **kwargs): if isinstance(estimator, BaseForecaster): # we need to handle the TransformedTargetForecaster separately if isinstance(estimator, _SeriesToSeriesTransformer): y = _make_series(**kwargs) else: y = make_forecasting_problem(**kwargs) fh = 1 X = None return y, X, fh elif isinstance(estimator, BaseSeriesAnnotator): X = make_annotation_problem(**kwargs) return (X,) elif isinstance(estimator, BaseClassifier): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseRegressor): return make_regression_problem(**kwargs) elif isinstance( estimator, (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer) ): X = _make_series(**kwargs) return (X,) elif isinstance(estimator, (_PanelToTabularTransformer, _PanelToPanelTransformer)): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseClusterer): return (make_clustering_problem(**kwargs),) else: raise ValueError(_get_err_msg(estimator))
def test_predict_time_index_with_X(Forecaster, index_type, fh_type, is_relative, steps): """Check that predicted time index matches forecasting horizon.""" f = _construct_instance(Forecaster) n_columns_list = _get_n_columns(f.get_tag("scitype:y")) z, X = make_forecasting_problem(index_type=index_type, make_X=True) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. for n_columns in n_columns_list: f = _construct_instance(Forecaster) y = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y.index[len(y) // 2] fh = _make_fh(cutoff, steps, fh_type, is_relative) y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh) try: f.fit(y_train, X_train, fh=fh) y_pred = f.predict(X=X_test) _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def _make_fit_args(estimator, **kwargs): if isinstance(estimator, BaseForecaster): y = make_forecasting_problem(**kwargs) fh = 1 X = None return y, X, fh elif isinstance(estimator, BaseClassifier): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseRegressor): return make_regression_problem(**kwargs) elif isinstance( estimator, (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer)): X = _make_series(**kwargs) return (X, ) elif isinstance( estimator, ( _PanelToTabularTransformer, _PanelToPanelTransformer, ), ): return make_classification_problem(**kwargs) else: raise ValueError(_get_err_msg(estimator))
def test_dummy_regressor_mean_prediction_endogenous_only( fh, window_length, strategy, scitype ): """Test dummy regressor mean prediction endogenous_only. The DummyRegressor ignores the input feature data X, hence we can use it for testing reduction from forecasting to both tabular and time series regression. The DummyRegressor also supports the 'multioutput' strategy. """ y = make_forecasting_problem() fh = check_fh(fh) y_train, y_test = temporal_train_test_split(y, fh=fh) regressor = DummyRegressor(strategy="mean") forecaster = make_reduction( regressor, scitype=scitype, window_length=window_length, strategy=strategy ) forecaster.fit(y_train, fh=fh) actual = forecaster.predict() if strategy == "recursive": # For the recursive strategy, we always use the first-step ahead as the # target vector in the regression problem during training, regardless of the # actual forecasting horizon. effective_window_length = window_length else: # For the other strategies, we split the data taking into account the steps # ahead we want to predict. effective_window_length = window_length + max(fh) - 1 # In the sliding-window transformation, the first values of the target series # make up the first window and are not used in the transformed target vector. So # the expected result should be the mean of the remaining values. expected = np.mean(y_train[effective_window_length:]) np.testing.assert_array_almost_equal(actual, expected)
def test_evaluate_common_configs(CV, fh, window_length, step_length, strategy, scoring): """Test evaluate common configs.""" y = make_forecasting_problem(n_timepoints=30, index_type="int") forecaster = NaiveForecaster() cv = CV(fh, window_length, step_length=step_length) out = evaluate(forecaster=forecaster, y=y, cv=cv, strategy=strategy, scoring=scoring) _check_evaluate_output(out, cv, y, scoring) # check scoring actual = out.loc[:, f"test_{scoring.name}"] n_splits = cv.get_n_splits(y) expected = np.empty(n_splits) for i, (train, test) in enumerate(cv.split(y)): f = clone(forecaster) f.fit(y.iloc[train], fh=fh) expected[i] = scoring(y.iloc[test], f.predict(), y_train=y.iloc[train]) np.testing.assert_array_equal(actual, expected)
def test_y_test_index_input(): y = make_forecasting_problem() y_train, y_test = temporal_train_test_split(y, train_size=0.75) # check if y_test.index can be passed as absolute horizon fh = FH(y_test.index, relative=False) cutoff = y_train.index[-1] np.testing.assert_array_equal(fh.relative(cutoff), np.arange(len(y_test)) + 1)
def _make_transform_args(estimator, random_state=None): if is_series_as_features_transformer(estimator): return make_classification_problem(random_state=random_state) elif is_single_series_transformer(estimator) or is_forecaster(estimator): y = make_forecasting_problem(random_state=random_state) return (y,) else: raise ValueError(f"Estimator type: {type(estimator)} not supported")
def _make_fit_args(estimator, random_state=None, **kwargs): if is_forecaster(estimator): y = make_forecasting_problem(random_state=random_state, **kwargs) fh = 1 return y, fh elif is_classifier(estimator): return make_classification_problem(random_state=random_state, **kwargs) elif is_regressor(estimator): return make_regression_problem(random_state=random_state, **kwargs) elif is_series_as_features_transformer(estimator): return make_classification_problem(random_state=random_state, **kwargs) elif is_single_series_transformer(estimator): y = make_forecasting_problem(random_state=random_state, **kwargs) return (y,) else: raise ValueError(f"Estimator type: {type(estimator)} not supported")
def test_avg_mean(forecasters): """Assert `mean` aggfunc returns the same values as `average` with equal weights.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters) forecaster.fit(y, fh=[1, 2, 3]) mean_pred = forecaster.predict() forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1]) forecaster_1.fit(y, fh=[1, 2, 3]) avg_pred = forecaster_1.predict() pd.testing.assert_series_equal(mean_pred, avg_pred)
def test_fh(index_type, fh_type, is_relative, steps): # generate data y = make_forecasting_problem(index_type=index_type) assert isinstance(y.index, INDEX_TYPE_LOOKUP.get(index_type)) # split data y_train, y_test = temporal_train_test_split(y, test_size=10) # choose cutoff point cutoff = y_train.index[-1] # generate fh fh = _make_fh(cutoff, steps, fh_type, is_relative) assert isinstance(fh.to_pandas(), INDEX_TYPE_LOOKUP.get(fh_type)) # get expected outputs if isinstance(steps, int): steps = np.array([steps]) fh_relative = pd.Int64Index(steps).sort_values() fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values() fh_indexer = fh_relative - 1 fh_oos = fh.to_pandas()[fh_relative > 0] is_oos = len(fh_oos) == len(fh) fh_ins = fh.to_pandas()[fh_relative <= 0] is_ins = len(fh_ins) == len(fh) # check outputs # check relative representation _assert_index_equal(fh_absolute, fh.to_absolute(cutoff).to_pandas()) assert not fh.to_absolute(cutoff).is_relative # check relative representation _assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas()) assert fh.to_relative(cutoff).is_relative # check index-like representation _assert_index_equal(fh_indexer, fh.to_indexer(cutoff)) # check in-sample representation # we only compare the numpy array here because the expected solution is # formatted in a slightly different way than the generated solution np.testing.assert_array_equal( fh_ins.to_numpy(), fh.to_in_sample(cutoff).to_pandas() ) assert fh.to_in_sample(cutoff).is_relative == is_relative assert fh.is_all_in_sample(cutoff) == is_ins # check out-of-sample representation np.testing.assert_array_equal( fh_oos.to_numpy(), fh.to_out_of_sample(cutoff).to_pandas() ) assert fh.to_out_of_sample(cutoff).is_relative == is_relative assert fh.is_all_out_of_sample(cutoff) == is_oos
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps): y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) try: f.fit(y_train, fh=fh) y_pred = f.predict() assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def _test_trend(degree, with_intercept): """Helper function to check trend""" y = make_forecasting_problem() forecaster = PolynomialTrendForecaster(degree=degree, with_intercept=with_intercept) forecaster.fit(y) # check coefficients # intercept is added in reverse order actual = forecaster.regressor_.steps[-1][1].coef_[::-1] expected = get_expected_polynomial_coefs(y, degree, with_intercept) np.testing.assert_allclose(actual, expected)
def test_update_predict_predicted_indices(Forecaster, fh, window_length, step_length): y = make_forecasting_problem(all_positive=True, index_type="datetime") y_train, y_test = temporal_train_test_split(y) cv = SlidingWindowSplitter(fh, window_length=window_length, step_length=step_length) f = _construct_instance(Forecaster) f.fit(y_train, fh=fh) try: y_pred = f.update_predict(y_test, cv=cv) _check_update_predict_y_pred(y_pred, y_test, fh, step_length) except NotImplementedError: pass
def test_predict_time_index_in_sample_full(Forecaster, index_type, fh_type, is_relative): # Check that predicted time index matched forecasting horizon for full in-sample # predictions. y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) # full in-sample fh fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) try: f.fit(y_train, fh=fh) y_pred = f.predict() assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps): # Check that predicted time index matches forecasting horizon. y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. try: f.fit(y_train, fh=fh) y_pred = f.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_time_index_in_sample_full(Forecaster, index_type, fh_type, is_relative): """Check that predicted time index equals fh for full in-sample predictions.""" y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) # full in-sample fh fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. try: f.fit(y_train, fh=fh) y_pred = f.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_multioutput_direct_equivalence_tabular_linear_regression(fh): # multioutput and direct strategies with linear regression # regressor should produce same predictions y, X = make_forecasting_problem(make_X=True) y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh) estimator = LinearRegression() direct = make_reduction(estimator, strategy="direct") multioutput = make_reduction(estimator, strategy="multioutput") y_pred_direct = direct.fit(y_train, X_train, fh=fh).predict(fh, X_test) y_pred_multioutput = multioutput.fit(y_train, X_train, fh=fh).predict(fh, X_test) np.testing.assert_array_almost_equal(y_pred_direct.to_numpy(), y_pred_multioutput.to_numpy())
def _check_update_predict_predicted_index(Forecaster, fh, window_length, step_length, update_params): y = make_forecasting_problem(all_positive=True, index_type="datetime") y_train, y_test = temporal_train_test_split(y) cv = SlidingWindowSplitter( fh, window_length=window_length, step_length=step_length, start_with_window=False, ) f = _construct_instance(Forecaster) f.fit(y_train, fh=fh) y_pred = f.update_predict(y_test, cv=cv, update_params=update_params) assert isinstance(y_pred, (pd.Series, pd.DataFrame)) expected = _get_expected_index_for_update_predict(y_test, fh, step_length) actual = y_pred.index np.testing.assert_array_equal(actual, expected)
def test_aggregation_unweighted(forecasters, aggfunc): """Assert aggfunc returns the correct values.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc) forecaster.fit(y, fh=[1, 2, 3]) actual_pred = forecaster.predict() predictions = [] _aggfunc = VALID_AGG_FUNCS[aggfunc]["unweighted"] for _, forecaster in forecasters: f = forecaster f.fit(y) f_pred = f.predict(fh=[1, 2, 3]) predictions.append(f_pred) predictions = pd.DataFrame(predictions).T expected_pred = predictions.apply(func=_aggfunc, axis=1) pd.testing.assert_series_equal(actual_pred, expected_pred)
def test_evaluate_initial_window(): initial_window = 20 y = make_forecasting_problem(n_timepoints=30, index_type="int") forecaster = NaiveForecaster() fh = 1 cv = SlidingWindowSplitter(fh=fh, initial_window=initial_window) scoring = sMAPE() out = evaluate( forecaster=forecaster, y=y, cv=cv, strategy="update", scoring=scoring ) _check_evaluate_output(out, cv, y, scoring) assert out.loc[0, "len_train_window"] == initial_window # check scoring actual = out.loc[0, f"test_{scoring.name}"] train, test = next(cv.split(y)) f = clone(forecaster) f.fit(y.iloc[train], fh=fh) expected = scoring(y.iloc[test], f.predict()) np.testing.assert_equal(actual, expected)
def test_aggregation_weighted(forecasters, aggfunc, weights): """Assert weighted aggfunc returns the correct values.""" y = make_forecasting_problem() forecaster = EnsembleForecaster( forecasters=forecasters, aggfunc=aggfunc, weights=weights ) forecaster.fit(y, fh=[1, 2, 3]) actual_pred = forecaster.predict() predictions = [] for _, forecaster in forecasters: f = forecaster f.fit(y) f_pred = f.predict(fh=[1, 2, 3]) predictions.append(f_pred) predictions = pd.DataFrame(predictions).T _aggfunc = VALID_AGG_FUNCS[aggfunc]["weighted"] expected_pred = pd.Series( _aggfunc(predictions, axis=1, weights=np.array(weights)), index=predictions.index, ) # expected_pred = predictions.apply(func=_aggfunc, axis=1, weights=weights) pd.testing.assert_series_equal(actual_pred, expected_pred)
def test_aggregation_weighted(forecasters, aggfunc, weights): """Assert weighted aggfunc returns the correct values.""" y = make_forecasting_problem() forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc, weights=weights) forecaster.fit(y, fh=[1, 2, 3]) actual_pred = forecaster.predict() predictions = [] for _, forecaster in forecasters: f = forecaster f.fit(y) f_pred = f.predict(fh=[1, 2, 3]) predictions.append(f_pred) predictions = pd.DataFrame(predictions) if aggfunc == "mean": func = np.average else: func = gmean expected_pred = predictions.apply(func=func, axis=0, weights=weights) pd.testing.assert_series_equal(actual_pred, expected_pred)
def test_make_forecasting_problem(n_timepoints): y = make_forecasting_problem(n_timepoints) assert isinstance(y, pd.Series) assert y.shape[0] == n_timepoints
from sktime.forecasting.tests._config import VALID_INDEX_FH_COMBINATIONS from sktime.performance_metrics.forecasting import smape_loss from sktime.utils import all_estimators from sktime.utils._testing import _construct_instance from sktime.utils._testing.forecasting import _make_fh from sktime.utils._testing.forecasting import assert_correct_pred_time_index from sktime.utils._testing.forecasting import get_expected_index_for_update_predict from sktime.utils._testing.forecasting import make_forecasting_problem from sktime.utils.validation.forecasting import check_fh # get all forecasters FORECASTERS = all_estimators(estimator_types="forecaster", return_names=False) FH0 = 1 # testing data y = make_forecasting_problem() y_train, y_test = temporal_train_test_split(y, train_size=0.75) @pytest.mark.parametrize("Forecaster", FORECASTERS) def test_fitted_params(Forecaster): f = _construct_instance(Forecaster) f.fit(y_train, fh=FH0) try: params = f.get_fitted_params() assert isinstance(params, dict) except NotImplementedError: pass
def test_fh(index_type, fh_type, is_relative, steps): """Testing ForecastingHorizon conversions.""" int_types = ["int64", "int32"] steps_is_int = (isinstance(steps, (int, np.integer)) or np.array(steps).dtype in int_types) steps_is_timedelta = isinstance(steps, pd.Timedelta) or (isinstance( steps, list) and isinstance(pd.Index(steps), pd.TimedeltaIndex)) steps_and_fh_incompatible = (fh_type == "timedelta" and steps_is_int) or (fh_type != "timedelta" and steps_is_timedelta) if steps_and_fh_incompatible: pytest.skip("steps and fh_type are incompatible") # generate data y = make_forecasting_problem(index_type=index_type) if index_type == "int": assert is_integer_index(y.index) else: assert isinstance(y.index, INDEX_TYPE_LOOKUP.get(index_type)) # split data y_train, y_test = temporal_train_test_split(y, test_size=10) # choose cutoff point cutoff = y_train.index[-1] # generate fh fh = _make_fh(cutoff, steps, fh_type, is_relative) if fh_type == "int": assert is_integer_index(fh.to_pandas()) else: assert isinstance(fh.to_pandas(), INDEX_TYPE_LOOKUP.get(fh_type)) # get expected outputs if isinstance(steps, int): steps = np.array([steps]) elif isinstance(steps, pd.Timedelta): steps = pd.Index([steps]) else: steps = pd.Index(steps) if steps.dtype in int_types: fh_relative = pd.Index(steps, dtype="int64").sort_values() fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values() fh_indexer = fh_relative - 1 else: fh_relative = steps.sort_values() fh_absolute = (cutoff + steps).sort_values() fh_indexer = None if steps.dtype in int_types: null = 0 else: null = pd.Timedelta(0) fh_oos = fh.to_pandas()[fh_relative > null] is_oos = len(fh_oos) == len(fh) fh_ins = fh.to_pandas()[fh_relative <= null] is_ins = len(fh_ins) == len(fh) # check outputs # check relative representation _assert_index_equal(fh_absolute, fh.to_absolute(cutoff).to_pandas()) assert not fh.to_absolute(cutoff).is_relative # check relative representation _assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas()) assert fh.to_relative(cutoff).is_relative if steps.dtype in int_types: # check index-like representation _assert_index_equal(fh_indexer, fh.to_indexer(cutoff)) else: with pytest.raises(NotImplementedError): fh.to_indexer(cutoff) # check in-sample representation # we only compare the numpy array here because the expected solution is # formatted in a slightly different way than the generated solution np.testing.assert_array_equal(fh_ins.to_numpy(), fh.to_in_sample(cutoff).to_pandas()) assert fh.to_in_sample(cutoff).is_relative == is_relative assert fh.is_all_in_sample(cutoff) == is_ins # check out-of-sample representation np.testing.assert_array_equal(fh_oos.to_numpy(), fh.to_out_of_sample(cutoff).to_pandas()) assert fh.to_out_of_sample(cutoff).is_relative == is_relative assert fh.is_all_out_of_sample(cutoff) == is_oos
__author__ = ["Ris-Bali"] import numpy as np import pytest from sktime.datasets import load_airline from sktime.forecasting.ets import AutoETS from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.forecasting.sarimax import SARIMAX from sktime.forecasting.structural import UnobservedComponents from sktime.forecasting.var import VAR from sktime.utils._testing.forecasting import make_forecasting_problem fh = np.arange(1, 5) y = load_airline() y_1 = make_forecasting_problem(n_columns=3) @pytest.mark.parametrize( "model", [AutoETS, ExponentialSmoothing, SARIMAX, UnobservedComponents, VAR], ) def test_random_state(model): """Function to test random_state parameter.""" obj = model.create_test_instance() if model == VAR: obj.fit(y=y_1, fh=fh) y = obj.predict() obj.fit(y=y_1, fh=fh) y1 = obj.predict() else: