def check_scoring(scoring): """ Validates the performace scoring Parameters ---------- scoring : object of class MetricFunctionWrapper from sktime.performance_metrics. Returns ---------- scoring : object of class MetricFunctionWrapper of sktime.performance_metrics. sMAPE(mean percentage error) if the object is None. Raises ---------- TypeError if object is not callable from current scope. if object is not an instance of class MetricFunctionWrapper of sktime.performance_metrics. """ from sktime.performance_metrics.forecasting._classes import MetricFunctionWrapper from sktime.performance_metrics.forecasting import sMAPE if scoring is None: return sMAPE() if not callable(scoring): raise TypeError("`scoring` must be a callable object") allowed_base_class = MetricFunctionWrapper if not isinstance(scoring, allowed_base_class): raise TypeError(f"`scoring` must inherit from `{allowed_base_class.__name__}`") return scoring
def test_evaluate_no_exog_against_with_exog(): # Check that adding exogenous data produces different results y, X = load_longley() forecaster = ARIMA(suppress_warnings=True) cv = SlidingWindowSplitter() scoring = sMAPE() out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring) out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring) scoring_name = f"test_{scoring.name}" assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
def check_scoring(scoring): from sktime.performance_metrics.forecasting._classes import MetricFunctionWrapper from sktime.performance_metrics.forecasting import sMAPE if scoring is None: return sMAPE() if not callable(scoring): raise TypeError("`scoring` must be a callable object") allowed_base_class = MetricFunctionWrapper if not isinstance(scoring, allowed_base_class): raise TypeError(f"`scoring` must inherit from `{allowed_base_class.__name__}`") return scoring
def test_evaluate_initial_window(): initial_window = 20 y = make_forecasting_problem(n_timepoints=30, index_type="int") forecaster = NaiveForecaster() fh = 1 cv = SlidingWindowSplitter(fh=fh, initial_window=initial_window) scoring = sMAPE() out = evaluate( forecaster=forecaster, y=y, cv=cv, strategy="update", scoring=scoring ) _check_evaluate_output(out, cv, y, scoring) assert out.loc[0, "len_train_window"] == initial_window # check scoring actual = out.loc[0, f"test_{scoring.name}"] train, test = next(cv.split(y)) f = clone(forecaster) f.fit(y.iloc[train], fh=fh) expected = scoring(y.iloc[test], f.predict()) np.testing.assert_equal(actual, expected)
"forecasters": FORECASTERS }, StackingForecaster: { "forecasters": FORECASTERS, "final_regressor": REGRESSOR }, Detrender: { "forecaster": FORECASTER }, ForecastingGridSearchCV: { "forecaster": NaiveForecaster(strategy="mean"), "cv": SingleWindowSplitter(fh=1), "param_grid": { "window_length": [2, 5] }, "scoring": sMAPE(), }, ForecastingRandomizedSearchCV: { "forecaster": NaiveForecaster(strategy="mean"), "cv": SingleWindowSplitter(fh=1), "param_distributions": { "window_length": [2, 5] }, "scoring": sMAPE(), }, TabularToSeriesAdaptor: { "transformer": StandardScaler() }, ColumnEnsembleClassifier: { "estimators": [(name, estimator, 0) for (name, estimator) in TIME_SERIES_CLASSIFIERS]
TransformedTargetForecaster( [ # composite estimator ("t", Detrender(PolynomialTrendForecaster())), ("f", ReducedForecaster(LinearRegression(), scitype="regressor")), ] ), { "f__window_length": TEST_WINDOW_LENGTHS, "f__step_length": TEST_STEP_LENGTHS, }, ), # multiple params ], ) @pytest.mark.parametrize( "scoring", [sMAPE(), make_forecasting_scorer(mean_squared_error, greater_is_better=False)], ) @pytest.mark.parametrize( "cv", [ *[SingleWindowSplitter(fh=fh) for fh in TEST_OOS_FHS], # single split with multi-step fh SlidingWindowSplitter(fh=1, initial_window=50) # multiple splits with single-step fh ], ) def test_gscv_fit(forecaster, param_dict, cv, scoring): param_grid = ParameterGrid(param_dict) y = load_airline() gscv = ForecastingGridSearchCV(
( TransformedTargetForecaster([ # composite estimator ("t", Detrender(PolynomialTrendForecaster())), ("f", ReducedRegressionForecaster(LinearRegression())), ]), { "f__window_length": TEST_WINDOW_LENGTHS, "f__step_length": TEST_STEP_LENGTHS, }, ), # multiple params ], ) @pytest.mark.parametrize( "scoring", [ sMAPE(), make_forecasting_scorer(mean_squared_error, greater_is_better=False) ], ) @pytest.mark.parametrize( "cv", [ *[SingleWindowSplitter(fh=fh) for fh in TEST_OOS_FHS], # single split with multi-step fh SlidingWindowSplitter(fh=1, initial_window=50) # multiple splits with single-step fh ], ) def test_gscv_fit(forecaster, param_dict, cv, scoring): param_grid = ParameterGrid(param_dict)
actual = out.loc[:, "len_train_window"].to_numpy() np.testing.assert_array_equal(expected, actual) assert np.all(out.loc[0, "len_train_window"] == cv.window_length) else: assert np.all(out.loc[:, "len_train_window"] == cv.window_length) @pytest.mark.parametrize("CV", [SlidingWindowSplitter, ExpandingWindowSplitter]) @pytest.mark.parametrize("fh", TEST_FHS) @pytest.mark.parametrize("window_length", [7, 10]) @pytest.mark.parametrize("step_length", TEST_STEP_LENGTHS) @pytest.mark.parametrize("strategy", ["refit", "update"]) @pytest.mark.parametrize( "scoring", [sMAPE(), make_forecasting_scorer(mean_squared_error)] ) def test_evaluate_common_configs(CV, fh, window_length, step_length, strategy, scoring): # Test a number of basic configurations y = make_forecasting_problem(n_timepoints=30, index_type="int") forecaster = NaiveForecaster() cv = CV(fh, window_length, step_length=step_length) out = evaluate( forecaster=forecaster, y=y, cv=cv, strategy=strategy, scoring=scoring ) _check_evaluate_output(out, cv, y, scoring) # check scoring actual = out.loc[:, f"test_{scoring.name}"]
TransformedTargetForecaster([ # composite estimator ("t", Detrender(PolynomialTrendForecaster())), ("f", ReducedForecaster(LinearRegression(), scitype="regressor")), ]), { "f__window_length": TEST_WINDOW_LENGTHS, "f__step_length": TEST_STEP_LENGTHS, }, ), # multiple params ], ) @pytest.mark.parametrize( "scoring", [ sMAPE(), make_forecasting_scorer(mean_squared_error, greater_is_better=False) ], ) @pytest.mark.parametrize( "cv", [ *[SingleWindowSplitter(fh=fh) for fh in TEST_OOS_FHS], # single split with multi-step fh SlidingWindowSplitter(fh=1, initial_window=50) # multiple splits with single-step fh ], ) def test_gscv_fit(forecaster, param_dict, cv, scoring): param_grid = ParameterGrid(param_dict)
("forecaster", ARIMA()), ]) PIPE_GRID = { "transformer__forecaster__degree": [1, 2], "forecaster__with_intercept": [True, False], } CVs = [ *[SingleWindowSplitter(fh=fh) for fh in TEST_OOS_FHS], SlidingWindowSplitter(fh=1, initial_window=15), ] MSE = make_forecasting_scorer(mean_squared_error, greater_is_better=False) @pytest.mark.parametrize("forecaster, param_grid", [(NAIVE, NAIVE_GRID), (PIPE, PIPE_GRID)]) @pytest.mark.parametrize("scoring", [sMAPE(), MSE]) @pytest.mark.parametrize("cv", CVs) def test_gscv(forecaster, param_grid, cv, scoring): y, X = load_longley() gscv = ForecastingGridSearchCV(forecaster, param_grid=param_grid, cv=cv, scoring=scoring) gscv.fit(y, X) param_grid = ParameterGrid(param_grid) _check_cv(forecaster, gscv, cv, param_grid, y, X, scoring) @pytest.mark.parametrize("forecaster, param_grid", [(NAIVE, NAIVE_GRID), (PIPE, PIPE_GRID)])