def test_split_by_fh(index_type, fh_type, is_relative, values): """Test temporal_train_test_split.""" y = _make_series(20, index_type=index_type) cutoff = y.index[10] fh = _make_fh(cutoff, values, fh_type, is_relative) split = temporal_train_test_split(y, fh=fh) _check_train_test_split_y(fh, split)
def test_predict_time_index_with_X(Forecaster, index_type, fh_type, is_relative, steps): """Check that predicted time index matches forecasting horizon.""" f = _construct_instance(Forecaster) n_columns_list = _get_n_columns(f.get_tag("scitype:y")) z, X = make_forecasting_problem(index_type=index_type, make_X=True) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. for n_columns in n_columns_list: f = _construct_instance(Forecaster) y = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y.index[len(y) // 2] fh = _make_fh(cutoff, steps, fh_type, is_relative) y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh) try: f.fit(y_train, X_train, fh=fh) y_pred = f.predict(X=X_test) _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_residuals(Forecaster, index_type, fh_type, is_relative, steps): """Check that predict_residuals method works as expected.""" f = Forecaster.create_test_instance() n_columns_list = _get_n_columns(f.get_tag("scitype:y")) for n_columns in n_columns_list: f = Forecaster.create_test_instance() y_train = _make_series(n_columns=n_columns, index_type=index_type, n_timepoints=50) cutoff = y_train.index[-1] fh = _make_fh(cutoff, steps, fh_type, is_relative) try: f.fit(y_train, fh=fh) y_pred = f.predict() y_test = _make_series(n_columns=n_columns, index_type=index_type, n_timepoints=len(y_pred)) y_test.index = y_pred.index y_res = f.predict_residuals(y_test) _assert_correct_pred_time_index(y_res.index, y_train.index[-1], fh=fh) except NotImplementedError: pass
def test_predict_time_index_with_X(self, estimator_instance, n_columns, index_fh_comb, fh_int_oos): """Check that predicted time index matches forecasting horizon.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": return None # todo: ensure check_estimator works with pytest.skip like below # pytest.skip( # "ForecastingHorizon with timedelta values " # "is currently experimental and not supported everywhere" # ) z, X = make_forecasting_problem(index_type=index_type, make_X=True) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. y = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y.index[len(y) // 2] fh = _make_fh(cutoff, fh_int_oos, fh_type, is_relative) y_train, _, X_train, X_test = temporal_train_test_split(y, X, fh=fh) try: estimator_instance.fit(y_train, X_train, fh=fh) y_pred = estimator_instance.predict(X=X_test) _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_residuals(self, estimator_instance, n_columns, index_fh_comb, fh_int): """Check that predict_residuals method works as expected.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": # workaround to ensure check_estimator without breaking e.g. debugging return None # todo: ensure check_estimator works with pytest.skip like below # pytest.skip( # "ForecastingHorizon with timedelta values " # "is currently experimental and not supported everywhere" # ) y_train = _make_series(n_columns=n_columns, index_type=index_type, n_timepoints=50) cutoff = y_train.index[-1] fh = _make_fh(cutoff, fh_int, fh_type, is_relative) try: estimator_instance.fit(y_train, fh=fh) y_pred = estimator_instance.predict() y_test = _make_series(n_columns=n_columns, index_type=index_type, n_timepoints=len(y_pred)) y_test.index = y_pred.index y_res = estimator_instance.predict_residuals(y_test) _assert_correct_pred_time_index(y_res.index, y_train.index[-1], fh=fh) except NotImplementedError: pass
def test_predict_time_index(self, estimator_instance, n_columns, index_fh_comb, fh_int): """Check that predicted time index matches forecasting horizon.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": return None # todo: ensure check_estimator works with pytest.skip like below # pytest.skip( # "ForecastingHorizon with timedelta values " # "is currently experimental and not supported everywhere" # ) y_train = _make_series(n_columns=n_columns, index_type=index_type, n_timepoints=50) cutoff = y_train.index[-1] fh = _make_fh(cutoff, fh_int, fh_type, is_relative) try: estimator_instance.fit(y_train, fh=fh) y_pred = estimator_instance.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh=fh_int) except NotImplementedError: pass
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps): y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) try: f.fit(y_train, fh=fh) y_pred = f.predict() assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_fh(index_type, fh_type, is_relative, steps): # generate data y = make_forecasting_problem(index_type=index_type) assert isinstance(y.index, INDEX_TYPE_LOOKUP.get(index_type)) # split data y_train, y_test = temporal_train_test_split(y, test_size=10) # choose cutoff point cutoff = y_train.index[-1] # generate fh fh = _make_fh(cutoff, steps, fh_type, is_relative) assert isinstance(fh.to_pandas(), INDEX_TYPE_LOOKUP.get(fh_type)) # get expected outputs if isinstance(steps, int): steps = np.array([steps]) fh_relative = pd.Int64Index(steps).sort_values() fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values() fh_indexer = fh_relative - 1 fh_oos = fh.to_pandas()[fh_relative > 0] is_oos = len(fh_oos) == len(fh) fh_ins = fh.to_pandas()[fh_relative <= 0] is_ins = len(fh_ins) == len(fh) # check outputs # check relative representation _assert_index_equal(fh_absolute, fh.to_absolute(cutoff).to_pandas()) assert not fh.to_absolute(cutoff).is_relative # check relative representation _assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas()) assert fh.to_relative(cutoff).is_relative # check index-like representation _assert_index_equal(fh_indexer, fh.to_indexer(cutoff)) # check in-sample representation # we only compare the numpy array here because the expected solution is # formatted in a slightly different way than the generated solution np.testing.assert_array_equal( fh_ins.to_numpy(), fh.to_in_sample(cutoff).to_pandas() ) assert fh.to_in_sample(cutoff).is_relative == is_relative assert fh.is_all_in_sample(cutoff) == is_ins # check out-of-sample representation np.testing.assert_array_equal( fh_oos.to_numpy(), fh.to_out_of_sample(cutoff).to_pandas() ) assert fh.to_out_of_sample(cutoff).is_relative == is_relative assert fh.is_all_out_of_sample(cutoff) == is_oos
def test_split_by_fh(index_type, fh_type, is_relative, values): """Test temporal_train_test_split.""" if fh_type == "timedelta": return None # todo: ensure check_estimator works with pytest.skip like below # pytest.skip( # "ForecastingHorizon with timedelta values " # "is currently experimental and not supported everywhere" # ) y = _make_series(20, index_type=index_type) cutoff = y.index[10] fh = _make_fh(cutoff, values, fh_type, is_relative) split = temporal_train_test_split(y, fh=fh) _check_train_test_split_y(fh, split)
def test_predict_time_index_in_sample_full(Forecaster, index_type, fh_type, is_relative): # Check that predicted time index matched forecasting horizon for full in-sample # predictions. y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) # full in-sample fh fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) try: f.fit(y_train, fh=fh) y_pred = f.predict() assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps): # Check that predicted time index matches forecasting horizon. y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. try: f.fit(y_train, fh=fh) y_pred = f.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_time_index_in_sample_full(Forecaster, index_type, fh_type, is_relative): """Check that predicted time index equals fh for full in-sample predictions.""" y_train = make_forecasting_problem(index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) # full in-sample fh fh = _make_fh(cutoff, steps, fh_type, is_relative) f = _construct_instance(Forecaster) # Some estimators may not support all time index types and fh types, hence we # need to catch NotImplementedErrors. try: f.fit(y_train, fh=fh) y_pred = f.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps): """Check that predicted time index matches forecasting horizon.""" f = _construct_instance(Forecaster) n_columns_list = _get_n_columns(f.get_tag("scitype:y")) for n_columns in n_columns_list: f = _construct_instance(Forecaster) y_train = _make_series( n_columns=n_columns, index_type=index_type, n_timepoints=50 ) cutoff = y_train.index[-1] fh = _make_fh(cutoff, steps, fh_type, is_relative) try: f.fit(y_train, fh=fh) y_pred = f.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh=fh) except NotImplementedError: pass
def test_predict_time_index_in_sample_full( Forecaster, index_type, fh_type, is_relative ): """Check that predicted time index equals fh for full in-sample predictions.""" f = _construct_instance(Forecaster) n_columns_list = _get_n_columns(f.get_tag("scitype:y")) for n_columns in n_columns_list: f = _construct_instance(Forecaster) y_train = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) fh = _make_fh(cutoff, steps, fh_type, is_relative) try: f.fit(y_train, fh=fh) y_pred = f.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_predict_time_index_in_sample_full(self, estimator_instance, n_columns, index_fh_comb): """Check that predicted time index equals fh for full in-sample predictions.""" index_type, fh_type, is_relative = index_fh_comb if fh_type == "timedelta": return None # todo: ensure check_estimator works with pytest.skip like below # pytest.skip( # "ForecastingHorizon with timedelta values " # "is currently experimental and not supported everywhere" # ) y_train = _make_series(n_columns=n_columns, index_type=index_type) cutoff = y_train.index[-1] steps = -np.arange(len(y_train)) fh = _make_fh(cutoff, steps, fh_type, is_relative) try: estimator_instance.fit(y_train, fh=fh) y_pred = estimator_instance.predict() _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) except NotImplementedError: pass
def test_fh(index_type, fh_type, is_relative, steps): """Testing ForecastingHorizon conversions.""" int_types = ["int64", "int32"] steps_is_int = (isinstance(steps, (int, np.integer)) or np.array(steps).dtype in int_types) steps_is_timedelta = isinstance(steps, pd.Timedelta) or (isinstance( steps, list) and isinstance(pd.Index(steps), pd.TimedeltaIndex)) steps_and_fh_incompatible = (fh_type == "timedelta" and steps_is_int) or (fh_type != "timedelta" and steps_is_timedelta) if steps_and_fh_incompatible: pytest.skip("steps and fh_type are incompatible") # generate data y = make_forecasting_problem(index_type=index_type) if index_type == "int": assert is_integer_index(y.index) else: assert isinstance(y.index, INDEX_TYPE_LOOKUP.get(index_type)) # split data y_train, y_test = temporal_train_test_split(y, test_size=10) # choose cutoff point cutoff = y_train.index[-1] # generate fh fh = _make_fh(cutoff, steps, fh_type, is_relative) if fh_type == "int": assert is_integer_index(fh.to_pandas()) else: assert isinstance(fh.to_pandas(), INDEX_TYPE_LOOKUP.get(fh_type)) # get expected outputs if isinstance(steps, int): steps = np.array([steps]) elif isinstance(steps, pd.Timedelta): steps = pd.Index([steps]) else: steps = pd.Index(steps) if steps.dtype in int_types: fh_relative = pd.Index(steps, dtype="int64").sort_values() fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values() fh_indexer = fh_relative - 1 else: fh_relative = steps.sort_values() fh_absolute = (cutoff + steps).sort_values() fh_indexer = None if steps.dtype in int_types: null = 0 else: null = pd.Timedelta(0) fh_oos = fh.to_pandas()[fh_relative > null] is_oos = len(fh_oos) == len(fh) fh_ins = fh.to_pandas()[fh_relative <= null] is_ins = len(fh_ins) == len(fh) # check outputs # check relative representation _assert_index_equal(fh_absolute, fh.to_absolute(cutoff).to_pandas()) assert not fh.to_absolute(cutoff).is_relative # check relative representation _assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas()) assert fh.to_relative(cutoff).is_relative if steps.dtype in int_types: # check index-like representation _assert_index_equal(fh_indexer, fh.to_indexer(cutoff)) else: with pytest.raises(NotImplementedError): fh.to_indexer(cutoff) # check in-sample representation # we only compare the numpy array here because the expected solution is # formatted in a slightly different way than the generated solution np.testing.assert_array_equal(fh_ins.to_numpy(), fh.to_in_sample(cutoff).to_pandas()) assert fh.to_in_sample(cutoff).is_relative == is_relative assert fh.is_all_in_sample(cutoff) == is_ins # check out-of-sample representation np.testing.assert_array_equal(fh_oos.to_numpy(), fh.to_out_of_sample(cutoff).to_pandas()) assert fh.to_out_of_sample(cutoff).is_relative == is_relative assert fh.is_all_out_of_sample(cutoff) == is_oos