Esempio n. 1
0
def test_split_by_fh(index_type, fh_type, is_relative, values):
    """Test temporal_train_test_split."""
    y = _make_series(20, index_type=index_type)
    cutoff = y.index[10]
    fh = _make_fh(cutoff, values, fh_type, is_relative)
    split = temporal_train_test_split(y, fh=fh)
    _check_train_test_split_y(fh, split)
Esempio n. 2
0
def test_predict_time_index_with_X(Forecaster, index_type, fh_type,
                                   is_relative, steps):
    """Check that predicted time index matches forecasting horizon."""
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    z, X = make_forecasting_problem(index_type=index_type, make_X=True)

    # Some estimators may not support all time index types and fh types, hence we
    # need to catch NotImplementedErrors.
    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y.index[len(y) // 2]
        fh = _make_fh(cutoff, steps, fh_type, is_relative)

        y_train, y_test, X_train, X_test = temporal_train_test_split(y,
                                                                     X,
                                                                     fh=fh)

        try:
            f.fit(y_train, X_train, fh=fh)
            y_pred = f.predict(X=X_test)
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1],
                                            fh)
        except NotImplementedError:
            pass
def test_predict_residuals(Forecaster, index_type, fh_type, is_relative,
                           steps):
    """Check that predict_residuals method works as expected."""
    f = Forecaster.create_test_instance()
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = Forecaster.create_test_instance()
        y_train = _make_series(n_columns=n_columns,
                               index_type=index_type,
                               n_timepoints=50)
        cutoff = y_train.index[-1]
        fh = _make_fh(cutoff, steps, fh_type, is_relative)
        try:
            f.fit(y_train, fh=fh)
            y_pred = f.predict()

            y_test = _make_series(n_columns=n_columns,
                                  index_type=index_type,
                                  n_timepoints=len(y_pred))
            y_test.index = y_pred.index
            y_res = f.predict_residuals(y_test)
            _assert_correct_pred_time_index(y_res.index,
                                            y_train.index[-1],
                                            fh=fh)
        except NotImplementedError:
            pass
Esempio n. 4
0
    def test_predict_time_index_with_X(self, estimator_instance, n_columns,
                                       index_fh_comb, fh_int_oos):
        """Check that predicted time index matches forecasting horizon."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        z, X = make_forecasting_problem(index_type=index_type, make_X=True)

        # Some estimators may not support all time index types and fh types, hence we
        # need to catch NotImplementedErrors.
        y = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y.index[len(y) // 2]
        fh = _make_fh(cutoff, fh_int_oos, fh_type, is_relative)

        y_train, _, X_train, X_test = temporal_train_test_split(y, X, fh=fh)

        try:
            estimator_instance.fit(y_train, X_train, fh=fh)
            y_pred = estimator_instance.predict(X=X_test)
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1],
                                            fh)
        except NotImplementedError:
            pass
Esempio n. 5
0
    def test_predict_residuals(self, estimator_instance, n_columns,
                               index_fh_comb, fh_int):
        """Check that predict_residuals method works as expected."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            # workaround to ensure check_estimator without breaking e.g. debugging
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        y_train = _make_series(n_columns=n_columns,
                               index_type=index_type,
                               n_timepoints=50)
        cutoff = y_train.index[-1]
        fh = _make_fh(cutoff, fh_int, fh_type, is_relative)
        try:
            estimator_instance.fit(y_train, fh=fh)
            y_pred = estimator_instance.predict()

            y_test = _make_series(n_columns=n_columns,
                                  index_type=index_type,
                                  n_timepoints=len(y_pred))
            y_test.index = y_pred.index
            y_res = estimator_instance.predict_residuals(y_test)
            _assert_correct_pred_time_index(y_res.index,
                                            y_train.index[-1],
                                            fh=fh)
        except NotImplementedError:
            pass
Esempio n. 6
0
    def test_predict_time_index(self, estimator_instance, n_columns,
                                index_fh_comb, fh_int):
        """Check that predicted time index matches forecasting horizon."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        y_train = _make_series(n_columns=n_columns,
                               index_type=index_type,
                               n_timepoints=50)
        cutoff = y_train.index[-1]
        fh = _make_fh(cutoff, fh_int, fh_type, is_relative)

        try:
            estimator_instance.fit(y_train, fh=fh)
            y_pred = estimator_instance.predict()
            _assert_correct_pred_time_index(y_pred.index,
                                            y_train.index[-1],
                                            fh=fh_int)
        except NotImplementedError:
            pass
Esempio n. 7
0
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative,
                            steps):
    y_train = make_forecasting_problem(index_type=index_type)
    cutoff = y_train.index[-1]
    fh = _make_fh(cutoff, steps, fh_type, is_relative)
    f = _construct_instance(Forecaster)
    try:
        f.fit(y_train, fh=fh)
        y_pred = f.predict()
        assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
    except NotImplementedError:
        pass
Esempio n. 8
0
def test_fh(index_type, fh_type, is_relative, steps):
    # generate data
    y = make_forecasting_problem(index_type=index_type)
    assert isinstance(y.index, INDEX_TYPE_LOOKUP.get(index_type))

    # split data
    y_train, y_test = temporal_train_test_split(y, test_size=10)

    # choose cutoff point
    cutoff = y_train.index[-1]

    # generate fh
    fh = _make_fh(cutoff, steps, fh_type, is_relative)
    assert isinstance(fh.to_pandas(), INDEX_TYPE_LOOKUP.get(fh_type))

    # get expected outputs
    if isinstance(steps, int):
        steps = np.array([steps])
    fh_relative = pd.Int64Index(steps).sort_values()
    fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values()
    fh_indexer = fh_relative - 1
    fh_oos = fh.to_pandas()[fh_relative > 0]
    is_oos = len(fh_oos) == len(fh)
    fh_ins = fh.to_pandas()[fh_relative <= 0]
    is_ins = len(fh_ins) == len(fh)

    # check outputs
    # check relative representation
    _assert_index_equal(fh_absolute, fh.to_absolute(cutoff).to_pandas())
    assert not fh.to_absolute(cutoff).is_relative

    # check relative representation
    _assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas())
    assert fh.to_relative(cutoff).is_relative

    # check index-like representation
    _assert_index_equal(fh_indexer, fh.to_indexer(cutoff))

    # check in-sample representation
    # we only compare the numpy array here because the expected solution is
    # formatted in a slightly different way than the generated solution
    np.testing.assert_array_equal(
        fh_ins.to_numpy(), fh.to_in_sample(cutoff).to_pandas()
    )
    assert fh.to_in_sample(cutoff).is_relative == is_relative
    assert fh.is_all_in_sample(cutoff) == is_ins

    # check out-of-sample representation
    np.testing.assert_array_equal(
        fh_oos.to_numpy(), fh.to_out_of_sample(cutoff).to_pandas()
    )
    assert fh.to_out_of_sample(cutoff).is_relative == is_relative
    assert fh.is_all_out_of_sample(cutoff) == is_oos
Esempio n. 9
0
def test_split_by_fh(index_type, fh_type, is_relative, values):
    """Test temporal_train_test_split."""
    if fh_type == "timedelta":
        return None
        # todo: ensure check_estimator works with pytest.skip like below
        # pytest.skip(
        #    "ForecastingHorizon with timedelta values "
        #     "is currently experimental and not supported everywhere"
        # )
    y = _make_series(20, index_type=index_type)
    cutoff = y.index[10]
    fh = _make_fh(cutoff, values, fh_type, is_relative)
    split = temporal_train_test_split(y, fh=fh)
    _check_train_test_split_y(fh, split)
Esempio n. 10
0
def test_predict_time_index_in_sample_full(Forecaster, index_type, fh_type,
                                           is_relative):
    # Check that predicted time index matched forecasting horizon for full in-sample
    # predictions.
    y_train = make_forecasting_problem(index_type=index_type)
    cutoff = y_train.index[-1]
    steps = -np.arange(len(y_train))  # full in-sample fh
    fh = _make_fh(cutoff, steps, fh_type, is_relative)
    f = _construct_instance(Forecaster)
    try:
        f.fit(y_train, fh=fh)
        y_pred = f.predict()
        assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
    except NotImplementedError:
        pass
Esempio n. 11
0
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative,
                            steps):
    # Check that predicted time index matches forecasting horizon.
    y_train = make_forecasting_problem(index_type=index_type)
    cutoff = y_train.index[-1]
    fh = _make_fh(cutoff, steps, fh_type, is_relative)
    f = _construct_instance(Forecaster)

    # Some estimators may not support all time index types and fh types, hence we
    # need to catch NotImplementedErrors.
    try:
        f.fit(y_train, fh=fh)
        y_pred = f.predict()
        _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
    except NotImplementedError:
        pass
Esempio n. 12
0
def test_predict_time_index_in_sample_full(Forecaster, index_type, fh_type,
                                           is_relative):
    """Check that predicted time index equals fh for full in-sample predictions."""
    y_train = make_forecasting_problem(index_type=index_type)
    cutoff = y_train.index[-1]
    steps = -np.arange(len(y_train))  # full in-sample fh
    fh = _make_fh(cutoff, steps, fh_type, is_relative)
    f = _construct_instance(Forecaster)
    # Some estimators may not support all time index types and fh types, hence we
    # need to catch NotImplementedErrors.
    try:
        f.fit(y_train, fh=fh)
        y_pred = f.predict()
        _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
    except NotImplementedError:
        pass
Esempio n. 13
0
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps):
    """Check that predicted time index matches forecasting horizon."""
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y_train = _make_series(
            n_columns=n_columns, index_type=index_type, n_timepoints=50
        )
        cutoff = y_train.index[-1]
        fh = _make_fh(cutoff, steps, fh_type, is_relative)

        try:
            f.fit(y_train, fh=fh)
            y_pred = f.predict()
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh=fh)
        except NotImplementedError:
            pass
Esempio n. 14
0
def test_predict_time_index_in_sample_full(
    Forecaster, index_type, fh_type, is_relative
):
    """Check that predicted time index equals fh for full in-sample predictions."""
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y_train = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y_train.index[-1]
        steps = -np.arange(len(y_train))
        fh = _make_fh(cutoff, steps, fh_type, is_relative)

        try:
            f.fit(y_train, fh=fh)
            y_pred = f.predict()
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
        except NotImplementedError:
            pass
Esempio n. 15
0
    def test_predict_time_index_in_sample_full(self, estimator_instance,
                                               n_columns, index_fh_comb):
        """Check that predicted time index equals fh for full in-sample predictions."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        y_train = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y_train.index[-1]
        steps = -np.arange(len(y_train))
        fh = _make_fh(cutoff, steps, fh_type, is_relative)

        try:
            estimator_instance.fit(y_train, fh=fh)
            y_pred = estimator_instance.predict()
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1],
                                            fh)
        except NotImplementedError:
            pass
Esempio n. 16
0
def test_fh(index_type, fh_type, is_relative, steps):
    """Testing ForecastingHorizon conversions."""
    int_types = ["int64", "int32"]
    steps_is_int = (isinstance(steps, (int, np.integer))
                    or np.array(steps).dtype in int_types)
    steps_is_timedelta = isinstance(steps, pd.Timedelta) or (isinstance(
        steps, list) and isinstance(pd.Index(steps), pd.TimedeltaIndex))
    steps_and_fh_incompatible = (fh_type == "timedelta"
                                 and steps_is_int) or (fh_type != "timedelta"
                                                       and steps_is_timedelta)
    if steps_and_fh_incompatible:
        pytest.skip("steps and fh_type are incompatible")
    # generate data
    y = make_forecasting_problem(index_type=index_type)
    if index_type == "int":
        assert is_integer_index(y.index)
    else:
        assert isinstance(y.index, INDEX_TYPE_LOOKUP.get(index_type))

    # split data
    y_train, y_test = temporal_train_test_split(y, test_size=10)

    # choose cutoff point
    cutoff = y_train.index[-1]

    # generate fh
    fh = _make_fh(cutoff, steps, fh_type, is_relative)
    if fh_type == "int":
        assert is_integer_index(fh.to_pandas())
    else:
        assert isinstance(fh.to_pandas(), INDEX_TYPE_LOOKUP.get(fh_type))

    # get expected outputs
    if isinstance(steps, int):
        steps = np.array([steps])
    elif isinstance(steps, pd.Timedelta):
        steps = pd.Index([steps])
    else:
        steps = pd.Index(steps)

    if steps.dtype in int_types:
        fh_relative = pd.Index(steps, dtype="int64").sort_values()
        fh_absolute = y.index[np.where(y.index == cutoff)[0] +
                              steps].sort_values()
        fh_indexer = fh_relative - 1
    else:
        fh_relative = steps.sort_values()
        fh_absolute = (cutoff + steps).sort_values()
        fh_indexer = None

    if steps.dtype in int_types:
        null = 0
    else:
        null = pd.Timedelta(0)
    fh_oos = fh.to_pandas()[fh_relative > null]
    is_oos = len(fh_oos) == len(fh)
    fh_ins = fh.to_pandas()[fh_relative <= null]
    is_ins = len(fh_ins) == len(fh)

    # check outputs
    # check relative representation
    _assert_index_equal(fh_absolute, fh.to_absolute(cutoff).to_pandas())
    assert not fh.to_absolute(cutoff).is_relative

    # check relative representation
    _assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas())
    assert fh.to_relative(cutoff).is_relative

    if steps.dtype in int_types:
        # check index-like representation
        _assert_index_equal(fh_indexer, fh.to_indexer(cutoff))
    else:
        with pytest.raises(NotImplementedError):
            fh.to_indexer(cutoff)

    # check in-sample representation
    # we only compare the numpy array here because the expected solution is
    # formatted in a slightly different way than the generated solution
    np.testing.assert_array_equal(fh_ins.to_numpy(),
                                  fh.to_in_sample(cutoff).to_pandas())
    assert fh.to_in_sample(cutoff).is_relative == is_relative
    assert fh.is_all_in_sample(cutoff) == is_ins

    # check out-of-sample representation
    np.testing.assert_array_equal(fh_oos.to_numpy(),
                                  fh.to_out_of_sample(cutoff).to_pandas())
    assert fh.to_out_of_sample(cutoff).is_relative == is_relative
    assert fh.is_all_out_of_sample(cutoff) == is_oos