def test_generate_holiday_events3():
    """Tests generate_holiday_events pre_post_num_dict parameter"""
    # Tests pre_post_num_dict
    countries = ["UnitedStates", "India"]
    year_start = 2019
    year_end = 2020
    holidays_to_model_separately = [
        "New Year's Day",
        "Diwali",
        "Columbus Day"
    ]
    pre_num = 2
    post_num = 2
    pre_post_num_dict = {
        "New Year's Day": (0, 2),
        "Columbus Day": (1, 3)}
    daily_event_df_dict = generate_holiday_events(
        countries=countries,
        holidays_to_model_separately=holidays_to_model_separately,
        year_start=year_start,
        year_end=year_end,
        pre_num=pre_num,
        post_num=post_num,
        pre_post_num_dict=pre_post_num_dict)

    # expected
    expected_holidays = [
        "New Years Day_plus_2",
        "Diwali_minus_2",
        "Diwali_plus_2",
        "Columbus Day_minus_1",
        "Columbus Day_plus_3"
    ]
    assert all([holiday in daily_event_df_dict.keys() for holiday in expected_holidays])
    unexpected_holidays = [
        "New Years Day_minus_1",
        "New Years Day_plus_3",
        "Diwali_minus_3",
        "Diwali_plus_3",
        "Columbus Day_minus_2",
        "Columbus Day_plus_4"
    ]
    assert not any([holiday in daily_event_df_dict.keys() for holiday in unexpected_holidays])

    with pytest.warns(UserWarning) as record:
        pre_post_num_dict = {"Bank Holiday": (1, 1)}
        generate_holiday_events(
            countries=countries,
            holidays_to_model_separately=holidays_to_model_separately,
            year_start=year_start,
            year_end=year_end,
            pre_num=pre_num,
            post_num=post_num,
            pre_post_num_dict=pre_post_num_dict)
        assert "Requested holiday 'Bank Holiday' is not valid. Valid holidays are" in record[0].message.args[0]
def test_generate_holiday_events():
    """Tests generate_holiday_events"""
    countries = ["UnitedStates", "UnitedKingdom", "India", "France"]
    year_start = 2019
    year_end = 2020
    holidays_to_model_separately = [
        "New Year's Day",
        "Christmas Day",
        "Independence Day",
        "Thanksgiving",
        "Labor Day",
        "Good Friday",
        "Easter Monday [England, Wales, Northern Ireland]",
        "Memorial Day",
        "Veterans Day"]
    pre_num = 2
    post_num = 2

    daily_event_df_dict = generate_holiday_events(
        countries=countries,
        holidays_to_model_separately=holidays_to_model_separately,
        year_start=year_start,
        year_end=year_end,
        pre_num=pre_num,
        post_num=post_num)

    cleaned_holidays = [
        holiday.replace("'", "") for holiday in holidays_to_model_separately]
    cleaned_holidays += ["Other"]  # default value is "Other"
    expected_holidays = cleaned_holidays.copy()
    for i in range(1, pre_num+1):
        expected_holidays += [f"{holiday}_minus_{i}" for holiday in cleaned_holidays]
    for i in range(1, post_num+1):
        expected_holidays += [f"{holiday}_plus_{i}" for holiday in cleaned_holidays]

    assert set(expected_holidays) == set(list(daily_event_df_dict.keys()))

    assert daily_event_df_dict["Christmas Day"].equals(pd.DataFrame({
        EVENT_DF_DATE_COL: [datetime.datetime(2019, 12, 25), datetime.datetime(2020, 12, 25)],
        EVENT_DF_LABEL_COL: [EVENT_INDICATOR, EVENT_INDICATOR]
    }))

    assert daily_event_df_dict["Easter Monday [England, Wales, Northern Ireland]_plus_1"].equals(pd.DataFrame({
        EVENT_DF_DATE_COL: [datetime.datetime(2019, 4, 23), datetime.datetime(2020, 4, 14)],
        EVENT_DF_LABEL_COL: [EVENT_INDICATOR, EVENT_INDICATOR]
    }))

    assert daily_event_df_dict["Veterans Day_minus_2"].equals(pd.DataFrame({
        EVENT_DF_DATE_COL: [datetime.datetime(2019, 11, 9), datetime.datetime(2020, 11, 9)],
        EVENT_DF_LABEL_COL: [EVENT_INDICATOR, EVENT_INDICATOR]
    }))
def test_generate_holiday_events2():
    """Tests proper handling of pre_num = 0 and post_num = 0"""
    countries = ["UnitedStates", "UnitedKingdom", "India", "France"]
    year_start = 2019
    year_end = 2020
    holidays_to_model_separately = [
        "New Year's Day",
        "Christmas Day",
        "Independence Day",
        "Thanksgiving",
        "Labor Day",
        "Good Friday",
        "Easter Monday [England, Wales, Northern Ireland]",
        "Memorial Day",
        "Veterans Day"]

    daily_event_df_dict1 = generate_holiday_events(
        countries=countries,
        holidays_to_model_separately=holidays_to_model_separately,
        year_start=year_start,
        year_end=year_end,
        pre_num=0,
        post_num=0)

    holidays_dict = get_holidays(
        countries,
        year_start=year_start,
        year_end=year_end)
    # merges country DataFrames, removes duplicate holidays
    holiday_df = dedup_holiday_dict(holidays_dict)
    # creates separate DataFrame for each holiday
    daily_event_df_dict2 = split_events_into_dictionaries(
        holiday_df,
        holidays_to_model_separately)

    assert daily_event_df_dict1.keys() == daily_event_df_dict2.keys()
Example #4
0
def test_run_template_2():
    """Runs custom template with all options"""
    data = generate_df_with_reg_for_tests(
        freq="D",
        periods=400,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    daily_event_df_dict = generate_holiday_events(
        countries=["UnitedStates"],
        holidays_to_model_separately=["New Year's Day"],
        year_start=2017,
        year_end=2022,
        pre_num=2,
        post_num=2)
    event_pred_cols = get_event_pred_cols(daily_event_df_dict)
    model_components = ModelComponentsParam(
        seasonality={
            "fs_components_df": pd.DataFrame({
                "name": ["tow", "tom", "toq", "toy"],
                "period": [7.0, 1.0, 1.0, 1.0],
                "order": [2, 1, 1, 5],
                "seas_names": ["weekly", "monthly", "quarterly", "yearly"]
            })
        },
        events={
            "daily_event_df_dict": daily_event_df_dict
        },
        changepoints={
            "changepoints_dict": {
                "method": "auto",
                "yearly_seasonality_order": 3,
                "regularization_strength": 0.5,
                "resample_freq": "14D",
                "potential_changepoint_distance": "56D",
                "no_changepoint_proportion_from_end": 0.2
            },
            "seasonality_changepoints_dict": {
                "potential_changepoint_distance": "60D",
                "regularization_strength": 0.5,
                "no_changepoint_proportion_from_end": 0.2
            },
        },
        autoregression=None,
        uncertainty={
            "uncertainty_dict": None,
        },
        custom={
            "origin_for_time_vars": None,
            "extra_pred_cols": [["ct1"] + reg_cols + event_pred_cols],  # growth, regressors, events
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {"cv": 2}
            },
            "min_admissible_value": min(df[VALUE_COL]) - abs(max(df[VALUE_COL])),
            "max_admissible_value": max(df[VALUE_COL]) * 2,
        }
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.692, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.531, rel=1e-2)
        assert result.backtest.test_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.823, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.304, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.921, rel=1e-2)
        assert result.forecast.train_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.897, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Example #5
0
def params_components():
    """Parameters for ``forecast_silverkite``"""
    autoreg_dict = {
        "lag_dict": {
            "orders": [7]
        },
        "agg_lag_dict": {
            "orders_list": [[7, 7 * 2, 7 * 3]],
            "interval_list": [(7, 7 * 2)]
        },
        "series_na_fill_func": lambda s: s.bfill().ffill()
    }

    uncertainty_dict = {
        "uncertainty_method": "simple_conditional_residuals",
        "params": {
            "conditional_cols": ["dow"],
            "quantiles": [0.025, 0.975],
            "quantile_estimation_method": "normal_fit",
            "sample_size_thresh": 5,
            "small_sample_size_method": "std_quantiles",
            "small_sample_size_quantile": 0.98
        }
    }

    # generate holidays
    countries = ["US", "India"]
    holidays_to_model_separately = [
        "New Year's Day", "Christmas Day", "Independence Day", "Thanksgiving",
        "Labor Day", "Memorial Day", "Veterans Day"
    ]
    event_df_dict = generate_holiday_events(
        countries=countries,
        holidays_to_model_separately=holidays_to_model_separately,
        year_start=2015,
        year_end=2025,
        pre_num=2,
        post_num=2)
    # constant event effect at daily level
    event_cols = [f"Q('events_{key}')" for key in event_df_dict.keys()]
    interaction_cols = cols_interact(static_col="is_weekend",
                                     fs_name="tow",
                                     fs_order=4,
                                     fs_seas_name="weekly")
    extra_pred_cols = ["ct_sqrt", "dow_hr", "ct1", "ct1:tod", "regressor1", "regressor2"] + \
        event_cols + interaction_cols

    # seasonality terms
    fs_components_df = pd.DataFrame({
        "name": ["tod", "tow", "ct1"],
        "period": [24.0, 7.0, 1.0],
        "order": [12, 4, 5],
        "seas_names": ["daily", "weekly", "yearly"]
    })

    # changepoints
    changepoints_dict = dict(
        method="custom",
        dates=["2018-01-01", "2019-01-02-16", "2019-01-03", "2019-02-01"],
        continuous_time_col="ct2")

    return {
        "coverage":
        0.95,
        "origin_for_time_vars":
        convert_date_to_continuous_time(datetime.datetime(2018, 1, 3)),
        "extra_pred_cols":
        extra_pred_cols,
        "train_test_thresh":
        None,
        "training_fraction":
        None,
        "fit_algorithm":
        "ridge",
        "daily_event_df_dict":
        event_df_dict,
        "changepoints_dict":
        changepoints_dict,
        "fs_components_df":
        fs_components_df,
        "autoreg_dict":
        autoreg_dict,
        "min_admissible_value":
        None,
        "max_admissible_value":
        None,
        "uncertainty_dict":
        uncertainty_dict
    }