Esempio n. 1
0
def test_dlt_map_univariate(make_weekly_data):
    train_df, test_df, coef = make_weekly_data

    dlt = DLT(
        response_col='response',
        date_col='week',
        seasonality=52,
        num_warmup=50,
        verbose=False,
        estimator='stan-map'
    )

    dlt.fit(train_df)

    init_call = dlt._model.get_init_values()
    assert isinstance(init_call, DLTInitializer)
    assert init_call.s == 52
    init_values = init_call()
    assert init_values['init_sea'].shape == (51,)

    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 12

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Esempio n. 2
0
def test_base_dlt_init(estimator):
    dlt = DLT(estimator=estimator)

    is_fitted = dlt.is_fitted()

    model_data_input = dlt.get_training_data_input()
    model_param_names = dlt._model.get_model_param_names()
    init_values = dlt._model.get_init_values()

    # model is not yet fitted
    assert not is_fitted
    # should only be initialized and not set
    assert not model_data_input
    # model param names should already be set
    assert model_param_names
    # callable is not implemented yet
    assert not init_values
Esempio n. 3
0
def test_dlt_aggregated_with_regression(make_weekly_data, estimator, regressor_signs, point_method):
    train_df, test_df, coef = make_weekly_data

    dlt = DLT(
        response_col='response',
        date_col='week',
        regressor_col=train_df.columns.tolist()[2:],
        regressor_sign=regressor_signs,
        seasonality=52,
        num_warmup=50,
        verbose=False,
        estimator=estimator
    )

    dlt.fit(train_df, point_method=point_method)
    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])

    predict_df = dlt.predict(test_df, decompose=True)
    assert any(predict_df['regression'].values)
Esempio n. 4
0
def test_dlt_map_global_trend(make_weekly_data, global_trend_option):
    train_df, test_df, coef = make_weekly_data

    dlt = DLT(
        response_col='response',
        date_col='week',
        seasonality=52,
        global_trend_option=global_trend_option,
        estimator='stan-map'
    )

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
Esempio n. 5
0
def test_dlt_non_seasonal_fit(make_weekly_data, estimator):
    train_df, test_df, coef = make_weekly_data

    dlt = DLT(
        response_col='response',
        date_col='week',
        estimator=estimator,
        num_warmup=50,
    )

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 10

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Esempio n. 6
0
def test_dlt_fixed_sm_input(make_weekly_data, level_sm_input, seasonality_sm_input, slope_sm_input):
    train_df, test_df, coef = make_weekly_data

    dlt = DLT(
        response_col='response',
        date_col='week',
        regressor_col=train_df.columns.tolist()[2:],
        level_sm_input=level_sm_input,
        seasonality_sm_input=seasonality_sm_input,
        slope_sm_input=slope_sm_input,
        seasonality=52,
        num_warmup=50,
        verbose=False,
        estimator='stan-map'
    )

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])
Esempio n. 7
0
def test_dlt_map_reproducibility(make_weekly_data, seasonality):
    train_df, test_df, coef = make_weekly_data

    dlt1 = DLT(
        response_col='response',
        date_col='week',
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
        estimator='stan-map'
    )

    # first fit and predict
    dlt1.fit(train_df)
    posteriors1 = copy(dlt1._point_posteriors['map'])
    prediction1 = dlt1.predict(test_df)

    # second fit and predict
    # note a new instance must be created to reset the seed
    # note both fit and predict contain random generation processes
    dlt2 = DLT(
        response_col='response',
        date_col='week',
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
        estimator='stan-map'
    )

    dlt2.fit(train_df)
    posteriors2 = copy(dlt2._point_posteriors['map'])
    prediction2 = dlt2.predict(test_df)

    # assert same posterior keys
    assert set(posteriors1.keys()) == set(posteriors2.keys())

    # assert posterior draws are reproducible
    for k, v in posteriors1.items():
        assert np.allclose(posteriors1[k], posteriors2[k])

    # assert prediction is reproducible
    assert np.allclose(prediction1['prediction'].values, prediction2['prediction'].values)
Esempio n. 8
0
def test_dlt_map_single_regressor(iclaims_training_data):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])
    regressor_col = ['trend.unemploy']

    dlt = DLT(
        response_col='claims',
        date_col='week',
        regressor_col=regressor_col,
        seasonality=52,
        seed=8888,
        estimator='stan-map'
    )
    dlt.fit(df)
    predicted_df = dlt.predict(df)

    expected_num_parameters = 13
    expected_columns = ['week', 'prediction']

    assert predicted_df.shape[0] == df.shape[0]
    assert predicted_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Esempio n. 9
0
def test_dlt_is_fitted(iclaims_training_data, estimator, keep_samples, point_method):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])
    regressor_col = ['trend.unemploy']

    dlt = DLT(
        response_col='claims',
        date_col='week',
        regressor_col=regressor_col,
        seasonality=52,
        seed=8888,
        num_warmup=50,
        num_sample=50,
        verbose=False,
        estimator=estimator
    )

    dlt.fit(df, keep_samples=keep_samples, point_method=point_method)
    is_fitted = dlt.is_fitted()

    # still True when keep_samples is False
    assert is_fitted
Esempio n. 10
0
def test_dlt_predict_seed(make_weekly_data, estimator, random_seed):
    train_df, test_df, coef = make_weekly_data
    args = {
        'response_col': 'response',
        'date_col': 'week',
        'seasonality': 52,
        'n_bootstrap_draws': 100,
        'verbose': False,
        'estimator': estimator,
    }

    if estimator == 'stan-mcmc':
        args.update({'num_warmup': 50, 'num_sample': 100})
    elif estimator == 'pyro-svi':
        args.update({'num_steps': 10})

    lgt = DLT(**args)
    lgt.fit(train_df)
    predict_df1 = lgt.predict(test_df, seed=random_seed)
    predict_df2 = lgt.predict(test_df, seed=random_seed)

    assert all(predict_df1['prediction'].values == predict_df2['prediction'].values)
Esempio n. 11
0
def test_invalid_regressor(make_weekly_data, regressor_signs, invalid_input):
    train_df, test_df, coef = make_weekly_data
    regressor_col = train_df.columns.tolist()[2:]
    # make invalid values
    train_df[regressor_col[0]][36] = invalid_input
    expected_flag = False
    try:
        dlt = DLT(
            response_col='response',
            date_col='week',
            regressor_col=regressor_col,
            regressor_sign=regressor_signs,
            prediction_percentiles=[5, 95],
            seasonality=52,
            num_warmup=50,
            verbose=False,
            estimator='stan-map'
        )
        dlt.fit(train_df)
    except ModelException:
        expected_flag = True

    assert expected_flag
Esempio n. 12
0
def test_dlt_missing(iclaims_training_data, estimator):
    df = iclaims_training_data
    missing_idx = np.array([10, 20, 30, 40, 41, 42, 43, 44, df.shape[0] - 1])
    df.loc[missing_idx, 'claims'] = np.nan

    dlt = DLT(
        response_col='claims',
        date_col='week',
        seasonality=52,
        verbose=False,
        estimator=estimator
    )

    dlt.fit(df)
    predicted_df = dlt.predict(df)
    if estimator == 'stan-map':
        expected_columns = ['week', 'prediction']
    elif estimator == 'stan-mcmc':
        expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']

    assert all(~np.isnan(predicted_df['prediction']))
    assert predicted_df.columns.tolist() == expected_columns
    assert predicted_df.shape[0] == df.shape[0]
Esempio n. 13
0
def test_dlt_grid_tuning(make_weekly_data, param_grid):
    train_df, test_df, coef = make_weekly_data
    args = {
        'response_col': 'response',
        'date_col': 'week',
        'seasonality': 52,
        'estimator': 'stan-map',
    }

    dlt = DLT(**args)

    best_params, tuned_df = grid_search_orbit(param_grid,
                                              model=dlt,
                                              df=train_df,
                                              min_train_len=80, incremental_len=20, forecast_len=20,
                                              metrics=None, criteria=None, verbose=True)

    assert best_params[0].keys() == param_grid.keys()
    assert set(tuned_df.columns.to_list()) == set(list(param_grid.keys()) + ['metrics'])
    assert tuned_df.shape == (9, 3)
Esempio n. 14
0
def test_plot_predicted_data(iclaims_training_data):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])

    regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job']

    dlt = DLT(date_col='week',
              response_col='claims',
              regressor_col=regressor_col,
              seasonality=52,
              estimator='stan-map')
    bt = BackTester(model=dlt,
                    df=df,
                    min_train_len=100,
                    incremental_len=100,
                    forecast_len=20)
    bt.fit_predict()
    predicted_df = bt.get_predicted_df()

    # test plotting
    _ = plot_bt_predictions(predicted_df, metrics=smape, ncol=2, include_vline=True)
Esempio n. 15
0
def test_dlt_full_with_regression(make_weekly_data, regressor_signs):
    train_df, test_df, coef = make_weekly_data

    dlt = DLT(
        response_col='response',
        date_col='week',
        regressor_col=train_df.columns.tolist()[2:],
        regressor_sign=regressor_signs,
        prediction_percentiles=[5, 95],
        seasonality=52,
        num_warmup=50,
        verbose=False,
        estimator='stan-mcmc'
    )

    dlt.fit(train_df)
    init_call = dlt._model.get_init_values()
    assert isinstance(init_call, DLTInitializer)
    init_values = init_call()
    assert init_values['init_sea'].shape == (51,)

    if regressor_signs.count('+') > 0:
        assert init_values['pr_beta'].shape == (regressor_signs.count('+'),)
    if regressor_signs.count('-') > 0:
        assert init_values['nr_beta'].shape == (regressor_signs.count('-'),)
    if regressor_signs.count('=') > 0:
        assert init_values['rr_beta'].shape == (regressor_signs.count('='),)

    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])

    assert np.sum(regression_out['coefficient'].values >= 0) <= \
           regressor_signs.count('+') + regressor_signs.count('=')
    assert np.sum(regression_out['coefficient'].values <= 0) <= \
           regressor_signs.count('-') + regressor_signs.count('=')
Esempio n. 16
0
def test_dlt_prediction_percentiles(iclaims_training_data, prediction_percentiles):
    df = iclaims_training_data

    dlt = DLT(
        response_col='claims',
        date_col='week',
        seasonality=52,
        num_warmup=50,
        num_sample=50,
        seed=8888,
        prediction_percentiles=prediction_percentiles,
        estimator='stan-mcmc'
    )

    if not prediction_percentiles:
        p_labels = ['_5', '', '_95']
    else:
        p_labels = ['_5', '_10', '', '_95']

    dlt.fit(df)
    predicted_df = dlt.predict(df)
    expected_columns = ['week'] + ["prediction" + p for p in p_labels]
    assert predicted_df.columns.tolist() == expected_columns
    assert predicted_df.shape[0] == df.shape[0]

    predicted_df = dlt.predict(df, decompose=True)
    predicted_components = [
        'prediction',
        PredictionKeys.TREND.value,
        PredictionKeys.SEASONALITY.value,
        PredictionKeys.REGRESSION.value]

    expected_columns = ['week']
    for pc in predicted_components:
        for p in p_labels:
            expected_columns.append(pc + p)
    assert predicted_df.columns.tolist() == expected_columns
    assert predicted_df.shape[0] == df.shape[0]
Esempio n. 17
0
def test_dlt_full_reproducibility(make_weekly_data, estimator, regressor_signs, seasonality):
    train_df, test_df, coef = make_weekly_data

    dlt_first = DLT(
        response_col='response',
        date_col='week',
        regressor_col=train_df.columns.tolist()[2:],
        regressor_sign=regressor_signs,
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
        num_warmup=50,
        verbose=False,
        estimator=estimator
    )

    # first fit and predict
    dlt_first.fit(train_df)
    posteriors_first = copy(dlt_first._posterior_samples)
    predict_df_first = dlt_first.predict(test_df)
    regression_out_first = dlt_first.get_regression_coefs()

    # second fit and predict
    # note a new instance must be created to reset the seed
    # note both fit and predict contain random generation processes
    dlt_second = DLT(
        response_col='response',
        date_col='week',
        regressor_col=train_df.columns.tolist()[2:],
        regressor_sign=regressor_signs,
        prediction_percentiles=[5, 95],
        seasonality=seasonality,
        num_warmup=50,
        verbose=False,
        estimator=estimator
    )

    dlt_second.fit(train_df)
    posteriors_second = copy(dlt_second._posterior_samples)
    predict_df_second = dlt_second.predict(test_df)
    regression_out_second = dlt_second.get_regression_coefs()

    # assert same posterior keys
    assert set(posteriors_first.keys()) == set(posteriors_second.keys())

    # assert posterior draws are reproducible
    for k, v in posteriors_first.items():
        assert np.allclose(posteriors_first[k], posteriors_second[k])

    # assert identical regression columns
    # this is also checked in posterior samples, but an extra layer just in case
    # since this one very commonly retrieved by end users
    assert regression_out_first.equals(regression_out_second)

    # assert prediction is reproducible
    assert predict_df_first.equals(predict_df_second)
Esempio n. 18
0
def test_dlt_mixed_signs_and_order(iclaims_training_data, regressor_signs):
    df = iclaims_training_data
    df['claims'] = np.log(df['claims'])
    raw_regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job']
    new_regressor_col = [raw_regressor_col[idx] for idx in [1, 2, 0]]
    new_regressor_signs = [regressor_signs[idx] for idx in [1, 2, 0]]
    # mixiing ordering of cols in df of prediction
    new_df = df[['claims', 'week'] + new_regressor_col]

    dlt = DLT(
        response_col='claims',
        date_col='week',
        regressor_col=raw_regressor_col,
        regressor_sign=regressor_signs,
        seasonality=52,
        seed=8888,
        estimator='stan-map'
    )
    dlt.fit(df)
    predicted_df_v1 = dlt.predict(df)
    predicted_df_v2 = dlt.predict(new_df)

    # mixing ordering of signs
    dlt_new = DLT(
        response_col='claims',
        date_col='week',
        regressor_col=new_regressor_col,
        regressor_sign=new_regressor_signs,
        seasonality=52,
        seed=8888,
        estimator='stan-map'
    )
    dlt_new.fit(df)
    predicted_df_v3 = dlt_new.predict(df)
    predicted_df_v4 = dlt_new.predict(new_df)

    pred_v1 = predicted_df_v1['prediction'].values
    pred_v2 = predicted_df_v2['prediction'].values
    pred_v3 = predicted_df_v3['prediction'].values
    pred_v4 = predicted_df_v4['prediction'].values

    # they should be all identical; ordering of signs or columns in prediction show not matter
    assert np.allclose(pred_v1, pred_v2, atol=1e-2)
    assert np.allclose(pred_v1, pred_v3, atol=1e-2)
    assert np.allclose(pred_v1, pred_v4, atol=1e-2)