Esempio n. 1
0
def test_dlt_non_seasonal_fit(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 11

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Esempio n. 2
0
def test_dlt_full_univariate(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 13

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Esempio n. 3
0
def test_dlt_full_with_regression(synthetic_data, estimator_type,
                                  regressor_signs):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  regressor_col=train_df.columns.tolist()[2:],
                  regressor_sign=regressor_signs,
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = [
        'week', 'prediction_lower', 'prediction', 'prediction_upper'
    ]
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])
Esempio n. 4
0
def test_dlt_full_univariate(synthetic_data, estimator_type):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)

    init_call = dlt.get_init_values()
    assert isinstance(init_call, DLTInitializer)
    assert init_call.s == 52
    init_values = init_call()
    assert init_values['init_sea'].shape == (51, )

    predict_df = dlt.predict(test_df)

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_num_parameters = 12

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(dlt._posterior_samples) == expected_num_parameters
Esempio n. 5
0
def test_dlt_full_with_regression(synthetic_data, estimator_type,
                                  regressor_signs):
    train_df, test_df, coef = synthetic_data

    dlt = DLTFull(response_col='response',
                  date_col='week',
                  regressor_col=train_df.columns.tolist()[2:],
                  regressor_sign=regressor_signs,
                  prediction_percentiles=[5, 95],
                  seasonality=52,
                  num_warmup=50,
                  verbose=False,
                  estimator_type=estimator_type)

    dlt.fit(train_df)
    init_call = dlt.get_init_values()
    assert isinstance(init_call, DLTInitializer)
    init_values = init_call()
    assert init_values['init_sea'].shape == (51, )

    if regressor_signs.count('+') > 0:
        assert init_values['pr_beta'].shape == (regressor_signs.count('+'), )
    if regressor_signs.count('-') > 0:
        assert init_values['nr_beta'].shape == (regressor_signs.count('-'), )
    if regressor_signs.count('=') > 0:
        assert init_values['rr_beta'].shape == (regressor_signs.count('='), )

    predict_df = dlt.predict(test_df)

    regression_out = dlt.get_regression_coefs()
    num_regressors = regression_out.shape[0]

    expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (51, len(expected_columns))
    expected_regression_shape = (6, 3)

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert regression_out.shape == expected_regression_shape
    assert num_regressors == len(train_df.columns.tolist()[2:])

    assert np.sum(regression_out['coefficient'].values >= 0) <= \
           regressor_signs.count('+') + regressor_signs.count('=')
    assert np.sum(regression_out['coefficient'].values <= 0) <= \
           regressor_signs.count('-') + regressor_signs.count('=')
Esempio n. 6
0
def test_dlt_full_reproducibility(synthetic_data, estimator_type,
                                  regressor_signs, seasonality):
    train_df, test_df, coef = synthetic_data

    dlt_first = DLTFull(response_col='response',
                        date_col='week',
                        regressor_col=train_df.columns.tolist()[2:],
                        regressor_sign=regressor_signs,
                        prediction_percentiles=[5, 95],
                        seasonality=seasonality,
                        num_warmup=50,
                        verbose=False,
                        estimator_type=estimator_type)

    # first fit and predict
    dlt_first.fit(train_df)
    posteriors_first = copy(dlt_first._posterior_samples)
    predict_df_first = dlt_first.predict(test_df)
    regression_out_first = dlt_first.get_regression_coefs()

    # second fit and predict
    # note a new instance must be created to reset the seed
    # note both fit and predict contain random generation processes
    dlt_second = DLTFull(response_col='response',
                         date_col='week',
                         regressor_col=train_df.columns.tolist()[2:],
                         regressor_sign=regressor_signs,
                         prediction_percentiles=[5, 95],
                         seasonality=seasonality,
                         num_warmup=50,
                         verbose=False,
                         estimator_type=estimator_type)

    dlt_second.fit(train_df)
    posteriors_second = copy(dlt_second._posterior_samples)
    predict_df_second = dlt_second.predict(test_df)
    regression_out_second = dlt_second.get_regression_coefs()

    # assert same posterior keys
    assert set(posteriors_first.keys()) == set(posteriors_second.keys())

    # assert posterior draws are reproducible
    for k, v in posteriors_first.items():
        assert np.allclose(posteriors_first[k], posteriors_second[k])

    # assert identical regression columns
    # this is also checked in posterior samples, but an extra layer just in case
    # since this one very commonly retrieved by end users
    assert regression_out_first.equals(regression_out_second)

    # assert prediction is reproducible
    assert predict_df_first.equals(predict_df_second)
Esempio n. 7
0
    msk = (credit_agg_short.loc[:, 'pct_chg_in_sales_from_prev_mnth'].isna())
    credit_agg_short.loc[msk, 'pct_chg_in_sales_from_prev_mnth'] = 0

    test_size = 14
    train_df = credit_agg_short[:-test_size]
    test_df = credit_agg_short[-test_size:]

    regressors = ['panel_sales', 'transaction_count']

    dlt = DLTFull(
        response_col='pct_chg_in_sales_from_prev_mnth',
        regressor_col=regressors,
        date_col='date',
        seasonality=1,
        seed=2020,
        level_sm_input=0.3,  # recommend for higher frequency data
        regressor_sigma_prior=[0.5] * len(regressors),
        regression_penalty='lasso',
        period=365,
        prediction_percentiles=[5, 95])

    dlt.fit(df=train_df)

    pystan.check_hmc_diagnostics(dlt)

    density_plot = plot_posterior_params(dlt,
                                         kind='density',
                                         incl_trend_params=True,
                                         incl_smooth_params=True)