def test_dlt_non_seasonal_fit(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', estimator_type=estimator_type) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 11 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_full_univariate(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 13 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_full_with_regression(synthetic_data, estimator_type, regressor_signs): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = [ 'week', 'prediction_lower', 'prediction', 'prediction_upper' ] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:])
def test_dlt_full_univariate(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) init_call = dlt.get_init_values() assert isinstance(init_call, DLTInitializer) assert init_call.s == 52 init_values = init_call() assert init_values['init_sea'].shape == (51, ) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_full_with_regression(synthetic_data, estimator_type, regressor_signs): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) init_call = dlt.get_init_values() assert isinstance(init_call, DLTInitializer) init_values = init_call() assert init_values['init_sea'].shape == (51, ) if regressor_signs.count('+') > 0: assert init_values['pr_beta'].shape == (regressor_signs.count('+'), ) if regressor_signs.count('-') > 0: assert init_values['nr_beta'].shape == (regressor_signs.count('-'), ) if regressor_signs.count('=') > 0: assert init_values['rr_beta'].shape == (regressor_signs.count('='), ) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:]) assert np.sum(regression_out['coefficient'].values >= 0) <= \ regressor_signs.count('+') + regressor_signs.count('=') assert np.sum(regression_out['coefficient'].values <= 0) <= \ regressor_signs.count('-') + regressor_signs.count('=')
def test_dlt_full_reproducibility(synthetic_data, estimator_type, regressor_signs, seasonality): train_df, test_df, coef = synthetic_data dlt_first = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator_type=estimator_type) # first fit and predict dlt_first.fit(train_df) posteriors_first = copy(dlt_first._posterior_samples) predict_df_first = dlt_first.predict(test_df) regression_out_first = dlt_first.get_regression_coefs() # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes dlt_second = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt_second.fit(train_df) posteriors_second = copy(dlt_second._posterior_samples) predict_df_second = dlt_second.predict(test_df) regression_out_second = dlt_second.get_regression_coefs() # assert same posterior keys assert set(posteriors_first.keys()) == set(posteriors_second.keys()) # assert posterior draws are reproducible for k, v in posteriors_first.items(): assert np.allclose(posteriors_first[k], posteriors_second[k]) # assert identical regression columns # this is also checked in posterior samples, but an extra layer just in case # since this one very commonly retrieved by end users assert regression_out_first.equals(regression_out_second) # assert prediction is reproducible assert predict_df_first.equals(predict_df_second)
msk = (credit_agg_short.loc[:, 'pct_chg_in_sales_from_prev_mnth'].isna()) credit_agg_short.loc[msk, 'pct_chg_in_sales_from_prev_mnth'] = 0 test_size = 14 train_df = credit_agg_short[:-test_size] test_df = credit_agg_short[-test_size:] regressors = ['panel_sales', 'transaction_count'] dlt = DLTFull( response_col='pct_chg_in_sales_from_prev_mnth', regressor_col=regressors, date_col='date', seasonality=1, seed=2020, level_sm_input=0.3, # recommend for higher frequency data regressor_sigma_prior=[0.5] * len(regressors), regression_penalty='lasso', period=365, prediction_percentiles=[5, 95]) dlt.fit(df=train_df) pystan.check_hmc_diagnostics(dlt) density_plot = plot_posterior_params(dlt, kind='density', incl_trend_params=True, incl_smooth_params=True)