def test_dlt_full_reproducibility(synthetic_data, estimator_type, regressor_signs, seasonality): train_df, test_df, coef = synthetic_data dlt_first = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator_type=estimator_type) # first fit and predict dlt_first.fit(train_df) posteriors_first = copy(dlt_first._posterior_samples) predict_df_first = dlt_first.predict(test_df) regression_out_first = dlt_first.get_regression_coefs() # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes dlt_second = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt_second.fit(train_df) posteriors_second = copy(dlt_second._posterior_samples) predict_df_second = dlt_second.predict(test_df) regression_out_second = dlt_second.get_regression_coefs() # assert same posterior keys assert set(posteriors_first.keys()) == set(posteriors_second.keys()) # assert posterior draws are reproducible for k, v in posteriors_first.items(): assert np.allclose(posteriors_first[k], posteriors_second[k]) # assert identical regression columns # this is also checked in posterior samples, but an extra layer just in case # since this one very commonly retrieved by end users assert regression_out_first.equals(regression_out_second) # assert prediction is reproducible assert predict_df_first.equals(predict_df_second)
def test_dlt_full_with_regression(synthetic_data, estimator_type, regressor_signs): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = [ 'week', 'prediction_lower', 'prediction', 'prediction_upper' ] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:])
def test_dlt_full_univariate(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) init_call = dlt.get_init_values() assert isinstance(init_call, DLTInitializer) assert init_call.s == 52 init_values = init_call() assert init_values['init_sea'].shape == (51, ) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_non_seasonal_fit(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', estimator_type=estimator_type) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 11 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_full_with_regression(synthetic_data, estimator_type, regressor_signs): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) init_call = dlt.get_init_values() assert isinstance(init_call, DLTInitializer) init_values = init_call() assert init_values['init_sea'].shape == (51, ) if regressor_signs.count('+') > 0: assert init_values['pr_beta'].shape == (regressor_signs.count('+'), ) if regressor_signs.count('-') > 0: assert init_values['nr_beta'].shape == (regressor_signs.count('-'), ) if regressor_signs.count('=') > 0: assert init_values['rr_beta'].shape == (regressor_signs.count('='), ) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:]) assert np.sum(regression_out['coefficient'].values >= 0) <= \ regressor_signs.count('+') + regressor_signs.count('=') assert np.sum(regression_out['coefficient'].values <= 0) <= \ regressor_signs.count('-') + regressor_signs.count('=')
def test_dlt_full_univariate(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data dlt = DLTFull(response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 13 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
incl_smooth_params=True) trace_plot = plot_posterior_params(dlt, kind='trace', incl_trend_params=True, incl_smooth_params=True) pair_plot = plot_posterior_params(dlt, kind='pair', pair_type='reg', incl_trend_params=False, incl_smooth_params=False) num_periods = 12 * 1 freq = 1 date_col = dlt.date_col last_dt = (dlt.date_col.dt.to_pydatetime())[-1] dts = [ last_dt + timedelta(days=x * freq) for x in range(1, num_periods + 1) ] future_df = pd.DataFrame(dts, columns=[date_col]) predicted_df_dlt = dlt.predict(df=future_df, decompose=True) plot_predicted_data(training_actual_df=credit_agg_short[-90:], predicted_df=predicted_df_dlt[-90:], test_actual_df=test_df, date_col=dlt.date_col, actual_col='pct_chg_in_sales_from_prev_mnth', pred_col='predicted_pct_chg_in_sales')