def test_dlt_map_univariate(make_weekly_data): train_df, test_df, coef = make_weekly_data dlt = DLT( response_col='response', date_col='week', seasonality=52, num_warmup=50, verbose=False, estimator='stan-map' ) dlt.fit(train_df) init_call = dlt._model.get_init_values() assert isinstance(init_call, DLTInitializer) assert init_call.s == 52 init_values = init_call() assert init_values['init_sea'].shape == (51,) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_base_dlt_init(estimator): dlt = DLT(estimator=estimator) is_fitted = dlt.is_fitted() model_data_input = dlt.get_training_data_input() model_param_names = dlt._model.get_model_param_names() init_values = dlt._model.get_init_values() # model is not yet fitted assert not is_fitted # should only be initialized and not set assert not model_data_input # model param names should already be set assert model_param_names # callable is not implemented yet assert not init_values
def test_dlt_aggregated_with_regression(make_weekly_data, estimator, regressor_signs, point_method): train_df, test_df, coef = make_weekly_data dlt = DLT( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, seasonality=52, num_warmup=50, verbose=False, estimator=estimator ) dlt.fit(train_df, point_method=point_method) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:]) predict_df = dlt.predict(test_df, decompose=True) assert any(predict_df['regression'].values)
def test_dlt_map_global_trend(make_weekly_data, global_trend_option): train_df, test_df, coef = make_weekly_data dlt = DLT( response_col='response', date_col='week', seasonality=52, global_trend_option=global_trend_option, estimator='stan-map' ) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns
def test_dlt_non_seasonal_fit(make_weekly_data, estimator): train_df, test_df, coef = make_weekly_data dlt = DLT( response_col='response', date_col='week', estimator=estimator, num_warmup=50, ) dlt.fit(train_df) predict_df = dlt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 10 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_fixed_sm_input(make_weekly_data, level_sm_input, seasonality_sm_input, slope_sm_input): train_df, test_df, coef = make_weekly_data dlt = DLT( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], level_sm_input=level_sm_input, seasonality_sm_input=seasonality_sm_input, slope_sm_input=slope_sm_input, seasonality=52, num_warmup=50, verbose=False, estimator='stan-map' ) dlt.fit(train_df) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:])
def test_dlt_map_reproducibility(make_weekly_data, seasonality): train_df, test_df, coef = make_weekly_data dlt1 = DLT( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, estimator='stan-map' ) # first fit and predict dlt1.fit(train_df) posteriors1 = copy(dlt1._point_posteriors['map']) prediction1 = dlt1.predict(test_df) # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes dlt2 = DLT( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, estimator='stan-map' ) dlt2.fit(train_df) posteriors2 = copy(dlt2._point_posteriors['map']) prediction2 = dlt2.predict(test_df) # assert same posterior keys assert set(posteriors1.keys()) == set(posteriors2.keys()) # assert posterior draws are reproducible for k, v in posteriors1.items(): assert np.allclose(posteriors1[k], posteriors2[k]) # assert prediction is reproducible assert np.allclose(prediction1['prediction'].values, prediction2['prediction'].values)
def test_dlt_map_single_regressor(iclaims_training_data): df = iclaims_training_data df['claims'] = np.log(df['claims']) regressor_col = ['trend.unemploy'] dlt = DLT( response_col='claims', date_col='week', regressor_col=regressor_col, seasonality=52, seed=8888, estimator='stan-map' ) dlt.fit(df) predicted_df = dlt.predict(df) expected_num_parameters = 13 expected_columns = ['week', 'prediction'] assert predicted_df.shape[0] == df.shape[0] assert predicted_df.columns.tolist() == expected_columns assert len(dlt._posterior_samples) == expected_num_parameters
def test_dlt_is_fitted(iclaims_training_data, estimator, keep_samples, point_method): df = iclaims_training_data df['claims'] = np.log(df['claims']) regressor_col = ['trend.unemploy'] dlt = DLT( response_col='claims', date_col='week', regressor_col=regressor_col, seasonality=52, seed=8888, num_warmup=50, num_sample=50, verbose=False, estimator=estimator ) dlt.fit(df, keep_samples=keep_samples, point_method=point_method) is_fitted = dlt.is_fitted() # still True when keep_samples is False assert is_fitted
def test_dlt_predict_seed(make_weekly_data, estimator, random_seed): train_df, test_df, coef = make_weekly_data args = { 'response_col': 'response', 'date_col': 'week', 'seasonality': 52, 'n_bootstrap_draws': 100, 'verbose': False, 'estimator': estimator, } if estimator == 'stan-mcmc': args.update({'num_warmup': 50, 'num_sample': 100}) elif estimator == 'pyro-svi': args.update({'num_steps': 10}) lgt = DLT(**args) lgt.fit(train_df) predict_df1 = lgt.predict(test_df, seed=random_seed) predict_df2 = lgt.predict(test_df, seed=random_seed) assert all(predict_df1['prediction'].values == predict_df2['prediction'].values)
def test_invalid_regressor(make_weekly_data, regressor_signs, invalid_input): train_df, test_df, coef = make_weekly_data regressor_col = train_df.columns.tolist()[2:] # make invalid values train_df[regressor_col[0]][36] = invalid_input expected_flag = False try: dlt = DLT( response_col='response', date_col='week', regressor_col=regressor_col, regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator='stan-map' ) dlt.fit(train_df) except ModelException: expected_flag = True assert expected_flag
def test_dlt_missing(iclaims_training_data, estimator): df = iclaims_training_data missing_idx = np.array([10, 20, 30, 40, 41, 42, 43, 44, df.shape[0] - 1]) df.loc[missing_idx, 'claims'] = np.nan dlt = DLT( response_col='claims', date_col='week', seasonality=52, verbose=False, estimator=estimator ) dlt.fit(df) predicted_df = dlt.predict(df) if estimator == 'stan-map': expected_columns = ['week', 'prediction'] elif estimator == 'stan-mcmc': expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] assert all(~np.isnan(predicted_df['prediction'])) assert predicted_df.columns.tolist() == expected_columns assert predicted_df.shape[0] == df.shape[0]
def test_dlt_grid_tuning(make_weekly_data, param_grid): train_df, test_df, coef = make_weekly_data args = { 'response_col': 'response', 'date_col': 'week', 'seasonality': 52, 'estimator': 'stan-map', } dlt = DLT(**args) best_params, tuned_df = grid_search_orbit(param_grid, model=dlt, df=train_df, min_train_len=80, incremental_len=20, forecast_len=20, metrics=None, criteria=None, verbose=True) assert best_params[0].keys() == param_grid.keys() assert set(tuned_df.columns.to_list()) == set(list(param_grid.keys()) + ['metrics']) assert tuned_df.shape == (9, 3)
def test_plot_predicted_data(iclaims_training_data): df = iclaims_training_data df['claims'] = np.log(df['claims']) regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job'] dlt = DLT(date_col='week', response_col='claims', regressor_col=regressor_col, seasonality=52, estimator='stan-map') bt = BackTester(model=dlt, df=df, min_train_len=100, incremental_len=100, forecast_len=20) bt.fit_predict() predicted_df = bt.get_predicted_df() # test plotting _ = plot_bt_predictions(predicted_df, metrics=smape, ncol=2, include_vline=True)
def test_dlt_full_with_regression(make_weekly_data, regressor_signs): train_df, test_df, coef = make_weekly_data dlt = DLT( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator='stan-mcmc' ) dlt.fit(train_df) init_call = dlt._model.get_init_values() assert isinstance(init_call, DLTInitializer) init_values = init_call() assert init_values['init_sea'].shape == (51,) if regressor_signs.count('+') > 0: assert init_values['pr_beta'].shape == (regressor_signs.count('+'),) if regressor_signs.count('-') > 0: assert init_values['nr_beta'].shape == (regressor_signs.count('-'),) if regressor_signs.count('=') > 0: assert init_values['rr_beta'].shape == (regressor_signs.count('='),) predict_df = dlt.predict(test_df) regression_out = dlt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:]) assert np.sum(regression_out['coefficient'].values >= 0) <= \ regressor_signs.count('+') + regressor_signs.count('=') assert np.sum(regression_out['coefficient'].values <= 0) <= \ regressor_signs.count('-') + regressor_signs.count('=')
def test_dlt_prediction_percentiles(iclaims_training_data, prediction_percentiles): df = iclaims_training_data dlt = DLT( response_col='claims', date_col='week', seasonality=52, num_warmup=50, num_sample=50, seed=8888, prediction_percentiles=prediction_percentiles, estimator='stan-mcmc' ) if not prediction_percentiles: p_labels = ['_5', '', '_95'] else: p_labels = ['_5', '_10', '', '_95'] dlt.fit(df) predicted_df = dlt.predict(df) expected_columns = ['week'] + ["prediction" + p for p in p_labels] assert predicted_df.columns.tolist() == expected_columns assert predicted_df.shape[0] == df.shape[0] predicted_df = dlt.predict(df, decompose=True) predicted_components = [ 'prediction', PredictionKeys.TREND.value, PredictionKeys.SEASONALITY.value, PredictionKeys.REGRESSION.value] expected_columns = ['week'] for pc in predicted_components: for p in p_labels: expected_columns.append(pc + p) assert predicted_df.columns.tolist() == expected_columns assert predicted_df.shape[0] == df.shape[0]
def test_dlt_full_reproducibility(make_weekly_data, estimator, regressor_signs, seasonality): train_df, test_df, coef = make_weekly_data dlt_first = DLT( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator=estimator ) # first fit and predict dlt_first.fit(train_df) posteriors_first = copy(dlt_first._posterior_samples) predict_df_first = dlt_first.predict(test_df) regression_out_first = dlt_first.get_regression_coefs() # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes dlt_second = DLT( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator=estimator ) dlt_second.fit(train_df) posteriors_second = copy(dlt_second._posterior_samples) predict_df_second = dlt_second.predict(test_df) regression_out_second = dlt_second.get_regression_coefs() # assert same posterior keys assert set(posteriors_first.keys()) == set(posteriors_second.keys()) # assert posterior draws are reproducible for k, v in posteriors_first.items(): assert np.allclose(posteriors_first[k], posteriors_second[k]) # assert identical regression columns # this is also checked in posterior samples, but an extra layer just in case # since this one very commonly retrieved by end users assert regression_out_first.equals(regression_out_second) # assert prediction is reproducible assert predict_df_first.equals(predict_df_second)
def test_dlt_mixed_signs_and_order(iclaims_training_data, regressor_signs): df = iclaims_training_data df['claims'] = np.log(df['claims']) raw_regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job'] new_regressor_col = [raw_regressor_col[idx] for idx in [1, 2, 0]] new_regressor_signs = [regressor_signs[idx] for idx in [1, 2, 0]] # mixiing ordering of cols in df of prediction new_df = df[['claims', 'week'] + new_regressor_col] dlt = DLT( response_col='claims', date_col='week', regressor_col=raw_regressor_col, regressor_sign=regressor_signs, seasonality=52, seed=8888, estimator='stan-map' ) dlt.fit(df) predicted_df_v1 = dlt.predict(df) predicted_df_v2 = dlt.predict(new_df) # mixing ordering of signs dlt_new = DLT( response_col='claims', date_col='week', regressor_col=new_regressor_col, regressor_sign=new_regressor_signs, seasonality=52, seed=8888, estimator='stan-map' ) dlt_new.fit(df) predicted_df_v3 = dlt_new.predict(df) predicted_df_v4 = dlt_new.predict(new_df) pred_v1 = predicted_df_v1['prediction'].values pred_v2 = predicted_df_v2['prediction'].values pred_v3 = predicted_df_v3['prediction'].values pred_v4 = predicted_df_v4['prediction'].values # they should be all identical; ordering of signs or columns in prediction show not matter assert np.allclose(pred_v1, pred_v2, atol=1e-2) assert np.allclose(pred_v1, pred_v3, atol=1e-2) assert np.allclose(pred_v1, pred_v4, atol=1e-2)