def test_backtester_test_metrics(iclaims_training_data, metrics): df = iclaims_training_data lgt = LGTMAP(response_col='claims', date_col='week', seasonality=1, verbose=False) backtester = BackTester( model=lgt, df=df, forecast_len=3, n_splits=1, ) backtester.fit_predict() eval_out = backtester.score(metrics=metrics) evaluated_metrics = set(eval_out['metric_name'].tolist()) if metrics is None: expected_metrics = [x.__name__ for x in backtester._default_metrics] elif isinstance(metrics, list): expected_metrics = [x.__name__ for x in metrics] else: expected_metrics = [metrics.__name__] assert set(expected_metrics) == evaluated_metrics
def test_lgt_predict_all_positive_reg(iclaims_training_data): df = iclaims_training_data lgt = LGTMAP( response_col='claims', date_col='week', regressor_col=['trend.unemploy', 'trend.filling', 'trend.job'], regressor_sign=['+', '+', '+'], seasonality=52, seed=8888, ) lgt.fit(df) predicted_df = lgt.predict(df, decompose=True) assert any(predicted_df['regression'].values)
def test_backtester_with_training_data(iclaims_training_data): df = iclaims_training_data lgt = LGTMAP(response_col='claims', date_col='week', seasonality=1, verbose=False) backtester = BackTester( model=lgt, df=df, min_train_len=100, incremental_len=100, forecast_len=20, ) backtester.fit_predict() eval_out = backtester.score(include_training_metrics=True) evaluated_test_metrics = set(eval_out.loc[~eval_out['is_training_metric'], 'metric_name'].tolist()) evaluated_train_metrics = set(eval_out.loc[eval_out['is_training_metric'], 'metric_name'].tolist()) expected_test_metrics = [x.__name__ for x in backtester._default_metrics] expected_train_metrics = list( filter( lambda x: backtester._get_metric_callable_signature(x) == {'actual', 'predicted'}, backtester._default_metrics)) expected_train_metrics = [x.__name__ for x in expected_train_metrics] assert set(expected_test_metrics) == evaluated_test_metrics assert set(expected_train_metrics) == evaluated_train_metrics
def test_lgt_map_univariate(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data lgt = LGTMAP(response_col='response', date_col='week', seasonality=52, verbose=False, estimator_type=estimator_type) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 # no `lp__` parameter in optimizing() assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_map_single_regressor(iclaims_training_data): df = iclaims_training_data df['claims'] = np.log(df['claims']) regressor_col = ['trend.unemploy'] lgt = LGTMAP( response_col='claims', date_col='week', regressor_col=regressor_col, seasonality=52, seed=8888, ) lgt.fit(df) predicted_df = lgt.predict(df) expected_num_parameters = 13 expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] assert predicted_df.shape[0] == df.shape[0] assert predicted_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_fixed_sm_input(synthetic_data, level_sm_input, seasonality_sm_input, slope_sm_input): train_df, test_df, coef = synthetic_data lgt = LGTMAP( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], level_sm_input=level_sm_input, seasonality_sm_input=seasonality_sm_input, slope_sm_input=slope_sm_input, seasonality=52, verbose=False, ) lgt.fit(train_df) predict_df = lgt.predict(test_df, n_bootstrap_draw=100) regression_out = lgt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:])
def test_lgt_map_fit(synthetic_data, seasonality, estimator_type): train_df, test_df, coef = synthetic_data lgt = LGTMAP(response_col='response', date_col='week', seasonality=seasonality, verbose=False, estimator_type=estimator_type) lgt.fit(train_df) init_call = lgt.get_init_values() if seasonality: assert isinstance(init_call, LGTInitializer) assert init_call.s == 52 init_values = init_call() assert init_values['init_sea'].shape == (51, ) else: assert not init_call predict_df = lgt.predict(test_df) expected_num_parameters = 10 expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] if seasonality == 52: expected_num_parameters += 2 expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_map_reproducibility(synthetic_data, seasonality): train_df, test_df, coef = synthetic_data lgt1 = LGTMAP( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, ) # first fit and predict lgt1.fit(train_df) posteriors1 = copy(lgt1._aggregate_posteriors['map']) prediction1 = lgt1.predict(test_df) # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes lgt2 = LGTMAP( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, ) lgt2.fit(train_df) posteriors2 = copy(lgt2._aggregate_posteriors['map']) prediction2 = lgt2.predict(test_df) # assert same posterior keys assert set(posteriors1.keys()) == set(posteriors2.keys()) # assert posterior draws are reproducible for k, v in posteriors1.items(): assert np.allclose(posteriors1[k], posteriors2[k]) # assert prediction is reproducible assert np.allclose(prediction1['prediction'].values, prediction2['prediction'].values)
def test_lgt_map_fit(synthetic_data, seasonality, estimator_type): train_df, test_df, coef = synthetic_data lgt = LGTMAP( response_col='response', date_col='week', seasonality=seasonality, verbose=False, estimator_type=estimator_type ) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_num_parameters = 10 expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] if seasonality == 52: expected_num_parameters += 2 expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_backtester_sceduler_args(iclaims_training_data, scheduler_args): df = iclaims_training_data lgt = LGTMAP(response_col='claims', date_col='week', seasonality=1, verbose=False) backtester = BackTester( model=lgt, df=df, **scheduler_args, ) backtester.fit_predict() eval_out = backtester.score(metrics=[smape]) assert np.all(eval_out['metric_values'].values > 0)
def test_lgt_grid_tuning(synthetic_data, param_grid): train_df, test_df, coef = synthetic_data args = {'response_col': 'response', 'date_col': 'week', 'seasonality': 52} lgt = LGTMAP(**args) best_params, tuned_df = grid_search_orbit(param_grid, model=lgt, df=train_df, min_train_len=80, incremental_len=20, forecast_len=20, metrics=None, criteria=None, verbose=True) assert best_params[0].keys() == param_grid.keys() assert set(tuned_df.columns.to_list()) == set( list(param_grid.keys()) + ['metrics']) assert tuned_df.shape == (9, 3)
def test_backtester_test_data_only(iclaims_training_data): df = iclaims_training_data lgt = LGTMAP(response_col='claims', date_col='week', seasonality=1, verbose=False) backtester = BackTester( model=lgt, df=df, min_train_len=100, incremental_len=100, forecast_len=20, ) backtester.fit_predict() eval_out = backtester.score() evaluated_metrics = set(eval_out['metric_name'].tolist()) expected_metrics = [x.__name__ for x in backtester._default_metrics] assert set(expected_metrics) == evaluated_metrics
def test_lgt_mixed_signs_and_order(iclaims_training_data, regressor_signs): df = iclaims_training_data df['claims'] = np.log(df['claims']) raw_regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job'] new_regressor_col = [raw_regressor_col[idx] for idx in [2, 1, 0]] new_regressor_signs = [regressor_signs[idx] for idx in [2, 1, 0]] # mixiing ordering of cols in df of prediction new_df = df[['claims', 'week'] + new_regressor_col] lgt = LGTMAP( response_col='claims', date_col='week', regressor_col=raw_regressor_col, regressor_sign=regressor_signs, seasonality=52, seed=8888, ) lgt.fit(df) predicted_df_v1 = lgt.predict(df) predicted_df_v2 = lgt.predict(new_df) # mixing ordering of signs lgt_new = LGTMAP( response_col='claims', date_col='week', regressor_col=new_regressor_col, regressor_sign=new_regressor_signs, seasonality=52, seed=8888, ) lgt_new.fit(df) predicted_df_v3 = lgt_new.predict(df) predicted_df_v4 = lgt_new.predict(new_df) pred_v1 = predicted_df_v1['prediction'].values pred_v2 = predicted_df_v2['prediction'].values pred_v3 = predicted_df_v3['prediction'].values pred_v4 = predicted_df_v4['prediction'].values # they should be all identical; ordering of signs or columns in prediction show not matter assert np.allclose(pred_v1, pred_v2, atol=1e-3) assert np.allclose(pred_v1, pred_v3, atol=1e-3) assert np.allclose(pred_v1, pred_v4, atol=1e-3)