def test_lgt_fixed_sm_input(synthetic_data, level_sm_input, seasonality_sm_input, slope_sm_input): train_df, test_df, coef = synthetic_data lgt = LGTMAP( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], level_sm_input=level_sm_input, seasonality_sm_input=seasonality_sm_input, slope_sm_input=slope_sm_input, seasonality=52, verbose=False, ) lgt.fit(train_df) predict_df = lgt.predict(test_df, n_bootstrap_draw=100) regression_out = lgt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:])
def test_lgt_map_fit(synthetic_data, seasonality, estimator_type): train_df, test_df, coef = synthetic_data lgt = LGTMAP(response_col='response', date_col='week', seasonality=seasonality, verbose=False, estimator_type=estimator_type) lgt.fit(train_df) init_call = lgt.get_init_values() if seasonality: assert isinstance(init_call, LGTInitializer) assert init_call.s == 52 init_values = init_call() assert init_values['init_sea'].shape == (51, ) else: assert not init_call predict_df = lgt.predict(test_df) expected_num_parameters = 10 expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] if seasonality == 52: expected_num_parameters += 2 expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_mixed_signs_and_order(iclaims_training_data, regressor_signs): df = iclaims_training_data df['claims'] = np.log(df['claims']) raw_regressor_col = ['trend.unemploy', 'trend.filling', 'trend.job'] new_regressor_col = [raw_regressor_col[idx] for idx in [2, 1, 0]] new_regressor_signs = [regressor_signs[idx] for idx in [2, 1, 0]] # mixiing ordering of cols in df of prediction new_df = df[['claims', 'week'] + new_regressor_col] lgt = LGTMAP( response_col='claims', date_col='week', regressor_col=raw_regressor_col, regressor_sign=regressor_signs, seasonality=52, seed=8888, ) lgt.fit(df) predicted_df_v1 = lgt.predict(df) predicted_df_v2 = lgt.predict(new_df) # mixing ordering of signs lgt_new = LGTMAP( response_col='claims', date_col='week', regressor_col=new_regressor_col, regressor_sign=new_regressor_signs, seasonality=52, seed=8888, ) lgt_new.fit(df) predicted_df_v3 = lgt_new.predict(df) predicted_df_v4 = lgt_new.predict(new_df) pred_v1 = predicted_df_v1['prediction'].values pred_v2 = predicted_df_v2['prediction'].values pred_v3 = predicted_df_v3['prediction'].values pred_v4 = predicted_df_v4['prediction'].values # they should be all identical; ordering of signs or columns in prediction show not matter assert np.allclose(pred_v1, pred_v2, atol=1e-3) assert np.allclose(pred_v1, pred_v3, atol=1e-3) assert np.allclose(pred_v1, pred_v4, atol=1e-3)
def test_lgt_map_reproducibility(synthetic_data, seasonality): train_df, test_df, coef = synthetic_data lgt1 = LGTMAP( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, ) # first fit and predict lgt1.fit(train_df) posteriors1 = copy(lgt1._aggregate_posteriors['map']) prediction1 = lgt1.predict(test_df) # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes lgt2 = LGTMAP( response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=seasonality, ) lgt2.fit(train_df) posteriors2 = copy(lgt2._aggregate_posteriors['map']) prediction2 = lgt2.predict(test_df) # assert same posterior keys assert set(posteriors1.keys()) == set(posteriors2.keys()) # assert posterior draws are reproducible for k, v in posteriors1.items(): assert np.allclose(posteriors1[k], posteriors2[k]) # assert prediction is reproducible assert np.allclose(prediction1['prediction'].values, prediction2['prediction'].values)
def test_lgt_predict_all_positive_reg(iclaims_training_data): df = iclaims_training_data lgt = LGTMAP( response_col='claims', date_col='week', regressor_col=['trend.unemploy', 'trend.filling', 'trend.job'], regressor_sign=['+', '+', '+'], seasonality=52, seed=8888, ) lgt.fit(df) predicted_df = lgt.predict(df, decompose=True) assert any(predicted_df['regression'].values)
def test_lgt_map_univariate(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data lgt = LGTMAP(response_col='response', date_col='week', seasonality=52, verbose=False, estimator_type=estimator_type) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 # no `lp__` parameter in optimizing() assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_map_single_regressor(iclaims_training_data): df = iclaims_training_data df['claims'] = np.log(df['claims']) regressor_col = ['trend.unemploy'] lgt = LGTMAP( response_col='claims', date_col='week', regressor_col=regressor_col, seasonality=52, seed=8888, ) lgt.fit(df) predicted_df = lgt.predict(df) expected_num_parameters = 13 expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] assert predicted_df.shape[0] == df.shape[0] assert predicted_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_map_fit(synthetic_data, seasonality, estimator_type): train_df, test_df, coef = synthetic_data lgt = LGTMAP( response_col='response', date_col='week', seasonality=seasonality, verbose=False, estimator_type=estimator_type ) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_num_parameters = 10 expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] if seasonality == 52: expected_num_parameters += 2 expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters