def test_lgt_full_fit(synthetic_data, seasonality, estimator_type): train_df, test_df, coef = synthetic_data args = { 'response_col': 'response', 'date_col': 'week', 'prediction_percentiles': [5, 95], 'seasonality': seasonality, 'verbose': False, 'estimator_type': estimator_type } if issubclass(estimator_type, StanEstimator): expected_num_parameters = 11 args.update({'num_warmup': 50}) else: # no `lp__` in pyro expected_num_parameters = 10 args.update({'num_steps': 10}) if seasonality == 52: expected_num_parameters += 2 lgt = LGTFull(**args) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_non_seasonal_fit_pyro(synthetic_data): train_df, test_df, coef = synthetic_data lgt = LGTFull(response_col='response', date_col='week', estimator_type=PyroEstimatorVI, num_steps=10) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 10 # no `lp__` in pyro assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_non_seasonal_fit(synthetic_data, estimator_type): train_df, test_df, coef = synthetic_data lgt = LGTFull( response_col='response', date_col='week', estimator_type=estimator_type, num_warmup=50, ) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_columns = ['week', 'prediction'] expected_shape = (51, len(expected_columns)) expected_num_parameters = 11 assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_prediction_percentiles(iclaims_training_data, prediction_percentiles): df = iclaims_training_data lgt = LGTFull( response_col='claims', date_col='week', seasonality=52, seed=8888, prediction_percentiles=prediction_percentiles, ) if not prediction_percentiles: p_labels = ['_5', '', '_95'] else: p_labels = ['_5', '_10', '', '_95'] lgt.fit(df) predicted_df = lgt.predict(df) expected_columns = ['week'] + ["prediction" + p for p in p_labels] assert predicted_df.columns.tolist() == expected_columns assert predicted_df.shape[0] == df.shape[0] predicted_df = lgt.predict(df, decompose=True) predicted_components = [ 'prediction', PredictedComponents.TREND.value, PredictedComponents.SEASONALITY.value, PredictedComponents.REGRESSION.value ] expected_columns = ['week'] for pc in predicted_components: for p in p_labels: expected_columns.append(pc + p) assert predicted_df.columns.tolist() == expected_columns assert predicted_df.shape[0] == df.shape[0]
def test_lgt_full_univariate_pyro(synthetic_data): train_df, test_df, coef = synthetic_data lgt = LGTFull(response_col='response', date_col='week', prediction_percentiles=[5, 95], seasonality=52, num_steps=10, verbose=False, estimator_type=PyroEstimatorVI) lgt.fit(train_df) predict_df = lgt.predict(test_df) expected_columns = [ 'week', 'prediction_lower', 'prediction', 'prediction_upper' ] expected_shape = (51, len(expected_columns)) expected_num_parameters = 12 # no `lp__` in pyro assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_full_with_regression(synthetic_data, estimator_type, regressor_signs): train_df, test_df, coef = synthetic_data if issubclass(estimator_type, StanEstimator): lgt = LGTFull( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_warmup=50, verbose=False, estimator_type=estimator_type ) elif issubclass(estimator_type, PyroEstimator): lgt = LGTFull( response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=52, num_steps=10, verbose=False, estimator_type=estimator_type ) else: return None lgt.fit(train_df) predict_df = lgt.predict(test_df) regression_out = lgt.get_regression_coefs() num_regressors = regression_out.shape[0] expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) expected_regression_shape = (6, 3) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert regression_out.shape == expected_regression_shape assert num_regressors == len(train_df.columns.tolist()[2:])
def test_lgt_full_fit(synthetic_data, seasonality, estimator_type): train_df, test_df, coef = synthetic_data args = { 'response_col': 'response', 'date_col': 'week', 'prediction_percentiles': [5, 95], 'seasonality': seasonality, 'verbose': False, 'estimator_type': estimator_type, } if issubclass(estimator_type, StanEstimator): args.update({'num_warmup': 50, 'num_sample': 50}) elif issubclass(estimator_type, PyroEstimator): args.update({'num_steps': 10}) expected_num_parameters = 10 if seasonality == 52: expected_num_parameters += 2 lgt = LGTFull(**args) lgt.fit(train_df) init_call = lgt.get_init_values() if seasonality: assert isinstance(init_call, LGTInitializer) assert init_call.s == 52 init_values = init_call() assert init_values['init_sea'].shape == (51, ) else: assert not init_call predict_df = lgt.predict(test_df) expected_columns = ['week', 'prediction_5', 'prediction', 'prediction_95'] expected_shape = (51, len(expected_columns)) assert predict_df.shape == expected_shape assert predict_df.columns.tolist() == expected_columns assert len(lgt._posterior_samples) == expected_num_parameters
def test_lgt_full_reproducibility(synthetic_data, estimator_type, regressor_signs, seasonality): train_df, test_df, coef = synthetic_data lgt_first = LGTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator_type=estimator_type) # first fit and predict lgt_first.fit(train_df) posteriors_first = copy(lgt_first._posterior_samples) predict_df_first = lgt_first.predict(test_df) regression_out_first = lgt_first.get_regression_coefs() # second fit and predict # note a new instance must be created to reset the seed # note both fit and predict contain random generation processes lgt_second = LGTFull(response_col='response', date_col='week', regressor_col=train_df.columns.tolist()[2:], regressor_sign=regressor_signs, prediction_percentiles=[5, 95], seasonality=seasonality, num_warmup=50, verbose=False, estimator_type=estimator_type) lgt_second.fit(train_df) posteriors_second = copy(lgt_second._posterior_samples) predict_df_second = lgt_second.predict(test_df) regression_out_second = lgt_second.get_regression_coefs() # assert same posterior keys assert set(posteriors_first.keys()) == set(posteriors_second.keys()) # assert posterior draws are reproducible for k, v in posteriors_first.items(): assert np.allclose(posteriors_first[k], posteriors_second[k]) # assert identical regression columns # this is also checked in posterior samples, but an extra layer just in case # since this one very commonly retrieved by end users assert regression_out_first.equals(regression_out_second) # assert prediction is reproducible assert predict_df_first.equals(predict_df_second)