def test_uncertainty(daily_data): """Runs a basic model with uncertainty intervals and checks coverage""" uncertainty_dict = { "uncertainty_method": "simple_conditional_residuals", "params": { "conditional_cols": ["dow_hr"], "quantiles": [0.025, 0.975], "quantile_estimation_method": "normal_fit", "sample_size_thresh": 10, "small_sample_size_method": "std_quantiles", "small_sample_size_quantile": 0.98}} model = SilverkiteEstimator(uncertainty_dict=uncertainty_dict) train_df = daily_data["train_df"] test_df = daily_data["test_df"] model.fit( train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) assert model.forecast is None predictions = model.predict(test_df) expected_forecast_cols = \ {"ts", "y", "y_quantile_summary", "err_std", "forecast_lower", "forecast_upper"} assert expected_forecast_cols.issubset(list(model.forecast.columns)) actual = daily_data["test_df"][cst.VALUE_COL] forecast_lower = predictions[cst.PREDICTED_LOWER_COL] forecast_upper = predictions[cst.PREDICTED_UPPER_COL] calc_pred_coverage = 100 * ( (actual <= forecast_upper) & (actual >= forecast_lower) ).mean() assert round(calc_pred_coverage) == 97, "forecast coverage is incorrect"
def test_score_function(daily_data_with_reg): """Tests fit and its compatibility with predict/score. Checks score function accuracy without null model """ model = SilverkiteEstimator( extra_pred_cols=["ct1", "regressor1", "regressor2"], impute_dict={ "func": impute_with_lags, "params": {"orders": [7]}} ) train_df = daily_data_with_reg["train_df"] test_df = daily_data_with_reg["test_df"] model.fit( X=train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) score = model.score(test_df, test_df[cst.VALUE_COL]) pred_df = model.predict(test_df) assert list(pred_df.columns) == [cst.TIME_COL, cst.PREDICTED_COL] assert score == pytest.approx(mean_squared_error( pred_df[cst.PREDICTED_COL], test_df[cst.VALUE_COL])) assert score == pytest.approx(4.6, rel=1e-2)
def test_lagged_regressors(daily_data_with_reg, params): """Tests a basic model with lagged regressors""" train_df = daily_data_with_reg["train_df"] test_df = daily_data_with_reg["test_df"][:20] # default forecast horizon, no uncertainty model = SilverkiteEstimator( lagged_regressor_dict=params["lagged_regressor_dict"]) model.fit( train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) assert model.forecast is None trained_model = model.model_dict assert trained_model["lagged_regressor_dict"] == params["lagged_regressor_dict"] pred_cols = trained_model["pred_cols"] expected_lagged_regression_terms = { 'regressor1_lag1', 'regressor1_lag2', 'regressor1_lag3', 'regressor1_avglag_7_14_21', 'regressor1_avglag_8_to_14', 'regressor2_lag35', 'regressor2_avglag_35_42_49', 'regressor2_avglag_30_to_36' } assert expected_lagged_regression_terms.issubset(pred_cols) model.predict(test_df) expected_forecast_cols = {"ts", "y"} assert expected_forecast_cols.issubset(list(model.forecast.columns)) # Passes forecast horizon of 10, and uncertainty dict model = SilverkiteEstimator( uncertainty_dict=params["uncertainty_dict"], lagged_regressor_dict=params["lagged_regressor_dict"], forecast_horizon=10) model.fit( train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) assert model.forecast is None trained_model = model.model_dict pred_cols = trained_model["pred_cols"] expected_lagged_regression_terms = { 'regressor1_lag1', 'regressor1_lag2', 'regressor1_lag3', 'regressor1_avglag_7_14_21', 'regressor1_avglag_8_to_14', 'regressor2_lag35', 'regressor2_avglag_35_42_49', 'regressor2_avglag_30_to_36' } assert expected_lagged_regression_terms.issubset(pred_cols) model.predict(test_df) expected_forecast_cols = {"ts", "y", 'y_quantile_summary', 'err_std', 'forecast_lower', 'forecast_upper'} assert expected_forecast_cols.issubset(list(model.forecast.columns))
def test_autoreg(daily_data): """Runs a basic model with uncertainty intervals and checks coverage""" uncertainty_dict = { "uncertainty_method": "simple_conditional_residuals", "params": { "conditional_cols": ["dow_hr"], "quantiles": [0.025, 0.975], "quantile_estimation_method": "normal_fit", "sample_size_thresh": 10, "small_sample_size_method": "std_quantiles", "small_sample_size_quantile": 0.98}} model = SilverkiteEstimator( uncertainty_dict=uncertainty_dict, autoreg_dict="auto") train_df = daily_data["train_df"] test_df = daily_data["test_df"][:20] model.fit( train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) assert model.forecast is None trained_model = model.model_dict pred_cols = trained_model["pred_cols"] expected_autoreg_terms = { "y_lag30", "y_lag31", "y_lag32", "y_avglag_35_42_49", "y_avglag_30_to_36", "y_avglag_37_to_43"} assert expected_autoreg_terms.issubset(pred_cols) predictions = model.predict(test_df) expected_forecast_cols = { "ts", "y", "y_quantile_summary", "err_std", "forecast_lower", "forecast_upper"} assert expected_forecast_cols.issubset(list(model.forecast.columns)) actual = test_df[cst.VALUE_COL] forecast_lower = predictions[cst.PREDICTED_LOWER_COL] forecast_upper = predictions[cst.PREDICTED_UPPER_COL] calc_pred_coverage = 100 * ( (actual <= forecast_upper) & (actual >= forecast_lower) ).mean() assert round(calc_pred_coverage) >= 75, "forecast coverage is incorrect" # Simulation based, default forecast horizon model = SilverkiteEstimator( uncertainty_dict=uncertainty_dict, autoreg_dict="auto", simulation_based=True) train_df = daily_data["train_df"] test_df = daily_data["test_df"][:20] model.fit( train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) assert model.forecast is None trained_model = model.model_dict pred_cols = trained_model["pred_cols"] expected_autoreg_terms = { "y_lag1", "y_lag2", "y_lag3", "y_avglag_7_14_21", "y_avglag_1_to_7", "y_avglag_8_to_14"} assert expected_autoreg_terms.issubset(pred_cols) # Passes forecast horizon of 10 model = SilverkiteEstimator( uncertainty_dict=uncertainty_dict, autoreg_dict="auto", forecast_horizon=10) train_df = daily_data["train_df"] test_df = daily_data["test_df"][:20] model.fit( train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) assert model.forecast is None trained_model = model.model_dict pred_cols = trained_model["pred_cols"] expected_autoreg_terms = { "y_lag10", "y_lag11", "y_lag12", "y_avglag_14_21_28", "y_avglag_10_to_16", "y_avglag_17_to_23"} assert expected_autoreg_terms.issubset(pred_cols) # Passes forecast horizon of 10, and simulation-based True model = SilverkiteEstimator( uncertainty_dict=uncertainty_dict, autoreg_dict="auto", forecast_horizon=10, simulation_based=True) train_df = daily_data["train_df"] test_df = daily_data["test_df"][:20] model.fit( train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL) assert model.forecast is None trained_model = model.model_dict pred_cols = trained_model["pred_cols"] expected_autoreg_terms = { "y_lag1", "y_lag2", "y_lag3", "y_avglag_7_14_21", "y_avglag_1_to_7", "y_avglag_8_to_14"} assert expected_autoreg_terms.issubset(pred_cols)