def basic_arima(data): arima = ARIMA(out_of_sample_size=30, order=(2, 0, 3)) return GroupedPmdarima(arima).fit( data.df, data.key_columns, "y", "ds", )
def test_grouped_pmdarima_save_load_predict(model): save_path = "/tmp/pmdarima/test.pmd" forecast = model.predict(30, return_conf_int=True) model.save(save_path) loaded = GroupedPmdarima.load(save_path) loaded_forecast = loaded.predict(30, return_conf_int=True) assert_frame_equal(forecast, loaded_forecast)
def basic_pmdarima(data): arima = AutoARIMA(out_of_sample_size=30) return GroupedPmdarima(arima).fit( data.df, data.key_columns, "y", "ds", )
def test_grouped_model_cross_validate(): metrics = ["smape", "mean_squared_error", "mean_absolute_error"] expected_columns = ( [f"{met}_mean" for met in metrics] + [f"{met}_stddev" for met in metrics] + ["grouping_key_columns", "key0"] ) train = data_generator.generate_test_data(1, 2, 765, "2019-01-01") grouped_model = GroupedPmdarima( model_template=AutoARIMA(max_order=5, out_of_sample_size=30), ).fit(train.df, train.key_columns, "y", "ds", silence_warnings=True) cross_validator = RollingForecastCV(h=90, step=120, initial=365) cv_metrics = grouped_model.cross_validate(train.df, metrics, cross_validator) assert len(cv_metrics) == 2 assert set(cv_metrics.columns).issubset(set(expected_columns))
def basic_pipeline(data): pipeline = Pipeline(steps=[ ("fourier", FourierFeaturizer(k=3, m=7)), ("arima", AutoARIMA(out_of_sample_size=60)), ]) return GroupedPmdarima(pipeline).fit( data.df, data.key_columns, "y", "ds", )
def grouped_pmdarima(diviner_data): from pmdarima.arima.auto import AutoARIMA base_model = AutoARIMA(out_of_sample_size=60, maxiter=30) return GroupedPmdarima(model_template=base_model).fit( df=diviner_data.df, group_key_columns=diviner_data.key_columns, y_col="y", datetime_col="ds", silence_warnings=True, )
def model(data): arima = GroupedPmdarima(model_template=Pipeline( steps=[("arima", AutoARIMA(out_of_sample_size=60, max_order=7))]), ).fit( df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds", silence_warnings=True, ) return arima
def test_pmdarima_default_arima_fit_attribute_extraction(data): arima_model = GroupedPmdarima(model_template=AutoARIMA( out_of_sample_size=30)).fit(data.df, data.key_columns, "y", "ds") for group in arima_model.model.keys(): pipeline = arima_model._extract_individual_model(group) instance_model = _extract_arima_model(pipeline) group_metrics = _get_arima_training_metrics(instance_model) for key, value in group_metrics.items(): assert value > 0 assert key in _PMDARIMA_MODEL_METRICS for item in _PMDARIMA_MODEL_METRICS: assert item in group_metrics.keys() group_params = _get_arima_params(instance_model) for item in {"P", "D", "Q", "s"}: # this isn't a seasonality model assert group_params[item] == 0
def test_grouped_pmdarima_save_and_load(model): orig_params = model.get_model_params() orig_metrics = model.get_metrics() save_path = "/tmp/pmdarima/test.pmd" model.save(save_path) loaded = GroupedPmdarima.load(save_path) loaded_params = loaded.get_model_params() loaded_metrics = loaded.get_metrics() assert_frame_equal(orig_params, loaded_params) assert_frame_equal(orig_metrics, loaded_metrics)
def test_pmdarima_ndiffs_override_class_args(data): ndiffs = PmdarimaAnalyzer(df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds").calculate_ndiffs(alpha=0.4, max_d=4) base_template = AutoARIMA(d=10, out_of_sample_size=7) model = GroupedPmdarima(base_template).fit( df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds", ndiffs=ndiffs, silence_warnings=True, ) params = model.get_model_params() for _, row in params.iterrows(): assert row["d"] <= 4
def pipeline_override_d(data): pipeline = Pipeline(steps=[("arima", AutoARIMA(out_of_sample_size=30))]) util = PmdarimaAnalyzer(df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds") ndiffs = util.calculate_ndiffs(alpha=0.2, test="kpss", max_d=7) nsdiffs = util.calculate_nsdiffs(m=7, test="ocsb", max_D=7) return GroupedPmdarima(pipeline).fit( df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds", ndiffs=ndiffs, nsdiffs=nsdiffs, silence_warnings=True, )
column_count=3, series_count=4, series_size=365 * 4, start_dt="2019-01-01", days_period=1, ) training_data = generated_data.df group_key_columns = generated_data.key_columns # Build a GroupedPmdarima model by specifying an ARIMA model arima_obj = ARIMA(order=(2, 1, 3), out_of_sample_size=60) base_arima = GroupedPmdarima(model_template=arima_obj).fit( df=training_data, group_key_columns=group_key_columns, y_col="y", datetime_col="ds", silence_warnings=True, ) # Save to local directory save_dir = "/tmp/group_pmdarima/arima.gpmd" base_arima.save(save_dir) # Load from saved model loaded_model = GroupedPmdarima.load(save_dir) print("\nARIMA results:\n", "-" * 40) get_and_print_model_metrics_params(loaded_model) prediction = loaded_model.predict(
diff_analyzer = PmdarimaAnalyzer( df=training_data, group_key_columns=group_key_columns, y_col="y", datetime_col="ds", ) ndiff = diff_analyzer.calculate_ndiffs( alpha=0.05, test="kpss", max_d=4, ) grouped_model = GroupedPmdarima(model_template=pipeline).fit( df=training_data, group_key_columns=group_key_columns, y_col="y", datetime_col="ds", ndiffs=ndiff, silence_warnings=True, ) # Save to local directory save_dir = "/tmp/group_pmdarima/pipeline_override.gpmd" grouped_model.save(save_dir) # Load from saved model loaded_model = GroupedPmdarima.load(save_dir) print("\nAutoARIMA results:\n", "-" * 40) get_and_print_model_metrics_params(loaded_model) print("\nPredictions:\n", "-" * 40)
generated_data = generate_example_data( column_count=2, series_count=6, series_size=365 * 4, start_dt="2019-01-01", days_period=1, ) training_data = generated_data.df group_key_columns = generated_data.key_columns arima_obj = ARIMA(order=(2, 1, 3), out_of_sample_size=60) base_arima = GroupedPmdarima(model_template=arima_obj).fit( df=training_data, group_key_columns=group_key_columns, y_col="y", datetime_col="ds", silence_warnings=True, ) # Get a subset of group keys to generate forecasts for group_df = training_data.copy() group_df["groups"] = list(zip(*[group_df[c] for c in group_key_columns])) distinct_groups = group_df["groups"].unique() groups_to_predict = list(distinct_groups[:3]) print("-" * 65) print(f"Unique groups that have been modeled: {distinct_groups}") print(f"Subset of groups to generate predictions for: {groups_to_predict}") print("-" * 65)