def test_prophet_extract_params(): train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1) model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns) params = model.extract_model_params() assert len(params) == 6
def grouped_prophet_example(locations, start_dt, artifact_path): print("Generating data...\n") data = generate_data(location_data=locations, start_dt=start_dt) grouping_keys = ["country", "city"] print("Data Generated.\nBuilding GroupedProphet Model...") model = GroupedProphet(n_changepoints=96, uncertainty_samples=0).fit( df=data, group_key_columns=grouping_keys, y_col="watts", datetime_col="datetime" ) print("GroupedProphet model built.\n") params = model.extract_model_params() print(f"Params: \n{params.to_string()}") print("Running Cross Validation on all groups...\n") metrics = model.cross_validate_and_score( horizon="120 hours", period="480 hours", initial="960 hours", parallel="threads", rolling_window=0.05, monthly=False, ) print(f"Cross Validation Metrics: \n{metrics.to_string()}") mlflow.diviner.log_model(diviner_model=model, artifact_path=artifact_path) # As an Alternative to saving metrics and params directly with a `log_dict()` function call, # Serializing the DataFrames to local as a .csv can be done as well, without requiring # column or object manipulation as shown below this block, utilizing a temporary directory # with a context wrapper to clean up the files from the local OS after the artifact logging # is complete: with tempfile.TemporaryDirectory() as tmpdir: params.to_csv(f"{tmpdir}/params.csv", index=False, header=True) metrics.to_csv(f"{tmpdir}/metrics.csv", index=False, header=True) mlflow.log_artifacts(tmpdir, artifact_path="run_data") # Saving the parameters and metrics as json without having to serialize to local # NOTE: this requires casting of fields that cannot be serialized to JSON # NOTE: Do not use both of these methods. These are shown as an either/or alternative based # on how you would choose to consume, view, or analyze the per-group metrics and parameters. # NB: There are object references present in the Prophet model parameters. Coerce to string if # using a JSON serialization approach with ``mlflow.log_dict()``. params = params.astype(dtype=str, errors="ignore") mlflow.log_dict(params.to_dict(), "params.json") mlflow.log_dict(metrics.to_dict(), "metrics.json") return mlflow.get_artifact_uri(artifact_path=artifact_path)
def test_prophet_df_naming_overrides(): train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1) train_df = train.df train_df.rename(columns={"ds": "datetime", "y": "sales"}, inplace=True) assert {"datetime", "sales"}.issubset(set(train_df.columns)) model = GroupedProphet().fit(train_df, train.key_columns, "sales", "datetime") params = model.extract_model_params() assert len(params) == 1