예제 #1
0
def test_prophet_extract_params():
    train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1)

    model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns)

    params = model.extract_model_params()

    assert len(params) == 6
예제 #2
0
파일: train.py 프로젝트: szczeles/mlflow
def grouped_prophet_example(locations, start_dt, artifact_path):

    print("Generating data...\n")
    data = generate_data(location_data=locations, start_dt=start_dt)
    grouping_keys = ["country", "city"]
    print("Data Generated.\nBuilding GroupedProphet Model...")

    model = GroupedProphet(n_changepoints=96, uncertainty_samples=0).fit(
        df=data, group_key_columns=grouping_keys, y_col="watts", datetime_col="datetime"
    )
    print("GroupedProphet model built.\n")

    params = model.extract_model_params()

    print(f"Params: \n{params.to_string()}")

    print("Running Cross Validation on all groups...\n")
    metrics = model.cross_validate_and_score(
        horizon="120 hours",
        period="480 hours",
        initial="960 hours",
        parallel="threads",
        rolling_window=0.05,
        monthly=False,
    )
    print(f"Cross Validation Metrics: \n{metrics.to_string()}")

    mlflow.diviner.log_model(diviner_model=model, artifact_path=artifact_path)

    # As an Alternative to saving metrics and params directly with a `log_dict()` function call,
    # Serializing the DataFrames to local as a .csv can be done as well, without requiring
    # column or object manipulation as shown below this block, utilizing a temporary directory
    # with a context wrapper to clean up the files from the local OS after the artifact logging
    # is complete:

    with tempfile.TemporaryDirectory() as tmpdir:
        params.to_csv(f"{tmpdir}/params.csv", index=False, header=True)
        metrics.to_csv(f"{tmpdir}/metrics.csv", index=False, header=True)
        mlflow.log_artifacts(tmpdir, artifact_path="run_data")

    # Saving the parameters and metrics as json without having to serialize to local
    # NOTE: this requires casting of fields that cannot be serialized to JSON
    # NOTE: Do not use both of these methods. These are shown as an either/or alternative based
    # on how you would choose to consume, view, or analyze the per-group metrics and parameters.

    # NB: There are object references present in the Prophet model parameters. Coerce to string if
    # using a JSON serialization approach with ``mlflow.log_dict()``.
    params = params.astype(dtype=str, errors="ignore")

    mlflow.log_dict(params.to_dict(), "params.json")

    mlflow.log_dict(metrics.to_dict(), "metrics.json")

    return mlflow.get_artifact_uri(artifact_path=artifact_path)
예제 #3
0
def test_prophet_df_naming_overrides():

    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df
    train_df.rename(columns={"ds": "datetime", "y": "sales"}, inplace=True)

    assert {"datetime", "sales"}.issubset(set(train_df.columns))

    model = GroupedProphet().fit(train_df, train.key_columns, "sales", "datetime")

    params = model.extract_model_params()

    assert len(params) == 1