Esempio n. 1
0
def test_model_raises_if_not_fit():
    model = GroupedProphet()
    with pytest.raises(
        DivinerException,
        match="The model has not been fit. Please fit the model first.",
    ):
        model.forecast(30, "days")
Esempio n. 2
0
def test_model_raises_if_already_fit():
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    model = GroupedProphet().fit(train.df, train.key_columns)
    with pytest.raises(
        DivinerException,
        match="The model has already been fit. Create a new instance to fit the model again.",
    ):
        model.fit(train.df, train.key_columns)
Esempio n. 3
0
def test_prophet_extract_params():
    train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1)

    model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns)

    params = model.extract_model_params()

    assert len(params) == 6
Esempio n. 4
0
def grouped_prophet_example(locations, start_dt, artifact_path):

    print("Generating data...\n")
    data = generate_data(location_data=locations, start_dt=start_dt)
    grouping_keys = ["country", "city"]
    print("Data Generated.\nBuilding GroupedProphet Model...")

    model = GroupedProphet(n_changepoints=96, uncertainty_samples=0).fit(
        df=data, group_key_columns=grouping_keys, y_col="watts", datetime_col="datetime"
    )
    print("GroupedProphet model built.\n")

    params = model.extract_model_params()

    print(f"Params: \n{params.to_string()}")

    print("Running Cross Validation on all groups...\n")
    metrics = model.cross_validate_and_score(
        horizon="120 hours",
        period="480 hours",
        initial="960 hours",
        parallel="threads",
        rolling_window=0.05,
        monthly=False,
    )
    print(f"Cross Validation Metrics: \n{metrics.to_string()}")

    mlflow.diviner.log_model(diviner_model=model, artifact_path=artifact_path)

    # As an Alternative to saving metrics and params directly with a `log_dict()` function call,
    # Serializing the DataFrames to local as a .csv can be done as well, without requiring
    # column or object manipulation as shown below this block, utilizing a temporary directory
    # with a context wrapper to clean up the files from the local OS after the artifact logging
    # is complete:

    with tempfile.TemporaryDirectory() as tmpdir:
        params.to_csv(f"{tmpdir}/params.csv", index=False, header=True)
        metrics.to_csv(f"{tmpdir}/metrics.csv", index=False, header=True)
        mlflow.log_artifacts(tmpdir, artifact_path="run_data")

    # Saving the parameters and metrics as json without having to serialize to local
    # NOTE: this requires casting of fields that cannot be serialized to JSON
    # NOTE: Do not use both of these methods. These are shown as an either/or alternative based
    # on how you would choose to consume, view, or analyze the per-group metrics and parameters.

    # NB: There are object references present in the Prophet model parameters. Coerce to string if
    # using a JSON serialization approach with ``mlflow.log_dict()``.
    params = params.astype(dtype=str, errors="ignore")

    mlflow.log_dict(params.to_dict(), "params.json")

    mlflow.log_dict(metrics.to_dict(), "metrics.json")

    return mlflow.get_artifact_uri(artifact_path=artifact_path)
Esempio n. 5
0
def test_prophet_forecast_correct_start():

    train = data_generator.generate_test_data(2, 5, 1000, "2020-01-01", 1)
    expected_start_of_forecast = max(train.df["ds"]) + timedelta(days=1)
    model = GroupedProphet().fit(train.df, train.key_columns)
    forecasted_data = model.forecast(10, "D")

    # check that the first date in the forecasted df for the first model is 1 day after last date.
    min_forecast = min(forecasted_data["ds"])

    assert expected_start_of_forecast == min_forecast
    assert len(forecasted_data) == 50
Esempio n. 6
0
def test_prophet_save_and_load():
    # Tests serialization, deserialization, and utilization of forecasting API from loaded model
    save_path = os.path.join("/tmp/grouped_prophet_test", "model")

    train = data_generator.generate_test_data(2, 2, 1000, "2020-01-01", 1)
    grouped_model = GroupedProphet().fit(train.df, train.key_columns)
    grouped_model.save(save_path)
    loaded_model = GroupedProphet.load(save_path)
    forecasts = loaded_model.forecast(25, "D")

    shutil.rmtree(os.path.dirname(save_path))

    assert len(forecasts) == 50
Esempio n. 7
0
def test_prophet_df_naming_overrides():

    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df
    train_df.rename(columns={"ds": "datetime", "y": "sales"}, inplace=True)

    assert {"datetime", "sales"}.issubset(set(train_df.columns))

    model = GroupedProphet().fit(train_df, train.key_columns, "sales", "datetime")

    params = model.extract_model_params()

    assert len(params) == 1
Esempio n. 8
0
def test_prophet_cross_validation_extract():

    train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1)

    model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns)

    scores = model.cross_validate_and_score(
        initial="100 days", period="90 days", horizon="15 days", parallel=None
    )

    assert all(scores["rmse"] > 0)
    assert len(scores) == 6
    assert "coverage" not in scores
Esempio n. 9
0
def test_prophet_manual_predict():
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df

    predict_df = train_df[["key1", "key0", "ds"]][-10:]

    model = GroupedProphet().fit(train_df, train.key_columns)

    prediction = model.predict(predict_df)

    assert len(prediction) == 10

    for _, row in prediction.iterrows():
        assert row["yhat"] > 0
Esempio n. 10
0
def test_prophet_group_subset_predict_raises_and_warns():

    _rows_to_generate = 30
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df

    model = GroupedProphet().fit(train_df, train.key_columns)

    key_entries = []
    for v in train_df[["key1", "key0"]].iloc[[0]].to_dict().values():
        key_entries.append(list(v.values())[0])
    groups = [(key_entries[0], key_entries[1]), ("missing", "key")]

    with pytest.raises(
        DivinerException, match="Cannot perform predictions due to submitted"
    ):
        model.predict_groups(groups, _rows_to_generate, "D")

    with pytest.warns(
        UserWarning, match="Specified groups are unable to be predicted due to "
    ):
        model.predict_groups(groups, _rows_to_generate, "D", on_error="warn")

    with pytest.raises(
        DivinerException, match="Groups specified for subset forecasting are not"
    ):
        model.predict_groups(
            ("invalid", "invalid"), _rows_to_generate, "D", on_error="ignore"
        )
Esempio n. 11
0
def test_prophet_cross_validation_extract_custom_scores():

    train = data_generator.generate_test_data(4, 2, 1000, "2020-01-01", 1)

    model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns)

    scores = model.cross_validate_and_score(
        initial="100 days",
        period="90 days",
        horizon="15 days",
        parallel=None,
        metrics=["rmse", "mape"],
        disable_tqdm=False,
        monthly=True,
    )

    assert all(scores["rmse"] > 0)
    assert len(scores) == 2
    assert "coverage" not in scores
Esempio n. 12
0
def test_prophet_default_fit():

    train = data_generator.generate_test_data(4, 2, 1000, "2020-01-01", 1)
    model = GroupedProphet().fit(train.df, train.key_columns)
    first_model = _get_individual_model(model, 0)

    assert len(first_model.history) > 0
    assert (
        len(first_model.params["trend"][0]) == 1000
    )  # fit value for each value in series
    assert len(list(model.model.keys())) == 2
Esempio n. 13
0
def test_prophet_group_subset_predict():

    _rows_to_generate = 30
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df

    model = GroupedProphet().fit(train_df, train.key_columns)

    key_entries = []
    for v in train_df[["key1", "key0"]].iloc[[0]].to_dict().values():
        key_entries.append(list(v.values())[0])
    groups = [tuple(key_entries)]

    group_prediction = model.predict_groups(groups, _rows_to_generate, "D")

    assert len(group_prediction) == _rows_to_generate
    _key1 = group_prediction["key1"].unique()
    assert len(_key1) == 1
    assert _key1[0] == groups[0][0]
    _key0 = group_prediction["key0"].unique()
    assert len(_key0) == 1
    assert _key0[0] == groups[0][1]
Esempio n. 14
0
def test_prophet_execution_with_kwargs_override_for_pystan():

    train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1)

    default_prophet_uncertainty_samples = Prophet().uncertainty_samples

    model = GroupedProphet(uncertainty_samples=0).fit(
        train.df, train.key_columns, algorithm="LBFGS"
    )

    last_model = _get_individual_model(model, 5)

    assert last_model.uncertainty_samples == 0
    assert default_prophet_uncertainty_samples != last_model.uncertainty_samples
Esempio n. 15
0
def test_prophet_with_bad_group_data():

    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df
    bad_data = pd.DataFrame(
        {
            "ds": datetime.strptime("2021-01-01", "%Y-%M-%d"),
            "y": -500.3,
            "key1": "bad",
            "key0": "data",
        },
        index=[1000],
    )

    train_df_add = pd.concat([train_df, bad_data])

    with pytest.warns(RuntimeWarning, match="An error occurred while fitting group"):
        model = GroupedProphet().fit(train_df_add, train.key_columns)
    assert ("bad", "data") not in model.model.keys()
def test_prophet_save_load_override_object():
    """Test to ensure that deserialization updates object properly for all attributes"""

    train1 = data_generator.generate_test_data(3, 2, 1000, "2020-01-01", 1)
    train2 = data_generator.generate_test_data(2, 2, 500, "2021-01-01", 1)

    model1 = GroupedProphet().fit(train1.df, train1.key_columns)
    model2 = GroupedProphet().fit(train2.df, train2.key_columns)

    model1_group_keys = deepcopy(model1._group_key_columns)
    model1_model = deepcopy(model1.model)

    # save model 2
    save_path = os.path.join("/tmp/group_prophet_test", "model2serdetest.gpm")
    model2.save(save_path)

    # use model1 object to load model2
    reloaded = model1.load(save_path)

    assert set(reloaded._group_key_columns) != set(model1_group_keys)
    assert reloaded.model.keys() == model2.model.keys()
    assert reloaded.model.keys() != model1_model.keys()
Esempio n. 17
0
def grouped_prophet(diviner_data):
    return GroupedProphet(n_changepoints=20, uncertainty_samples=0).fit(
        df=diviner_data.df, group_key_columns=diviner_data.key_columns, y_col="y", datetime_col="ds"
    )
Esempio n. 18
0
def execute_grouped_prophet():
    """
    This function call will generate synthetic group time series data in a normalized format.
    The structure will be of:

    ============ ====== =========== =========== ===========
    ds           y      group_key_1 group_key_2 group_key_3
    ============ ====== =========== =========== ===========
    "2016-02-01" 1234.5 A           B           C
    ============ ====== =========== =========== ===========

    With the grouping key values that are generated per ``ds`` and ``y`` values assigned in a
    non-deterministic fashion.

    For utililzation of this API, the normalized representation of the data is required, such that
    a particular target variables' data 'y' and the associated indexed datetime values in ``'ds'``
    are 'stacked' (unioned) from a more traditional denormalized data storage paradigm.

    For guidance on this data transposition from denormalized representations, see:
    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.melt.html
    """

    generated_data = generate_example_data(
        column_count=3,
        series_count=10,
        series_size=365 * 5,
        start_dt="2016-02-01",
        days_period=1,
    )

    # Extract the normalized grouped datetime series data
    training_data = generated_data.df

    # Extract the names of the grouping columns that define the unique series data
    grouping_key_columns = generated_data.key_columns

    # Create a GroupedProphet model instance
    grouped_model = GroupedProphet(n_changepoints=20,
                                   uncertainty_samples=0).fit(
                                       training_data, grouping_key_columns)

    # Save the model to the local file system
    save_path = "/tmp/grouped_prophet.gpm"
    grouped_model.save(path="/tmp/grouped_prophet.gpm")

    # Load the model from the local storage location
    retrieved_model = GroupedProphet.load(save_path)

    # Score the model and print the results
    model_scores = retrieved_model.cross_validate_and_score(
        horizon="30 days",
        period="180 days",
        initial="365 days",
        parallel="threads",
        rolling_window=0.05,
        monthly=False,
    )

    print(f"Model scores:\n{model_scores.to_string()}")

    # Run a forecast for each group
    forecasts = retrieved_model.forecast(horizon=20, frequency="D")

    print(f"Forecasted data:\n{forecasts[:50].to_string()}")

    # Extract the parameters from each model for logging
    params = retrieved_model.extract_model_params()

    print(f"Model parameters:\n{params.to_string()}")
        column_count=3,
        series_count=10,
        series_size=365 * 5,
        start_dt="2016-02-01",
        days_period=1,
    )

    # Extract the normalized grouped datetime series data
    training_data = generated_data.df

    # Extract the names of the grouping columns that define the unique series data
    group_key_columns = generated_data.key_columns

    # Create a GroupedProphet model instance
    grouped_model = GroupedProphet(n_changepoints=20, uncertainty_samples=0).fit(
        training_data, group_key_columns
    )

    # Get a subset of group keys to generate forecasts for
    group_df = training_data.copy()
    group_df["groups"] = list(zip(*[group_df[c] for c in group_key_columns]))
    distinct_groups = group_df["groups"].unique()
    groups_to_predict = list(distinct_groups[:3])

    print("-" * 65)
    print(f"\nUnique groups that have been modeled: \n{distinct_groups}\n")
    print(f"Subset of groups to generate predictions for: \n{groups_to_predict}\n")
    print("-" * 65)

    forecasts = grouped_model.predict_groups(
        groups=groups_to_predict,