Python cross_validation Examples, prophet.diagnostics.cross_validation Python Examples

Example #1

0

Show file

 def test_cross_validation_default_value_check(self):
     m = Prophet()
     m.fit(self.__df)
     # Default value of initial should be equal to 3 * horizon
     df_cv1 = diagnostics.cross_validation(m,
                                           horizon='32 days',
                                           period='10 days')
     df_cv2 = diagnostics.cross_validation(m,
                                           horizon='32 days',
                                           period='10 days',
                                           initial='96 days')
     self.assertAlmostEqual(((df_cv1['y'] - df_cv2['y'])**2).sum(), 0.0)
     self.assertAlmostEqual(((df_cv1['yhat'] - df_cv2['yhat'])**2).sum(),
                            0.0)

Example #2

0

Show file

 def test(self):
     df_cv = cross_validation(self.model, horizon='365 days')
     df = performance_metrics(df_cv)
     df.drop(['horizon'], axis=1, inplace=True)
     df.apply(np.sum, inplace=True)
     print(df)
     return df

Example #3

0

Show file

File: prophet_cross_validate.py Project: hamza648-prog/darthsvade

def group_cross_validation(
    grouped_model, horizon, period=None, initial=None, parallel=None, cutoffs=None
):
    """
    Model debugging utility function for performing cross validation for each model within the
    GroupedProphet collection.
    note: the output of this will be a Pandas DataFrame for each grouping key per cutoff
    boundary in the datetime series. The output of this function will be many times larger than
    the original input data utilized for training of the model.

    :param grouped_model: A fit GroupedProphet model
    :param horizon: pd.Timedelta formatted string (i.e. "14 days" or "18 hours") to define the
                    amount of time to utilize for a validation set to be created.
    :param period: the periodicity of how often a windowed validation will occur. Default is
                    0.5 * horizon value.
    :param initial: The minimum amount of training data to include in the first cross validation
                    window.
    :param parallel: mode of computing cross validation statistics. (None, processes, or threads)
    :param cutoffs: List of pd.Timestamp values that specify cutoff overrides to be used in
                    conducting cross validation.
    :return: Dictionary of {group_key: cross validation Pandas DataFrame}
    """
    return {
        group_key: cross_validation(
            model, horizon, period, initial, parallel, cutoffs, disable_tqdm=True
        )
        for group_key, model in grouped_model.model.items()
    }

Example #4

0

Show file

 def test_cross_validation_extra_regressors(self):
     df = self.__df.copy()
     df['extra'] = range(df.shape[0])
     df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2
     m = Prophet()
     m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
     m.add_seasonality(name='conditional_weekly',
                       period=7,
                       fourier_order=3,
                       prior_scale=2.,
                       condition_name='is_conditional_week')
     m.add_regressor('extra')
     m.fit(df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='4 days',
                                          initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     period = pd.Timedelta('4 days')
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)

Example #5

0

Show file

    def test_check_single_cutoff_forecast_func_calls(self):
        m = Prophet()
        m.fit(self.__df)
        mock_predict = pd.DataFrame({
            'ds':
            pd.date_range(start='2012-09-17', periods=3),
            'yhat':
            np.arange(16, 19),
            'yhat_lower':
            np.arange(15, 18),
            'yhat_upper':
            np.arange(17, 20),
            'y':
            np.arange(16.5, 19.5),
            'cutoff': [datetime.date(2012, 9, 15)] * 3
        })

        # cross validation  with 3 and 7 forecasts
        for args, forecasts in ((['4 days', '10 days', '115 days'], 3),
                                (['4 days', '4 days', '115 days'], 7)):
            with patch(
                    'prophet.diagnostics.single_cutoff_forecast') as mock_func:
                mock_func.return_value = mock_predict
                df_cv = diagnostics.cross_validation(m, *args)
                # check single forecast function called expected number of times
                self.assertEqual(diagnostics.single_cutoff_forecast.call_count,
                                 forecasts)

Example #6

0

Show file

File: prophet.py Project: EmiCareOfCell44/BigDL

 def _eval_cross_validation(self, expected_horizon):
     df_cv = cross_validation(self.model, horizon=expected_horizon)
     df_p = performance_metrics(df_cv,
                                metrics=[self.metric],
                                rolling_window=1)
     return {
         self.metric: np.mean(df_p[self.metric].values)
     }  # here we use the mean metrics

Example #7

0

Show file

    def test_cross_validation(self):
        m = Prophet()
        m.fit(self.__df)
        # Calculate the number of cutoff points(k)
        horizon = pd.Timedelta('4 days')
        period = pd.Timedelta('10 days')
        initial = pd.Timedelta('115 days')
        methods = [None, 'processes', 'threads', CustomParallelBackend()]

        try:
            from dask.distributed import Client
            client = Client(processes=False)  # noqa
            methods.append("dask")
        except ImportError:
            pass

        for parallel in methods:
            df_cv = diagnostics.cross_validation(m,
                                                 horizon='4 days',
                                                 period='10 days',
                                                 initial='115 days',
                                                 parallel=parallel)
            self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
            self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
            self.assertTrue(
                min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
            dc = df_cv['cutoff'].diff()
            dc = dc[dc > pd.Timedelta(0)].min()
            self.assertTrue(dc >= period)
            self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
            # Each y in df_cv and self.__df with same ds should be equal
            df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
            self.assertAlmostEqual(
                np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
            df_cv = diagnostics.cross_validation(m,
                                                 horizon='4 days',
                                                 period='10 days',
                                                 initial='135 days')
            self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
            with self.assertRaises(ValueError):
                diagnostics.cross_validation(m,
                                             horizon='10 days',
                                             period='10 days',
                                             initial='140 days')

        # invalid alias
        with self.assertRaisesRegex(ValueError, "'parallel' should be one"):
            diagnostics.cross_validation(m, horizon="4 days", parallel="bad")

        # no map method
        with self.assertRaisesRegex(ValueError, "'parallel' should be one"):
            diagnostics.cross_validation(m,
                                         horizon="4 days",
                                         parallel=object())

Example #8

0

Show file

def prophet_model(Train, Days):

    # make sure to get the input here, then uncomment the rest
    ###
    # HERE, convert the  json that looks like this
    # {"income":[{income fields},{},{}], "expense":spend_dict}
    # to a dataframe
    # the you can just do this: jsonify({"predictions": yourlist})
    ###
    k_trans_pro = Train
    n = len(k_trans_pro)
    d = Days
    pro_train_df = k_trans_pro[0:n - d]
    pro_test_df_y = k_trans_pro[n - d:]
    pro_test_df = k_trans_pro[n - d:].drop(['y'], axis=1)

    param_grid = {
        'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
        'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    }

    # Generate all combinations of parameters
    all_params = [
        dict(zip(param_grid.keys(), v))
        for v in itertools.product(*param_grid.values())
    ]
    rmses = []  # Store the RMSEs for each params here

    # Use cross validation to evaluate all parameters
    for params in all_params:
        m = Prophet(**params).fit(pro_train_df)  # Fit model with given params
        df_cv = cross_validation(m,
                                 period='30 days',
                                 horizon='30 days',
                                 parallel="processes")
        df_p = performance_metrics(df_cv, rolling_window=1)
        rmses.append(df_p['rmse'].values[0])

    # Find the best parameters
    tuning_results = pd.DataFrame(all_params)
    tuning_results['rmse'] = rmses
    best_params = all_params[np.argmin(rmses)]
    pro_model_tuned = Prophet(**best_params).fit(pro_train_df)

    with open('serialized_model.json', 'w') as fout:
        json.dump(model_to_json(pro_model_tuned), fout)  # Save model
    ## Here you find the predictions

    ###
    # HERE, convert the precitions to a json doc that looks like this
    # { "predictions": [ {"ds": value, "yhat":value}, {"ds": value, "yhat":value}, {"ds": value, "yhat":value}, ]}
    # just get a list from your dataframe.
    # the you can just do this: jsonify({"predictions": yourlist})

    return None

Example #9

0

Show file

 def test_cross_validation_custom_cutoffs(self):
     m = Prophet()
     m.fit(self.__df)
     # When specify a list of cutoffs
     #  the cutoff dates in df_cv are those specified
     df_cv1 = diagnostics.cross_validation(
         m,
         horizon='32 days',
         period='10 days',
         cutoffs=[pd.Timestamp('2012-07-31'),
                  pd.Timestamp('2012-08-31')])
     self.assertEqual(len(df_cv1['cutoff'].unique()), 2)

Example #10

0

Show file

 def test_cross_validation_uncertainty_disabled(self):
     df = self.__df.copy()
     for uncertainty in [0, False]:
         m = Prophet(uncertainty_samples=uncertainty)
         m.fit(df, algorithm='Newton')
         df_cv = diagnostics.cross_validation(m,
                                              horizon='4 days',
                                              period='4 days',
                                              initial='115 days')
         expected_cols = ['ds', 'yhat', 'y', 'cutoff']
         self.assertTrue(
             all(col in expected_cols for col in df_cv.columns.tolist()))
         df_p = diagnostics.performance_metrics(df_cv)
         self.assertTrue('coverage' not in df_p.columns)

Example #11

0

Show file

 def test_cross_validation_logistic_or_flat_growth(self):
     params = (x for x in ['logistic', 'flat'])
     for growth in params:
         with self.subTest(i=growth):
             df = self.__df.copy()
             if growth == "logistic":
                 df['cap'] = 40
             m = Prophet(growth=growth).fit(df)
             df_cv = diagnostics.cross_validation(m,
                                                  horizon='1 days',
                                                  period='1 days',
                                                  initial='140 days')
             self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
             self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
             df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
             self.assertAlmostEqual(
                 np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)

Example #12

0

Show file

File: custom_cutoffs.py Project: ankane/prophet

import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics

# float_precision='high' required for pd.read_csv to match precision of Rover.read_csv
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv',
                 float_precision='high')

m = Prophet()
m.fit(df, seed=123)

cutoffs = pd.to_datetime(['2013-02-15', '2013-08-15', '2014-02-15'])
df_cv2 = cross_validation(m, cutoffs=cutoffs, horizon='365 days')
print(len(df_cv2))
print(df_cv2.head())
print(df_cv2.tail())

Example #13

0

Show file

File: prophet_utils.py Project: hamza648-prog/darthsvade

def _cross_validate_and_score_model(
    model,
    horizon,
    period=None,
    initial=None,
    parallel=None,
    cutoffs=None,
    metrics=None,
    **kwargs,
):
    """
    Wrapper around Prophet's `cross_validation` and `performance_metrics` functions within
    the `prophet.diagnostics` module.
    Provides backtesting metric evaluation based on the configurations specified for
    initial, horizon, and period (optionally, a specified 'cutoffs' list of DateTime or string
    date-time entries can override the backtesting split boundaries for training and validation).

    :param model: Prophet model instance that has been fit
    :param horizon: String pd.Timedelta format that defines the length of forecasting values
                    to generate in order to acquire error metrics.
                    examples: '30 days', '1 year'
     :param period: the periodicity of how often a windowed validation will occur. Default is
                    0.5 * horizon value.
    :param initial: The minimum amount of training data to include in the first cross validation
                    window.
    :param parallel: mode of computing cross validation statistics. (None, processes, or threads)
    :param cutoffs: List of pd.Timestamp values that specify cutoff overrides to be used in
                    conducting cross validation.
    :param metrics: List of metrics to evaluate and return for the provided model
                    note: see supported metrics in Prophet documentation:
                    https://facebook.github.io/prophet/docs/diagnostics.html#cross-validation
    :param kwargs: cross validation overrides for Prophet's implementation of metric evaluation.
    :return: Dict[str, float] of each metric and its value averaged over each time horizon.
    """

    if metrics:
        metrics = prophet_config_utils._remove_coverage_metric_if_necessary(
            metrics, model.uncertainty_samples
        )

    # extract `performance_metrics` *args if present
    performance_metrics_defaults = signature(performance_metrics).parameters

    performance_metrics_args = {}
    for param, value in performance_metrics_defaults.items():
        if value.default != value.empty and value.name != "metrics":
            performance_metrics_args[param] = kwargs.pop(param, value.default)

    model_cv = cross_validation(
        model=model,
        horizon=horizon,
        period=period,
        initial=initial,
        parallel=parallel,
        cutoffs=cutoffs,
        disable_tqdm=kwargs.pop("disable_tqdm", True),
    )
    horizon_metrics = performance_metrics(
        model_cv, metrics=metrics, **performance_metrics_args
    )

    return {
        metric: horizon_metrics[metric].mean()
        for metric in list(horizon_metrics.columns)
        if metric != "horizon"
    }

Example #14

0

Show file

File: train.py Project: TheVinhLuong102/mlflow

    return {attr: getattr(pr_model, attr) for attr in serialize.SIMPLE_ATTRIBUTES}


sales_data = pd.read_csv(SOURCE_DATA)

with mlflow.start_run():

    model = Prophet().fit(sales_data)

    params = extract_params(model)

    metric_keys = ["mse", "rmse", "mae", "mape", "mdape", "smape", "coverage"]
    metrics_raw = cross_validation(
        model=model,
        horizon="365 days",
        period="180 days",
        initial="710 days",
        parallel="threads",
        disable_tqdm=True,
    )
    cv_metrics = performance_metrics(metrics_raw)
    metrics = {k: cv_metrics[k].mean() for k in metric_keys}

    print(f"Logged Metrics: \n{json.dumps(metrics, indent=2)}")
    print(f"Logged Params: \n{json.dumps(params, indent=2)}")

    mlflow.prophet.log_model(model, artifact_path=ARTIFACT_PATH)
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH)
    print(f"Model artifact logged to: {model_uri}")

Example #15

0

Show file

 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='10 days',
                                          initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=-1)
     self.assertEqual(
         set(df_none.columns),
         {
             'horizon', 'coverage', 'mae', 'mape', 'mdape', 'mse', 'rmse',
             'smape'
         },
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0
     df_0 = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(len(df_0), 4)
     self.assertEqual(len(df_0['horizon'].unique()), 4)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon), 4)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertAlmostEqual(df_all[metric].values[0],
                                df_none[metric].mean())
     self.assertAlmostEqual(df_all['mdape'].values[0],
                            df_none['mdape'].median())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
     # Skip MAPE
     df_cv.loc[0, 'y'] = 0.
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mape'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'horizon'},
     )
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['mape'],
     )
     self.assertIsNone(df_horizon)
     # List of metrics containing non-valid metrics
     with self.assertRaises(ValueError):
         diagnostics.performance_metrics(
             df_cv,
             metrics=['mse', 'error_metric'],
         )

Example #16

0

Show file

File: hyperparameters.py Project: ankane/prophet

from prophet.diagnostics import cross_validation, performance_metrics

# float_precision='high' required for pd.read_csv to match precision of Rover.read_csv
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv',
                 float_precision='high')
cutoffs = pd.to_datetime(['2013-02-15', '2013-08-15', '2014-02-15'])

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

# Generate all combinations of parameters
all_params = [
    dict(zip(param_grid.keys(), v))
    for v in itertools.product(*param_grid.values())
]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params).fit(df)  # Fit model with given params
    df_cv = cross_validation(m, cutoffs=cutoffs, horizon='30 days')
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)

Example #17

0

Show file

import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics

# float_precision='high' required for pd.read_csv to match precision of Rover.read_csv
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv',
                 float_precision='high')

m = Prophet()
m.fit(df, seed=123)

df_cv = cross_validation(m,
                         initial='730 days',
                         period='180 days',
                         horizon='365 days')
print(len(df_cv))
print(df_cv.head())
print(df_cv.tail())

df_p = performance_metrics(df_cv)
print(len(df_p))
print(df_p.head())
print(df_p.tail())