コード例 #1
0
 def test_cross_validation_default_value_check(self):
     m = Prophet()
     m.fit(self.__df)
     # Default value of initial should be equal to 3 * horizon
     df_cv1 = diagnostics.cross_validation(m,
                                           horizon='32 days',
                                           period='10 days')
     df_cv2 = diagnostics.cross_validation(m,
                                           horizon='32 days',
                                           period='10 days',
                                           initial='96 days')
     self.assertAlmostEqual(((df_cv1['y'] - df_cv2['y'])**2).sum(), 0.0)
     self.assertAlmostEqual(((df_cv1['yhat'] - df_cv2['yhat'])**2).sum(),
                            0.0)
コード例 #2
0
 def test(self):
     df_cv = cross_validation(self.model, horizon='365 days')
     df = performance_metrics(df_cv)
     df.drop(['horizon'], axis=1, inplace=True)
     df.apply(np.sum, inplace=True)
     print(df)
     return df
コード例 #3
0
def group_cross_validation(
    grouped_model, horizon, period=None, initial=None, parallel=None, cutoffs=None
):
    """
    Model debugging utility function for performing cross validation for each model within the
    GroupedProphet collection.
    note: the output of this will be a Pandas DataFrame for each grouping key per cutoff
    boundary in the datetime series. The output of this function will be many times larger than
    the original input data utilized for training of the model.

    :param grouped_model: A fit GroupedProphet model
    :param horizon: pd.Timedelta formatted string (i.e. "14 days" or "18 hours") to define the
                    amount of time to utilize for a validation set to be created.
    :param period: the periodicity of how often a windowed validation will occur. Default is
                    0.5 * horizon value.
    :param initial: The minimum amount of training data to include in the first cross validation
                    window.
    :param parallel: mode of computing cross validation statistics. (None, processes, or threads)
    :param cutoffs: List of pd.Timestamp values that specify cutoff overrides to be used in
                    conducting cross validation.
    :return: Dictionary of {group_key: cross validation Pandas DataFrame}
    """
    return {
        group_key: cross_validation(
            model, horizon, period, initial, parallel, cutoffs, disable_tqdm=True
        )
        for group_key, model in grouped_model.model.items()
    }
コード例 #4
0
 def test_cross_validation_extra_regressors(self):
     df = self.__df.copy()
     df['extra'] = range(df.shape[0])
     df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2
     m = Prophet()
     m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
     m.add_seasonality(name='conditional_weekly',
                       period=7,
                       fourier_order=3,
                       prior_scale=2.,
                       condition_name='is_conditional_week')
     m.add_regressor('extra')
     m.fit(df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='4 days',
                                          initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     period = pd.Timedelta('4 days')
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
コード例 #5
0
    def test_check_single_cutoff_forecast_func_calls(self):
        m = Prophet()
        m.fit(self.__df)
        mock_predict = pd.DataFrame({
            'ds':
            pd.date_range(start='2012-09-17', periods=3),
            'yhat':
            np.arange(16, 19),
            'yhat_lower':
            np.arange(15, 18),
            'yhat_upper':
            np.arange(17, 20),
            'y':
            np.arange(16.5, 19.5),
            'cutoff': [datetime.date(2012, 9, 15)] * 3
        })

        # cross validation  with 3 and 7 forecasts
        for args, forecasts in ((['4 days', '10 days', '115 days'], 3),
                                (['4 days', '4 days', '115 days'], 7)):
            with patch(
                    'prophet.diagnostics.single_cutoff_forecast') as mock_func:
                mock_func.return_value = mock_predict
                df_cv = diagnostics.cross_validation(m, *args)
                # check single forecast function called expected number of times
                self.assertEqual(diagnostics.single_cutoff_forecast.call_count,
                                 forecasts)
コード例 #6
0
ファイル: prophet.py プロジェクト: EmiCareOfCell44/BigDL
 def _eval_cross_validation(self, expected_horizon):
     df_cv = cross_validation(self.model, horizon=expected_horizon)
     df_p = performance_metrics(df_cv,
                                metrics=[self.metric],
                                rolling_window=1)
     return {
         self.metric: np.mean(df_p[self.metric].values)
     }  # here we use the mean metrics
コード例 #7
0
    def test_cross_validation(self):
        m = Prophet()
        m.fit(self.__df)
        # Calculate the number of cutoff points(k)
        horizon = pd.Timedelta('4 days')
        period = pd.Timedelta('10 days')
        initial = pd.Timedelta('115 days')
        methods = [None, 'processes', 'threads', CustomParallelBackend()]

        try:
            from dask.distributed import Client
            client = Client(processes=False)  # noqa
            methods.append("dask")
        except ImportError:
            pass

        for parallel in methods:
            df_cv = diagnostics.cross_validation(m,
                                                 horizon='4 days',
                                                 period='10 days',
                                                 initial='115 days',
                                                 parallel=parallel)
            self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
            self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
            self.assertTrue(
                min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
            dc = df_cv['cutoff'].diff()
            dc = dc[dc > pd.Timedelta(0)].min()
            self.assertTrue(dc >= period)
            self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
            # Each y in df_cv and self.__df with same ds should be equal
            df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
            self.assertAlmostEqual(
                np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
            df_cv = diagnostics.cross_validation(m,
                                                 horizon='4 days',
                                                 period='10 days',
                                                 initial='135 days')
            self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
            with self.assertRaises(ValueError):
                diagnostics.cross_validation(m,
                                             horizon='10 days',
                                             period='10 days',
                                             initial='140 days')

        # invalid alias
        with self.assertRaisesRegex(ValueError, "'parallel' should be one"):
            diagnostics.cross_validation(m, horizon="4 days", parallel="bad")

        # no map method
        with self.assertRaisesRegex(ValueError, "'parallel' should be one"):
            diagnostics.cross_validation(m,
                                         horizon="4 days",
                                         parallel=object())
コード例 #8
0
def prophet_model(Train, Days):

    # make sure to get the input here, then uncomment the rest
    ###
    # HERE, convert the  json that looks like this
    # {"income":[{income fields},{},{}], "expense":spend_dict}
    # to a dataframe
    # the you can just do this: jsonify({"predictions": yourlist})
    ###
    k_trans_pro = Train
    n = len(k_trans_pro)
    d = Days
    pro_train_df = k_trans_pro[0:n - d]
    pro_test_df_y = k_trans_pro[n - d:]
    pro_test_df = k_trans_pro[n - d:].drop(['y'], axis=1)

    param_grid = {
        'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
        'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    }

    # Generate all combinations of parameters
    all_params = [
        dict(zip(param_grid.keys(), v))
        for v in itertools.product(*param_grid.values())
    ]
    rmses = []  # Store the RMSEs for each params here

    # Use cross validation to evaluate all parameters
    for params in all_params:
        m = Prophet(**params).fit(pro_train_df)  # Fit model with given params
        df_cv = cross_validation(m,
                                 period='30 days',
                                 horizon='30 days',
                                 parallel="processes")
        df_p = performance_metrics(df_cv, rolling_window=1)
        rmses.append(df_p['rmse'].values[0])

    # Find the best parameters
    tuning_results = pd.DataFrame(all_params)
    tuning_results['rmse'] = rmses
    best_params = all_params[np.argmin(rmses)]
    pro_model_tuned = Prophet(**best_params).fit(pro_train_df)

    with open('serialized_model.json', 'w') as fout:
        json.dump(model_to_json(pro_model_tuned), fout)  # Save model
    ## Here you find the predictions

    ###
    # HERE, convert the precitions to a json doc that looks like this
    # { "predictions": [ {"ds": value, "yhat":value}, {"ds": value, "yhat":value}, {"ds": value, "yhat":value}, ]}
    # just get a list from your dataframe.
    # the you can just do this: jsonify({"predictions": yourlist})

    return None
コード例 #9
0
 def test_cross_validation_custom_cutoffs(self):
     m = Prophet()
     m.fit(self.__df)
     # When specify a list of cutoffs
     #  the cutoff dates in df_cv are those specified
     df_cv1 = diagnostics.cross_validation(
         m,
         horizon='32 days',
         period='10 days',
         cutoffs=[pd.Timestamp('2012-07-31'),
                  pd.Timestamp('2012-08-31')])
     self.assertEqual(len(df_cv1['cutoff'].unique()), 2)
コード例 #10
0
 def test_cross_validation_uncertainty_disabled(self):
     df = self.__df.copy()
     for uncertainty in [0, False]:
         m = Prophet(uncertainty_samples=uncertainty)
         m.fit(df, algorithm='Newton')
         df_cv = diagnostics.cross_validation(m,
                                              horizon='4 days',
                                              period='4 days',
                                              initial='115 days')
         expected_cols = ['ds', 'yhat', 'y', 'cutoff']
         self.assertTrue(
             all(col in expected_cols for col in df_cv.columns.tolist()))
         df_p = diagnostics.performance_metrics(df_cv)
         self.assertTrue('coverage' not in df_p.columns)
コード例 #11
0
 def test_cross_validation_logistic_or_flat_growth(self):
     params = (x for x in ['logistic', 'flat'])
     for growth in params:
         with self.subTest(i=growth):
             df = self.__df.copy()
             if growth == "logistic":
                 df['cap'] = 40
             m = Prophet(growth=growth).fit(df)
             df_cv = diagnostics.cross_validation(m,
                                                  horizon='1 days',
                                                  period='1 days',
                                                  initial='140 days')
             self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
             self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
             df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
             self.assertAlmostEqual(
                 np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
コード例 #12
0
ファイル: custom_cutoffs.py プロジェクト: ankane/prophet
import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics

# float_precision='high' required for pd.read_csv to match precision of Rover.read_csv
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv',
                 float_precision='high')

m = Prophet()
m.fit(df, seed=123)

cutoffs = pd.to_datetime(['2013-02-15', '2013-08-15', '2014-02-15'])
df_cv2 = cross_validation(m, cutoffs=cutoffs, horizon='365 days')
print(len(df_cv2))
print(df_cv2.head())
print(df_cv2.tail())
コード例 #13
0
def _cross_validate_and_score_model(
    model,
    horizon,
    period=None,
    initial=None,
    parallel=None,
    cutoffs=None,
    metrics=None,
    **kwargs,
):
    """
    Wrapper around Prophet's `cross_validation` and `performance_metrics` functions within
    the `prophet.diagnostics` module.
    Provides backtesting metric evaluation based on the configurations specified for
    initial, horizon, and period (optionally, a specified 'cutoffs' list of DateTime or string
    date-time entries can override the backtesting split boundaries for training and validation).

    :param model: Prophet model instance that has been fit
    :param horizon: String pd.Timedelta format that defines the length of forecasting values
                    to generate in order to acquire error metrics.
                    examples: '30 days', '1 year'
     :param period: the periodicity of how often a windowed validation will occur. Default is
                    0.5 * horizon value.
    :param initial: The minimum amount of training data to include in the first cross validation
                    window.
    :param parallel: mode of computing cross validation statistics. (None, processes, or threads)
    :param cutoffs: List of pd.Timestamp values that specify cutoff overrides to be used in
                    conducting cross validation.
    :param metrics: List of metrics to evaluate and return for the provided model
                    note: see supported metrics in Prophet documentation:
                    https://facebook.github.io/prophet/docs/diagnostics.html#cross-validation
    :param kwargs: cross validation overrides for Prophet's implementation of metric evaluation.
    :return: Dict[str, float] of each metric and its value averaged over each time horizon.
    """

    if metrics:
        metrics = prophet_config_utils._remove_coverage_metric_if_necessary(
            metrics, model.uncertainty_samples
        )

    # extract `performance_metrics` *args if present
    performance_metrics_defaults = signature(performance_metrics).parameters

    performance_metrics_args = {}
    for param, value in performance_metrics_defaults.items():
        if value.default != value.empty and value.name != "metrics":
            performance_metrics_args[param] = kwargs.pop(param, value.default)

    model_cv = cross_validation(
        model=model,
        horizon=horizon,
        period=period,
        initial=initial,
        parallel=parallel,
        cutoffs=cutoffs,
        disable_tqdm=kwargs.pop("disable_tqdm", True),
    )
    horizon_metrics = performance_metrics(
        model_cv, metrics=metrics, **performance_metrics_args
    )

    return {
        metric: horizon_metrics[metric].mean()
        for metric in list(horizon_metrics.columns)
        if metric != "horizon"
    }
コード例 #14
0
ファイル: train.py プロジェクト: TheVinhLuong102/mlflow
    return {attr: getattr(pr_model, attr) for attr in serialize.SIMPLE_ATTRIBUTES}


sales_data = pd.read_csv(SOURCE_DATA)

with mlflow.start_run():

    model = Prophet().fit(sales_data)

    params = extract_params(model)

    metric_keys = ["mse", "rmse", "mae", "mape", "mdape", "smape", "coverage"]
    metrics_raw = cross_validation(
        model=model,
        horizon="365 days",
        period="180 days",
        initial="710 days",
        parallel="threads",
        disable_tqdm=True,
    )
    cv_metrics = performance_metrics(metrics_raw)
    metrics = {k: cv_metrics[k].mean() for k in metric_keys}

    print(f"Logged Metrics: \n{json.dumps(metrics, indent=2)}")
    print(f"Logged Params: \n{json.dumps(params, indent=2)}")

    mlflow.prophet.log_model(model, artifact_path=ARTIFACT_PATH)
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH)
    print(f"Model artifact logged to: {model_uri}")
コード例 #15
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='10 days',
                                          initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=-1)
     self.assertEqual(
         set(df_none.columns),
         {
             'horizon', 'coverage', 'mae', 'mape', 'mdape', 'mse', 'rmse',
             'smape'
         },
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0
     df_0 = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(len(df_0), 4)
     self.assertEqual(len(df_0['horizon'].unique()), 4)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon), 4)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertAlmostEqual(df_all[metric].values[0],
                                df_none[metric].mean())
     self.assertAlmostEqual(df_all['mdape'].values[0],
                            df_none['mdape'].median())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
     # Skip MAPE
     df_cv.loc[0, 'y'] = 0.
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mape'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'horizon'},
     )
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['mape'],
     )
     self.assertIsNone(df_horizon)
     # List of metrics containing non-valid metrics
     with self.assertRaises(ValueError):
         diagnostics.performance_metrics(
             df_cv,
             metrics=['mse', 'error_metric'],
         )
コード例 #16
0
ファイル: hyperparameters.py プロジェクト: ankane/prophet
from prophet.diagnostics import cross_validation, performance_metrics

# float_precision='high' required for pd.read_csv to match precision of Rover.read_csv
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv',
                 float_precision='high')
cutoffs = pd.to_datetime(['2013-02-15', '2013-08-15', '2014-02-15'])

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

# Generate all combinations of parameters
all_params = [
    dict(zip(param_grid.keys(), v))
    for v in itertools.product(*param_grid.values())
]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params).fit(df)  # Fit model with given params
    df_cv = cross_validation(m, cutoffs=cutoffs, horizon='30 days')
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)
コード例 #17
0
import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics

# float_precision='high' required for pd.read_csv to match precision of Rover.read_csv
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv',
                 float_precision='high')

m = Prophet()
m.fit(df, seed=123)

df_cv = cross_validation(m,
                         initial='730 days',
                         period='180 days',
                         horizon='365 days')
print(len(df_cv))
print(df_cv.head())
print(df_cv.tail())

df_p = performance_metrics(df_cv)
print(len(df_p))
print(df_p.head())
print(df_p.tail())