def test_cross_validation_default_value_check(self): m = Prophet() m.fit(self.__df) # Default value of initial should be equal to 3 * horizon df_cv1 = diagnostics.cross_validation(m, horizon='32 days', period='10 days') df_cv2 = diagnostics.cross_validation(m, horizon='32 days', period='10 days', initial='96 days') self.assertAlmostEqual(((df_cv1['y'] - df_cv2['y'])**2).sum(), 0.0) self.assertAlmostEqual(((df_cv1['yhat'] - df_cv2['yhat'])**2).sum(), 0.0)
def test(self): df_cv = cross_validation(self.model, horizon='365 days') df = performance_metrics(df_cv) df.drop(['horizon'], axis=1, inplace=True) df.apply(np.sum, inplace=True) print(df) return df
def group_cross_validation( grouped_model, horizon, period=None, initial=None, parallel=None, cutoffs=None ): """ Model debugging utility function for performing cross validation for each model within the GroupedProphet collection. note: the output of this will be a Pandas DataFrame for each grouping key per cutoff boundary in the datetime series. The output of this function will be many times larger than the original input data utilized for training of the model. :param grouped_model: A fit GroupedProphet model :param horizon: pd.Timedelta formatted string (i.e. "14 days" or "18 hours") to define the amount of time to utilize for a validation set to be created. :param period: the periodicity of how often a windowed validation will occur. Default is 0.5 * horizon value. :param initial: The minimum amount of training data to include in the first cross validation window. :param parallel: mode of computing cross validation statistics. (None, processes, or threads) :param cutoffs: List of pd.Timestamp values that specify cutoff overrides to be used in conducting cross validation. :return: Dictionary of {group_key: cross validation Pandas DataFrame} """ return { group_key: cross_validation( model, horizon, period, initial, parallel, cutoffs, disable_tqdm=True ) for group_key, model in grouped_model.model.items() }
def test_cross_validation_extra_regressors(self): df = self.__df.copy() df['extra'] = range(df.shape[0]) df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2 m = Prophet() m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_regressor('extra') m.fit(df) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='4 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) period = pd.Timedelta('4 days') dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
def test_check_single_cutoff_forecast_func_calls(self): m = Prophet() m.fit(self.__df) mock_predict = pd.DataFrame({ 'ds': pd.date_range(start='2012-09-17', periods=3), 'yhat': np.arange(16, 19), 'yhat_lower': np.arange(15, 18), 'yhat_upper': np.arange(17, 20), 'y': np.arange(16.5, 19.5), 'cutoff': [datetime.date(2012, 9, 15)] * 3 }) # cross validation with 3 and 7 forecasts for args, forecasts in ((['4 days', '10 days', '115 days'], 3), (['4 days', '4 days', '115 days'], 7)): with patch( 'prophet.diagnostics.single_cutoff_forecast') as mock_func: mock_func.return_value = mock_predict df_cv = diagnostics.cross_validation(m, *args) # check single forecast function called expected number of times self.assertEqual(diagnostics.single_cutoff_forecast.call_count, forecasts)
def _eval_cross_validation(self, expected_horizon): df_cv = cross_validation(self.model, horizon=expected_horizon) df_p = performance_metrics(df_cv, metrics=[self.metric], rolling_window=1) return { self.metric: np.mean(df_p[self.metric].values) } # here we use the mean metrics
def test_cross_validation(self): m = Prophet() m.fit(self.__df) # Calculate the number of cutoff points(k) horizon = pd.Timedelta('4 days') period = pd.Timedelta('10 days') initial = pd.Timedelta('115 days') methods = [None, 'processes', 'threads', CustomParallelBackend()] try: from dask.distributed import Client client = Client(processes=False) # noqa methods.append("dask") except ImportError: pass for parallel in methods: df_cv = diagnostics.cross_validation(m, horizon='4 days', period='10 days', initial='115 days', parallel=parallel) self.assertEqual(len(np.unique(df_cv['cutoff'])), 3) self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon) self.assertTrue( min(df_cv['cutoff']) >= min(self.__df['ds']) + initial) dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) # Each y in df_cv and self.__df with same ds should be equal df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='10 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 1) with self.assertRaises(ValueError): diagnostics.cross_validation(m, horizon='10 days', period='10 days', initial='140 days') # invalid alias with self.assertRaisesRegex(ValueError, "'parallel' should be one"): diagnostics.cross_validation(m, horizon="4 days", parallel="bad") # no map method with self.assertRaisesRegex(ValueError, "'parallel' should be one"): diagnostics.cross_validation(m, horizon="4 days", parallel=object())
def prophet_model(Train, Days): # make sure to get the input here, then uncomment the rest ### # HERE, convert the json that looks like this # {"income":[{income fields},{},{}], "expense":spend_dict} # to a dataframe # the you can just do this: jsonify({"predictions": yourlist}) ### k_trans_pro = Train n = len(k_trans_pro) d = Days pro_train_df = k_trans_pro[0:n - d] pro_test_df_y = k_trans_pro[n - d:] pro_test_df = k_trans_pro[n - d:].drop(['y'], axis=1) param_grid = { 'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5], 'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0], } # Generate all combinations of parameters all_params = [ dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values()) ] rmses = [] # Store the RMSEs for each params here # Use cross validation to evaluate all parameters for params in all_params: m = Prophet(**params).fit(pro_train_df) # Fit model with given params df_cv = cross_validation(m, period='30 days', horizon='30 days', parallel="processes") df_p = performance_metrics(df_cv, rolling_window=1) rmses.append(df_p['rmse'].values[0]) # Find the best parameters tuning_results = pd.DataFrame(all_params) tuning_results['rmse'] = rmses best_params = all_params[np.argmin(rmses)] pro_model_tuned = Prophet(**best_params).fit(pro_train_df) with open('serialized_model.json', 'w') as fout: json.dump(model_to_json(pro_model_tuned), fout) # Save model ## Here you find the predictions ### # HERE, convert the precitions to a json doc that looks like this # { "predictions": [ {"ds": value, "yhat":value}, {"ds": value, "yhat":value}, {"ds": value, "yhat":value}, ]} # just get a list from your dataframe. # the you can just do this: jsonify({"predictions": yourlist}) return None
def test_cross_validation_custom_cutoffs(self): m = Prophet() m.fit(self.__df) # When specify a list of cutoffs # the cutoff dates in df_cv are those specified df_cv1 = diagnostics.cross_validation( m, horizon='32 days', period='10 days', cutoffs=[pd.Timestamp('2012-07-31'), pd.Timestamp('2012-08-31')]) self.assertEqual(len(df_cv1['cutoff'].unique()), 2)
def test_cross_validation_uncertainty_disabled(self): df = self.__df.copy() for uncertainty in [0, False]: m = Prophet(uncertainty_samples=uncertainty) m.fit(df, algorithm='Newton') df_cv = diagnostics.cross_validation(m, horizon='4 days', period='4 days', initial='115 days') expected_cols = ['ds', 'yhat', 'y', 'cutoff'] self.assertTrue( all(col in expected_cols for col in df_cv.columns.tolist())) df_p = diagnostics.performance_metrics(df_cv) self.assertTrue('coverage' not in df_p.columns)
def test_cross_validation_logistic_or_flat_growth(self): params = (x for x in ['logistic', 'flat']) for growth in params: with self.subTest(i=growth): df = self.__df.copy() if growth == "logistic": df['cap'] = 40 m = Prophet(growth=growth).fit(df) df_cv = diagnostics.cross_validation(m, horizon='1 days', period='1 days', initial='140 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
import pandas as pd from prophet import Prophet from prophet.diagnostics import cross_validation, performance_metrics # float_precision='high' required for pd.read_csv to match precision of Rover.read_csv df = pd.read_csv('examples/example_wp_log_peyton_manning.csv', float_precision='high') m = Prophet() m.fit(df, seed=123) cutoffs = pd.to_datetime(['2013-02-15', '2013-08-15', '2014-02-15']) df_cv2 = cross_validation(m, cutoffs=cutoffs, horizon='365 days') print(len(df_cv2)) print(df_cv2.head()) print(df_cv2.tail())
def _cross_validate_and_score_model( model, horizon, period=None, initial=None, parallel=None, cutoffs=None, metrics=None, **kwargs, ): """ Wrapper around Prophet's `cross_validation` and `performance_metrics` functions within the `prophet.diagnostics` module. Provides backtesting metric evaluation based on the configurations specified for initial, horizon, and period (optionally, a specified 'cutoffs' list of DateTime or string date-time entries can override the backtesting split boundaries for training and validation). :param model: Prophet model instance that has been fit :param horizon: String pd.Timedelta format that defines the length of forecasting values to generate in order to acquire error metrics. examples: '30 days', '1 year' :param period: the periodicity of how often a windowed validation will occur. Default is 0.5 * horizon value. :param initial: The minimum amount of training data to include in the first cross validation window. :param parallel: mode of computing cross validation statistics. (None, processes, or threads) :param cutoffs: List of pd.Timestamp values that specify cutoff overrides to be used in conducting cross validation. :param metrics: List of metrics to evaluate and return for the provided model note: see supported metrics in Prophet documentation: https://facebook.github.io/prophet/docs/diagnostics.html#cross-validation :param kwargs: cross validation overrides for Prophet's implementation of metric evaluation. :return: Dict[str, float] of each metric and its value averaged over each time horizon. """ if metrics: metrics = prophet_config_utils._remove_coverage_metric_if_necessary( metrics, model.uncertainty_samples ) # extract `performance_metrics` *args if present performance_metrics_defaults = signature(performance_metrics).parameters performance_metrics_args = {} for param, value in performance_metrics_defaults.items(): if value.default != value.empty and value.name != "metrics": performance_metrics_args[param] = kwargs.pop(param, value.default) model_cv = cross_validation( model=model, horizon=horizon, period=period, initial=initial, parallel=parallel, cutoffs=cutoffs, disable_tqdm=kwargs.pop("disable_tqdm", True), ) horizon_metrics = performance_metrics( model_cv, metrics=metrics, **performance_metrics_args ) return { metric: horizon_metrics[metric].mean() for metric in list(horizon_metrics.columns) if metric != "horizon" }
return {attr: getattr(pr_model, attr) for attr in serialize.SIMPLE_ATTRIBUTES} sales_data = pd.read_csv(SOURCE_DATA) with mlflow.start_run(): model = Prophet().fit(sales_data) params = extract_params(model) metric_keys = ["mse", "rmse", "mae", "mape", "mdape", "smape", "coverage"] metrics_raw = cross_validation( model=model, horizon="365 days", period="180 days", initial="710 days", parallel="threads", disable_tqdm=True, ) cv_metrics = performance_metrics(metrics_raw) metrics = {k: cv_metrics[k].mean() for k in metric_keys} print(f"Logged Metrics: \n{json.dumps(metrics, indent=2)}") print(f"Logged Params: \n{json.dumps(params, indent=2)}") mlflow.prophet.log_model(model, artifact_path=ARTIFACT_PATH) mlflow.log_params(params) mlflow.log_metrics(metrics) model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH) print(f"Model artifact logged to: {model_uri}")
def test_performance_metrics(self): m = Prophet() m.fit(self.__df) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='10 days', initial='90 days') # Aggregation level none df_none = diagnostics.performance_metrics(df_cv, rolling_window=-1) self.assertEqual( set(df_none.columns), { 'horizon', 'coverage', 'mae', 'mape', 'mdape', 'mse', 'rmse', 'smape' }, ) self.assertEqual(df_none.shape[0], 16) # Aggregation level 0 df_0 = diagnostics.performance_metrics(df_cv, rolling_window=0) self.assertEqual(len(df_0), 4) self.assertEqual(len(df_0['horizon'].unique()), 4) # Aggregation level 0.2 df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2) self.assertEqual(len(df_horizon), 4) self.assertEqual(len(df_horizon['horizon'].unique()), 4) # Aggregation level all df_all = diagnostics.performance_metrics(df_cv, rolling_window=1) self.assertEqual(df_all.shape[0], 1) for metric in ['mse', 'mape', 'mae', 'coverage']: self.assertAlmostEqual(df_all[metric].values[0], df_none[metric].mean()) self.assertAlmostEqual(df_all['mdape'].values[0], df_none['mdape'].median()) # Custom list of metrics df_horizon = diagnostics.performance_metrics( df_cv, metrics=['coverage', 'mse'], ) self.assertEqual( set(df_horizon.columns), {'coverage', 'mse', 'horizon'}, ) # Skip MAPE df_cv.loc[0, 'y'] = 0. df_horizon = diagnostics.performance_metrics( df_cv, metrics=['coverage', 'mape'], ) self.assertEqual( set(df_horizon.columns), {'coverage', 'horizon'}, ) df_horizon = diagnostics.performance_metrics( df_cv, metrics=['mape'], ) self.assertIsNone(df_horizon) # List of metrics containing non-valid metrics with self.assertRaises(ValueError): diagnostics.performance_metrics( df_cv, metrics=['mse', 'error_metric'], )
from prophet.diagnostics import cross_validation, performance_metrics # float_precision='high' required for pd.read_csv to match precision of Rover.read_csv df = pd.read_csv('examples/example_wp_log_peyton_manning.csv', float_precision='high') cutoffs = pd.to_datetime(['2013-02-15', '2013-08-15', '2014-02-15']) param_grid = { 'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5], 'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0], } # Generate all combinations of parameters all_params = [ dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values()) ] rmses = [] # Store the RMSEs for each params here # Use cross validation to evaluate all parameters for params in all_params: m = Prophet(**params).fit(df) # Fit model with given params df_cv = cross_validation(m, cutoffs=cutoffs, horizon='30 days') df_p = performance_metrics(df_cv, rolling_window=1) rmses.append(df_p['rmse'].values[0]) # Find the best parameters tuning_results = pd.DataFrame(all_params) tuning_results['rmse'] = rmses print(tuning_results)
import pandas as pd from prophet import Prophet from prophet.diagnostics import cross_validation, performance_metrics # float_precision='high' required for pd.read_csv to match precision of Rover.read_csv df = pd.read_csv('examples/example_wp_log_peyton_manning.csv', float_precision='high') m = Prophet() m.fit(df, seed=123) df_cv = cross_validation(m, initial='730 days', period='180 days', horizon='365 days') print(len(df_cv)) print(df_cv.head()) print(df_cv.tail()) df_p = performance_metrics(df_cv) print(len(df_p)) print(df_p.head()) print(df_p.tail())