def test_fit_predict_with_country_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) # Test with holidays and country_holidays model = Prophet(holidays=holidays, uncertainty_samples=0) model.add_country_holidays(country_name='US') model.fit(DATA).predict() # There are training holidays missing in the test set train = DATA.head(154) future = DATA.tail(355) model = Prophet(uncertainty_samples=0) model.add_country_holidays(country_name='US') model.fit(train).predict(future) # There are test holidays missing in the training set train = DATA.tail(355) future = DATA2 model = Prophet(uncertainty_samples=0) model.add_country_holidays(country_name='US') model.fit(train).predict(future)
class OutlierProphet(BaseDetector, FitMixin): def __init__(self, threshold: float = .8, growth: str = 'linear', cap: float = None, holidays: pd.DataFrame = None, holidays_prior_scale: float = 10., country_holidays: str = None, changepoint_prior_scale: float = .05, changepoint_range: float = .8, seasonality_mode: str = 'additive', daily_seasonality: Union[str, bool, int] = 'auto', weekly_seasonality: Union[str, bool, int] = 'auto', yearly_seasonality: Union[str, bool, int] = 'auto', add_seasonality: List = None, seasonality_prior_scale: float = 10., uncertainty_samples: int = 1000, mcmc_samples: int = 0) -> None: """ Outlier detector for time series data using fbprophet. See https://facebook.github.io/prophet/ for more details. Parameters ---------- threshold Width of the uncertainty intervals of the forecast, used as outlier threshold. Equivalent to `interval_width`. If the instance lies outside of the uncertainty intervals, it is flagged as an outlier. If `mcmc_samples` equals 0, it is the uncertainty in the trend using the MAP estimate of the extrapolated model. If `mcmc_samples` >0, then uncertainty over all parameters is used. growth 'linear' or 'logistic' to specify a linear or logistic trend. cap Growth cap in case growth equals 'logistic'. holidays pandas DataFrame with columns `holiday` (string) and `ds` (dates) and optionally columns `lower_window` and `upper_window` which specify a range of days around the date to be included as holidays. holidays_prior_scale Parameter controlling the strength of the holiday components model. Higher values imply a more flexible trend, more prone to more overfitting. country_holidays Include country-specific holidays via country abbreviations. The holidays for each country are provided by the holidays package in Python. A list of available countries and the country name to use is available on: https://github.com/dr-prodigy/python-holidays. Additionally, Prophet includes holidays for: Brazil (BR), Indonesia (ID), India (IN), Malaysia (MY), Vietnam (VN), Thailand (TH), Philippines (PH), Turkey (TU), Pakistan (PK), Bangladesh (BD), Egypt (EG), China (CN) and Russian (RU). changepoint_prior_scale Parameter controlling the flexibility of the automatic changepoint selection. Large values will allow many changepoints, potentially leading to overfitting. changepoint_range Proportion of history in which trend changepoints will be estimated. Higher values means more changepoints, potentially leading to overfitting. seasonality_mode Either 'additive' or 'multiplicative'. daily_seasonality Can be 'auto', True, False, or a number of Fourier terms to generate. weekly_seasonality Can be 'auto', True, False, or a number of Fourier terms to generate. yearly_seasonality Can be 'auto', True, False, or a number of Fourier terms to generate. add_seasonality Manually add one or more seasonality components. Pass a list of dicts containing the keys `name`, `period`, `fourier_order` (obligatory), `prior_scale` and `mode` (optional). seasonality_prior_scale Parameter controlling the strength of the seasonality model. Larger values allow the model to fit larger seasonal fluctuations, potentially leading to overfitting. uncertainty_samples Number of simulated draws used to estimate uncertainty intervals. mcmc_samples If >0, will do full Bayesian inference with the specified number of MCMC samples. If 0, will do MAP estimation. """ super().__init__() # initialize Prophet model # TODO: add conditional seasonalities kwargs = { 'growth': growth, 'interval_width': threshold, 'holidays': holidays, 'holidays_prior_scale': holidays_prior_scale, 'changepoint_prior_scale': changepoint_prior_scale, 'changepoint_range': changepoint_range, 'seasonality_mode': seasonality_mode, 'daily_seasonality': daily_seasonality, 'weekly_seasonality': weekly_seasonality, 'yearly_seasonality': yearly_seasonality, 'seasonality_prior_scale': seasonality_prior_scale, 'uncertainty_samples': uncertainty_samples, 'mcmc_samples': mcmc_samples } self.model = Prophet(**kwargs) if country_holidays: self.model.add_country_holidays(country_name=country_holidays) if add_seasonality: for s in add_seasonality: self.model.add_seasonality(**s) self.cap = cap # set metadata self.meta['detector_type'] = 'offline' self.meta['data_type'] = 'time-series' def fit(self, df: pd.DataFrame) -> None: """ Fit Prophet model on normal (inlier) data. Parameters ---------- df Dataframe with columns `ds` with timestamps and `y` with target values. """ if self.cap: df['cap'] = self.cap self.model.fit(df) def score(self, df: pd.DataFrame) -> pd.DataFrame: """ Compute outlier scores. Parameters ---------- df DataFrame with columns `ds` with timestamps and `y` with values which need to be flagged as outlier or not. Returns ------- Array with outlier scores for each instance in the batch. """ if self.cap: df['cap'] = self.cap forecast = self.model.predict(df) forecast['y'] = df['y'].values forecast['score'] = ((forecast['y'] - forecast['yhat_upper']) * (forecast['y'] >= forecast['yhat']) + (forecast['yhat_lower'] - forecast['y']) * (forecast['y'] < forecast['yhat'])) return forecast def predict( self, df: pd.DataFrame, return_instance_score: bool = True, return_forecast: bool = True ) -> Dict[Dict[str, str], Dict[pd.DataFrame, pd.DataFrame]]: """ Compute outlier scores and transform into outlier predictions. Parameters ---------- df DataFrame with columns `ds` with timestamps and `y` with values which need to be flagged as outlier or not. return_instance_score Whether to return instance level outlier scores. return_forecast Whether to return the model forecast. Returns ------- Dictionary containing 'meta' and 'data' dictionaries. 'meta' has the model's metadata. 'data' contains the outlier predictions, instance level outlier scores and the model forecast. """ # compute outlier scores forecast = self.score(df) iscore = pd.DataFrame(data={ 'ds': df['ds'].values, 'instance_score': forecast['score'] }) # values above threshold are outliers outlier_pred = pd.DataFrame( data={ 'ds': df['ds'].values, 'is_outlier': (forecast['score'] > 0.).astype(int) }) # populate output dict od = outlier_prediction_dict() od['meta'] = self.meta od['data']['is_outlier'] = outlier_pred if return_instance_score: od['data']['instance_score'] = iscore if return_forecast: od['data']['forecast'] = forecast return od
'ds': pd.to_datetime(['2018-02-14', '2019-02-14', '2020-02-14', '2021-02-14', '2022-02-14'], format='%Y-%m-%d'), 'lower_window': -1, 'upper_window': 1, }) holidays = dias_especiales ############################################################################### # Modelo Prophet sin tuning. ############################################################################### # Creamos el modelo Prophet y le hacemos un fit. m = Prophet(holidays=holidays, weekly_seasonality=True, daily_seasonality=False, yearly_seasonality=False, n_changepoints=20) m.add_country_holidays(country_name='Chile') m.fit(df) # Se indica cuáles serán los futures. future = m.make_future_dataframe(periods=7) future.tail() # Forecast forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(14) # Se grafican los componentes del forecast (trend, weekly, yearly) fig2 = m.plot_components(forecast) plt.title('Componentes del forecast sin tuning') plt.show()
def PlotSeries(): #obtém valores de selects da pagina select_ano = request.form.get("Anos", None) select_mun = request.form.get("Municipios", None) select_dp = request.form.get("Delegacias", None) select_crime = request.form.get("Crimes", None) if select_mun != None and select_mun != "" and select_dp != None and select_dp != "" and select_crime != None and select_crime != "": #dá um nome para o arquivo do plot img = 'static/plot' + select_ano + select_mun + select_dp + select_crime + '.png' #obtém o dataframe df = getDataAtDB(select_mun, select_dp, select_crime) df['datas'] = pd.to_datetime(df['datas']) #altera colunas do dataframe df.set_index('datas') df.columns = ["ds", "y"] #cria um modelo m = Prophet(changepoint_prior_scale=0.05, changepoint_range=0.8) m.add_country_holidays(country_name='BR') m.fit(df) #prevendo o futuro future = m.make_future_dataframe(periods=12 * int(select_ano), freq='MS') forecast = m.predict(future) #cria imagem do plot m.plot(forecast, figsize=(8, 4)) plt.xlabel('Data') plt.ylabel('Ocorrencias') plt.gca().set_ylim(bottom=0) plt.title("Série temporal das ocorrências de " + select_crime + " registradas no " + select_dp) plt.savefig(img, bbox_inches='tight') plt.clf() #limpa figura atual # df_cv = cross_validation(m, initial='3600 days', horizon = '1200 days', parallel="processes") # df_p = performance_metrics(df_cv) # print(df_p.head()) #Otimização dos hiperparametros # params_df = create_param_combinations(**param_grid) # print(len(params_df.values)) # for param in params_df.values: # param_dict = dict(zip(params_df.keys(), param)) # cv_df = single_cv_run(df, metrics, param_dict, parallel="processes") # results.append(cv_df) # results_df = pd.concat(results).reset_index(drop=True) # best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']] # print(f'\n The best param combination is {best_param.values[0][0]}') # print(results_df) return render_template("previsao.html", image=img) return render_template("previsao.html")
pd.to_datetime( ['2017-12-24', '2018-12-24', '2019-12-24', '2020-12-24']), 'lower_window': -1, 'upper_window': 7, }) holidays = pd.concat((ascensionday, christmas)) m = Prophet(holidays=holidays, interval_width=0.9, yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False) m.add_country_holidays(country_name='NO') if rain_con == True: m.add_regressor('rain') if temp_con == True: m.add_regressor('temp') # Fit the data. Remember that prophet expect "ds" and "y" as names for the columns. m.fit(bikerides) # We must create a data frame holding dates for our forecast. The periods # parameter counts days as long as the frequency is 'D' for the day. Let's # do a 180 day forecast, approximately half a year. future = m.make_future_dataframe(periods=0, freq='D') future = future.merge(bikerides, on='ds')
def compare_models(data, variable, test_size): test_split = len(data) - test_size # simple model using mean electricity use by month, weekday and hour data_train = data[:test_split][[variable]] data_train_grouped = (data_train[[variable]].groupby([ data_train.index.month, data_train.index.weekday, data_train.index.hour ]).mean()) data_train_grouped.index.names = ["month", "weekday", "hour"] data_test = pd.DataFrame( data={ "month": data[test_split:].index.month, "weekday": data[test_split:].index.weekday, "hour": data[test_split:].index.hour, }, index=data[test_split:].index, ) mean_grouped_predictions = data_test.join(data_train_grouped, how="left", on=["month", "weekday", "hour"])[variable] # preparing data for prophet df = data[variable].reset_index(level=0) df.columns = ["ds", "y"] df_train = df[:test_split] df_test = df[test_split:] m_simple = Prophet() m_simple.fit(df_train) future_simple = m_simple.make_future_dataframe(periods=test_size, freq="H") forecast_simple = m_simple.predict(future_simple) # limiting low predictions to zero forecast_simple["yhat"] = np.where(forecast_simple["yhat"] < 0, 0, forecast_simple["yhat"]) forecast_simple["yhat_lower"] = np.where(forecast_simple["yhat_lower"] < 0, 0, forecast_simple["yhat_lower"]) forecast_simple["yhat_upper"] = np.where(forecast_simple["yhat_upper"] < 0, 0, forecast_simple["yhat_upper"]) global forecast_plot_simple global component_plot_simple forecast_plot_simple = m_simple.plot(forecast_simple) component_plot_simple = m_simple.plot_components(forecast_simple) # using inbuilt holidays because this automatically applies to predictions also m_holiday = Prophet() m_holiday.add_country_holidays(country_name="FRA") m_holiday.fit(df_train) future_holiday = m_holiday.make_future_dataframe(periods=test_size, freq="H") forecast_holiday = m_holiday.predict(future_holiday) # limiting low predictions to zero forecast_holiday["yhat"] = np.where(forecast_holiday["yhat"] < 0, 0, forecast_holiday["yhat"]) forecast_holiday["yhat_lower"] = np.where( forecast_holiday["yhat_lower"] < 0, 0, forecast_holiday["yhat_lower"]) forecast_holiday["yhat_upper"] = np.where( forecast_holiday["yhat_upper"] < 0, 0, forecast_holiday["yhat_upper"]) global forecast_plot_holiday global component_plot_holiday forecast_plot_holiday = m_holiday.plot(forecast_holiday) component_plot_holiday = m_holiday.plot_components(forecast_holiday) m_temp = Prophet() m_temp.add_regressor("temperature") m_temp.add_regressor("temperature2") m_temp.add_regressor("temperature_lag") m_temp.add_regressor("temperature2_lag") df_train["temperature"] = data["temperature"][:test_split].to_numpy() df_train["temperature2"] = df_train["temperature"]**2 df_train["temperature_lag"] = df_train["temperature"].shift( 1, fill_value=df_train["temperature"].mean()) df_train["temperature2_lag"] = df_train["temperature2"].shift( 1, fill_value=df_train["temperature"].mean()**2) m_temp.fit(df_train) future_temp = m_temp.make_future_dataframe(periods=test_size, freq="H") future_temp["temperature"] = data["temperature"][-len(future_temp ):].to_numpy() future_temp["temperature2"] = future_temp["temperature"]**2 future_temp["temperature_lag"] = future_temp["temperature"].shift( 1, fill_value=future_temp["temperature"].mean()) future_temp["temperature2_lag"] = future_temp["temperature2"].shift( 1, fill_value=future_temp["temperature"].mean()**2) forecast_temp = m_temp.predict(future_temp) # limiting low predictions to zero forecast_temp["yhat"] = np.where(forecast_temp["yhat"] < 0, 0, forecast_temp["yhat"]) forecast_temp["yhat_lower"] = np.where(forecast_temp["yhat_lower"] < 0, 0, forecast_temp["yhat_lower"]) forecast_temp["yhat_upper"] = np.where(forecast_temp["yhat_upper"] < 0, 0, forecast_temp["yhat_upper"]) global forecast_plot_temp global component_plot_temp forecast_plot_temp = m_temp.plot(forecast_temp) component_plot_temp = m_temp.plot_components(forecast_temp) # calculate rmse df_test.y.describe() print( "Mean RMSE: ", mean_squared_error(df_test.y, np.repeat(df_train.y.mean(), len(df_test))), ) print("Mean grouped RMSE: ", mean_squared_error(df_test.y, mean_grouped_predictions)) print( "Simple Prophet: ", mean_squared_error(df_test.y, forecast_simple.yhat[test_split:]), ) print( "Holiday Prophet: ", mean_squared_error(df_test.y, forecast_holiday.yhat[test_split:]), ) print( "Temperature Prophet: ", mean_squared_error(df_test.y, forecast_temp.yhat[test_split:]), )
def grid_search_worker(event, context={}): # Time series model settings parameter_list = event['parameters'] parameters = {} for key in parameter_list: # Special case for holidays, because it is a json string if key == 'holidays': holidays_dict = json.loads(event['data'][key]) continue parameters[key] = event['data'][key] forecast = event['forecast'] print('=====Parameters=======') print(parameters) # Read the dataset from S3 bucket df = read_csv_s3(parameters['dataset']) # df = pd.read_csv("./datasets/prophet/example_wp_log_peyton_manning.csv") # Transfer holiday to data frame if holidays_dict is not None: parameters['holidays'] = pd.DataFrame({ 'holiday': holidays_dict['holiday'], 'ds': pd.to_datetime(holidays_dict['ds']), 'lower_window': holidays_dict['lower_window'], 'upper_window': holidays_dict['upper_window'], }) # Fit the model df['cap'] = parameters['cap'] df['floor'] = parameters['floor'] model = Prophet( growth=parameters['growth'], changepoint_prior_scale=parameters['changepoint_prior_scale'], holidays=parameters['holidays'], holidays_prior_scale=parameters['holidays_prior_scale'], seasonality_mode=parameters['seasonality_mode'], interval_width=parameters['interval_width']) model.add_seasonality(name='yearly', period=365, fourier_order=parameters['fourier_order'], prior_scale=parameters['seasonality_prior_scale']) model.add_country_holidays(country_name=parameters['country_holidays']) # Truncate the time series df.loc[(df['ds'] <= parameters['left_bound']) & (df['ds'] >= parameters['right_bound']), 'y'] = None print("=====Fit the Model=======") model.fit(df) if forecast == 0: # Cross validation the model print("=====Cross Validation=======") average_metric = cross_validation_worker(model, parameters['initial'], parameters['period'], parameters['horizon'], parameters['metric']) print("Metric {0}: {1}".format(parameters['metric'], average_metric)) return {'average_metric': average_metric, 'event': event} else: future = model.make_future_dataframe(periods=int(forecast)) forecast = model.predict(future) time_series = model.plot(forecast) components = model.plot_components(forecast) time_series.savefig(local_repo + '/time_series.png') upload_csv_s3(local_repo + '/time_series.png') components.savefig(local_repo + '/components.png') upload_csv_s3(local_repo + '/components.png') return "Graphs are uploaded to S3"