Ejemplo n.º 1
0
 def test_fit_predict_with_country_holidays(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2012-06-06', '2013-06-06']),
         'holiday': ['seans-bday'] * 2,
         'lower_window': [0] * 2,
         'upper_window': [1] * 2,
     })
     # Test with holidays and country_holidays
     model = Prophet(holidays=holidays, uncertainty_samples=0)
     model.add_country_holidays(country_name='US')
     model.fit(DATA).predict()
     # There are training holidays missing in the test set
     train = DATA.head(154)
     future = DATA.tail(355)
     model = Prophet(uncertainty_samples=0)
     model.add_country_holidays(country_name='US')
     model.fit(train).predict(future)
     # There are test holidays missing in the training set
     train = DATA.tail(355)
     future = DATA2
     model = Prophet(uncertainty_samples=0)
     model.add_country_holidays(country_name='US')
     model.fit(train).predict(future)
Ejemplo n.º 2
0
class OutlierProphet(BaseDetector, FitMixin):
    def __init__(self,
                 threshold: float = .8,
                 growth: str = 'linear',
                 cap: float = None,
                 holidays: pd.DataFrame = None,
                 holidays_prior_scale: float = 10.,
                 country_holidays: str = None,
                 changepoint_prior_scale: float = .05,
                 changepoint_range: float = .8,
                 seasonality_mode: str = 'additive',
                 daily_seasonality: Union[str, bool, int] = 'auto',
                 weekly_seasonality: Union[str, bool, int] = 'auto',
                 yearly_seasonality: Union[str, bool, int] = 'auto',
                 add_seasonality: List = None,
                 seasonality_prior_scale: float = 10.,
                 uncertainty_samples: int = 1000,
                 mcmc_samples: int = 0) -> None:
        """
        Outlier detector for time series data using fbprophet.
        See https://facebook.github.io/prophet/ for more details.

        Parameters
        ----------
        threshold
            Width of the uncertainty intervals of the forecast, used as outlier threshold.
            Equivalent to `interval_width`. If the instance lies outside of the uncertainty intervals,
            it is flagged as an outlier. If `mcmc_samples` equals 0, it is the uncertainty in the trend
            using the MAP estimate of the extrapolated model. If `mcmc_samples` >0, then uncertainty
            over all parameters is used.
        growth
            'linear' or 'logistic' to specify a linear or logistic trend.
        cap
            Growth cap in case growth equals 'logistic'.
        holidays
            pandas DataFrame with columns `holiday` (string) and `ds` (dates) and optionally
            columns `lower_window` and `upper_window` which specify a range of days around
            the date to be included as holidays.
        holidays_prior_scale
            Parameter controlling the strength of the holiday components model.
            Higher values imply a more flexible trend, more prone to more overfitting.
        country_holidays
            Include country-specific holidays via country abbreviations.
            The holidays for each country are provided by the holidays package in Python.
            A list of available countries and the country name to use is available on:
            https://github.com/dr-prodigy/python-holidays. Additionally, Prophet includes holidays for:
            Brazil (BR), Indonesia (ID), India (IN), Malaysia (MY), Vietnam (VN), Thailand (TH),
            Philippines (PH), Turkey (TU), Pakistan (PK), Bangladesh (BD), Egypt (EG), China (CN) and Russian (RU).
        changepoint_prior_scale
            Parameter controlling the flexibility of the automatic changepoint selection.
            Large values will allow many changepoints, potentially leading to overfitting.
        changepoint_range
            Proportion of history in which trend changepoints will be estimated.
            Higher values means more changepoints, potentially leading to overfitting.
        seasonality_mode
            Either 'additive' or 'multiplicative'.
        daily_seasonality
            Can be 'auto', True, False, or a number of Fourier terms to generate.
        weekly_seasonality
            Can be 'auto', True, False, or a number of Fourier terms to generate.
        yearly_seasonality
            Can be 'auto', True, False, or a number of Fourier terms to generate.
        add_seasonality
            Manually add one or more seasonality components. Pass a list of dicts containing the keys
            `name`, `period`, `fourier_order` (obligatory), `prior_scale` and `mode` (optional).
        seasonality_prior_scale
            Parameter controlling the strength of the seasonality model. Larger values allow the model to
            fit larger seasonal fluctuations, potentially leading to overfitting.
        uncertainty_samples
            Number of simulated draws used to estimate uncertainty intervals.
        mcmc_samples
            If >0, will do full Bayesian inference with the specified number of MCMC samples.
            If 0, will do MAP estimation.
        """
        super().__init__()

        # initialize Prophet model
        # TODO: add conditional seasonalities
        kwargs = {
            'growth': growth,
            'interval_width': threshold,
            'holidays': holidays,
            'holidays_prior_scale': holidays_prior_scale,
            'changepoint_prior_scale': changepoint_prior_scale,
            'changepoint_range': changepoint_range,
            'seasonality_mode': seasonality_mode,
            'daily_seasonality': daily_seasonality,
            'weekly_seasonality': weekly_seasonality,
            'yearly_seasonality': yearly_seasonality,
            'seasonality_prior_scale': seasonality_prior_scale,
            'uncertainty_samples': uncertainty_samples,
            'mcmc_samples': mcmc_samples
        }
        self.model = Prophet(**kwargs)
        if country_holidays:
            self.model.add_country_holidays(country_name=country_holidays)
        if add_seasonality:
            for s in add_seasonality:
                self.model.add_seasonality(**s)
        self.cap = cap

        # set metadata
        self.meta['detector_type'] = 'offline'
        self.meta['data_type'] = 'time-series'

    def fit(self, df: pd.DataFrame) -> None:
        """
        Fit Prophet model on normal (inlier) data.

        Parameters
        ----------
        df
            Dataframe with columns `ds` with timestamps and `y` with target values.
        """
        if self.cap:
            df['cap'] = self.cap
        self.model.fit(df)

    def score(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Compute outlier scores.

        Parameters
        ----------
        df
            DataFrame with columns `ds` with timestamps and `y` with values which
            need to be flagged as outlier or not.

        Returns
        -------
        Array with outlier scores for each instance in the batch.
        """
        if self.cap:
            df['cap'] = self.cap
        forecast = self.model.predict(df)
        forecast['y'] = df['y'].values
        forecast['score'] = ((forecast['y'] - forecast['yhat_upper']) *
                             (forecast['y'] >= forecast['yhat']) +
                             (forecast['yhat_lower'] - forecast['y']) *
                             (forecast['y'] < forecast['yhat']))
        return forecast

    def predict(
        self,
        df: pd.DataFrame,
        return_instance_score: bool = True,
        return_forecast: bool = True
    ) -> Dict[Dict[str, str], Dict[pd.DataFrame, pd.DataFrame]]:
        """
        Compute outlier scores and transform into outlier predictions.

        Parameters
        ----------
        df
            DataFrame with columns `ds` with timestamps and `y` with values which
            need to be flagged as outlier or not.
        return_instance_score
            Whether to return instance level outlier scores.
        return_forecast
            Whether to return the model forecast.

        Returns
        -------
        Dictionary containing 'meta' and 'data' dictionaries.
        'meta' has the model's metadata.
        'data' contains the outlier predictions, instance level outlier scores and the model forecast.
        """
        # compute outlier scores
        forecast = self.score(df)
        iscore = pd.DataFrame(data={
            'ds': df['ds'].values,
            'instance_score': forecast['score']
        })

        # values above threshold are outliers
        outlier_pred = pd.DataFrame(
            data={
                'ds': df['ds'].values,
                'is_outlier': (forecast['score'] > 0.).astype(int)
            })

        # populate output dict
        od = outlier_prediction_dict()
        od['meta'] = self.meta
        od['data']['is_outlier'] = outlier_pred
        if return_instance_score:
            od['data']['instance_score'] = iscore
        if return_forecast:
            od['data']['forecast'] = forecast
        return od
Ejemplo n.º 3
0
    'ds': pd.to_datetime(['2018-02-14', '2019-02-14', '2020-02-14',
                          '2021-02-14', '2022-02-14'], format='%Y-%m-%d'),
    'lower_window': -1,
    'upper_window': 1,
})
holidays = dias_especiales

###############################################################################
# Modelo Prophet sin tuning.
###############################################################################

# Creamos el modelo Prophet y le hacemos un fit.
m = Prophet(holidays=holidays, weekly_seasonality=True,
            daily_seasonality=False,
            yearly_seasonality=False, n_changepoints=20)
m.add_country_holidays(country_name='Chile')
m.fit(df)

# Se indica cuáles serán los futures.
future = m.make_future_dataframe(periods=7)
future.tail()

# Forecast
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(14)

# Se grafican los componentes del forecast (trend, weekly, yearly)
fig2 = m.plot_components(forecast)
plt.title('Componentes del forecast sin tuning')
plt.show()
Ejemplo n.º 4
0
def PlotSeries():

    #obtém valores de selects da pagina
    select_ano = request.form.get("Anos", None)
    select_mun = request.form.get("Municipios", None)
    select_dp = request.form.get("Delegacias", None)
    select_crime = request.form.get("Crimes", None)

    if select_mun != None and select_mun != "" and select_dp != None and select_dp != "" and select_crime != None and select_crime != "":

        #dá um nome para o arquivo do plot
        img = 'static/plot' + select_ano + select_mun + select_dp + select_crime + '.png'

        #obtém o dataframe
        df = getDataAtDB(select_mun, select_dp, select_crime)
        df['datas'] = pd.to_datetime(df['datas'])

        #altera colunas do dataframe
        df.set_index('datas')
        df.columns = ["ds", "y"]

        #cria um modelo
        m = Prophet(changepoint_prior_scale=0.05, changepoint_range=0.8)
        m.add_country_holidays(country_name='BR')
        m.fit(df)

        #prevendo o futuro
        future = m.make_future_dataframe(periods=12 * int(select_ano),
                                         freq='MS')
        forecast = m.predict(future)

        #cria imagem do plot
        m.plot(forecast, figsize=(8, 4))
        plt.xlabel('Data')
        plt.ylabel('Ocorrencias')
        plt.gca().set_ylim(bottom=0)
        plt.title("Série temporal das ocorrências de " + select_crime +
                  " registradas no " + select_dp)
        plt.savefig(img, bbox_inches='tight')

        plt.clf()  #limpa figura atual

        # df_cv = cross_validation(m, initial='3600 days', horizon = '1200 days', parallel="processes")
        # df_p = performance_metrics(df_cv)
        # print(df_p.head())

        #Otimização dos hiperparametros
        # params_df = create_param_combinations(**param_grid)
        # print(len(params_df.values))
        # for param in params_df.values:
        #     param_dict = dict(zip(params_df.keys(), param))
        #     cv_df = single_cv_run(df, metrics, param_dict, parallel="processes")
        #     results.append(cv_df)
        # results_df = pd.concat(results).reset_index(drop=True)
        # best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']]
        # print(f'\n The best param combination is {best_param.values[0][0]}')
        # print(results_df)

        return render_template("previsao.html", image=img)

    return render_template("previsao.html")
Ejemplo n.º 5
0
        pd.to_datetime(
            ['2017-12-24', '2018-12-24', '2019-12-24', '2020-12-24']),
        'lower_window':
        -1,
        'upper_window':
        7,
    })

    holidays = pd.concat((ascensionday, christmas))

    m = Prophet(holidays=holidays,
                interval_width=0.9,
                yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=False)
    m.add_country_holidays(country_name='NO')

if rain_con == True:
    m.add_regressor('rain')

if temp_con == True:
    m.add_regressor('temp')

# Fit the data. Remember that prophet expect "ds" and "y" as names for the columns.
m.fit(bikerides)

# We must create a data frame holding dates for our forecast. The periods # parameter counts days as long as the frequency is 'D' for the day. Let's # do a 180 day forecast, approximately half a year.
future = m.make_future_dataframe(periods=0, freq='D')

future = future.merge(bikerides, on='ds')
def compare_models(data, variable, test_size):

    test_split = len(data) - test_size

    # simple model using mean electricity use by month, weekday and hour
    data_train = data[:test_split][[variable]]
    data_train_grouped = (data_train[[variable]].groupby([
        data_train.index.month, data_train.index.weekday, data_train.index.hour
    ]).mean())
    data_train_grouped.index.names = ["month", "weekday", "hour"]

    data_test = pd.DataFrame(
        data={
            "month": data[test_split:].index.month,
            "weekday": data[test_split:].index.weekday,
            "hour": data[test_split:].index.hour,
        },
        index=data[test_split:].index,
    )

    mean_grouped_predictions = data_test.join(data_train_grouped,
                                              how="left",
                                              on=["month", "weekday",
                                                  "hour"])[variable]

    # preparing data for prophet
    df = data[variable].reset_index(level=0)
    df.columns = ["ds", "y"]

    df_train = df[:test_split]
    df_test = df[test_split:]

    m_simple = Prophet()
    m_simple.fit(df_train)
    future_simple = m_simple.make_future_dataframe(periods=test_size, freq="H")
    forecast_simple = m_simple.predict(future_simple)
    # limiting low predictions to zero
    forecast_simple["yhat"] = np.where(forecast_simple["yhat"] < 0, 0,
                                       forecast_simple["yhat"])
    forecast_simple["yhat_lower"] = np.where(forecast_simple["yhat_lower"] < 0,
                                             0, forecast_simple["yhat_lower"])
    forecast_simple["yhat_upper"] = np.where(forecast_simple["yhat_upper"] < 0,
                                             0, forecast_simple["yhat_upper"])
    global forecast_plot_simple
    global component_plot_simple

    forecast_plot_simple = m_simple.plot(forecast_simple)
    component_plot_simple = m_simple.plot_components(forecast_simple)

    # using inbuilt holidays because this automatically applies to predictions also
    m_holiday = Prophet()
    m_holiday.add_country_holidays(country_name="FRA")
    m_holiday.fit(df_train)
    future_holiday = m_holiday.make_future_dataframe(periods=test_size,
                                                     freq="H")
    forecast_holiday = m_holiday.predict(future_holiday)
    # limiting low predictions to zero
    forecast_holiday["yhat"] = np.where(forecast_holiday["yhat"] < 0, 0,
                                        forecast_holiday["yhat"])
    forecast_holiday["yhat_lower"] = np.where(
        forecast_holiday["yhat_lower"] < 0, 0, forecast_holiday["yhat_lower"])
    forecast_holiday["yhat_upper"] = np.where(
        forecast_holiday["yhat_upper"] < 0, 0, forecast_holiday["yhat_upper"])
    global forecast_plot_holiday
    global component_plot_holiday

    forecast_plot_holiday = m_holiday.plot(forecast_holiday)
    component_plot_holiday = m_holiday.plot_components(forecast_holiday)

    m_temp = Prophet()
    m_temp.add_regressor("temperature")
    m_temp.add_regressor("temperature2")
    m_temp.add_regressor("temperature_lag")
    m_temp.add_regressor("temperature2_lag")
    df_train["temperature"] = data["temperature"][:test_split].to_numpy()
    df_train["temperature2"] = df_train["temperature"]**2
    df_train["temperature_lag"] = df_train["temperature"].shift(
        1, fill_value=df_train["temperature"].mean())
    df_train["temperature2_lag"] = df_train["temperature2"].shift(
        1, fill_value=df_train["temperature"].mean()**2)
    m_temp.fit(df_train)
    future_temp = m_temp.make_future_dataframe(periods=test_size, freq="H")
    future_temp["temperature"] = data["temperature"][-len(future_temp
                                                          ):].to_numpy()
    future_temp["temperature2"] = future_temp["temperature"]**2
    future_temp["temperature_lag"] = future_temp["temperature"].shift(
        1, fill_value=future_temp["temperature"].mean())
    future_temp["temperature2_lag"] = future_temp["temperature2"].shift(
        1, fill_value=future_temp["temperature"].mean()**2)
    forecast_temp = m_temp.predict(future_temp)
    # limiting low predictions to zero
    forecast_temp["yhat"] = np.where(forecast_temp["yhat"] < 0, 0,
                                     forecast_temp["yhat"])
    forecast_temp["yhat_lower"] = np.where(forecast_temp["yhat_lower"] < 0, 0,
                                           forecast_temp["yhat_lower"])
    forecast_temp["yhat_upper"] = np.where(forecast_temp["yhat_upper"] < 0, 0,
                                           forecast_temp["yhat_upper"])

    global forecast_plot_temp
    global component_plot_temp

    forecast_plot_temp = m_temp.plot(forecast_temp)
    component_plot_temp = m_temp.plot_components(forecast_temp)

    # calculate rmse

    df_test.y.describe()
    print(
        "Mean RMSE: ",
        mean_squared_error(df_test.y, np.repeat(df_train.y.mean(),
                                                len(df_test))),
    )
    print("Mean grouped RMSE: ",
          mean_squared_error(df_test.y, mean_grouped_predictions))
    print(
        "Simple Prophet: ",
        mean_squared_error(df_test.y, forecast_simple.yhat[test_split:]),
    )
    print(
        "Holiday Prophet: ",
        mean_squared_error(df_test.y, forecast_holiday.yhat[test_split:]),
    )
    print(
        "Temperature Prophet: ",
        mean_squared_error(df_test.y, forecast_temp.yhat[test_split:]),
    )
Ejemplo n.º 7
0
def grid_search_worker(event, context={}):
    # Time series model settings
    parameter_list = event['parameters']
    parameters = {}
    for key in parameter_list:
        # Special case for holidays, because it is a json string
        if key == 'holidays':
            holidays_dict = json.loads(event['data'][key])
            continue
        parameters[key] = event['data'][key]

    forecast = event['forecast']
    print('=====Parameters=======')
    print(parameters)

    # Read the dataset from S3 bucket
    df = read_csv_s3(parameters['dataset'])
    # df = pd.read_csv("./datasets/prophet/example_wp_log_peyton_manning.csv")

    # Transfer holiday to data frame
    if holidays_dict is not None:
        parameters['holidays'] = pd.DataFrame({
            'holiday':
            holidays_dict['holiday'],
            'ds':
            pd.to_datetime(holidays_dict['ds']),
            'lower_window':
            holidays_dict['lower_window'],
            'upper_window':
            holidays_dict['upper_window'],
        })

    # Fit the model
    df['cap'] = parameters['cap']
    df['floor'] = parameters['floor']
    model = Prophet(
        growth=parameters['growth'],
        changepoint_prior_scale=parameters['changepoint_prior_scale'],
        holidays=parameters['holidays'],
        holidays_prior_scale=parameters['holidays_prior_scale'],
        seasonality_mode=parameters['seasonality_mode'],
        interval_width=parameters['interval_width'])

    model.add_seasonality(name='yearly',
                          period=365,
                          fourier_order=parameters['fourier_order'],
                          prior_scale=parameters['seasonality_prior_scale'])
    model.add_country_holidays(country_name=parameters['country_holidays'])

    # Truncate the time series

    df.loc[(df['ds'] <= parameters['left_bound']) &
           (df['ds'] >= parameters['right_bound']), 'y'] = None

    print("=====Fit the Model=======")
    model.fit(df)

    if forecast == 0:
        # Cross validation the model
        print("=====Cross Validation=======")
        average_metric = cross_validation_worker(model, parameters['initial'],
                                                 parameters['period'],
                                                 parameters['horizon'],
                                                 parameters['metric'])
        print("Metric {0}: {1}".format(parameters['metric'], average_metric))
        return {'average_metric': average_metric, 'event': event}

    else:
        future = model.make_future_dataframe(periods=int(forecast))
        forecast = model.predict(future)
        time_series = model.plot(forecast)
        components = model.plot_components(forecast)
        time_series.savefig(local_repo + '/time_series.png')
        upload_csv_s3(local_repo + '/time_series.png')
        components.savefig(local_repo + '/components.png')
        upload_csv_s3(local_repo + '/components.png')
        return "Graphs are uploaded to S3"