Beispiel #1
0
 def _set_params(self):
     """
     Set input parameters based on the request.
     Parameters implemented for the Prophet() function are: growth, cap, floor, changepoint_prior_scale, interval_width 
     Parameters implemented for the make_future_dataframe() function are: freq, periods
     Parameters implemented for seasonality are: add_seasonality, seasonality_period, seasonality_fourier, seasonality_prior_scale
     Parameters implemented for holidays are: holidays_prior_scale, lower_window, upper_window
     Additional parameters for seasonlity requests are: weekly_start, yearly_start
     Additional parameters used are: return, take_log, seasonality, debug
     """
     
     # Calculate the forecast periods based on the number of placeholders in the data
     self.periods = utils.count_placeholders(self.request_df.loc[:,'y'])
     
     # Set the row count in the original request
     self.request_row_count = len(self.request_df) + len(self.NaT_df)
     
     # Set default values which will be used if an argument is not passed
     self.result_type = 'yhat'
     self.take_log  = False
     self.seasonality = 'yearly'
     self.debug = False
     self.freq = 'D'
     self.cap = None
     self.floor = None
     self.growth = None
     self.changepoint_prior_scale = None
     self.interval_width = None
     self.name = None
     self.period = None
     self.fourier_order = None
     self.seasonality_prior_scale = None
     self.holidays_prior_scale = None
     self.is_seasonality_request = False
     self.weekly_start = 6 # Defaulting to a Monday start for the week as used in Qlik
     self.yearly_start = 0
     self.lower_window = None
     self.upper_window = None
     
     # Set optional parameters
     
     # Check if there is a fourth column in the request
     try:
         # If there is a fourth column, it is assumed to contain the key word arguments
         args = self.request[0].rows[0].duals[3].strData
         
         # The third column should then provide the holiday name or null for each row
         self.has_holidays = True
         
     except IndexError:
         # If there is no fourth column, the request does not include holidays
         self.has_holidays = False
     
     # If the fourth column did not exist, we try again with the third column
     if not self.has_holidays:
         try:
             args = self.request[0].rows[0].duals[2].strData
         except IndexError:
             args = None
     
     # If the key word arguments were included in the request, get the parameters and values
     if args is not None:
         
         # The parameter and values are transformed into key value pairs
         args = args.translate(str.maketrans('', '', string.whitespace)).split(",")
         self.kwargs = dict([arg.split("=") for arg in args])
         
         # Make sure the key words are in lower case
         self.kwargs = {k.lower(): v for k, v in self.kwargs.items()}
         
         # Set the return type 
         # Valid values are: yhat, trend, seasonal, seasonalities. 
         # Add _lower or _upper to the series name to get lower or upper limits.
         if 'return' in self.kwargs:
             self.result_type = self.kwargs['return'].lower()
         
         # Set the option to take a logarithm of y values before forecast calculations
         # Valid values are: true, false
         if 'take_log' in self.kwargs:
             self.take_log = 'true' == self.kwargs['take_log'].lower()
             
         # Set the type of seasonlity requested. Used only for seasonality requests
         # Valid values are: yearly, weekly, monthly, holidays
         if 'seasonality' in self.kwargs:
             self.seasonality = self.kwargs['seasonality'].lower()
         
         # Set the debug option for generating execution logs
         # Valid values are: true, false
         if 'debug' in self.kwargs:
             self.debug = 'true' == self.kwargs['debug'].lower()
         
         # Set the frequency of the timeseries
         # Any valid frequency for pd.date_range, such as 'D' or 'M' 
         # For options see: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
         if 'freq' in self.kwargs:
             self.freq = self.kwargs['freq']
         
         # Set the cap which adds an upper limit at which the forecast will saturate
         # This changes the default linear growth model to a logistic growth model
         if 'cap' in self.kwargs:
             self.cap = float(self.kwargs['cap'])
             self.growth = 'logistic'
         
             # Set the floor which adds a lower limit at which the forecast will saturate
             # To use a logistic growth trend with a floor, a cap must also be specified
             if 'floor' in self.kwargs:
                 self.floor = float(self.kwargs['floor'])
         
         # Set the changepoint_prior_scale to adjust the trend flexibility
         # If the trend changes are being overfit (too much flexibility) or underfit (not enough flexibility), 
         # you can adjust the strength of the sparse prior. 
         # Default value is 0.05. Increasing it will make the trend more flexible.
         if 'changepoint_prior_scale' in self.kwargs:
             self.changepoint_prior_scale = float(self.kwargs['changepoint_prior_scale'])
         
         # Set the width for the uncertainty intervals
         # Default value is 0.8 (i.e. 80%)
         if 'interval_width' in self.kwargs:
             self.interval_width = float(self.kwargs['interval_width'])
         
         # Set additional seasonality to be added to the model
         # Default seasonalities are yearly and weekly, as well as daily for sub daily data
         if 'add_seasonality' in self.kwargs:
             self.name = self.kwargs['add_seasonality'].lower()
         
         # Set the seasonality period 
         # e.g. 30.5 for 'monthly' seasonality
         if 'seasonality_period' in self.kwargs:
             self.period = float(self.kwargs['seasonality_period'])
         
         # Set the seasonality fourier terms 
         # Increasing the number of Fourier terms allows the seasonality to fit faster changing cycles, 
         # but can also lead to overfitting
         if 'seasonality_fourier' in self.kwargs:
             self.fourier_order = int(self.kwargs['seasonality_fourier'])
         
         # Set the seasonality prior scale to smooth seasonality effects. 
         # Reducing this parameter dampens seasonal effects
         if 'seasonality_prior_scale' in self.kwargs:
             self.seasonality_prior_scale = float(self.kwargs['seasonality_prior_scale'])
         
         # Set the holiday prior scale to smooth holiday effects. 
         # Reducing this parameter dampens holiday effects. Default is 10, which provides very little regularization.
         if 'holidays_prior_scale' in self.kwargs:
             self.holidays_prior_scale = float(self.kwargs['holidays_prior_scale'])
         
         # Set the weekly start for 'weekly' seasonality requests 
         # Default week start is 0 which represents Sunday. Add offset as required.
         if 'weekly_start' in self.kwargs:
             self.weekly_start = int(self.kwargs['weekly_start'])
         
         # Set the weekly start for 'yearly' seasonality requests 
         # Default week start is 0 which represents 1st of Jan. Add offset as required.
         if 'yearly_start' in self.kwargs:
             self.yearly_start = int(self.kwargs['yearly_start'])
         
         # Set a period to extend the holidays by lower_window number of days before the date. 
         # This can be used to extend the holiday effect
         if 'lower_window' in self.kwargs:
             self.lower_window = int(self.kwargs['lower_window'])
         
         # Set a period to extend the holidays by upper_window number of days after the date. 
         # This can be used to extend the holiday effect
         if 'upper_window' in self.kwargs:
             self.upper_window = int(self.kwargs['upper_window'])
     
     # Create dictionary of arguments for the Prophet(), make_future_dataframe() and add_seasonality() functions
     self.prophet_kwargs = {}
     self.make_kwargs = {}
     self.add_seasonality_kwargs = {}
     
     # Populate the parameters in the corresponding dictionary:
     
     # Set up a list of possible key word arguments for the Prophet() function
     prophet_params = ['growth', 'changepoint_prior_scale', 'interval_width', 'seasonality_prior_scale',\
                       'holidays_prior_scale']
     
     # Create dictionary of key word arguments for the Prophet() function
     self.prophet_kwargs = self._populate_dict(prophet_params)
     
     # Set up a list of possible key word arguments for the make_future_dataframe() function
     make_params = ['periods', 'freq']
     
     # Create dictionary of key word arguments for the make_future_dataframe() function
     self.make_kwargs = self._populate_dict(make_params)
     
     # Set up a list of possible key word arguments for the add_seasonality() function
     seasonality_params = ['name', 'period', 'fourier_order']
     
     # Create dictionary of key word arguments for the add_seasonality() function
     self.add_seasonality_kwargs = self._populate_dict(seasonality_params)
Beispiel #2
0
    def _set_params(self):
        """
        Set input parameters based on the request.
        Parameters implemented for the Prophet() function are: growth, cap, floor, changepoint_prior_scale, interval_width 
        Parameters implemented for the make_future_dataframe() function are: freq, periods
        Parameters implemented for seasonality are: add_seasonality, seasonality_period, seasonality_fourier, seasonality_prior_scale
        Parameters implemented for holidays are: holidays_prior_scale, lower_window, upper_window
        Additional parameters for seasonlity requests are: weekly_start, yearly_start
        Additional parameters used are: return, take_log, seasonality, debug
        """

        # Calculate the forecast periods based on the number of placeholders in the data
        self.periods = utils.count_placeholders(self.request_df.loc[:, 'y'])

        # Set the row count in the original request
        self.request_row_count = len(self.request_df) + len(self.NaT_df)

        # Set default values which will be used if an argument is not passed
        self.load_script = False
        self.result_type = 'yhat'
        self.take_log = False
        self.seasonality = 'yearly'
        self.seasonality_mode = None
        self.debug = False
        self.freq = 'D'
        self.cap = None
        self.floor = None
        self.growth = None
        self.changepoint_prior_scale = None
        self.interval_width = None
        self.name = None
        self.period = None
        self.fourier_order = None
        self.mode = None
        self.seasonality_prior_scale = None
        self.holidays_prior_scale = None
        self.mcmc_samples = None
        self.seed = None
        self.n_changepoints = None
        self.changepoint_range = None
        self.uncertainty_samples = None
        self.is_seasonality_request = False
        self.weekly_start = 1  # Defaulting to a Monday start for the week as used in Qlik
        self.yearly_start = 0
        self.lower_window = None
        self.upper_window = None

        # Set optional parameters

        # Check the number of columns in the request to determine whether we have holidays and/or added regressors
        cols = len(self.request[0].rows[0].duals)
        self.has_holidays = False
        self.has_regressors = False

        # If we receive five columns, we expect both holidays and additional regressors
        if cols == 6:
            self.has_regressors = True
        # For a request with four columns, we only expect holidays
        if cols >= 4:
            self.has_holidays = True

        # If there are three or more columns, the last column should contain the key word arguments
        if cols < 3:
            args = None
        else:
            args = self.request[0].rows[0].duals[cols - 1].strData

        # If the key word arguments were included in the request, get the parameters and values
        if args is not None:

            # The parameter and values are transformed into key value pairs
            args = args.translate(str.maketrans('', '',
                                                string.whitespace)).split(",")
            self.kwargs = dict([arg.split("=") for arg in args])

            # Make sure the key words are in lower case
            self.kwargs = {k.lower(): v for k, v in self.kwargs.items()}

            # Set the load_script parameter to determine the output format
            # Set to 'true' if calling the functions from the load script in the Qlik app
            if 'load_script' in self.kwargs:
                self.load_script = 'true' == self.kwargs['load_script'].lower()

            # Set the return type
            # Valid values are: yhat, trend, seasonal, seasonalities, all, y_then_yhat, residual.
            # Add _lower or _upper to the series name to get lower or upper limits.
            # The special case of 'all' returns all output columns from Prophet. This can only be used with 'load_script=true'.
            # 'y_then_yhat' returns actual values for historical periods and forecast values for future periods
            # 'residual' returns y - yhat for historical periods
            if 'return' in self.kwargs:
                self.result_type = self.kwargs['return'].lower()

            # Set a flag to return the seasonality plot instead
            # Only usable through the load script as the result will have a different cardinality to the request
            if 'is_seasonality_request' in self.kwargs:
                self.is_seasonality_request = 'true' == self.kwargs[
                    'is_seasonality_request'].lower()
                self.load_script = True

            # Set the option to take a logarithm of y values before forecast calculations
            # Valid values are: true, false
            if 'take_log' in self.kwargs:
                self.take_log = 'true' == self.kwargs['take_log'].lower()

            # Set the type of seasonlity requested. Used only for seasonality requests
            # Valid values are: yearly, weekly, monthly, holidays
            if 'seasonality' in self.kwargs:
                self.seasonality = self.kwargs['seasonality'].lower()

            # Set the seasonlity mode. Useful if the seasonality is not a constant additive factor as assumed by Prophet
            # Valid values are: additive, multiplicative
            if 'seasonality_mode' in self.kwargs:
                self.seasonality_mode = self.kwargs['seasonality_mode'].lower()

            # Set the debug option for generating execution logs
            # Valid values are: true, false
            if 'debug' in self.kwargs:
                self.debug = 'true' == self.kwargs['debug'].lower()

            # Set the frequency of the timeseries
            # Any valid frequency for pd.date_range, such as 'D' or 'M'
            # For options see: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
            if 'freq' in self.kwargs:
                self.freq = self.kwargs['freq']

            # Set the cap which adds an upper limit at which the forecast will saturate
            # This changes the default linear growth model to a logistic growth model
            if 'cap' in self.kwargs:
                self.cap = utils.atof(self.kwargs['cap'])
                self.growth = 'logistic'

                # Set the floor which adds a lower limit at which the forecast will saturate
                # To use a logistic growth trend with a floor, a cap must also be specified
                if 'floor' in self.kwargs:
                    self.floor = utils.atof(self.kwargs['floor'])

            # Set the changepoint_prior_scale to adjust the trend flexibility
            # If the trend changes are being overfit (too much flexibility) or underfit (not enough flexibility),
            # you can adjust the strength of the sparse prior.
            # Default value is 0.05. Increasing it will make the trend more flexible.
            if 'changepoint_prior_scale' in self.kwargs:
                self.changepoint_prior_scale = utils.atof(
                    self.kwargs['changepoint_prior_scale'])

            # Set the width for the uncertainty intervals
            # Default value is 0.8 (i.e. 80%)
            if 'interval_width' in self.kwargs:
                self.interval_width = utils.atof(self.kwargs['interval_width'])

            # Set additional seasonality to be added to the model
            # Default seasonalities are yearly and weekly, as well as daily for sub daily data
            if 'add_seasonality' in self.kwargs:
                self.name = self.kwargs['add_seasonality'].lower()

            # Set 'additive' or 'multiplicative' mode for the additional seasonality
            # Default value follows the seasonality_mode parameter
            if 'add_seasonality_mode' in self.kwargs:
                self.mode = self.kwargs['add_seasonality_mode'].lower()

            # Set the seasonality period
            # e.g. 30.5 for 'monthly' seasonality
            if 'seasonality_period' in self.kwargs:
                self.period = utils.atof(self.kwargs['seasonality_period'])

            # Set the seasonality fourier terms
            # Increasing the number of Fourier terms allows the seasonality to fit faster changing cycles,
            # but can also lead to overfitting
            if 'seasonality_fourier' in self.kwargs:
                self.fourier_order = int(self.kwargs['seasonality_fourier'])

            # Set the seasonality prior scale to smooth seasonality effects.
            # Reducing this parameter dampens seasonal effects
            if 'seasonality_prior_scale' in self.kwargs:
                self.seasonality_prior_scale = utils.atof(
                    self.kwargs['seasonality_prior_scale'])

            # Set the holiday prior scale to smooth holiday effects.
            # Reducing this parameter dampens holiday effects. Default is 10, which provides very little regularization.
            if 'holidays_prior_scale' in self.kwargs:
                self.holidays_prior_scale = utils.atof(
                    self.kwargs['holidays_prior_scale'])

            # Set the number of MCMC samples.
            # If greater than 0, Prophet will do full Bayesian inference with the specified number of MCMC samples.
            # If 0, Prophet will do MAP estimation. Default is 0.
            if 'mcmc_samples' in self.kwargs:
                self.mcmc_samples = utils.atoi(self.kwargs['mcmc_samples'])

            # Random seed that can be used to control stochasticity.
            # Used for setting the numpy random seed used in predict and also for pystan when using mcmc_samples>0.
            if 'random_seed' in self.kwargs:
                self.seed = utils.atoi(self.kwargs['random_seed'])

                # Set the random seed for numpy
                np.random.seed(self.seed)

            # Number of potential changepoints to include. Default value is 25.
            # Potential changepoints are selected uniformly from the first `changepoint_range` proportion of the history.
            if 'n_changepoints' in self.kwargs:
                self.n_changepoints = utils.atoi(self.kwargs['n_changepoints'])

            # Proportion of history in which trend changepoints will be estimated.
            # Defaults to 0.8 for the first 80%.
            if 'changepoint_range' in self.kwargs:
                self.changepoint_range = utils.atof(
                    self.kwargs['changepoint_range'])

            # Number of simulated draws used to estimate uncertainty intervals.
            if 'uncertainty_samples' in self.kwargs:
                self.uncertainty_samples = utils.atoi(
                    self.kwargs['uncertainty_samples'])

            # Set the weekly start for 'weekly' seasonality requests
            # Default week start is 0 which represents Sunday. Add offset as required.
            if 'weekly_start' in self.kwargs:
                self.weekly_start = utils.atoi(self.kwargs['weekly_start'])

            # Set the weekly start for 'yearly' seasonality requests
            # Default week start is 0 which represents 1st of Jan. Add offset as required.
            if 'yearly_start' in self.kwargs:
                self.yearly_start = utils.atoi(self.kwargs['yearly_start'])

            # Set a period to extend the holidays by lower_window number of days before the date.
            # This can be used to extend the holiday effect
            if 'lower_window' in self.kwargs:
                self.lower_window = utils.atoi(self.kwargs['lower_window'])

            # Set a period to extend the holidays by upper_window number of days after the date.
            # This can be used to extend the holiday effect
            if 'upper_window' in self.kwargs:
                self.upper_window = utils.atoi(self.kwargs['upper_window'])

        # Create dictionary of arguments for the Prophet(), make_future_dataframe(), add_seasonality() and fit() functions
        self.prophet_kwargs = {}
        self.make_kwargs = {}
        self.add_seasonality_kwargs = {}
        self.fit_kwargs = {}

        # Populate the parameters in the corresponding dictionary:

        # Set up a list of possible key word arguments for the Prophet() function
        prophet_params = ['seasonality_mode', 'growth', 'changepoint_prior_scale', 'interval_width',\
                          'seasonality_prior_scale', 'holidays_prior_scale', 'mcmc_samples', 'n_changepoints',\
                          'changepoint_range', 'uncertainty_samples']

        # Create dictionary of key word arguments for the Prophet() function
        self.prophet_kwargs = self._populate_dict(prophet_params)

        # Set up a list of possible key word arguments for the make_future_dataframe() function
        make_params = ['periods', 'freq']

        # Create dictionary of key word arguments for the make_future_dataframe() function
        self.make_kwargs = self._populate_dict(make_params)

        # Set up a list of possible key word arguments for the add_seasonality() function
        seasonality_params = ['name', 'period', 'fourier_order', 'mode']

        # Create dictionary of key word arguments for the add_seasonality() function
        self.add_seasonality_kwargs = self._populate_dict(seasonality_params)

        # Pass the random seed to the fit method if MCMC is being used
        if self.mcmc_samples is not None and self.mcmc_samples > 0:
            # Set up a list of possible key word arguments for the fit() function
            fit_params = ['seed']
            # Create dictionary of key word arguments for the fit() function
            self.fit_kwargs = self._populate_dict(fit_params)