Beispiel #1
0
    def _set_params(self, kwargs):
        """
        Set input parameters based on the request.
        :
        :Parameters implemented for the HDBSCAN() function are: algorithm, metric, min_cluster_size, min_samples,
        :p, alpha, cluster_selection_method, allow_single_cluster, match_reference_implementation.
        :More information here: https://hdbscan.readthedocs.io/en/latest/api.html#hdbscan
        :
        :Scaler types implemented for preprocessing data are: StandardScaler, MinMaxScaler, MaxAbsScaler,
        :RobustScaler and QuantileTransformer.
        :More information here: http://scikit-learn.org/stable/modules/preprocessing.html
        :
        :Additional parameters used are: load_script, return, missing, scaler, debug
        """

        # Set the row count in the original request
        self.request_row_count = len(self.request_df) + len(self.NaN_df)

        # Set default values which will be used if arguments are not passed

        # SSE parameters:
        self.load_script = False
        self.result_type = 'labels_'
        self.missing = 'zeros'
        self.scaler = 'robust'
        self.debug = False
        # HDBSCAN parameters:
        self.algorithm = None
        self.metric = None
        self.min_cluster_size = None
        self.min_samples = None
        self.p = None
        self.alpha = None
        self.cluster_selection_method = None
        self.allow_single_cluster = None
        self.match_reference_implementation = None
        # Standard scaler parameters:
        self.with_mean = None
        self.with_std = None
        # MinMaxScaler scaler parameters:
        self.feature_range = None
        # Robust scaler parameters:
        self.with_centering = None
        self.with_scaling = None
        self.quantile_range = None
        # Quantile Transformer parameters:
        self.n_quantiles = None
        self.output_distribution = None
        self.ignore_implicit_zeros = None
        self.subsample = None
        self.random_state = None

        # Adjust default options if variant is two_dims
        if self.variant == "two_dims":
            self.load_script = True

        # Adjust default options if variant is lat_long
        elif self.variant == "lat_long":
            self.scaler = "none"
            self.metric = "haversine"

        # Set optional parameters

        # If the key word arguments were included in the request, get the parameters and values
        if len(kwargs) > 0:

            # The parameter and values are transformed into key value pairs
            args = kwargs.translate(str.maketrans(
                '', '', string.whitespace)).split(",")
            self.kwargs = dict([arg.split("=") for arg in args])

            # Make sure the key words are in lower case
            self.kwargs = {k.lower(): v for k, v in self.kwargs.items()}

            # Set the load_script parameter to determine the output format
            # Set to 'true' if calling the functions from the load script in the Qlik app
            if 'load_script' in self.kwargs:
                self.load_script = 'true' == self.kwargs['load_script'].lower()

            # Set the return type
            # Valid values are: labels, probabilities, cluster_persistence, outlier_scores
            if 'return' in self.kwargs:
                self.result_type = self.kwargs['return'].lower() + '_'

            # Set the strategy for missing data
            # Valid values are: zeros, mean, median, mode
            if 'missing' in self.kwargs:
                self.missing = self.kwargs['missing'].lower()

            # Set the standardization strategy for the data
            # Valid values are: standard, minmax, maxabs, robust, quantile, none
            if 'scaler' in self.kwargs:
                self.scaler = self.kwargs['scaler'].lower()

            # Set the debug option for generating execution logs
            # Valid values are: true, false
            if 'debug' in self.kwargs:
                self.debug = 'true' == self.kwargs['debug'].lower()

            # Set optional parameters for the HDBSCAN algorithmn
            # For documentation see here: https://hdbscan.readthedocs.io/en/latest/api.html#id20

            # Options are: best, generic, prims_kdtree, prims_balltree, boruvka_kdtree, boruvka_balltree
            # Default is 'best'.
            if 'algorithm' in self.kwargs:
                self.algorithm = self.kwargs['algorithm'].lower()

            # The metric to use when calculating distance between instances in a feature array.
            # More information here: https://hdbscan.readthedocs.io/en/latest/basic_hdbscan.html#what-about-different-metrics
            # And here: http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.DistanceMetric.html
            # Default is 'euclidean' for 'standard' and 'two_dims' variants, and 'haversine' for the lat_long variant.
            if 'metric' in self.kwargs:
                self.metric = self.kwargs['metric'].lower()

            # The minimum size of clusters.
            # The default value is 5.
            if 'min_cluster_size' in self.kwargs:
                self.min_cluster_size = utils.atoi(
                    self.kwargs['min_cluster_size'])

            # The number of samples in a neighbourhood for a point to be considered a core point.
            if 'min_samples' in self.kwargs:
                self.min_samples = utils.atoi(self.kwargs['min_samples'])

            # p value to use if using the minkowski metric.
            if 'p' in self.kwargs:
                self.p = utils.atoi(self.kwargs['p'])

            # A distance scaling parameter as used in robust single linkage.
            if 'alpha' in self.kwargs:
                self.alpha = utils.atof(self.kwargs['alpha'])

            # The method used to select clusters from the condensed tree.
            # Options are: eom, leaf.
            if 'cluster_selection_method' in self.kwargs:
                self.cluster_selection_method = self.kwargs[
                    'cluster_selection_method'].lower()

            # By default HDBSCAN* will not produce a single cluster.
            # Setting this to True will override this and allow single cluster results.
            if 'allow_single_cluster' in self.kwargs:
                self.allow_single_cluster = 'true' == self.kwargs[
                    'allow_single_cluster'].lower()

            # There exist some interpretational differences between this HDBSCAN implementation
            # and the original authors reference implementation in Java.
            # Note that there is a performance cost for setting this to True.
            if 'match_reference_implementation' in self.kwargs:
                self.match_reference_implementation = 'true' == self.kwargs[
                    'match_reference_implementation']

            # Set optional parameters for the scaler functions

            # Parameters for the Standard scaler
            # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
            if self.scaler == 'standard':
                if 'with_mean' in self.kwargs:
                    self.with_mean = 'true' == self.kwargs['with_mean'].lower()
                if 'with_std' in self.kwargs:
                    self.with_std = 'true' == self.kwargs['with_std'].lower()

            # Parameters for the MinMax scaler
            # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
            if self.scaler == 'minmax':
                if 'feature_range' in self.kwargs:
                    self.feature_range = ''.join(
                        c for c in self.kwargs['feature_range']
                        if c not in '()').split(';')
                    self.feature_range = (utils.atoi(self.feature_range[0]),
                                          utils.atoi(self.feature_range[1]))

            # Parameters for the Robust scaler
            # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html
            if self.scaler == 'robust':
                if 'with_centering' in self.kwargs:
                    self.with_centering = 'true' == self.kwargs[
                        'with_centering'].lower()
                if 'with_scaling' in self.kwargs:
                    self.with_scaling = 'true' == self.kwargs[
                        'with_scaling'].lower()
                if 'quantile_range' in self.kwargs:
                    self.quantile_range = ''.join(
                        c for c in self.kwargs['quantile_range']
                        if c not in '()').split(';')
                    self.quantile_range = (utils.atof(self.quantile_range[0]),
                                           utils.atof(self.quantile_range[1]))

            # Parameters for the Quantile Transformer
            # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html
            if self.scaler == 'quantile':
                if 'n_quantiles' in self.kwargs:
                    self.n_quantiles = utils.atoi(self.kwargs['n_quantiles'])
                if 'output_distribution' in self.kwargs:
                    self.output_distribution = self.kwargs[
                        'output_distribution'].lower()
                if 'ignore_implicit_zeros' in self.kwargs:
                    self.ignore_implicit_zeros = 'true' == self.kwargs[
                        'ignore_implicit_zeros'].lower()
                if 'subsample' in self.kwargs:
                    self.subsample = utils.atoi(self.kwargs['subsample'])
                if 'random_state' in self.kwargs:
                    self.random_state = utils.atoi(self.kwargs['random_state'])

        # Set up a list of possible key word arguments for the HDBSCAN() function
        hdbscan_params = ['algorithm', 'metric', 'min_cluster_size', 'min_samples', 'p', 'alpha',\
                          'cluster_selection_method', 'allow_single_cluster', 'match_reference_implementation']

        # Create dictionary of key word arguments for the HDBSCAN() function
        self.hdbscan_kwargs = self._populate_dict(hdbscan_params)

        # Set up a list of possible key word arguments for the sklearn preprocessing functions
        scaler_params = ['with_mean', 'with_std', 'feature_range', 'with_centering', 'with_scaling',\
                        'quantile_range', 'n_quantiles', 'output_distribution', 'ignore_implicit_zeros',\
                        'subsample', 'random_state']

        # Create dictionary of key word arguments for the scaler functions
        self.scaler_kwargs = self._populate_dict(scaler_params)
Beispiel #2
0
    def _set_params(self, kwargs):
        """
        Set input parameters based on the request.
        :
        :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools
        """

        # Set default values which will be used if execution arguments are not passed

        # Default parameters:
        self.debug = False
        self.model = 'en_core_web_sm'
        self.custom = False
        self.base_model = 'en_core_web_sm'
        self.blank = False
        self.epochs = 100
        self.batch_size = compounding(4.0, 32.0, 1.001)
        self.drop = 0.25
        self.test = 0

        # Extract the model path if required
        try:
            # Get the model name from the first row in the request_df
            self.model = self.request_df.loc[0, 'model_name']

            # Remove the model_name column from the request_df
            self.request_df = self.request_df.drop(['model_name'], axis=1)
        except KeyError:
            pass

        # If key word arguments were included in the request, get the parameters and values
        if len(kwargs) > 0:

            # Transform the string of arguments into a dictionary
            self.kwargs = utils.get_kwargs(kwargs)

            # Set the debug option for generating execution logs
            # Valid values are: true, false
            if 'debug' in self.kwargs:
                self.debug = 'true' == self.kwargs['debug'].lower()

                # Additional information is printed to the terminal and logs if the paramater debug = true
                if self.debug:
                    # Increment log counter for the class. Each instance of the class generates a new log.
                    self.__class__.log_no += 1

                    # Create a log file for the instance
                    # Logs will be stored in ..\logs\SpaCy Log <n>.txt
                    self.logfile = os.path.join(
                        os.getcwd(), 'logs',
                        'SpaCy Log {}.txt'.format(self.log_no))

                    self._print_log(1)

            # Set whether the model (if getting named entites) or base model (if retraining) is a custom model
            # i.e. not one of the pre-trained models provided by spaCy
            if 'custom' in self.kwargs:
                self.custom = 'true' == self.kwargs['custom'].lower()

            # Set the base model, i.e an existing spaCy model to be retrained.
            if 'base_model' in self.kwargs:
                self.base_model = self.kwargs['base_model'].lower()

            # Set the retraining to be done on a blank Language class
            if 'blank' in self.kwargs:
                self.blank = 'true' == self.kwargs['blank'].lower()

            # Set the epochs for training the model.
            # This is the the number times that the learning algorithm will work through the entire training dataset.
            # Valid values are an integer e.g. 200
            if 'epochs' in self.kwargs:
                self.epochs = utils.atoi(self.kwargs['epochs'])

            # Set the batch size to be used during model training.
            # The model's internal parameters will be updated at the end of each batch.
            # Valid values are a single integer or compounding or decaying parameters.
            if 'batch_size' in self.kwargs:
                # The batch size may be a single integer
                try:
                    self.batch_size = utils.atoi(self.kwargs['batch_size'])
                # Or a list of floats
                except ValueError:
                    sizes = utils.get_kwargs_by_type(self.kwargs['batch_size'])

                    # If the start < end, batch sizes will be compounded
                    if sizes[0] < sizes[1]:
                        self.batch_size = compounding(sizes[0], sizes[1],
                                                      sizes[2])
                    # else bath sizes will decay during training
                    else:
                        self.batch_size = decaying(sizes[0], sizes[1],
                                                   sizes[2])

            # Set the dropout rate for retraining the model
            # This determines the likelihood that a feature or internal representation in the model will be dropped,
            # making it harder for the model to memorize the training data.
            # Valid values are a float lesser than 1.0 e.g. 0.35
            if 'drop' in self.kwargs:
                self.drop = utils.atof(self.kwargs['drop'])

            # Set the ratio of data to be used for testing.
            # This data will be held out from training and just used to provide evaluation metrics.
            # Valid values are a float >= zero and < 1.0 e.g. 0.3
            if 'test' in self.kwargs:
                self.test = utils.atof(self.kwargs['test'])

        # Debug information is printed to the terminal and logs if the paramater debug = true
        if self.debug:
            self._print_log(2)

        # Remove the kwargs column from the request_df
        self.request_df = self.request_df.drop(['kwargs'], axis=1)
Beispiel #3
0
    def _set_params(self):
        """
        Set input parameters based on the request.
        Parameters implemented for the Prophet() function are: growth, cap, floor, changepoint_prior_scale, interval_width 
        Parameters implemented for the make_future_dataframe() function are: freq, periods
        Parameters implemented for seasonality are: add_seasonality, seasonality_period, seasonality_fourier, seasonality_prior_scale
        Parameters implemented for holidays are: holidays_prior_scale, lower_window, upper_window
        Additional parameters for seasonlity requests are: weekly_start, yearly_start
        Additional parameters used are: return, take_log, seasonality, debug
        """

        # Calculate the forecast periods based on the number of placeholders in the data
        self.periods = utils.count_placeholders(self.request_df.loc[:, 'y'])

        # Set the row count in the original request
        self.request_row_count = len(self.request_df) + len(self.NaT_df)

        # Set default values which will be used if an argument is not passed
        self.load_script = False
        self.result_type = 'yhat'
        self.take_log = False
        self.seasonality = 'yearly'
        self.seasonality_mode = None
        self.debug = False
        self.freq = 'D'
        self.cap = None
        self.floor = None
        self.growth = None
        self.changepoint_prior_scale = None
        self.interval_width = None
        self.name = None
        self.period = None
        self.fourier_order = None
        self.mode = None
        self.seasonality_prior_scale = None
        self.holidays_prior_scale = None
        self.mcmc_samples = None
        self.seed = None
        self.n_changepoints = None
        self.changepoint_range = None
        self.uncertainty_samples = None
        self.is_seasonality_request = False
        self.weekly_start = 6  # Defaulting to a Monday start for the week as used in Qlik
        self.yearly_start = 0
        self.lower_window = None
        self.upper_window = None

        # Set optional parameters

        # Check if there is a fourth column in the request
        try:
            # If there is a fourth column, it is assumed to contain the key word arguments
            args = self.request[0].rows[0].duals[3].strData

            # The third column should then provide the holiday name or null for each row
            self.has_holidays = True

        except IndexError:
            # If there is no fourth column, the request does not include holidays
            self.has_holidays = False

        # If the fourth column did not exist, we try again with the third column
        if not self.has_holidays:
            try:
                args = self.request[0].rows[0].duals[2].strData
            except IndexError:
                args = None

        # If the key word arguments were included in the request, get the parameters and values
        if args is not None:

            # The parameter and values are transformed into key value pairs
            args = args.translate(str.maketrans('', '',
                                                string.whitespace)).split(",")
            self.kwargs = dict([arg.split("=") for arg in args])

            # Make sure the key words are in lower case
            self.kwargs = {k.lower(): v for k, v in self.kwargs.items()}

            # Set the load_script parameter to determine the output format
            # Set to 'true' if calling the functions from the load script in the Qlik app
            if 'load_script' in self.kwargs:
                self.load_script = 'true' == self.kwargs['load_script'].lower()

            # Set the return type
            # Valid values are: yhat, trend, seasonal, seasonalities.
            # Add _lower or _upper to the series name to get lower or upper limits.
            if 'return' in self.kwargs:
                self.result_type = self.kwargs['return'].lower()

            # Set the option to take a logarithm of y values before forecast calculations
            # Valid values are: true, false
            if 'take_log' in self.kwargs:
                self.take_log = 'true' == self.kwargs['take_log'].lower()

            # Set the type of seasonlity requested. Used only for seasonality requests
            # Valid values are: yearly, weekly, monthly, holidays
            if 'seasonality' in self.kwargs:
                self.seasonality = self.kwargs['seasonality'].lower()

            # Set the seasonlity mode. Useful if the seasonality is not a constant additive factor as assumed by Prophet
            # Valid values are: additive, multiplicative
            if 'seasonality_mode' in self.kwargs:
                self.seasonality_mode = self.kwargs['seasonality_mode'].lower()

            # Set the debug option for generating execution logs
            # Valid values are: true, false
            if 'debug' in self.kwargs:
                self.debug = 'true' == self.kwargs['debug'].lower()

            # Set the frequency of the timeseries
            # Any valid frequency for pd.date_range, such as 'D' or 'M'
            # For options see: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
            if 'freq' in self.kwargs:
                self.freq = self.kwargs['freq']

            # Set the cap which adds an upper limit at which the forecast will saturate
            # This changes the default linear growth model to a logistic growth model
            if 'cap' in self.kwargs:
                self.cap = utils.atof(self.kwargs['cap'])
                self.growth = 'logistic'

                # Set the floor which adds a lower limit at which the forecast will saturate
                # To use a logistic growth trend with a floor, a cap must also be specified
                if 'floor' in self.kwargs:
                    self.floor = utils.atof(self.kwargs['floor'])

            # Set the changepoint_prior_scale to adjust the trend flexibility
            # If the trend changes are being overfit (too much flexibility) or underfit (not enough flexibility),
            # you can adjust the strength of the sparse prior.
            # Default value is 0.05. Increasing it will make the trend more flexible.
            if 'changepoint_prior_scale' in self.kwargs:
                self.changepoint_prior_scale = utils.atof(
                    self.kwargs['changepoint_prior_scale'])

            # Set the width for the uncertainty intervals
            # Default value is 0.8 (i.e. 80%)
            if 'interval_width' in self.kwargs:
                self.interval_width = utils.atof(self.kwargs['interval_width'])

            # Set additional seasonality to be added to the model
            # Default seasonalities are yearly and weekly, as well as daily for sub daily data
            if 'add_seasonality' in self.kwargs:
                self.name = self.kwargs['add_seasonality'].lower()

            # Set 'additive' or 'multiplicative' mode for the additional seasonality
            # Default value follows the seasonality_mode parameter
            if 'add_seasonality_mode' in self.kwargs:
                self.mode = self.kwargs['add_seasonality_mode'].lower()

            # Set the seasonality period
            # e.g. 30.5 for 'monthly' seasonality
            if 'seasonality_period' in self.kwargs:
                self.period = utils.atof(self.kwargs['seasonality_period'])

            # Set the seasonality fourier terms
            # Increasing the number of Fourier terms allows the seasonality to fit faster changing cycles,
            # but can also lead to overfitting
            if 'seasonality_fourier' in self.kwargs:
                self.fourier_order = int(self.kwargs['seasonality_fourier'])

            # Set the seasonality prior scale to smooth seasonality effects.
            # Reducing this parameter dampens seasonal effects
            if 'seasonality_prior_scale' in self.kwargs:
                self.seasonality_prior_scale = utils.atof(
                    self.kwargs['seasonality_prior_scale'])

            # Set the holiday prior scale to smooth holiday effects.
            # Reducing this parameter dampens holiday effects. Default is 10, which provides very little regularization.
            if 'holidays_prior_scale' in self.kwargs:
                self.holidays_prior_scale = utils.atof(
                    self.kwargs['holidays_prior_scale'])

            # Set the number of MCMC samples.
            # If greater than 0, Prophet will do full Bayesian inference with the specified number of MCMC samples.
            # If 0, Prophet will do MAP estimation. Default is 0.
            if 'mcmc_samples' in self.kwargs:
                self.mcmc_samples = utils.atoi(self.kwargs['mcmc_samples'])

            # Random seed that can be used to control stochasticity.
            # Used for setting the numpy random seed used in predict and also for pystan when using mcmc_samples>0.
            if 'random_seed' in self.kwargs:
                self.seed = utils.atoi(self.kwargs['random_seed'])

                # Set the random seed for numpy
                np.random.seed(self.seed)

            # Number of potential changepoints to include. Default value is 25.
            # Potential changepoints are selected uniformly from the first `changepoint_range` proportion of the history.
            if 'n_changepoints' in self.kwargs:
                self.n_changepoints = utils.atoi(self.kwargs['n_changepoints'])

            # Proportion of history in which trend changepoints will be estimated.
            # Defaults to 0.8 for the first 80%.
            if 'changepoint_range' in self.kwargs:
                self.changepoint_range = utils.atof(
                    self.kwargs['changepoint_range'])

            # Number of simulated draws used to estimate uncertainty intervals.
            if 'uncertainty_samples' in self.kwargs:
                self.uncertainty_samples = utils.atoi(
                    self.kwargs['uncertainty_samples'])

            # Set the weekly start for 'weekly' seasonality requests
            # Default week start is 0 which represents Sunday. Add offset as required.
            if 'weekly_start' in self.kwargs:
                self.weekly_start = utils.atoi(self.kwargs['weekly_start'])

            # Set the weekly start for 'yearly' seasonality requests
            # Default week start is 0 which represents 1st of Jan. Add offset as required.
            if 'yearly_start' in self.kwargs:
                self.yearly_start = utils.atoi(self.kwargs['yearly_start'])

            # Set a period to extend the holidays by lower_window number of days before the date.
            # This can be used to extend the holiday effect
            if 'lower_window' in self.kwargs:
                self.lower_window = utils.atoi(self.kwargs['lower_window'])

            # Set a period to extend the holidays by upper_window number of days after the date.
            # This can be used to extend the holiday effect
            if 'upper_window' in self.kwargs:
                self.upper_window = utils.atoi(self.kwargs['upper_window'])

        # Create dictionary of arguments for the Prophet(), make_future_dataframe(), add_seasonality() and fit() functions
        self.prophet_kwargs = {}
        self.make_kwargs = {}
        self.add_seasonality_kwargs = {}
        self.fit_kwargs = {}

        # Populate the parameters in the corresponding dictionary:

        # Set up a list of possible key word arguments for the Prophet() function
        prophet_params = ['seasonality_mode', 'growth', 'changepoint_prior_scale', 'interval_width',\
                          'seasonality_prior_scale', 'holidays_prior_scale', 'mcmc_samples', 'n_changepoints',\
                          'changepoint_range', 'uncertainty_samples']

        # Create dictionary of key word arguments for the Prophet() function
        self.prophet_kwargs = self._populate_dict(prophet_params)

        # Set up a list of possible key word arguments for the make_future_dataframe() function
        make_params = ['periods', 'freq']

        # Create dictionary of key word arguments for the make_future_dataframe() function
        self.make_kwargs = self._populate_dict(make_params)

        # Set up a list of possible key word arguments for the add_seasonality() function
        seasonality_params = ['name', 'period', 'fourier_order', 'mode']

        # Create dictionary of key word arguments for the add_seasonality() function
        self.add_seasonality_kwargs = self._populate_dict(seasonality_params)

        # Pass the random seed to the fit method if MCMC is being used
        if self.mcmc_samples is not None and self.mcmc_samples > 0:
            # Set up a list of possible key word arguments for the fit() function
            fit_params = ['seed']
            # Create dictionary of key word arguments for the fit() function
            self.fit_kwargs = self._populate_dict(fit_params)
Beispiel #4
0
    def _set_params(self, kwargs):
        """
        Set input parameters based on the request.
        :
        :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools
        """

        # If key word arguments were included in the request, get the parameters and values,
        # by transforming the string of arguments into a dictionary
        self.kwargs = {} if len(kwargs) == 0 else utils.get_kwargs(kwargs)

        # Set the debug option for generating execution logs
        # Valid values are: true, false
        self.debug = False if 'debug' not in self.kwargs else (
            'true' == self.kwargs.pop('debug').lower())

        # Additional information is printed to the terminal and logs if the paramater debug = true
        if self.debug:
            # Increment log counter for the class. Each instance of the class generates a new log.
            self.__class__.log_no += 1

            # Create a log file for the instance
            # Logs will be stored in ..\logs\Common Functions Log <n>.txt
            self.logfile = os.path.join(
                os.getcwd(), 'logs',
                'Common Functions Log {}.txt'.format(self.log_no))

            self._print_log(1)

        # Set the name of the function to be called on the model
        # By default this is the 'predict' function, but could be other functions such as 'predict_proba' if supported by the model
        self.prediction_func = 'predict' if 'return' not in self.kwargs else self.kwargs.pop(
            'return')

        # Certain models may need sorted data for predictions
        # A feature can be specified for use in sorting using the identifier argument.
        self.identifier = None if 'identifier' not in self.kwargs else self.kwargs.pop(
            'identifier')

        # The identifier can be excluded from the inputs to the model using the exclude_identifier argument.
        self.exclude_identifier = False if 'exclude_identifier' not in self.kwargs else (
            self.kwargs.pop('exclude_identifier').lower() == 'true')

        # Number of seconds to wait if a Keras model is being loaded by another thread
        self.wait = 2 if 'keras_wait' not in self.kwargs else utils.atoi(
            self.kwargs.pop('keras_wait'))

        # Number of retries if a Keras model is being loaded by another thread
        self.retries = 5 if 'keras_retries' not in self.kwargs else utils.atoi(
            self.kwargs.pop('keras_retries'))

        # Get the rest of the parameters, converting values to the correct data type
        self.pass_on_kwargs = {} if len(
            self.kwargs) == 0 else utils.get_kwargs_by_type(self.kwargs)

        # The predictions may need to be decoded in case of classification labels
        # The labels can be passed as a dictionary using the 'labels' argument.
        self.labels = None if 'labels' not in self.pass_on_kwargs else self.pass_on_kwargs.pop(
            'labels')

        # Debug information is printed to the terminal and logs if the paramater debug = true
        if self.debug:
            self._print_log(2)

        # Remove the kwargs column from the request_df
        self.request_df = self.request_df.drop(['kwargs'], axis=1)