def _set_params(self, kwargs): """ Set input parameters based on the request. : :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools """ # Set default values which will be used if execution arguments are not passed # Default parameters: self.debug = False # If key word arguments were included in the request, get the parameters and values if len(kwargs) > 0: # Transform the string of arguments into a dictionary self.kwargs = utils.get_kwargs(kwargs) # Set the debug option for generating execution logs # Valid values are: true, false if 'debug' in self.kwargs: self.debug = 'true' == self.kwargs.pop('debug').lower() # Additional information is printed to the terminal and logs if the paramater debug = true if self.debug: # Increment log counter for the class. Each instance of the class generates a new log. self.__class__.log_no += 1 # Create a log file for the instance # Logs will be stored in ..\logs\Common Functions Log <n>.txt self.logfile = os.path.join( os.getcwd(), 'logs', 'Common Functions Log {}.txt'.format(self.log_no)) self._print_log(1) # Get the rest of the parameters, converting values to the correct data type self.pass_on_kwargs = utils.get_kwargs_by_type(self.kwargs) # Debug information is printed to the terminal and logs if the paramater debug = true if self.debug: self._print_log(2) # Remove the kwargs column from the request_df self.request_df = self.request_df.drop(['kwargs'], axis=1)
def _set_params(self, kwargs): """ Set input parameters based on the request. : :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools """ # Set default values which will be used if execution arguments are not passed # Default parameters: self.debug = False self.model = 'en_core_web_sm' self.custom = False self.base_model = 'en_core_web_sm' self.blank = False self.epochs = 100 self.batch_size = compounding(4.0, 32.0, 1.001) self.drop = 0.25 self.test = 0 # Extract the model path if required try: # Get the model name from the first row in the request_df self.model = self.request_df.loc[0, 'model_name'] # Remove the model_name column from the request_df self.request_df = self.request_df.drop(['model_name'], axis=1) except KeyError: pass # If key word arguments were included in the request, get the parameters and values if len(kwargs) > 0: # Transform the string of arguments into a dictionary self.kwargs = utils.get_kwargs(kwargs) # Set the debug option for generating execution logs # Valid values are: true, false if 'debug' in self.kwargs: self.debug = 'true' == self.kwargs['debug'].lower() # Additional information is printed to the terminal and logs if the paramater debug = true if self.debug: # Increment log counter for the class. Each instance of the class generates a new log. self.__class__.log_no += 1 # Create a log file for the instance # Logs will be stored in ..\logs\SpaCy Log <n>.txt self.logfile = os.path.join( os.getcwd(), 'logs', 'SpaCy Log {}.txt'.format(self.log_no)) self._print_log(1) # Set whether the model (if getting named entites) or base model (if retraining) is a custom model # i.e. not one of the pre-trained models provided by spaCy if 'custom' in self.kwargs: self.custom = 'true' == self.kwargs['custom'].lower() # Set the base model, i.e an existing spaCy model to be retrained. if 'base_model' in self.kwargs: self.base_model = self.kwargs['base_model'].lower() # Set the retraining to be done on a blank Language class if 'blank' in self.kwargs: self.blank = 'true' == self.kwargs['blank'].lower() # Set the epochs for training the model. # This is the the number times that the learning algorithm will work through the entire training dataset. # Valid values are an integer e.g. 200 if 'epochs' in self.kwargs: self.epochs = utils.atoi(self.kwargs['epochs']) # Set the batch size to be used during model training. # The model's internal parameters will be updated at the end of each batch. # Valid values are a single integer or compounding or decaying parameters. if 'batch_size' in self.kwargs: # The batch size may be a single integer try: self.batch_size = utils.atoi(self.kwargs['batch_size']) # Or a list of floats except ValueError: sizes = utils.get_kwargs_by_type(self.kwargs['batch_size']) # If the start < end, batch sizes will be compounded if sizes[0] < sizes[1]: self.batch_size = compounding(sizes[0], sizes[1], sizes[2]) # else bath sizes will decay during training else: self.batch_size = decaying(sizes[0], sizes[1], sizes[2]) # Set the dropout rate for retraining the model # This determines the likelihood that a feature or internal representation in the model will be dropped, # making it harder for the model to memorize the training data. # Valid values are a float lesser than 1.0 e.g. 0.35 if 'drop' in self.kwargs: self.drop = utils.atof(self.kwargs['drop']) # Set the ratio of data to be used for testing. # This data will be held out from training and just used to provide evaluation metrics. # Valid values are a float >= zero and < 1.0 e.g. 0.3 if 'test' in self.kwargs: self.test = utils.atof(self.kwargs['test']) # Debug information is printed to the terminal and logs if the paramater debug = true if self.debug: self._print_log(2) # Remove the kwargs column from the request_df self.request_df = self.request_df.drop(['kwargs'], axis=1)
def _set_params(self, kwargs): """ Set input parameters based on the request. : :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools """ # If key word arguments were included in the request, get the parameters and values, # by transforming the string of arguments into a dictionary self.kwargs = {} if len(kwargs) == 0 else utils.get_kwargs(kwargs) # Set the debug option for generating execution logs # Valid values are: true, false self.debug = False if 'debug' not in self.kwargs else ( 'true' == self.kwargs.pop('debug').lower()) # Additional information is printed to the terminal and logs if the paramater debug = true if self.debug: # Increment log counter for the class. Each instance of the class generates a new log. self.__class__.log_no += 1 # Create a log file for the instance # Logs will be stored in ..\logs\Common Functions Log <n>.txt self.logfile = os.path.join( os.getcwd(), 'logs', 'Common Functions Log {}.txt'.format(self.log_no)) self._print_log(1) # Set the name of the function to be called on the model # By default this is the 'predict' function, but could be other functions such as 'predict_proba' if supported by the model self.prediction_func = 'predict' if 'return' not in self.kwargs else self.kwargs.pop( 'return') # Certain models may need sorted data for predictions # A feature can be specified for use in sorting using the identifier argument. self.identifier = None if 'identifier' not in self.kwargs else self.kwargs.pop( 'identifier') # The identifier can be excluded from the inputs to the model using the exclude_identifier argument. self.exclude_identifier = False if 'exclude_identifier' not in self.kwargs else ( self.kwargs.pop('exclude_identifier').lower() == 'true') # Number of seconds to wait if a Keras model is being loaded by another thread self.wait = 2 if 'keras_wait' not in self.kwargs else utils.atoi( self.kwargs.pop('keras_wait')) # Number of retries if a Keras model is being loaded by another thread self.retries = 5 if 'keras_retries' not in self.kwargs else utils.atoi( self.kwargs.pop('keras_retries')) # Get the rest of the parameters, converting values to the correct data type self.pass_on_kwargs = {} if len( self.kwargs) == 0 else utils.get_kwargs_by_type(self.kwargs) # The predictions may need to be decoded in case of classification labels # The labels can be passed as a dictionary using the 'labels' argument. self.labels = None if 'labels' not in self.pass_on_kwargs else self.pass_on_kwargs.pop( 'labels') # Debug information is printed to the terminal and logs if the paramater debug = true if self.debug: self._print_log(2) # Remove the kwargs column from the request_df self.request_df = self.request_df.drop(['kwargs'], axis=1)