def _assert_valid_types(self): if not isinstance(self.remove_stop_words, bool): raise InvalidArgumentError( 'remove_stop_words', 'Remove stop words ' 'option must be True or False.') if not isinstance(self.lemmatize, bool): raise InvalidArgumentError( 'lemmatize', 'Lemmatize option must be ' 'True or False.') if not hasattr(self.additional_pipes, '__iter__'): logger.info('Additional pipes is: {}'.format( self.additional_pipes)) raise InvalidArgumentError( 'additional_pipes', 'Additional pipes ' 'must be an iterable of callables') if self.spacy_model_id not in id2pkg.keys(): raise InvalidArgumentError( 'spacy_model_id', 'Model not found. List of valid model ' 'ids: {}'.format(id2pkg.keys())) for pipe in self.additional_pipes: if not hasattr(pipe, '__call__'): raise InvalidArgumentError( 'additional_pipes', 'All additional ' 'pipes must be callables')
def _assert_valid_train_data(train_data): if not hasattr(train_data, '__iter__') or isinstance(train_data, str): raise InvalidArgumentError( 'train_data', 'Train data must be an ' 'iterable of tokens.') elif len(train_data) == 0: raise InvalidArgumentError('train_data', "Train data can't be empty")
def _assert_valid_input(self): if not isinstance(self.transformers, list): raise InvalidArgumentError(self.transformers, "Transformers param " "must be a list.") for tr in self.transformers: if not callable(tr): raise InvalidArgumentError(tr, "All transformers must be " "callables.")
def _assert_files_exist(self, vector_path, language): if not os.path.exists(vector_path + '.vocab'): raise InvalidArgumentError( self.embeddings_dir, 'Could not find ' 'vocab file for languageĀ {} in given ' 'directory.'.format(language)) if not os.path.exists(vector_path + '.npy'): raise InvalidArgumentError( self.embeddings_dir, 'Could not find ' 'npy file for languageĀ {} in given ' 'directory.'.format(language))
def fit(self, x, y=None): """Fits the padder to the given data. Parameters ---------- x : :obj:`list` of :obj:`list` List of lists with the numerical representation of each sentence. y : :obj:`list`, optional (default=None) List of labels of each sentence. Returns ------- self Reference to the class after being trained. Raises ------ InvalidArgumentError If the padding length is lower than the maximum length of the sentences in the given array. """ max_length = len(max(x, key=len)) if self.padding_length is not None and self.padding_length < max_length: raise InvalidArgumentError('padding_length', 'Padding length must be greater or \ equal to the maximum sentence length.') elif self.padding_length is None: self.padding_length = max_length return self
def transform(self, x): """Extract the given column_names from x. Parameters ---------- x : pandas DataFrame Pandas DataFrame from which the columns will be extracted. Returns ------- return : numpy array Numpy array containing the extracted columns. Raises ------ InvalidArgumentError If x is not a pandas DataFrame. KeyError If the column names passed in the constructor of the selector don't exist in the given DataFrame. """ if not isinstance(x, pd.DataFrame): raise InvalidArgumentError( 'x', 'Transform must receive a pandas ' 'Dataframe as argument.') return x[self.column_names].values
def _obtain_lexicon_info(self, variable, text): """ Calculates a specific dimension from the lexicon. Parameters ---------- variable : str Must be one of 'valence', 'arousal' and 'dominance'. text : :obj:`list` of str List of tokens which are present in the text. Returns ------- return : float Normalized number with the specified score. """ if not hasattr(text, '__iter__') or isinstance(text, str): raise InvalidArgumentError('text', 'Text must be an iterable of tokens') result = 0 words_used = 0 for token in text: lexicon_data = self._lexicon.get(token) if lexicon_data is not None: words_used += 1 result += float(getattr(lexicon_data, variable)) return 0.5 if words_used == 0 else result / words_used
def load_config(self, layer_config): """Load the parameters of the layer using a LayerConfig object. Parameters ---------- layer_config : :obj:`LayerConfig` LayerConfig object that holds the parameters that will be loaded by the layer. Returns ------- self Self reference after the parameters have been initialized. Raises ------ InvalidArgumentError If the layer_config parameter is not an instance of the :obj:`LayerConfig` class. """ if not isinstance(layer_config, LayerConfig): error_msg = "Layer config must be an instance of the LayerConfig" \ "class." raise InvalidArgumentError(layer_config, error_msg) for parameter, value in layer_config.get_params().items(): if value is not None: setattr(self, parameter, value) return self
def _check_valid_params(self): if self.dropout_rate is None: return elif self.dropout_rate < 0 or self.dropout_rate > 1: raise InvalidArgumentError( 'dropout_rate', 'Dropout rate must be ' 'a float between 0 and 1.')
def fit(self, x, y=None, **fit_params): max_length = len(max(x, key=len)) if self.padding_length is not None and self.padding_length < max_length: raise InvalidArgumentError( 'padding_length', 'Padding length must be greater \ or equal to the maximum sentence length.' ) elif self.padding_length is None: self.padding_length = max_length return self
def load_config(self, layer_config): if not isinstance(layer_config, LayerConfig): error_msg = "Layer config must be an instance of the LayerConfig" \ "class." raise InvalidArgumentError(layer_config, error_msg) for parameter, value in layer_config.get_params().items(): if value is not None: setattr(self, parameter, value) return self
def _compute_values_from(y_true, y_pred): if len(y_true) != len(y_pred): raise InvalidArgumentError('y_true', 'Both predictions must contain the' 'same number of elements.') conf_mtx = confusion_matrix(y_true, y_pred) tp = np.diag(conf_mtx) fp = conf_mtx.sum(axis=0) - tp fn = conf_mtx.sum(axis=1) - tp tn = conf_mtx.sum() - (tp + fp + fn) return fp, fn, tp, tn
def _assert_valid_conf(gensim_conf): if not isinstance(gensim_conf, GensimConfig): raise InvalidArgumentError( gensim_conf, 'Configuration parameter ' 'must be an instance of the GensimConfig ' 'class.') elif gensim_conf.iter <= 0: raise InvalidArgumentError( 'epochs', 'Number of epochs must be greater ' 'than 0.') elif gensim_conf.window <= 0: raise InvalidArgumentError('window', 'Window size must be greater ' 'than 0.') elif gensim_conf.max_vocab_size is not None \ and gensim_conf.max_vocab_size <= 0: raise InvalidArgumentError( 'max_vocab_size', 'Maximum vocabulary size must be greater ' 'than 0.') elif gensim_conf.size <= 0: raise InvalidArgumentError('size', 'Vector size must be greater ' 'than 0.')