Exemple #1
    def _assert_valid_types(self):
        if not isinstance(self.remove_stop_words, bool):
            raise InvalidArgumentError(
                'remove_stop_words', 'Remove stop words '
                'option must be True or False.')

        if not isinstance(self.lemmatize, bool):
            raise InvalidArgumentError(
                'lemmatize', 'Lemmatize option must be '
                'True or False.')

        if not hasattr(self.additional_pipes, '__iter__'):
            logger.info('Additional pipes is: {}'.format(
            raise InvalidArgumentError(
                'additional_pipes', 'Additional pipes '
                'must be an iterable of callables')

        if self.spacy_model_id not in id2pkg.keys():
            raise InvalidArgumentError(
                'spacy_model_id', 'Model not found. List of valid model '
                'ids: {}'.format(id2pkg.keys()))

        for pipe in self.additional_pipes:
            if not hasattr(pipe, '__call__'):
                raise InvalidArgumentError(
                    'additional_pipes', 'All additional '
                    'pipes must be callables')
Exemple #2
def _assert_valid_train_data(train_data):
    if not hasattr(train_data, '__iter__') or isinstance(train_data, str):
        raise InvalidArgumentError(
            'train_data', 'Train data must be an '
            'iterable of tokens.')
    elif len(train_data) == 0:
        raise InvalidArgumentError('train_data', "Train data can't be empty")
Exemple #3
    def _assert_valid_input(self):
        if not isinstance(self.transformers, list):
            raise InvalidArgumentError(self.transformers, "Transformers param "
                                                          "must be a list.")

        for tr in self.transformers:
            if not callable(tr):
                raise InvalidArgumentError(tr, "All transformers must be "
Exemple #4
 def _assert_files_exist(self, vector_path, language):
     if not os.path.exists(vector_path + '.vocab'):
         raise InvalidArgumentError(
             self.embeddings_dir, 'Could not find '
             'vocab file for language {} in given '
     if not os.path.exists(vector_path + '.npy'):
         raise InvalidArgumentError(
             self.embeddings_dir, 'Could not find '
             'npy file for language {} in given '
Exemple #5
    def fit(self, x, y=None):
        """Fits the padder to the given data.

        x : :obj:`list` of :obj:`list`
            List of lists with the numerical representation of each sentence.
        y : :obj:`list`, optional (default=None)
            List of labels of each sentence.

            Reference to the class after being trained.

            If the padding length is lower than the maximum length of the
            sentences in the given array.
        max_length = len(max(x, key=len))
        if self.padding_length is not None and self.padding_length < max_length:
            raise InvalidArgumentError('padding_length',
                                       'Padding length must be greater or \
                                       equal to the maximum sentence length.')
        elif self.padding_length is None:
            self.padding_length = max_length
        return self
Exemple #6
    def transform(self, x):
        """Extract the given column_names from x.

        x : pandas DataFrame
            Pandas DataFrame from which the columns will be extracted.

        return : numpy array
            Numpy array containing the extracted columns.

            If x is not a pandas DataFrame.

            If the column names passed in the constructor of the
            selector don't exist in the given DataFrame.
        if not isinstance(x, pd.DataFrame):
            raise InvalidArgumentError(
                'x', 'Transform must receive a pandas '
                'Dataframe as argument.')

        return x[self.column_names].values
Exemple #7
    def _obtain_lexicon_info(self, variable, text):
        """ Calculates a specific dimension from the lexicon.

        variable : str
            Must be one of 'valence', 'arousal' and 'dominance'.
        text : :obj:`list` of str
            List of tokens which are present in the text.

        return : float
            Normalized number with the specified score.
        if not hasattr(text, '__iter__') or isinstance(text, str):
            raise InvalidArgumentError('text',
                                       'Text must be an iterable of tokens')
        result = 0
        words_used = 0
        for token in text:
            lexicon_data = self._lexicon.get(token)
            if lexicon_data is not None:
                words_used += 1
                result += float(getattr(lexicon_data, variable))
        return 0.5 if words_used == 0 else result / words_used
Exemple #8
    def load_config(self, layer_config):
        """Load the parameters of the layer using a LayerConfig object.

        layer_config : :obj:`LayerConfig`
            LayerConfig object that holds the parameters that will be loaded
            by the layer.
            Self reference after the parameters have been initialized.
            If the layer_config parameter is not an instance of the
            :obj:`LayerConfig` class.
        if not isinstance(layer_config, LayerConfig):
            error_msg = "Layer config must be an instance of the LayerConfig" \
            raise InvalidArgumentError(layer_config, error_msg)

        for parameter, value in layer_config.get_params().items():
            if value is not None:
                setattr(self, parameter, value)
        return self
Exemple #9
 def _check_valid_params(self):
     if self.dropout_rate is None:
     elif self.dropout_rate < 0 or self.dropout_rate > 1:
         raise InvalidArgumentError(
             'dropout_rate', 'Dropout rate must be '
             'a float between 0 and 1.')
Exemple #10
 def fit(self, x, y=None, **fit_params):
     max_length = len(max(x, key=len))
     if self.padding_length is not None and self.padding_length < max_length:
         raise InvalidArgumentError(
             'padding_length', 'Padding length must be greater \
                                                       or equal to the maximum sentence length.'
     elif self.padding_length is None:
         self.padding_length = max_length
     return self
Exemple #11
    def load_config(self, layer_config):
        if not isinstance(layer_config, LayerConfig):
            error_msg = "Layer config must be an instance of the LayerConfig" \
            raise InvalidArgumentError(layer_config, error_msg)

        for parameter, value in layer_config.get_params().items():
            if value is not None:
                setattr(self, parameter, value)
        return self
Exemple #12
def _compute_values_from(y_true, y_pred):
    if len(y_true) != len(y_pred):
        raise InvalidArgumentError('y_true', 'Both predictions must contain the'
                                             'same number of elements.')

    conf_mtx = confusion_matrix(y_true, y_pred)
    tp = np.diag(conf_mtx)
    fp = conf_mtx.sum(axis=0) - tp
    fn = conf_mtx.sum(axis=1) - tp
    tn = conf_mtx.sum() - (tp + fp + fn)
    return fp, fn, tp, tn
Exemple #13
def _assert_valid_conf(gensim_conf):
    if not isinstance(gensim_conf, GensimConfig):
        raise InvalidArgumentError(
            gensim_conf, 'Configuration parameter '
            'must be an instance of the GensimConfig '
    elif gensim_conf.iter <= 0:
        raise InvalidArgumentError(
            'epochs', 'Number of epochs must be greater '
            'than 0.')
    elif gensim_conf.window <= 0:
        raise InvalidArgumentError('window', 'Window size must be greater '
                                   'than 0.')
    elif gensim_conf.max_vocab_size is not None \
            and gensim_conf.max_vocab_size <= 0:
        raise InvalidArgumentError(
            'max_vocab_size', 'Maximum vocabulary size must be greater '
            'than 0.')
    elif gensim_conf.size <= 0:
        raise InvalidArgumentError('size', 'Vector size must be greater '
                                   'than 0.')