Exemple #1
0
    def _assert_valid_types(self):
        if not isinstance(self.remove_stop_words, bool):
            raise InvalidArgumentError(
                'remove_stop_words', 'Remove stop words '
                'option must be True or False.')

        if not isinstance(self.lemmatize, bool):
            raise InvalidArgumentError(
                'lemmatize', 'Lemmatize option must be '
                'True or False.')

        if not hasattr(self.additional_pipes, '__iter__'):
            logger.info('Additional pipes is: {}'.format(
                self.additional_pipes))
            raise InvalidArgumentError(
                'additional_pipes', 'Additional pipes '
                'must be an iterable of callables')

        if self.spacy_model_id not in id2pkg.keys():
            raise InvalidArgumentError(
                'spacy_model_id', 'Model not found. List of valid model '
                'ids: {}'.format(id2pkg.keys()))

        for pipe in self.additional_pipes:
            if not hasattr(pipe, '__call__'):
                raise InvalidArgumentError(
                    'additional_pipes', 'All additional '
                    'pipes must be callables')
Exemple #2
0
def _assert_valid_train_data(train_data):
    if not hasattr(train_data, '__iter__') or isinstance(train_data, str):
        raise InvalidArgumentError(
            'train_data', 'Train data must be an '
            'iterable of tokens.')
    elif len(train_data) == 0:
        raise InvalidArgumentError('train_data', "Train data can't be empty")
Exemple #3
0
    def _assert_valid_input(self):
        if not isinstance(self.transformers, list):
            raise InvalidArgumentError(self.transformers, "Transformers param "
                                                          "must be a list.")

        for tr in self.transformers:
            if not callable(tr):
                raise InvalidArgumentError(tr, "All transformers must be "
                                           "callables.")
Exemple #4
0
 def _assert_files_exist(self, vector_path, language):
     if not os.path.exists(vector_path + '.vocab'):
         raise InvalidArgumentError(
             self.embeddings_dir, 'Could not find '
             'vocab file for language {} in given '
             'directory.'.format(language))
     if not os.path.exists(vector_path + '.npy'):
         raise InvalidArgumentError(
             self.embeddings_dir, 'Could not find '
             'npy file for language {} in given '
             'directory.'.format(language))
Exemple #5
0
    def fit(self, x, y=None):
        """Fits the padder to the given data.

        Parameters
        ----------
        x : :obj:`list` of :obj:`list`
            List of lists with the numerical representation of each sentence.
        y : :obj:`list`, optional (default=None)
            List of labels of each sentence.

        Returns
        -------
        self
            Reference to the class after being trained.

        Raises
        ------
        InvalidArgumentError
            If the padding length is lower than the maximum length of the
            sentences in the given array.
        """
        max_length = len(max(x, key=len))
        if self.padding_length is not None and self.padding_length < max_length:
            raise InvalidArgumentError('padding_length',
                                       'Padding length must be greater or \
                                       equal to the maximum sentence length.')
        elif self.padding_length is None:
            self.padding_length = max_length
        return self
Exemple #6
0
    def transform(self, x):
        """Extract the given column_names from x.

        Parameters
        ----------
        x : pandas DataFrame
            Pandas DataFrame from which the columns will be extracted.

        Returns
        -------
        return : numpy array
            Numpy array containing the extracted columns.

        Raises
        ------
        InvalidArgumentError
            If x is not a pandas DataFrame.

        KeyError
            If the column names passed in the constructor of the
            selector don't exist in the given DataFrame.
        """
        if not isinstance(x, pd.DataFrame):
            raise InvalidArgumentError(
                'x', 'Transform must receive a pandas '
                'Dataframe as argument.')

        return x[self.column_names].values
Exemple #7
0
    def _obtain_lexicon_info(self, variable, text):
        """ Calculates a specific dimension from the lexicon.

        Parameters
        ----------
        variable : str
            Must be one of 'valence', 'arousal' and 'dominance'.
        text : :obj:`list` of str
            List of tokens which are present in the text.

        Returns
        -------
        return : float
            Normalized number with the specified score.
        """
        if not hasattr(text, '__iter__') or isinstance(text, str):
            raise InvalidArgumentError('text',
                                       'Text must be an iterable of tokens')
        result = 0
        words_used = 0
        for token in text:
            lexicon_data = self._lexicon.get(token)
            if lexicon_data is not None:
                words_used += 1
                result += float(getattr(lexicon_data, variable))
        return 0.5 if words_used == 0 else result / words_used
Exemple #8
0
    def load_config(self, layer_config):
        """Load the parameters of the layer using a LayerConfig object.

        Parameters
        ----------
        layer_config : :obj:`LayerConfig`
            LayerConfig object that holds the parameters that will be loaded
            by the layer.
        Returns
        -------
        self
            Self reference after the parameters have been initialized.
        Raises
        ------
        InvalidArgumentError
            If the layer_config parameter is not an instance of the
            :obj:`LayerConfig` class.
        """
        if not isinstance(layer_config, LayerConfig):
            error_msg = "Layer config must be an instance of the LayerConfig" \
                        "class."
            raise InvalidArgumentError(layer_config, error_msg)

        for parameter, value in layer_config.get_params().items():
            if value is not None:
                setattr(self, parameter, value)
        return self
Exemple #9
0
 def _check_valid_params(self):
     if self.dropout_rate is None:
         return
     elif self.dropout_rate < 0 or self.dropout_rate > 1:
         raise InvalidArgumentError(
             'dropout_rate', 'Dropout rate must be '
             'a float between 0 and 1.')
Exemple #10
0
 def fit(self, x, y=None, **fit_params):
     max_length = len(max(x, key=len))
     if self.padding_length is not None and self.padding_length < max_length:
         raise InvalidArgumentError(
             'padding_length', 'Padding length must be greater \
                                                       or equal to the maximum sentence length.'
         )
     elif self.padding_length is None:
         self.padding_length = max_length
     return self
Exemple #11
0
    def load_config(self, layer_config):
        if not isinstance(layer_config, LayerConfig):
            error_msg = "Layer config must be an instance of the LayerConfig" \
                        "class."
            raise InvalidArgumentError(layer_config, error_msg)

        for parameter, value in layer_config.get_params().items():
            if value is not None:
                setattr(self, parameter, value)
        return self
Exemple #12
0
def _compute_values_from(y_true, y_pred):
    if len(y_true) != len(y_pred):
        raise InvalidArgumentError('y_true', 'Both predictions must contain the'
                                             'same number of elements.')

    conf_mtx = confusion_matrix(y_true, y_pred)
    tp = np.diag(conf_mtx)
    fp = conf_mtx.sum(axis=0) - tp
    fn = conf_mtx.sum(axis=1) - tp
    tn = conf_mtx.sum() - (tp + fp + fn)
    return fp, fn, tp, tn
Exemple #13
0
def _assert_valid_conf(gensim_conf):
    if not isinstance(gensim_conf, GensimConfig):
        raise InvalidArgumentError(
            gensim_conf, 'Configuration parameter '
            'must be an instance of the GensimConfig '
            'class.')
    elif gensim_conf.iter <= 0:
        raise InvalidArgumentError(
            'epochs', 'Number of epochs must be greater '
            'than 0.')
    elif gensim_conf.window <= 0:
        raise InvalidArgumentError('window', 'Window size must be greater '
                                   'than 0.')
    elif gensim_conf.max_vocab_size is not None \
            and gensim_conf.max_vocab_size <= 0:
        raise InvalidArgumentError(
            'max_vocab_size', 'Maximum vocabulary size must be greater '
            'than 0.')
    elif gensim_conf.size <= 0:
        raise InvalidArgumentError('size', 'Vector size must be greater '
                                   'than 0.')