Ejemplo n.º 1
0
    def __init__(self, train=None, model=None, type_='pos',
                 backoff=None, cutoff=0, verbose=False):
        self._morph = pymorphy2.MorphAnalyzer()
        self._contexts_to_tags = (model if model else {})  # mapping to store "useful" contexts
        if type_ not in ["pos", "full"]:
            raise Exception("Unknown tagset type `%s`!" % type_)
        self.type = type_

        NgramTagger.__init__(self, 1, train, model,
                             backoff, cutoff, verbose)
Ejemplo n.º 2
0
    def __init__(self, n, train=None, model=None, backoff=None, cutoff=0):
        """Setup for NgramLemmatizer()

        :param n: Int with length of 'n'-gram
        :param train: List of tuples of the form (TOKEN, LEMMA)
        :param model: Dict; DEPRECATED, use TrainLemmatizer
        :param backoff: Next lemmatizer in backoff chain.
        :param cutoff: Int with minimum number of matches to choose lemma
        """
        self._n = n
        self._check_params(train, model)
        ContextLemmatizer.__init__(self, model, backoff)
        NgramTagger.__init__(self, self._n, train, model, backoff, cutoff)

        if train:
            # Refactor to remove model? Always train?
            self._train(train, cutoff)
Ejemplo n.º 3
0
    def __init__(self, n, train=None, model=None, backoff=None, cutoff=0):
        """Setup for NgramLemmatizer()

        :param n: Int with length of 'n'-gram
        :param train: List of tuples of the form (TOKEN, LEMMA)
        :param model: Dict; DEPRECATED, use TrainLemmatizer
        :param backoff: Next lemmatizer in backoff chain.
        :param cutoff: Int with minimum number of matches to choose lemma
        """
        self._n = n
        self._check_params(train, model)
        ContextLemmatizer.__init__(self, model, backoff)
        NgramTagger.__init__(self, self._n, train, model, backoff, cutoff)

        if train:
            # Refactor to remove model? Always train?
            self._train(train, cutoff)
Ejemplo n.º 4
0
def nltk_ngram_pos_tagger(input_dict):
    """
    A tagger that chooses a token's tag based on its word string and
    on the preceding n word's tags.  In particular, a tuple
    (tags[i-n:i-1], words[i]) is looked up in a table, and the
    corresponding tag is returned.  N-gram taggers are typically
    trained on a tagged corpus.

    Train a new NgramTagger using the given training data or
    the supplied model.  In particular, construct a new tagger
    whose table maps from each context (tag[i-n:i-1], word[i])
    to the most frequent tag for that context.  But exclude any
    contexts that are already tagged perfectly by the backoff
    tagger.

    :param training_corpus: A tagged corpus included with NLTK, such as treebank, brown, cess_esp, floresta,
        or an Annotated Document Corpus in the standard TextFlows' adc format
    :param backoff_tagger: A backoff tagger, to be used by the new
        tagger if it encounters an unknown context.
    :param cutoff: If the most likely tag for a context occurs
        fewer than *cutoff* times, then exclude it from the
        context-to-tag table for the new tagger.
    :param n:  N-gram is a contiguous sequence of n items from a given sequence of text or speech.

    :returns pos_tagger: A python dictionary containing the POS tagger object and its arguments.
    """

    training_corpus = corpus_reader(input_dict['training_corpus'])
    backoff_tagger = input_dict['backoff_tagger']['object'] if input_dict[
        'backoff_tagger'] else DefaultTagger('-None-')
    n = int(input_dict['n'])  #default 2
    cutoff = int(input_dict['cutoff'])  #default 0

    return {
        'pos_tagger': {
            'function':
            'tag_sents',
            'object':
            NgramTagger(n,
                        train=training_corpus,
                        model=None,
                        backoff=backoff_tagger,
                        cutoff=cutoff)
        }
    }
Ejemplo n.º 5
0
 def context(self, tokens, index, history):
     """"""
     return NgramTagger.context(self, tokens, index, history)
Ejemplo n.º 6
0
 def context(self, tokens, index, history):
     """"""
     return NgramTagger.context(self, tokens, index, history)
Ejemplo n.º 7
0
 def __init__(self, train=None, model=None,
              backoff=None, cutoff=0, verbose=False):
     self._morph = pymorphy2.MorphAnalyzer()
     self._contexts_to_tags = (model if model else {})
     NgramTagger.__init__(self, 1, train, model,
                          backoff, cutoff, verbose)