class NNOTagger (BaseTagger, object):
    """
    TextBlob compatible Norsk Nynorsk POS tagger class based on the NLTK HunPos wrapper.
    """
    def __init__(self, model_fn=None):
        self.tokenizer = NOTokenizer()
        self.tagger = HunposTagger(NNO_TAGGER_DEFAULT_MODEL_FN,
                                   hunpos_tag_bin(), encoding='utf-8')

    def tag(self, text, tokenize=True):
        text = clean_input(text)

        if tokenize:
            text = self.tokenizer.tokenize(text)

        return self.tagger.tag(text)
예제 #2
0
class NNOTagger(BaseTagger, object):
    """
    TextBlob compatible Norsk Nynorsk POS tagger class based on the NLTK HunPos wrapper.
    """
    def __init__(self, model_fn=None):
        self.tokenizer = NOTokenizer()
        self.tagger = HunposTagger(NNO_TAGGER_DEFAULT_MODEL_FN,
                                   hunpos_tag_bin(),
                                   encoding='utf-8')

    def tag(self, text, tokenize=True):
        text = clean_input(text)

        if tokenize:
            text = self.tokenizer.tokenize(text)

        return self.tagger.tag(text)
예제 #3
0
 def test_tokenize(self):
     tokenizer = NOTokenizer()
     self.assertEqual(['Dette', 'er', u'vårt', 'hus', '.'],
                      tokenizer.tokenize(u'Dette er vårt hus.'))