class NNOTagger (BaseTagger, object): """ TextBlob compatible Norsk Nynorsk POS tagger class based on the NLTK HunPos wrapper. """ def __init__(self, model_fn=None): self.tokenizer = NOTokenizer() self.tagger = HunposTagger(NNO_TAGGER_DEFAULT_MODEL_FN, hunpos_tag_bin(), encoding='utf-8') def tag(self, text, tokenize=True): text = clean_input(text) if tokenize: text = self.tokenizer.tokenize(text) return self.tagger.tag(text)
class NNOTagger(BaseTagger, object): """ TextBlob compatible Norsk Nynorsk POS tagger class based on the NLTK HunPos wrapper. """ def __init__(self, model_fn=None): self.tokenizer = NOTokenizer() self.tagger = HunposTagger(NNO_TAGGER_DEFAULT_MODEL_FN, hunpos_tag_bin(), encoding='utf-8') def tag(self, text, tokenize=True): text = clean_input(text) if tokenize: text = self.tokenizer.tokenize(text) return self.tagger.tag(text)
def test_tokenize(self): tokenizer = NOTokenizer() self.assertEqual(['Dette', 'er', u'vårt', 'hus', '.'], tokenizer.tokenize(u'Dette er vårt hus.'))