class Tagger: """Tag words' part of speech in sentences.""" def __init__(self): self.tagger = SennaTagger('/app/util/senna') def tag(self, tokens_in_sentence): """Tag tokens in sentences with their part of speech. :param tokens_in_sentence: list of tokens, grouped by sentence. :return: iterator of lists with words. """ return ((Word(t, self._parse_POS(POS)) for t, POS in s) for s in self.tagger.tag_sents(tokens_in_sentence)) def _parse_POS(self, tag): if tag.startswith('J'): return PartOfSpeach.ADJECTIVE elif tag.startswith('V'): return PartOfSpeach.VERB elif tag.startswith('N'): return PartOfSpeach.NOUN elif tag.startswith('R'): return PartOfSpeach.ADVERB return PartOfSpeach.OTHER
# -*- coding: utf-8 -*- """ Created on Sun May 14 12:37:50 2017 @author: Shanika Ediriweera """ from nltk import word_tokenize from nltk.tag import SennaTagger senna = SennaTagger('../../tools/senna') sents = ["All the banks are closed", "Today is Sunday"] tokenized_sents = [word_tokenize(sent) for sent in sents] print(senna.tag_sents(tokenized_sents))