def __init__(self, text): self.text = text.strip() self.sents = [Sentence(x.strip()) for x in Paragraph.sent_tokenizer.tokenize(text) if x.strip()] self.pwords = catlist([x.pwords for x in self.sents]) self.cwords = catlist([x.cwords for x in self.sents]) self.scwords = catlist([x.scwords for x in self.sents]) self.pos_tagged = False
def __init__(self, text): self.text = text.strip() tmppars = [x.strip() for x in text.split('\n') if x.strip()] self.pars = [Paragraph(x) for x in tmppars] self.sents = catlist([x.sents for x in self.pars]) self.pwords = catlist([x.pwords for x in self.pars]) self.cwords = catlist([x.cwords for x in self.pars]) self.scwords = catlist([x.scwords for x in self.pars]) self.pos_tagged = False
def pos_tag(self): for x in self.sents: x.pos_tag() self.nouns = catlist([x.nouns for x in self.sents]) self.pos_tagged = True