def tag(self, text=None): """ Tags the given text. :param text: a string or unicode object. Strings assumed to be utf-8 :returns: a list of lists (sentences with tokens). Each sentence has (token, tag) tuples. """ result = [] if text: tokens = utils.tokenize(text, clean=False) for sent in tokens: tags = self.tag_tokens(sent) result.append(zip(sent, tags)) else: # read tsv from stdin sent = [] for line in sys.stdin: line = line.decode('utf-8').strip() if line: sent.append(line.split()[0]) else: #ipdb.set_trace() tags = self.tag_tokens(sent) result.append(zip(sent, tags)) sent = [] return result
def tag(self, text): """ Runs the SRL process on the given text. :param text: unicode or str encoded in utf-8. :returns: a list of SRLAnnotatedSentence objects """ tokens = utils.tokenize(text, self.language) result = [] for sent in tokens: tagged = self.tag_tokens(sent) result.append(tagged) return result
def tag(self, text): """ Tags the given text. :param text: a string or unicode object. Strings assumed to be utf-8 :returns: a list of lists (sentences with tokens). Each sentence has (token, tag) tuples. """ tokens = utils.tokenize(text, self.language) result = [] for sent in tokens: tagged = self.tag_tokens(sent, return_tokens=True) result.append(tagged) return result
def parse(self, text): """ Split the given text into sentences and determines their dependency trees. If you want to provide your own tokenized text, use `parse_sentence` instead. :param text: a string :returns: a list of ParsedSentence's """ sentences = utils.tokenize(text, self.language) result = [] for sent in sentences: parsed = self.parse_sentence(sent) result.append(parsed) return result