예제 #1
0
    def tag(self, text=None):
        """
        Tags the given text.
        
        :param text: a string or unicode object. Strings assumed to be utf-8
        :returns: a list of lists (sentences with tokens).
            Each sentence has (token, tag) tuples.
        """
        result = []
        if text:
            tokens = utils.tokenize(text, clean=False)
            for sent in tokens:
                tags = self.tag_tokens(sent)
                result.append(zip(sent, tags))
        else:
            # read tsv from stdin
            sent = []
            for line in sys.stdin:
                line = line.decode('utf-8').strip()
                if line:
                    sent.append(line.split()[0])
                else:
                    #ipdb.set_trace()
                    tags = self.tag_tokens(sent)
                    result.append(zip(sent, tags))
                    sent = []

        return result
예제 #2
0
    def tag(self, text):
        """
        Runs the SRL process on the given text.

        :param text: unicode or str encoded in utf-8.
        :returns: a list of SRLAnnotatedSentence objects
        """
        tokens = utils.tokenize(text, self.language)
        result = []
        for sent in tokens:
            tagged = self.tag_tokens(sent)
            result.append(tagged)
        
        return result
예제 #3
0
    def tag(self, text):
        """
        Tags the given text.

        :param text: a string or unicode object. Strings assumed to be utf-8
        :returns: a list of lists (sentences with tokens).
            Each sentence has (token, tag) tuples.
        """
        tokens = utils.tokenize(text, self.language)
        result = []
        for sent in tokens:
            tagged = self.tag_tokens(sent, return_tokens=True)
            result.append(tagged)

        return result
예제 #4
0
 def parse(self, text):
     """
     Split the given text into sentences and determines their 
     dependency trees. If you want to provide your own tokenized
     text, use `parse_sentence` instead.
             
     :param text: a string
     :returns: a list of ParsedSentence's
     """
     sentences = utils.tokenize(text, self.language)
     result = []
     for sent in sentences:
         parsed = self.parse_sentence(sent)
         result.append(parsed)
     
     return result