Esempio n. 1
0
    def _find_bad_tag_and_raise_error(self, tags):
        ext_pos = parser.ExtPos()
        bad_tags = set()
        for tag in set(tags):
            good_tag = ext_pos.addTagConstraints(parser.StringVector([tag]))
            if not good_tag:
                bad_tags.add(tag)

        raise ValueError("Invalid POS tags (not present in the parser's "
                         "terms.txt file): %s" % ', '.join(sorted(bad_tags)))
Esempio n. 2
0
 def _possible_tags_to_ext_pos(self, tokens, possible_tags):
     ext_pos = parser.ExtPos()
     if not possible_tags:
         return ext_pos
     for index in range(len(tokens)):
         tags = possible_tags.get(index, [])
         if isinstance(tags, basestring):
             tags = [tags]
         tags = map(str, tags)
         valid_tags = ext_pos.addTagConstraints(parser.StringVector(tags))
         if not valid_tags:
             # at least one of the tags is bad -- find out which ones
             # and throw a ValueError
             self._find_bad_tag_and_raise_error(tags)
     return ext_pos
Esempio n. 3
0
    def parse_tagged(self, tokens, possible_tags, rerank=True):
        """Parse some pre-tagged, pre-tokenized text.  tokens is a
        sequence of strings.  possible_tags is map from token indices
        to possible POS tags.  Tokens without an entry in possible_tags
        will be unconstrained by POS.  If rerank is True, we will
        rerank the n-best list."""
        self.check_loaded_models(rerank)

        ext_pos = parser.ExtPos()
        for index in range(len(tokens)):
            tags = possible_tags.get(index, [])
            if isinstance(tags, basestring):
                tags = [tags]
            ext_pos.addTagConstraints(parser.VectorString(tags))

        sentence = Sentence(tokens)
        parses = parser.parse(sentence.sentrep, ext_pos,
                              self._parser_thread_slot)
        nbest_list = NBestList(sentence, parses)
        if rerank:
            nbest_list.rerank(self)
        return nbest_list