Example #1
0
    def test_lexicalize_does_change_tree(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det Det) (Noun Noun))
                    (VP (Verb Verb) (NP (Noun Noun) (Adj Adj)))
                )
            """)
        sent = 'el gato come pescado crudo'.split()

        lexicalize(t, sent)

        ut2 = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado) (Adj crudo)))
                )
            """)
        self.assertEqual(t, ut2)
Example #2
0
    def parse(self, tagged_sent):
        """
        Parse a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        words, tags = zip(*tagged_sent)

        log_probability, tree = self.my_parser.parse(tags)

        # Si no se puede parsear con CKY, entonces devolvemos el Flat
        if log_probability == float("-inf"):
            return Tree(self.start, [Tree(t, [w]) for w, t in tagged_sent])

        tree.un_chomsky_normal_form()

        return lexicalize(tree, words)
Example #3
0
    def parse(self, tagged_sent):
        """Parse a tagged sentence.
 
        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        sent, tags = zip(*tagged_sent)
        prob_sent, tree = self.parser.parse(tags)

        if prob_sent == float('-inf'):
            # flat tree
            return Tree(self.start,
                        [Tree(tag, [word]) for word, tag in tagged_sent])

        # because we want the unchomsky normal form
        # cky's tree is in chomsky normal form.
        tree.un_chomsky_normal_form()

        # now the leaft are words. words in terminal_symbols.
        return lexicalize(tree, sent)
Example #4
0
    def parse(self, tagged_sent):
        """Parse a tagged sentence.

        tagged_sent -- the tagged sentence (a list of pairs (word, tag)).
        """
        words, tags = zip(*tagged_sent)
        # Unlexicalized tree in CNF
        _, unlex_parse_tree = self._parser.parse(tags)

        if unlex_parse_tree is None:
            # Flat tree
            parse_tree = Tree(self.start.symbol(),
                              [Tree(tag, [word]) for word, tag in tagged_sent])
        else:
            # Undo CNF
            unlex_parse_tree.un_chomsky_normal_form()
            # Add words
            parse_tree = lexicalize(unlex_parse_tree, words)

        return parse_tree