def test_lexicalize_does_change_tree(self): t = Tree.fromstring( """ (S (NP (Det Det) (Noun Noun)) (VP (Verb Verb) (NP (Noun Noun) (Adj Adj))) ) """) sent = 'el gato come pescado crudo'.split() lexicalize(t, sent) ut2 = Tree.fromstring( """ (S (NP (Det el) (Noun gato)) (VP (Verb come) (NP (Noun pescado) (Adj crudo))) ) """) self.assertEqual(t, ut2)
def parse(self, tagged_sent): """ Parse a tagged sentence. tagged_sent -- the tagged sentence (a list of pairs (word, tag)). """ words, tags = zip(*tagged_sent) log_probability, tree = self.my_parser.parse(tags) # Si no se puede parsear con CKY, entonces devolvemos el Flat if log_probability == float("-inf"): return Tree(self.start, [Tree(t, [w]) for w, t in tagged_sent]) tree.un_chomsky_normal_form() return lexicalize(tree, words)
def parse(self, tagged_sent): """Parse a tagged sentence. tagged_sent -- the tagged sentence (a list of pairs (word, tag)). """ sent, tags = zip(*tagged_sent) prob_sent, tree = self.parser.parse(tags) if prob_sent == float('-inf'): # flat tree return Tree(self.start, [Tree(tag, [word]) for word, tag in tagged_sent]) # because we want the unchomsky normal form # cky's tree is in chomsky normal form. tree.un_chomsky_normal_form() # now the leaft are words. words in terminal_symbols. return lexicalize(tree, sent)
def parse(self, tagged_sent): """Parse a tagged sentence. tagged_sent -- the tagged sentence (a list of pairs (word, tag)). """ words, tags = zip(*tagged_sent) # Unlexicalized tree in CNF _, unlex_parse_tree = self._parser.parse(tags) if unlex_parse_tree is None: # Flat tree parse_tree = Tree(self.start.symbol(), [Tree(tag, [word]) for word, tag in tagged_sent]) else: # Undo CNF unlex_parse_tree.un_chomsky_normal_form() # Add words parse_tree = lexicalize(unlex_parse_tree, words) return parse_tree