def _prepare_copy(self): for sentence in self.sentences: sentence.parse_tree = Tree.convert(sentence.parse_tree) for sentence in self._sentences_list: sentence.parse_tree = Tree.convert(sentence.parse_tree) for k,sentence in self._sentences_dict.items(): sentence.parse_tree = Tree.convert(sentence.parse_tree) for sentence in self.sentences_removed: sentence.parse_tree = Tree.convert(sentence.parse_tree)
def evaluate(self, print_tree_mismatch=False): # First, flatten the answer key test_pos_list = [] for test_tree in self.test_trees: test_pos_list.append(test_tree.leaves()) # Second, get the grammar if self.trained_grammar is None: self.trained_grammar = self.train() # Third, build trees back up with your grammar trees_built_by_grammar = [] for pos_list in test_pos_list: print(pos_list) syntactic_parsed_result = self.syntactic_parse( pos_list, self.trained_grammar) print("done parsing") if syntactic_parsed_result is not None: print(syntactic_parsed_result) trees_built_by_grammar.append( Tree.convert(syntactic_parsed_result)) else: print(syntactic_parsed_result) trees_built_by_grammar.append(Tree.fromstring("(NOT COVERED)")) # Finally, get a proportion of correct trees number_correct = 0 number_trees = len(trees_built_by_grammar) for index in range(number_trees): print(index) if self.test_trees[index] == trees_built_by_grammar[index]: number_correct += 1 elif print_tree_mismatch: print("\nTREE MISMATCH", "\nConstructed:\n", trees_built_by_grammar[index], "\nOriginal:\n", self.test_trees[index]) return number_correct / number_trees
def merge_tree_nnps(tree): """ Takes a parse tree and merges any consecutive leaf nodes that come from NNPs For example if there is a segment of: (NP (JJ old) (NNP Pierre) (NNP Vinken) ) Returns: (NP (JJ old) (NNP PierreVinken) ) """ # require a parented tree to get a subtrees tree position p = ParentedTree.convert(tree) # iterates subtrees of height 3. This is where NP's leading to NNP's leading to lexicalizations will be for s in p.subtrees(filter=lambda s: s.height() == 3): # merge NNP's in the list representation of this trees children: [(POS, word), ...] new_noun_phrase = merge_tagged_nnps([(c.label(), c[0]) for c in s]) child_str = " ".join("(%s %s)" % (pos, word) for pos, word in new_noun_phrase) # create new subtree with merged NNP's new_s = ParentedTree.fromstring("(%s %s)" % (s.label(), child_str)) # replace old subtree with new subtree p[s.treeposition()] = new_s return Tree.convert(p)
def _clear_caches(self,parent): del self._tokens_list del self._tokens_dict del self._text del self._parent_document self.parse_tree = Tree.convert(self.parse_tree) for i in self.tokens: i._clear_caches(self) for i in self.verbs: i._clear_caches(self)