Beispiel #1
0
 def _prepare_copy(self):
     for sentence in self.sentences:
         sentence.parse_tree = Tree.convert(sentence.parse_tree)
     for sentence in self._sentences_list:
         sentence.parse_tree = Tree.convert(sentence.parse_tree)
     for k,sentence in self._sentences_dict.items():
         sentence.parse_tree = Tree.convert(sentence.parse_tree)
     for sentence in self.sentences_removed:
         sentence.parse_tree = Tree.convert(sentence.parse_tree)
Beispiel #2
0
 def evaluate(self, print_tree_mismatch=False):
     # First, flatten the answer key
     test_pos_list = []
     for test_tree in self.test_trees:
         test_pos_list.append(test_tree.leaves())
     # Second, get the grammar
     if self.trained_grammar is None:
         self.trained_grammar = self.train()
     # Third, build trees back up with your grammar
     trees_built_by_grammar = []
     for pos_list in test_pos_list:
         print(pos_list)
         syntactic_parsed_result = self.syntactic_parse(
             pos_list, self.trained_grammar)
         print("done parsing")
         if syntactic_parsed_result is not None:
             print(syntactic_parsed_result)
             trees_built_by_grammar.append(
                 Tree.convert(syntactic_parsed_result))
         else:
             print(syntactic_parsed_result)
             trees_built_by_grammar.append(Tree.fromstring("(NOT COVERED)"))
     # Finally, get a proportion of correct trees
     number_correct = 0
     number_trees = len(trees_built_by_grammar)
     for index in range(number_trees):
         print(index)
         if self.test_trees[index] == trees_built_by_grammar[index]:
             number_correct += 1
         elif print_tree_mismatch:
             print("\nTREE MISMATCH", "\nConstructed:\n",
                   trees_built_by_grammar[index], "\nOriginal:\n",
                   self.test_trees[index])
     return number_correct / number_trees
Beispiel #3
0
def merge_tree_nnps(tree):
    """
    Takes a parse tree and merges any consecutive leaf nodes that come from NNPs
    For example if there is a segment of:
        (NP
            (JJ old)
            (NNP Pierre)
            (NNP Vinken)
        )
    Returns:
        (NP
            (JJ old)
            (NNP PierreVinken)
        )
    """

    # require a parented tree to get a subtrees tree position
    p = ParentedTree.convert(tree)

    # iterates subtrees of height 3. This is where NP's leading to NNP's leading to lexicalizations will be
    for s in p.subtrees(filter=lambda s: s.height() == 3):
        # merge NNP's in the list representation of this trees children: [(POS, word), ...] 
        new_noun_phrase = merge_tagged_nnps([(c.label(), c[0]) for c in s])
        child_str = " ".join("(%s %s)" % (pos, word) for pos, word in new_noun_phrase)
        # create new subtree with merged NNP's
        new_s = ParentedTree.fromstring("(%s %s)" % (s.label(), child_str))

        # replace old subtree with new subtree
        p[s.treeposition()] = new_s
    return Tree.convert(p)
Beispiel #4
0
 def _clear_caches(self,parent):
     del self._tokens_list
     del self._tokens_dict
     del self._text
     del self._parent_document
     self.parse_tree = Tree.convert(self.parse_tree)
     for i in self.tokens:
         i._clear_caches(self)
     for i in self.verbs:
         i._clear_caches(self)