コード例 #1
0
 def _element_2_syntactic_node(cls, element):
     cat = element.attrib["cat"]
     
     if element.tag == "lf":
         
         terminal_node = SyntacticNode(cat, 
                                       pos=element.attrib["pos"],
                                       lemma=element.attrib["lemma"],
                                       word=element.attrib["word"])
         return terminal_node
     else:
         node = SyntacticNode(cat)
         for child_element in element:
             child_node = cls._element_2_syntactic_node(child_element)
             node.add_child(child_node)
         return node
コード例 #2
0
 def read_tree(cls, string):
     """Read a SyntacticTree from a string
     
     Args:
     string: the input string
         For a PSG tree, the format of the tree is similar to a Penn Treebank
         tree, without newline characters:
         e.g. (S (NP (NNP Mary)) (VP (VBZ loves) (NP (NNP John))))
     
     Returns:
     the SyntacticTree represented by the input string
     """
     
     # first, remove the brackets ()
     string = string.strip()
     if len(string) == 0:
         raise ValueError("empty string cannot be a Synstactic Tree")
     if string[0] == "(" and string[-1] == ")":
         string = string[1:-1]
     
     # split the string with blank character
     # if the string has exactly or fewer than one element, it cannot be a 
     # tree
     # if it has two elements, it must be a tree with a terminal node as the
     # root
     # if it has more than two elements, take the first element as the root
     # and other elements as the branches 
     elements = string.split()
     if len(elements) <= 1 or elements[1] == "":
         raise ValueError("%s cannot be a tree or subtree" %string)
     else:
         if len(elements) == 2:
             # TODO: if the label comes from CCG parser, turn [] into ()
             root = cls._read_terminal_node(elements)
         else:
             branch_string = " ".join(elements[1:])
             root = SyntacticNode(elements[0])
             branches = cls._read_branches(branch_string)
             for branch in branches:
                 root.add_child(branch._root)
         
         return SyntacticTree(root)