def _element_2_syntactic_node(cls, element):
     cat = element.attrib["cat"]
     
     if element.tag == "lf":
         
         terminal_node = SyntacticNode(cat, 
                                       pos=element.attrib["pos"],
                                       lemma=element.attrib["lemma"],
                                       word=element.attrib["word"])
         return terminal_node
     else:
         node = SyntacticNode(cat)
         for child_element in element:
             child_node = cls._element_2_syntactic_node(child_element)
             node.add_child(child_node)
         return node
 def read_tree(cls, string):
     """Read a SyntacticTree from a string
     
     Args:
     string: the input string
         For a PSG tree, the format of the tree is similar to a Penn Treebank
         tree, without newline characters:
         e.g. (S (NP (NNP Mary)) (VP (VBZ loves) (NP (NNP John))))
     
     Returns:
     the SyntacticTree represented by the input string
     """
     
     # first, remove the brackets ()
     string = string.strip()
     if len(string) == 0:
         raise ValueError("empty string cannot be a Synstactic Tree")
     if string[0] == "(" and string[-1] == ")":
         string = string[1:-1]
     
     # split the string with blank character
     # if the string has exactly or fewer than one element, it cannot be a 
     # tree
     # if it has two elements, it must be a tree with a terminal node as the
     # root
     # if it has more than two elements, take the first element as the root
     # and other elements as the branches 
     elements = string.split()
     if len(elements) <= 1 or elements[1] == "":
         raise ValueError("%s cannot be a tree or subtree" %string)
     else:
         if len(elements) == 2:
             # TODO: if the label comes from CCG parser, turn [] into ()
             root = cls._read_terminal_node(elements)
         else:
             branch_string = " ".join(elements[1:])
             root = SyntacticNode(elements[0])
             branches = cls._read_branches(branch_string)
             for branch in branches:
                 root.add_child(branch._root)
         
         return SyntacticTree(root)
Exemple #3
0
class EnglishPluralizationSimple(LearningSimulation):

    # Lexical
    DOG_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'DOG', 1000, 'dog')
    TABLE_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'TABLE', 1001, 'teibl')
    TREE_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'TREE', 1010, 'tri')
    CAR_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'CAR', 1011, 'car')
    DEER_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'DEER', 1100, 'dir')
    FISH_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'FISH', 1101, 'fish')

    # Functional
    PLURAL_NODE = SyntacticNode(SyntacticNode.TYPE_FEATURE, '+PL', 1100)

    NODES = [
        DOG_NODE, TABLE_NODE, TREE_NODE, CAR_NODE, DEER_NODE, FISH_NODE,
        PLURAL_NODE
    ]
    VOCABULARY = ['z']
    RULES = [
        Rule([PLURAL_NODE], 'z', []),
        Rule([PLURAL_NODE], '', [DEER_NODE, FISH_NODE])
    ]

    TARGET_GRAMMAR = Grammar(NODES, VOCABULARY, RULES)

    DATA = [
        Datum(DOG_NODE, [], 'dog'),
        Datum(DOG_NODE, [PLURAL_NODE], 'dogz'),
        Datum(TABLE_NODE, [], 'teibl'),
        Datum(TABLE_NODE, [PLURAL_NODE], 'teiblz'),
        Datum(TREE_NODE, [], 'tri'),
        Datum(TREE_NODE, [PLURAL_NODE], 'triz'),
        Datum(CAR_NODE, [], 'car'),
        Datum(CAR_NODE, [PLURAL_NODE], 'carz'),
        Datum(DEER_NODE, [], 'dir'),
        Datum(DEER_NODE, [PLURAL_NODE], 'dir'),
        Datum(FISH_NODE, [], 'fish'),
        Datum(FISH_NODE, [PLURAL_NODE], 'fish'),
    ]

    def __init__(self):
        super().__init__(self.NODES, self.VOCABULARY, self.RULES, self.DATA)