def _element_2_syntactic_node(cls, element): cat = element.attrib["cat"] if element.tag == "lf": terminal_node = SyntacticNode(cat, pos=element.attrib["pos"], lemma=element.attrib["lemma"], word=element.attrib["word"]) return terminal_node else: node = SyntacticNode(cat) for child_element in element: child_node = cls._element_2_syntactic_node(child_element) node.add_child(child_node) return node
def read_tree(cls, string): """Read a SyntacticTree from a string Args: string: the input string For a PSG tree, the format of the tree is similar to a Penn Treebank tree, without newline characters: e.g. (S (NP (NNP Mary)) (VP (VBZ loves) (NP (NNP John)))) Returns: the SyntacticTree represented by the input string """ # first, remove the brackets () string = string.strip() if len(string) == 0: raise ValueError("empty string cannot be a Synstactic Tree") if string[0] == "(" and string[-1] == ")": string = string[1:-1] # split the string with blank character # if the string has exactly or fewer than one element, it cannot be a # tree # if it has two elements, it must be a tree with a terminal node as the # root # if it has more than two elements, take the first element as the root # and other elements as the branches elements = string.split() if len(elements) <= 1 or elements[1] == "": raise ValueError("%s cannot be a tree or subtree" %string) else: if len(elements) == 2: # TODO: if the label comes from CCG parser, turn [] into () root = cls._read_terminal_node(elements) else: branch_string = " ".join(elements[1:]) root = SyntacticNode(elements[0]) branches = cls._read_branches(branch_string) for branch in branches: root.add_child(branch._root) return SyntacticTree(root)
class EnglishPluralizationSimple(LearningSimulation): # Lexical DOG_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'DOG', 1000, 'dog') TABLE_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'TABLE', 1001, 'teibl') TREE_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'TREE', 1010, 'tri') CAR_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'CAR', 1011, 'car') DEER_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'DEER', 1100, 'dir') FISH_NODE = SyntacticNode(SyntacticNode.TYPE_ROOT, 'FISH', 1101, 'fish') # Functional PLURAL_NODE = SyntacticNode(SyntacticNode.TYPE_FEATURE, '+PL', 1100) NODES = [ DOG_NODE, TABLE_NODE, TREE_NODE, CAR_NODE, DEER_NODE, FISH_NODE, PLURAL_NODE ] VOCABULARY = ['z'] RULES = [ Rule([PLURAL_NODE], 'z', []), Rule([PLURAL_NODE], '', [DEER_NODE, FISH_NODE]) ] TARGET_GRAMMAR = Grammar(NODES, VOCABULARY, RULES) DATA = [ Datum(DOG_NODE, [], 'dog'), Datum(DOG_NODE, [PLURAL_NODE], 'dogz'), Datum(TABLE_NODE, [], 'teibl'), Datum(TABLE_NODE, [PLURAL_NODE], 'teiblz'), Datum(TREE_NODE, [], 'tri'), Datum(TREE_NODE, [PLURAL_NODE], 'triz'), Datum(CAR_NODE, [], 'car'), Datum(CAR_NODE, [PLURAL_NODE], 'carz'), Datum(DEER_NODE, [], 'dir'), Datum(DEER_NODE, [PLURAL_NODE], 'dir'), Datum(FISH_NODE, [], 'fish'), Datum(FISH_NODE, [PLURAL_NODE], 'fish'), ] def __init__(self): super().__init__(self.NODES, self.VOCABULARY, self.RULES, self.DATA)