def buildSyllable(sTree, segments, noLabel): c = tb.tree_category(sTree) if c == "C0" or c == "C1" or c == "C2" or c == "C3" or c == "C4" or c == "C5" or c == "Consonant" or c == "Stress": if noLabel: segments.append(tb.tree_children(sTree)[0]) else: segments.append(tb.tree_children(sTree)[0] + "_C") elif tb.tree_category(sTree) == "Vowel": if noLabel: segments.append(tb.tree_children(sTree)[0]) else: segments.append(tb.tree_children(sTree)[0] + "_V") else: for child in tb.tree_children(sTree): buildSyllable(child, segments, noLabel)
def buildWord(wTree, sCat, syllables, segSep, sylSep, noLabel): try: if sCat.search( tb.tree_category(wTree) ): #=="Syllable" or tb.tree_category(wTree)=="SyllableIF" or tb.tree_category(wTree)=="SyllableI" or tb.tree_category(wTree)=="SyllableF": segments = [] buildSyllable(wTree, segments, noLabel) syllables.append(segSep.join(segments)) else: for child in tb.tree_children(wTree): buildWord(child, sCat, syllables, segSep, sylSep, noLabel) except: print "%s\n" % wTree sys.exit() for child in tb.tree_children(wTree): buildWord(child, sCat, syllables, segSep, sylSep, noLabel)
def rightbranch(tree): """ Transform a subtree lying on a right branch. """ def leftbranch(subtree, transformed_right): """ Transform a subtree lying on a left branch. transformed_right is transformed right material between this node and Anc. """ if tb.is_preterminal(subtree): return [subtree, transformed_right] else: left, right = tb.tree_children(subtree) return leftbranch( left, tb.make_nonterminal( make_pair(Anc, tb.tree_label(left)), rightbranch(right) + [transformed_right])) if tb.is_preterminal(tree): return [flag(tree)] else: Anc = tb.tree_label(tree) left, right = tb.tree_children(tree) return leftbranch( left, tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)), rightbranch(right)))
def visit(node, sofar): assert tb.is_phrasal(node) label = tb.tree_label(node) assert is_pair(label) A, X = pair_categories(label) children = tb.tree_children(node) assert len(children) > 0 assert tb.is_preterminal(children[0]) if len(children) == 1: assert is_flagged(children[0]) xf = cat_c(A, X, unflag(children[0])) elif len(children) == 2: if is_flagged(children[0]): A1, B = pair_categories(tb.tree_label(children[1])) assert A1 == A xf = cat_b(A, X, B, unflag(children[0])) else: C, a = pair_categories(tb.tree_label(children[1])) assert a == tb.tree_label(children[0]), \ "error in label of node.children[1] a = {}, node = {}".format(a, node) xf = cat_e(A, X, C, children[0]) elif len(children) == 3: assert not is_flagged(children[0]) C, a = pair_categories(tb.tree_label(children[1])) A1, B = pair_categories(tb.tree_label(children[2])) assert A == A1 xf = cat_d(A, X, B, C, children[0]) else: sys.exit("error: ill-formed subtree {}\n in tree {}".format( node, xtree)) sofar.append(xf) for child in children[1:]: sofar = visit(child, sofar) return sofar
def leftbranch(subtree, continuation): if tb.is_preterminal(subtree): return [subtree] + continuation else: left, right = tb.tree_children(subtree) return leftbranch(left, [ tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)), rightbranch(right) + continuation) ])
def leftbranch(subtree, continuation, X1): if tb.is_preterminal(subtree): return [relabel(subtree, X1)] + continuation else: left, right = tb.tree_children(subtree) X2 = tb.tree_label(left) + '>' return leftbranch(left, [ tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)), rightbranch(right, X2) + continuation) ], X1)
def visit(node, wordssofar, segssofar): """Does a preorder visit of the nodes in the tree""" if tb.is_terminal(node): if not ignore_terminal_rex.match(node): segssofar.append(simplify_terminal(node)) return wordssofar,segssofar for child in tb.tree_children(node): wordssofar,segssofar = visit(child, wordssofar, segssofar) if word_rex.match(tb.tree_label(node)): if segssofar != []: wordssofar.append(''.join(segssofar)) segssofar = [] return wordssofar,segssofar
def visit(node, wordssofar, segssofar): """Does a preorder visit of the nodes in the tree""" if tb.is_terminal(node): if not ignore_terminal_rex.match(node): segssofar.append(simplify_terminal(node)) return wordssofar, segssofar for child in tb.tree_children(node): wordssofar, segssofar = visit(child, wordssofar, segssofar) if word_rex.match(tb.tree_label(node)): if segssofar != []: wordssofar.append(''.join(segssofar)) segssofar = [] return wordssofar, segssofar
def visitTree(tree, words, wCat, sCat, segSep, sylSep, noLabel): """ Performs a pre-order traversal of tree, and collects syllable and word boundaries """ def buildSyllable(sTree, segments, noLabel): c = tb.tree_category(sTree) if c == "C0" or c == "C1" or c == "C2" or c == "C3" or c == "C4" or c == "C5" or c == "Consonant" or c == "Stress": if noLabel: segments.append(tb.tree_children(sTree)[0]) else: segments.append(tb.tree_children(sTree)[0] + "_C") elif tb.tree_category(sTree) == "Vowel": if noLabel: segments.append(tb.tree_children(sTree)[0]) else: segments.append(tb.tree_children(sTree)[0] + "_V") else: for child in tb.tree_children(sTree): buildSyllable(child, segments, noLabel) def buildWord(wTree, sCat, syllables, segSep, sylSep, noLabel): try: if sCat.search( tb.tree_category(wTree) ): #=="Syllable" or tb.tree_category(wTree)=="SyllableIF" or tb.tree_category(wTree)=="SyllableI" or tb.tree_category(wTree)=="SyllableF": segments = [] buildSyllable(wTree, segments, noLabel) syllables.append(segSep.join(segments)) else: for child in tb.tree_children(wTree): buildWord(child, sCat, syllables, segSep, sylSep, noLabel) except: print "%s\n" % wTree sys.exit() for child in tb.tree_children(wTree): buildWord(child, sCat, syllables, segSep, sylSep, noLabel) # print("VisitT %s"%tb.tree_category(tree)) if wCat.search(tb.tree_category(tree)): syllables = [] buildWord(tree, sCat, syllables, segSep, sylSep, noLabel) words.append(sylSep.join(syllables)) else: for child in tb.tree_children(tree): visitTree(child, words, wCat, sCat, segSep, sylSep, noLabel)
def lcx2tree_labels0(xtree): """ Maps an lcx2 tree to the corresponding labels, as in my 1996 paper. """ def visit(node, sofar): assert tb.is_phrasal(node) label = tb.tree_label(node) assert is_pair(label) A, X = pair_categories(label) children = tb.tree_children(node) assert len(children) > 0 assert tb.is_preterminal(children[0]) if len(children) == 1: assert is_flagged(children[0]) xf = cat_c(A, X, unflag(children[0])) elif len(children) == 2: if is_flagged(children[0]): A1, B = pair_categories(tb.tree_label(children[1])) assert A1 == A xf = cat_b(A, X, B, unflag(children[0])) else: C, a = pair_categories(tb.tree_label(children[1])) assert a == tb.tree_label(children[0]), \ "error in label of node.children[1] a = {}, node = {}".format(a, node) xf = cat_e(A, X, C, children[0]) elif len(children) == 3: assert not is_flagged(children[0]) C, a = pair_categories(tb.tree_label(children[1])) A1, B = pair_categories(tb.tree_label(children[2])) assert A == A1 xf = cat_d(A, X, B, C, children[0]) else: sys.exit("error: ill-formed subtree {}\n in tree {}".format( node, xtree)) sofar.append(xf) for child in children[1:]: sofar = visit(child, sofar) return sofar root = tb.tree_label(xtree) rchildren = tb.tree_children(xtree) assert len(rchildren) == 2, "nonbinary xtree = {}".format(xtree) sofar = [cat_a(root, rchildren[0])] return visit(rchildren[1], sofar)
def visit(node, wordssofar, segssofar): """Does a preorder visit of the nodes in the tree""" if tb.is_terminal(node): if not ignore_terminal_rex.match(node): segssofar.append(node) return wordssofar,segssofar for child in tb.tree_children(node): wordssofar,segssofar = visit(child, wordssofar, segssofar) mo = score_cat_rex.match(tb.tree_label(node)) if mo: if segssofar != []: word = ''.join(segssofar) segssofar = [] try: topic = mo.group('topic') if topic != None: wordssofar.append((word,topic)) else: wordssofar.append(word) except IndexError: wordssofar.append(word) return wordssofar,segssofar
def relabel(tree, label): return tb.make_nonterminal(tree[0] + ' ' + label, tb.tree_children(tree))