def pruneLeft(tree, phrase): if isinstance(tree, nltk.tree.Tree): for index, child in enumerate(tree): if nlp.tree.containsPhrase(child, phrase): for i in range(index): tree.pop(0) pruneLeft(tree[0], phrase) break pruneWords(tree, phrase, True)
def pruneRight(tree, phrase): if isinstance(tree, nltk.tree.Tree): for index in range(len(tree) - 1, -1, -1): child = tree[index] if nlp.tree.containsPhrase(child, phrase): numIters = len(tree) - index - 1 for i in range(numIters): tree.pop(-1) pruneRight(tree[-1], phrase) break pruneWords(tree, phrase, False)
def pruneWords(tree, phrase, left): ## TODO: won't work for repeated first/last words if isinstance(tree, nltk.tree.Tree): index = 0 if left else -1 word = phrase[index].lower() while len(tree) != 0: child = tree[index] if nlp.tree.isWord(child): if child[0].lower() == word: break else: tree.pop(index) else: pruneWords(child, phrase, left) break
def removeChildren(tree): """Remove the children from parse tree Arguments: tree - nltk parse tree Return: list with children of tree""" children = [] while len(tree)>0: children.append(tree.pop(0)) return children
def removeChildren(tree): """Remove the children from parse tree Arguments: tree - nltk parse tree Return: list with children of tree""" children = [] while len(tree) > 0: children.append(tree.pop(0)) return children
def unbinarizeAndReunarize(tree): """Unbinarize and re-unarize a parse tree. Replace child that contains @ with its children. Extend node <A>%%%%%<B> by <A> --> <B>. Arguments: tree - nltk parse tree""" i = 0 while i < len(tree): child = tree[i] if not isinstance(child, str): if "@" in child.node: # unbinarize tree.pop(i) tree[i:i] = list(child) i -= 1 elif "%%%%%" in child.node: # re-unarize split = child.node.split("%%%%%") child.node = split[0] grandChildren = removeChildren(child) newChild = nltk.Tree(split[1], grandChildren) child.append(newChild) unbinarizeAndReunarize(child) i += 1