def tgrep_positions(pattern, trees, search_leaves=True): """ Return the tree positions in the trees which match the given pattern. :param pattern: a tgrep search pattern :type pattern: str or output of tgrep_compile() :param trees: a sequence of NLTK trees (usually ParentedTrees) :type trees: iter(ParentedTree) or iter(Tree) :param search_leaves: whether ot return matching leaf nodes :type search_leaves: bool :rtype: iter(tree positions) """ if isinstance(pattern, (bytes, str)): pattern = tgrep_compile(pattern) for tree in trees: try: if search_leaves: positions = tree.treepositions() else: positions = treepositions_no_leaves(tree) yield [position for position in positions if pattern(tree[position])] except AttributeError: yield []
def get_max_depth(tree : tree.Tree, factor : str ='right') -> int: tree.collapse_unary() max_depth = 0 tree.chomsky_normal_form(factor=factor) leaf_positions = tree.treepositions('leaves') for leaf_p in leaf_positions: p_str = '0'+''.join([str(x) for x in leaf_p[:-1]]) turns = re.findall('0[1-9]', p_str) this_depth = len(turns) if this_depth > max_depth: max_depth = this_depth if max_depth == 0 and len(leaf_positions) != 1: print(leaf_positions) print(tree) raise Exception # if max_depth[0] != max_depth[1]: # print(tree) # tree.un_chomsky_normal_form() # print(tree) # tree.chomsky_normal_form(factors[0]) # print(tree) # # raise Exception return max_depth
def tgrep_nodes(pattern, trees, search_leaves=True): """ Return the tree nodes in the trees which match the given pattern. :param pattern: a tgrep search pattern :type pattern: str or output of tgrep_compile() :param trees: a sequence of NLTK trees (usually ParentedTrees) :type trees: iter(ParentedTree) or iter(Tree) :param search_leaves: whether ot return matching leaf nodes :type search_leaves: bool :rtype: iter(tree nodes) """ if isinstance(pattern, (binary_type, text_type)): pattern = tgrep_compile(pattern) for tree in trees: try: if search_leaves: positions = tree.treepositions() else: positions = treepositions_no_leaves(tree) except AttributeError: yield [] yield [tree[position] for position in positions if pattern(tree[position])]
def arbol_max_nodos(self): """ Retorna el árbol del corpus con la máxima cantidad de nodos. (el primero si hay mas de uno con la misma cantidad) """ trees = self.corpus.parsed_sents() return max(trees, key=lambda tree : len(tree.treepositions()))
def _after(node): """ Returns the set of all nodes that are after the given node. """ try: pos = node.treeposition() tree = node.root() except AttributeError: return [] return [tree[x] for x in tree.treepositions() if x[: len(pos)] > pos[: len(x)]]
def _after(node): ''' Returns the set of all nodes that are after the given node. ''' try: pos = node.treeposition() tree = node.root() except AttributeError: return [] return [tree[x] for x in tree.treepositions() if x[: len(pos)] > pos[: len(x)]]
def _before(node): """ Returns the set of all nodes that are before the given node. """ try: pos = node.treeposition() tree = node.root() except AttributeError: return [] return [tree[x] for x in tree.treepositions() if x[: len(pos)] < pos[: len(x)]]
def traverse_tree(tree): print("lable: ", tree.label()) #print("type(tree):", type(tree)) positions = tree.treepositions() print("treepositions:", positions) for subtree in tree: if type(subtree) == nltk.tree.Tree: traverse_tree(subtree) #recursive call
def _before(node): ''' Returns the set of all nodes that are before the given node. ''' try: pos = node.treeposition() tree = node.root() except AttributeError: return [] return [tree[x] for x in tree.treepositions() if x[:len(pos)] < pos[:len(x)]]
def get_position_and_flags(tree): position_flags = {} positions = tree.treepositions() # initialize for p in positions: position_flags[p] = 1 # 1: keep # apply rules here for p in positions: if position_flags[p] == 0: #already visited and set false to keep continue if type(tree[p]) == nltk.tree.Tree: # non-term node #print ("label: ", tree1[p].label()) label = tree[p].label() # rules: # remove PP if label == 'PP': #print("PP identified.") #position_flags[p] = 0 #0: not to keep #print("To be removed: \n", tree[p]) # mark 'flase' to keep on all child nodes ''' for p2 in positions: if is_leading(p, p2): # is a child node position_flags[p2] = 0 #print("To be removed: \n", tree[p2]) ''' children = find_child_positions(p, positions) #print("# children: ", len(children)) # mark to remove if len(children) <= 15: position_flags[p] = 0 #0: not to keep #print("To be removed: \n", tree[p]) for c in children: position_flags[c] = 0 #print("To be removed: \n", tree[c]) # other rules elif label == 'DT': # determiner 'the, The' position_flags[p] = 0 #0: not to keep children = find_child_positions(p, positions) for c in children: position_flags[c] = 0 elif type(tree[p]) == str: # term node # rules on term node continue return positions, position_flags
def treepositions_no_leaves(tree): """ Returns all the tree positions in the given tree which are not leaf nodes. """ treepositions = tree.treepositions() # leaves are treeposition tuples that are not prefixes of any # other treeposition prefixes = set() for pos in treepositions: for length in range(len(pos)): prefixes.add(pos[:length]) return [pos for pos in treepositions if pos in prefixes]
def treepositions_no_leaves(tree): ''' Returns all the tree positions in the given tree which are not leaf nodes. ''' treepositions = tree.treepositions() # leaves are treeposition tuples that are not prefixes of any # other treeposition prefixes = set() for pos in treepositions: for length in range(len(pos)): prefixes.add(pos[:length]) return [pos for pos in treepositions if pos in prefixes]
def tgrep_positions(tree, tgrep_string, search_leaves=True): """ Return all tree positions in the given tree which match the given `tgrep_string`. If `search_leaves` is False, the method will not return any results in leaf positions. """ try: if search_leaves: search_positions = tree.treepositions() else: search_positions = treepositions_no_leaves(tree) except AttributeError: return [] if isinstance(tgrep_string, (bytes, str)): tgrep_string = tgrep_compile(tgrep_string) return [position for position in search_positions if tgrep_string(tree[position])]
def tgrep_positions(tree, tgrep_string, search_leaves = True): ''' Return all tree positions in the given tree which match the given `tgrep_string`. If `search_leaves` is False, the method will not return any results in leaf positions. ''' try: if search_leaves: search_positions = tree.treepositions() else: search_positions = treepositions_no_leaves(tree) except AttributeError: return [] if isinstance(tgrep_string, (bytes, str)): tgrep_string = tgrep_compile(tgrep_string) return [position for position in search_positions if tgrep_string(tree[position])]