Example #1
0
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (bytes, str)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions if pattern(tree[position])]
        except AttributeError:
            yield []
Example #2
0
def get_max_depth(tree : tree.Tree, factor : str ='right') -> int:
    tree.collapse_unary()
    max_depth = 0

    tree.chomsky_normal_form(factor=factor)

    leaf_positions = tree.treepositions('leaves')

    for leaf_p in leaf_positions:
        p_str = '0'+''.join([str(x) for x in leaf_p[:-1]])
        turns = re.findall('0[1-9]', p_str)
        this_depth = len(turns)
        if this_depth > max_depth:
            max_depth = this_depth
    if max_depth == 0 and len(leaf_positions) != 1:
        print(leaf_positions)
        print(tree)
        raise Exception
    # if max_depth[0] != max_depth[1]:
    #     print(tree)
    #     tree.un_chomsky_normal_form()
    #     print(tree)
    #     tree.chomsky_normal_form(factors[0])
    #     print(tree)
    #
    #     raise Exception
    return max_depth
Example #3
0
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
        except AttributeError:
            yield []

        yield [tree[position] for position in positions
                  if pattern(tree[position])]
Example #4
0
 def arbol_max_nodos(self):
     """
     Retorna el árbol del corpus con la máxima cantidad de nodos.
     (el primero si hay mas de uno con la misma cantidad)
     """
     trees = self.corpus.parsed_sents()
     return max(trees, key=lambda tree : len(tree.treepositions()))
Example #5
0
def _after(node):
    """
    Returns the set of all nodes that are after the given node.
    """
    try:
        pos = node.treeposition()
        tree = node.root()
    except AttributeError:
        return []
    return [tree[x] for x in tree.treepositions() if x[: len(pos)] > pos[: len(x)]]
Example #6
0
def _after(node):
    '''
    Returns the set of all nodes that are after the given node.
    '''
    try:
        pos = node.treeposition()
        tree = node.root()
    except AttributeError:
        return []
    return [tree[x] for x in tree.treepositions() if x[: len(pos)] > pos[: len(x)]]
Example #7
0
def _before(node):
    """
    Returns the set of all nodes that are before the given node.
    """
    try:
        pos = node.treeposition()
        tree = node.root()
    except AttributeError:
        return []
    return [tree[x] for x in tree.treepositions() if x[: len(pos)] < pos[: len(x)]]
Example #8
0
def traverse_tree(tree):
	print("lable: ", tree.label())
	#print("type(tree):", type(tree))

	positions = tree.treepositions()
	print("treepositions:", positions)

	for subtree in tree:
		if type(subtree) == nltk.tree.Tree:
			traverse_tree(subtree)	#recursive call
Example #9
0
def _before(node):
    '''
    Returns the set of all nodes that are before the given node.
    '''
    try:
        pos = node.treeposition()
        tree = node.root()
    except AttributeError:
        return []
    return [tree[x] for x in tree.treepositions()
            if x[:len(pos)] < pos[:len(x)]]
Example #10
0
def get_position_and_flags(tree):
	position_flags = {}

	positions = tree.treepositions()

	# initialize
	for p in positions:
		position_flags[p] = 1	# 1: keep

	# apply rules here
	for p in positions:
		if position_flags[p] == 0:  #already visited and set false to keep
			continue

		if type(tree[p]) == nltk.tree.Tree:		# non-term node
			#print ("label: ", tree1[p].label())
			label = tree[p].label()

			# rules:
			# remove PP
			if label == 'PP':
				#print("PP identified.")
				#position_flags[p] = 0   #0: not to keep
				#print("To be removed: \n", tree[p])
				# mark 'flase' to keep on all child nodes
				'''
				for p2 in positions:
					if is_leading(p, p2):  # is a child node
						position_flags[p2] = 0	
						#print("To be removed: \n", tree[p2])
				'''
				children = find_child_positions(p, positions)
				#print("# children: ", len(children))
				# mark to remove
				if len(children) <= 15:
					position_flags[p] = 0   #0: not to keep
					#print("To be removed: \n", tree[p])
					for c in children:
						position_flags[c] = 0
						#print("To be removed: \n", tree[c])

			# other rules
			elif label == 'DT':  # determiner 'the, The'
				position_flags[p] = 0   #0: not to keep
				children = find_child_positions(p, positions)
				for c in children:
					position_flags[c] = 0
				

		elif type(tree[p]) == str:	# term node
			# rules on term node
			continue

	return positions, position_flags
Example #11
0
def treepositions_no_leaves(tree):
    """
    Returns all the tree positions in the given tree which are not
    leaf nodes.
    """
    treepositions = tree.treepositions()
    # leaves are treeposition tuples that are not prefixes of any
    # other treeposition
    prefixes = set()
    for pos in treepositions:
        for length in range(len(pos)):
            prefixes.add(pos[:length])
    return [pos for pos in treepositions if pos in prefixes]
Example #12
0
def treepositions_no_leaves(tree):
    '''
    Returns all the tree positions in the given tree which are not
    leaf nodes.
    '''
    treepositions = tree.treepositions()
    # leaves are treeposition tuples that are not prefixes of any
    # other treeposition
    prefixes = set()
    for pos in treepositions:
        for length in range(len(pos)):
            prefixes.add(pos[:length])
    return [pos for pos in treepositions if pos in prefixes]
Example #13
0
def tgrep_positions(tree, tgrep_string, search_leaves=True):
    """
    Return all tree positions in the given tree which match the given
    `tgrep_string`.

    If `search_leaves` is False, the method will not return any
    results in leaf positions.
    """
    try:
        if search_leaves:
            search_positions = tree.treepositions()
        else:
            search_positions = treepositions_no_leaves(tree)
    except AttributeError:
        return []
    if isinstance(tgrep_string, (bytes, str)):
        tgrep_string = tgrep_compile(tgrep_string)
    return [position for position in search_positions if tgrep_string(tree[position])]
def tgrep_positions(tree, tgrep_string, search_leaves = True):
    '''
    Return all tree positions in the given tree which match the given
    `tgrep_string`.

    If `search_leaves` is False, the method will not return any
    results in leaf positions.
    '''
    try:
        if search_leaves:
            search_positions = tree.treepositions()
        else:
            search_positions = treepositions_no_leaves(tree)
    except AttributeError:
        return []
    if isinstance(tgrep_string, (bytes, str)):
        tgrep_string = tgrep_compile(tgrep_string)
    return [position for position in search_positions
            if tgrep_string(tree[position])]