Python Tree.leaf_treeposition Examples

Programming Language: Python

Namespace/Package Name: nltk.tree

Class/Type: Tree

Method/Function: leaf_treeposition

Examples at hotexamples.com: 3

Python Tree.leaf_treeposition - 3 examples found. These are the top rated real world Python examples of nltk.tree.Tree.leaf_treeposition extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tree(30)

fromstring(30)

label(26)

append(17)

parse(17)

leaves(16)

draw(10)

set_label(9)

pos(8)

subtrees(7)

insert(5)

treepositions(5)

pretty_print(3)

__init__(3)

convert(3)

__str__(2)

height(2)

span(2)

leaf_treeposition(2)

extend(2)

treeposition_spanning_leaves(1)

toString(1)

split(1)

bracket_parse(1)

replace(1)

remove(1)

delta(1)

pprint(1)

chomsky_normal_form(1)

collapse_unary(1)

keys(1)

gradb_d(1)

gradW_d(1)

copy(1)

flatten(1)

cn(1)

Example #1

Show file

File: TreebankReader.py Project: sobhe/hazm

		def traverse(node):
			def extract_tags(W):
				pos = [W.getAttribute('lc') if W.getAttribute('lc') else None]
				if W.getAttribute('clitic') in {'ezafe', 'pronominal', 'verb', 'prep', 'adv', 'det'}:
					pos.append(W.getAttribute('clitic'))
				if W.getAttribute('ne_sort'):
					pos.append(W.getAttribute('ne_sort'))
				if W.getAttribute('n_type'):
					pos.append(W.getAttribute('n_type'))
				if W.getAttribute('ya_type'):
					pos.append(W.getAttribute('ya_type'))
				if W.getAttribute('ke_type'):
					pos.append(W.getAttribute('ke_type'))
				if W.getAttribute('type'):
					pos.append(W.getAttribute('type'))
				if W.getAttribute('kind'):
					pos.append(W.getAttribute('kind'))
				return pos

			def clitic_join(tree, clitic):
				if type(tree[-1]) == Tree:
					return clitic_join(tree[-1], clitic)
				else:
					if(clitic[0][0][0] == 'ا'):
						clitic[0] = ('‌' + clitic[0][0], clitic[0][1])
					tree[-1]=(tree[-1][0] + clitic[0][0], clitic[0][1])
					tree.set_label('CLITICS')
					return

			if not len(node.childNodes):
				return
			first = node.childNodes[0]
			if first.tagName == 'w':
				pos=extract_tags(first)
				return Tree(node.tagName, [(first.childNodes[0].data.replace('می ', 'می‌'), self._pos_map(pos))])
			childs = node.childNodes[2:] if node.tagName == 'S' else node.childNodes
			for child in childs:
				if not len(child.childNodes):
					childs.remove(child)
			tree = Tree(node.tagName, map(traverse, childs))
			if self._join_clitics and len(tree) > 1 and type(tree[1]) == Tree and tree[1].label() == 'CLITIC' and tree[1][0][1] not in {'P', 'V'}:
				clitic=tree[-1]
				tree = Tree(tree.label(), [subtree for subtree in tree[0]])
				clitic_join(tree, clitic)
			if self._join_verb_parts and len(tree) > 1 and type(tree[1]) == Tree and type(tree[0]) == Tree and tree[0].label() == 'AUX' and tree[0][0][0] in self._tokenizer.before_verbs:
				tree[1][0] = (tree[0][0][0] + ' ' + tree[1][0][0], tree[1][0][1])
				tree.remove(tree[0])
			if self._join_verb_parts and len(tree.leaves()) > 1 and tree.leaves()[-1][0] in self._tokenizer.after_verbs and tree.leaves()[-2][0] in self._tokenizer.verbe :
				tree[1][0] = (tree[0].leaves()[-1][0] + ' ' + tree[1][0][0], tree[1][0][1])
				path = tree.leaf_treeposition(len(tree.leaves())-2)
				removingtree = tree
				while len(path) > 2 :
					removingtree = removingtree[path[0]]
					path = path[1:]
				removingtree.remove(Tree(tree.pos()[-2][1],[tree.pos()[-2][0]]))
			if self._join_verb_parts and len(tree.leaves()) > 1 and tree.leaves()[-1][0] in self._tokenizer.after_verbs and tree.leaves()[-2][0] in self._tokenizer.verbe :
				tree[1][0] = (tree[0].leaves()[-1][0] + ' ' + tree[1][0][0], tree[1][0][1])
				path = tree.leaf_treeposition(len(tree.leaves())-2)
				removingtree = tree
				while len(path) > 2 :
					removingtree = removingtree[path[0]]
					path = path[1:]
				removingtree.remove(Tree(tree.pos()[-2][1],[tree.pos()[-2][0]]))
			return tree

Example #2

Show file

File: TreebankReader.py Project: theship/hazm

        def traverse(node):
            def extract_tags(W):
                pos = [W.getAttribute('lc') if W.getAttribute('lc') else None]
                if W.getAttribute('clitic') in {
                        'ezafe', 'pronominal', 'verb', 'prep', 'adv', 'det'
                }:
                    pos.append(W.getAttribute('clitic'))
                if W.getAttribute('ne_sort'):
                    pos.append(W.getAttribute('ne_sort'))
                if W.getAttribute('n_type'):
                    pos.append(W.getAttribute('n_type'))
                if W.getAttribute('ya_type'):
                    pos.append(W.getAttribute('ya_type'))
                if W.getAttribute('ke_type'):
                    pos.append(W.getAttribute('ke_type'))
                if W.getAttribute('type'):
                    pos.append(W.getAttribute('type'))
                if W.getAttribute('kind'):
                    pos.append(W.getAttribute('kind'))
                return pos

            def clitic_join(tree, clitic):
                if type(tree[-1]) == Tree:
                    return clitic_join(tree[-1], clitic)
                else:
                    if (clitic[0][0][0] == 'ا'):
                        clitic[0] = ('‌' + clitic[0][0], clitic[0][1])
                    tree[-1] = (tree[-1][0] + clitic[0][0], clitic[0][1])
                    tree.set_label('CLITICS')
                    return

            if not len(node.childNodes):
                return
            first = node.childNodes[0]
            if first.tagName == 'w':
                pos = extract_tags(first)
                return Tree(node.tagName, [(first.childNodes[0].data.replace(
                    'می ', 'می‌'), self._pos_map(pos))])
            childs = node.childNodes[
                2:] if node.tagName == 'S' else node.childNodes
            for child in childs:
                if not len(child.childNodes):
                    childs.remove(child)
            tree = Tree(node.tagName, map(traverse, childs))
            if self._join_clitics and len(tree) > 1 and type(
                    tree[1]) == Tree and tree[1].label(
                    ) == 'CLITIC' and tree[1][0][1] not in {'P', 'V'}:
                clitic = tree[-1]
                tree = Tree(tree.label(), [subtree for subtree in tree[0]])
                clitic_join(tree, clitic)
            if self._join_verb_parts and len(tree) > 1 and type(
                    tree[1]) == Tree and type(
                        tree[0]) == Tree and tree[0].label() == 'AUX' and tree[
                            0][0][0] in self._tokenizer.before_verbs:
                tree[1][0] = (tree[0][0][0] + ' ' + tree[1][0][0],
                              tree[1][0][1])
                tree.remove(tree[0])
            if self._join_verb_parts and len(
                    tree.leaves()) > 1 and tree.leaves(
                    )[-1][0] in self._tokenizer.after_verbs and tree.leaves(
                    )[-2][0] in self._tokenizer.verbe:
                tree[1][0] = (tree[0].leaves()[-1][0] + ' ' + tree[1][0][0],
                              tree[1][0][1])
                path = tree.leaf_treeposition(len(tree.leaves()) - 2)
                removingtree = tree
                while len(path) > 2:
                    removingtree = removingtree[path[0]]
                    path = path[1:]
                removingtree.remove(
                    Tree(tree.pos()[-2][1], [tree.pos()[-2][0]]))
            if self._join_verb_parts and len(
                    tree.leaves()) > 1 and tree.leaves(
                    )[-1][0] in self._tokenizer.after_verbs and tree.leaves(
                    )[-2][0] in self._tokenizer.verbe:
                tree[1][0] = (tree[0].leaves()[-1][0] + ' ' + tree[1][0][0],
                              tree[1][0][1])
                path = tree.leaf_treeposition(len(tree.leaves()) - 2)
                removingtree = tree
                while len(path) > 2:
                    removingtree = removingtree[path[0]]
                    path = path[1:]
                removingtree.remove(
                    Tree(tree.pos()[-2][1], [tree.pos()[-2][0]]))
            return tree

Example #3

Show file

def coref_replace(event_dict, key):
    """
    Function to replace pronouns with the referenced noun phrase. Iterates
    over each sentence in a news story and pulls coreference information
    from the applicable sentence, even if it is from another sentence. Also
    keeps track of any changes in indexes made by replacing pronouns, i.e.,
    the reference is longer than the reference so the tree index changes for
    future references. Filters coreferences on various dimensions to ensure
    only "good" coreferences are replaced. The default behavior is to do
    no replacement rather than a bad replacement. The function does not
    return a value, instead the event_dict is updated with the new parse tree
    containing the coref information.

    Parameters
    ----------

    event_dict: Dictionary.
                    Dictionary of sentence information, such as produced by
                    utilities.parse_sents().

    key: String.
            ID of the event or news story being processed.

    """
    #TODO: This could use some major refactoring.
    if 'coref_info' in event_dict[key]['sent_info'].keys():
        sent_info = event_dict[key]['sent_info']['sents']
        coref_info = event_dict[key]['sent_info']['coref_info']
        for sent in coref_info:
            for coref in coref_info[sent]['corefs']:
                pronoun = coref[0]
                ref = coref[1]
                if any([word in ref[0] for word in pronoun[0].split()]):
                    pass
                elif any([word in pronoun[0] for word in ref[0].split()]):
                    pass
                elif pronoun[4] - pronoun[3] > 1:
                    pass
                else:
                    try:
                        #Getting the stuff for pronouns
                        if 'coref_tree' in sent_info[pronoun[1]].keys():
                            pronoun_sent = copy.deepcopy(sent_info[pronoun[1]]
                                                         ['coref_tree'])
                        else:
                            pronoun_sent = copy.deepcopy(sent_info[pronoun[1]]
                                                         ['parse_tree'])
                            pronoun_sent = Tree(pronoun_sent)
                        pro_shift = coref_info[pronoun[1]]['shift']
                        #Getting stuff for the reference
                        if 'coref_tree' in sent_info[ref[1]].keys():
                            coref_sent = sent_info[ref[1]]['coref_tree']
                        else:
                            coref_sent = Tree(sent_info[ref[1]]['parse_tree'])
                        ref_shift = coref_info[ref[1]]['shift']

                        #Actaully replacing the pronoun
                        try:
                            pronoun_pos = pronoun_sent.leaf_treeposition(pronoun[3]
                                                                         + pro_shift)
                            #Hunting for the right pronoun
                            if pronoun_sent[pronoun_pos] != pronoun[0]:
                                if pronoun_sent[pronoun_sent.leaf_treeposition(pronoun[3] + (pro_shift - 1))] == pronoun[0]:
                                    pronoun_pos = pronoun_sent.leaf_treeposition(pronoun[3] + (pro_shift - 1))
                                    coref_info[pronoun[1]]['shift'] -= 1
                                elif pronoun_sent[pronoun_sent.leaf_treeposition(pronoun[3] + (pro_shift + 1))] == pronoun[0]:
                                    pronoun_pos = pronoun_sent.leaf_treeposition(pronoun[3] + (pro_shift + 1))
                                    coref_info[pronoun[1]]['shift'] += 1
                                else:
                                    break

                            #Hunting for the right coref
                            original_coref_index = coref_sent.leaf_treeposition(ref[3])[:-2]
                            if ' '.join(coref_sent[original_coref_index].leaves()) == ref[0]:
                                coref_pos = coref_sent.leaf_treeposition(ref[3])[:-2]
                            elif ref[0] in ' '.join(coref_sent[original_coref_index].leaves()):
                                coref_pos = coref_sent.leaf_treeposition(ref[3])[:-2]
                            else:
                                coref_pos = coref_sent.leaf_treeposition(ref[3] + ref_shift)[:-2]

                            if ref[0] not in ' '.join(coref_sent[coref_pos].leaves()):
                                pass

                            #Found everything, now replace
                            coref_tree = Tree('COREF', [coref_sent[coref_pos]])
                            pronoun_sent[pronoun_pos[:-1]] = coref_tree
                        except IndexError:
                            #TODO: Should this use the original sentence rather
                            #than possibly bad coreferences?
                            print """Key {}, sentence {} has a problem with the corefencing. Breaking and moving on.\n""".format(key, sent)
                            break

                        #Recording the shift length for the pronoun replacement
                        if len(coref_tree.leaves()) <= 2:
                            coref_info[pronoun[1]]['shift'] += 0
                        else:
                            coref_info[pronoun[1]]['shift'] += coref_tree.height()

                        coref_info[pronoun[1]]['errors'].append(False)

                        if not any(coref_info[pronoun[1]]['errors']):
                            if pronoun_sent != sent_info[sent]['parse_tree']:
                                sent_info[sent]['coref_tree'] = pronoun_sent
                    except RuntimeError, e:
                        print 'There was an error. {}'.format(e)
                        coref_info[pronoun[1]]['errors'].append(True)
                        pass