Exemple #1
0
def extract_entity(parse_tree, semantic_role=''):
    """Creates an entity object given a snippet of a parse tree."""
    entity = Location() if semantic_role in (
        'Location', 'Source', 'Destination') else ObjectEntity()

    # print 'Extracting from:'
    # print str(parse_tree)

    # Ignore rescursed trees and added descriptions
    ignore_positions = []
    previous_node = None
    previous_leaves = None
    for position in parse_tree.treepositions():
        if not isinstance(parse_tree[position], Tree):
            continue
        if position in ignore_positions:
            continue
        subtree = parse_tree[position]
        node = subtree.node

        leaves = ' '.join(subtree.leaves()).lower()
        # A noun phrase might have sub-parts that we need to parse recursively
        # Recurse while there are NP's below the current node
        if subtree is not parse_tree and 'NP' in node:
            entity.merge(extract_entity(subtree))
            # ignore_positions should be relative to parse_tree
            ignore_positions.extend(position + subposition
                                    for subposition in subtree.treepositions())
        # A determiner cardinal node adds some information for the quantifier
        if 'DT' in node:
            entity.quantifier.fill_determiner(leaves)
        # Cardinal number sets the quantifier number
        elif node == 'CD':
            entity.quantifier.fill_cardinal(leaves)
            if entity.quantifier.number is None:
                # Not actually a number
                entity.name = leaves
        elif node == 'PRP':
            entity.name = 'Commander' if leaves in ('i', 'me') else leaves
        elif ('PP' in node and entity.name) or node in ('SBAR', 'JJ'):
            entity.description.append(leaves)
            # ignore_positions should be relative to parse_tree
            ignore_positions.extend(position + subposition
                                    for subposition in subtree.treepositions())
        elif 'NN' in node and previous_node and 'NN' in previous_node and entity.name == previous_leaves:
            entity.description.append(previous_leaves)
            entity.name = leaves
        elif 'NN' in node or node == '-NONE-':
            entity.name = morphy(leaves, 'n')
            if entity.name is None:
                entity.name = leaves
        elif node == 'RB' and leaves == 'there':
            entity.name = 'there'
        previous_node = node
        previous_leaves = leaves
    return entity
Exemple #2
0
def extract_entity(parse_tree, semantic_role=''):
    """Creates an entity object given a snippet of a parse tree."""
    entity = Location() if semantic_role in ('Location', 'Source', 'Destination') else ObjectEntity()

    # print 'Extracting from:'
    # print str(parse_tree)

    # Ignore rescursed trees and added descriptions
    ignore_positions = []
    previous_node = None
    previous_leaves = None
    for position in parse_tree.treepositions():
        if not isinstance(parse_tree[position], Tree):
            continue
        if position in ignore_positions:
            continue
        subtree = parse_tree[position]
        node = subtree.node

        leaves = ' '.join(subtree.leaves()).lower()
        # A noun phrase might have sub-parts that we need to parse recursively
        # Recurse while there are NP's below the current node
        if subtree is not parse_tree and 'NP' in node:
            entity.merge(extract_entity(subtree))
            # ignore_positions should be relative to parse_tree
            ignore_positions.extend(position + subposition for subposition in subtree.treepositions())
        # A determiner cardinal node adds some information for the quantifier
        if 'DT' in node:
            entity.quantifier.fill_determiner(leaves)
        # Cardinal number sets the quantifier number
        elif node == 'CD':
            entity.quantifier.fill_cardinal(leaves)
            if entity.quantifier.number is None:
                # Not actually a number
                entity.name = leaves
        elif node == 'PRP':
            entity.name = 'Commander' if leaves in ('i', 'me') else leaves
        elif ('PP' in node and entity.name) or node in ('SBAR', 'JJ'):
            entity.description.append(leaves)
            # ignore_positions should be relative to parse_tree
            ignore_positions.extend(position + subposition for subposition in subtree.treepositions())
        elif 'NN' in node and previous_node and 'NN' in previous_node and entity.name == previous_leaves:
            entity.description.append(previous_leaves)
            entity.name = leaves
        elif 'NN' in node or node == '-NONE-':
            entity.name = morphy(leaves, 'n')
            if entity.name is None:
                entity.name = leaves
        elif node == 'RB' and leaves == 'there':
            entity.name = 'there'
        previous_node = node
        previous_leaves = leaves
    return entity
def get_verb_frames(verb):
    lemmatized_verb = morphy(verb, 'v')
    vfo_list = _create_vfos(lemmatized_verb)
    return vfo_list
Exemple #4
0
def extract_frames_from_parse(parse_tree_string, verbose=False):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    result_list = []

    # In case we're handed an bad string, bail somewhat gracefully
    try:
        parse_tree = Tree.parse(parse_tree_string)
    except ValueError:
        print "Warning: semantics could not parse tree", repr(
            parse_tree_string)
        return result_list

    # Temporarily (and maybe permanently) disabled features:
    # 1. Clause splitting: we have not found any example where it does something
    # 2. Activizing clauses: for now, passives do not matter.

    # Split clauses to handle them separately
    #split_clause_dict = split_clauses(parse_tree)

    # Activize clauses
    #for key, (clause, conjunction) in split_clause_dict.items():
    #    activized_clause = activize_clause(clause)
    #    split_clause_dict[key] = (activized_clause, conjunction)

    #for (clause, conjunction) in split_clause_dict.values():
    for clause, conjunction in ((parse_tree, ''), ):
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = split_conjunctions(clause)

        if conjunction != '':
            result_list.append(conjunction)

        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = existential_there_insertion(tree)
                tree = invert_clause(tree)
                tree = wh_movement(tree)

                if EXTRACT_DEBUG:
                    print 'Transformed tree:'
                    print str(tree)

                verbs = find_verbs(tree)

                # Create VFOs for each verb, then match them to the parse tree
                for verb, negation in verbs:
                    lemmatized_verb = morphy(verb, 'v')
                    vfo_list = create_VerbFrameObjects(lemmatized_verb)
                    match_list = []

                    if EXTRACT_DEBUG:
                        print 'VFO list for %s:' % verb
                        print '\n'.join(
                            str(vfo.frame_list) for vfo in vfo_list)

                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)

                        if match:
                            if EXTRACT_DEBUG:
                                print 'Matched:'
                                print '\t', str(vfo.frame_list)
                                print 'with'
                                print '\t', str(tree)
                            match_list.append((match, vfo.classid))

                    if EXTRACT_DEBUG:
                        print 'Match list:'

                        for m in match_list:
                            print 'Sense:', m[1]
                            for a, b in m[0].items():
                                print a, str(b)
                            print '\n\n'

                    (best_match, sense) = pick_best_match(match_list)

                    if EXTRACT_DEBUG:
                        print 'Chose: '
                        if best_match:
                            print sense
                            for a, b in best_match.items():
                                print a, str(b)
                        else:
                            print str(None)
                        print '\n\n'
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense,
                                            verb, negation))

    return result_list
Exemple #5
0
def extract_frames_from_parse(parse_tree_string, verbose=False):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    result_list = []

    # In case we're handed an bad string, bail somewhat gracefully
    try:
        parse_tree = Tree.parse(parse_tree_string)
    except ValueError:
        print "Warning: semantics could not parse tree", repr(parse_tree_string)
        return result_list

    # Temporarily (and maybe permanently) disabled features:
    # 1. Clause splitting: we have not found any example where it does something
    # 2. Activizing clauses: for now, passives do not matter.

    # Split clauses to handle them separately
    #split_clause_dict = split_clauses(parse_tree)

    # Activize clauses
    #for key, (clause, conjunction) in split_clause_dict.items():
    #    activized_clause = activize_clause(clause)
    #    split_clause_dict[key] = (activized_clause, conjunction)

    #for (clause, conjunction) in split_clause_dict.values():
    for clause, conjunction in ((parse_tree, ''),):
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = split_conjunctions(clause)

        if conjunction != '':
            result_list.append(conjunction)

        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = existential_there_insertion(tree)
                tree = invert_clause(tree)
                tree = wh_movement(tree)

                if EXTRACT_DEBUG:
                    print 'Transformed tree:'
                    print str(tree)

                verbs = find_verbs(tree)

                # Create VFOs for each verb, then match them to the parse tree
                for verb, negation in verbs:
                    lemmatized_verb = morphy(verb, 'v')
                    vfo_list = create_VerbFrameObjects(lemmatized_verb)
                    match_list = []

                    if EXTRACT_DEBUG:
                        print 'VFO list for %s:' % verb
                        print '\n'.join(str(vfo.frame_list) for vfo in vfo_list)

                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)

                        if match:
                            if EXTRACT_DEBUG:
                                print 'Matched:'
                                print '\t', str(vfo.frame_list)
                                print 'with'
                                print '\t', str(tree)
                            match_list.append((match, vfo.classid))

                    if EXTRACT_DEBUG:
                        print 'Match list:'

                        for m in match_list:
                            print 'Sense:', m[1]
                            for a, b in m[0].items():
                                print a, str(b)
                            print '\n\n'

                    (best_match, sense) = pick_best_match(match_list)

                    if EXTRACT_DEBUG:
                        print 'Chose: '
                        if best_match:
                            print sense
                            for a, b in best_match.items():
                                print a, str(b)
                        else:
                            print str(None)
                        print '\n\n'
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense, verb, negation))

    return result_list