Beispiel #1
0
def extract_entity(parse_tree, semantic_role=''):
    """Creates an entity object given a snippet of a parse tree."""
    entity = Location() if semantic_role in ('Location', 'Source', 'Destination') else ObjectEntity()

    # print 'Extracting from:'
    # print str(parse_tree)

    # Ignore rescursed trees and added descriptions
    ignore_positions = []
    previous_node = None
    previous_leaves = None
    for position in parse_tree.treepositions():
        if not isinstance(parse_tree[position], Tree):
            continue
        if position in ignore_positions:
            continue
        subtree = parse_tree[position]
        node = subtree.node

        leaves = ' '.join(subtree.leaves()).lower()
        # A noun phrase might have sub-parts that we need to parse recursively
        # Recurse while there are NP's below the current node
        if subtree is not parse_tree and 'NP' in node:
            entity.merge(extract_entity(subtree))
            # ignore_positions should be relative to parse_tree
            ignore_positions.extend(position + subposition for subposition in subtree.treepositions())
        # A determiner cardinal node adds some information for the quantifier
        if 'DT' in node:
            entity.quantifier.fill_determiner(leaves)
        # Cardinal number sets the quantifier number
        elif node == 'CD':
            entity.quantifier.fill_cardinal(leaves)
            if entity.quantifier.number == None:
                # Not actually a number
                entity.name = leaves
        elif node == 'PRP':
            entity.name = 'Commander' if leaves in ('i', 'me') else leaves
        elif ('PP' in node and entity.name) or node in ('SBAR', 'JJ'):
            entity.description.append(leaves)
            # ignore_positions should be relative to parse_tree
            ignore_positions.extend(position + subposition for subposition in subtree.treepositions())
        elif 'NN' in node and previous_node and 'NN' in previous_node and entity.name == previous_leaves:
            entity.description.append(previous_leaves)
            entity.name = leaves
        elif 'NN' in node or node == '-NONE-':
            entity.name = morphy(leaves, 'n')
            if entity.name is None:
                entity.name = leaves
        elif node == 'RB' and leaves == 'there':
            entity.name = 'there'
        previous_node = node
        previous_leaves = leaves
    return entity
Beispiel #2
0
def get_semantics_from_parse_tree(parse_tree_string):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    parse_tree = Tree.parse(parse_tree_string)

    # Split clauses to handle them separately
    split_clause_dict = frames.split_clauses(parse_tree)

    # Activize clauses
    for key, (clause, conjunction) in split_clause_dict.items():
        activized_clause = frames.activize_clause(clause)
        split_clause_dict[key] = (activized_clause, conjunction)

    result_list = []
        
    for (clause, conjunction) in split_clause_dict.values():
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = frames.split_conjunctions(clause)
        
        if conjunction != '':
            result_list.append(conjunction)
        
        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if frames.is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = frames.existential_there_insertion(tree)
                tree = frames.invert_clause(tree)
                tree = frames.wh_movement(tree)



                # Regex for finding verbs 
                verb_finder = re.compile(r'(?<=VB[ DGNPZ]) *\w*(?=\))')

                # Get the lemma of the verb for searching verbnet
                verbs = (word.strip().lower() for word in
                         verb_finder.findall(str(tree)))

                # Create VFOs for each verb, then match them to the parse tree
                for verb in verbs:
                    lemmatized_verb = morphy(verb,'v')
                    vfo_list = frames.create_VerbFrameObjects(lemmatized_verb)

                    match_list = []
                    
                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)
                        
                        if match:
                            match_list.append((match, vfo.classid))

                    (best_match, sense) = frames.pick_best_match(match_list)
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense))
                    

    return result_list
Beispiel #3
0
def extract_entity_class(parse_tree, semantic_role = ''):
    """Creates an entity_class object given a snippet of a parse tree."""
    quantifier = Quantifier()
    quantifier.number = 1
    quantifier.definite = True

    predicates = defaultdict(list)

    for position in parse_tree.treepositions():
        if not isinstance(parse_tree[position], Tree):
            continue
        
        subtree = parse_tree[position]
        node = subtree.node

        # A determiner node adds some information for the quantifier
        if node == 'DT':
            determiner = ' '.join(subtree.leaves()).lower()
            if determiner == 'any':
                quantifier.definite = False
                quantifier.exhaustive = True
                quantifier.proportionality = 'at least'
                quantifier.number = 1
            if determiner == 'a' or determiner == 'an':
                quantifier.plural = False
                quantifier.number = 1
                quantifier.definite = False
                quantifier.exhaustive = False
                quantifier.proportionality = 'at least'
            if determiner == 'the':
                quantifier.definite = True
                quantifier.exhaustive = False
        # A personal pronoun adds some information for the quantifier
        elif node == 'PRP':
            pronoun = ' '.join(subtree.leaves()).lower()
            if pronoun == 'him' or pronoun == 'he' or \
               pronoun == 'her' or pronoun == 'she':
                obj = pronoun
                quantifier.definite = True
                quantifier.number = 1
                quantifier.proportionality = 'exact'
                quantifier.exhaustive = True
                quantifier.fulfilled = False
            # The object is Commander
            elif pronoun == 'i' or pronoun == 'me':
                obj = 'Commander'
                quantifier.definite = True
                quantifier.number = 1
                quantifier.proportionality = 'exact'
                quantifier.exhaustive = True
            else:
                obj = pronoun
                quantifier.definite = True
                quantifier.number = 1
                quantifier.proportionality = 'exact'
                quantifier.exhaustive = True
                quantifier.fulfilled = False

            predicates[semantic_role].append(Predicate(semantic_role,obj))
        # Prepositional phrase generates a location predicate
        elif node == 'PP-LOC':
            for subposition in subtree.treepositions():
                predicate_type = 'Location'
                if not isinstance(subtree[subposition], Tree):
                    continue
                if subtree[subposition].node == 'IN':
                    predicate_type = 'Location'
                elif 'NP' in subtree[subposition].node and \
                     'NP' not in subtree[subposition][0]:
                    predicates[predicate_type].append(
                        Predicate(predicate_type,
                                  ' '.join(
                                      subtree[subposition].leaves())))
        # Cardinal number sets the quantifier number
        elif node == 'CD':
            number_text = ' '.join(subtree.leaves()).lower()
            if not number_text.isdigit():
                number = text2int(number_text)
                quantifier.number = number

        # A noun phrase might have sub-parts that we need to parse separately
        elif ('NP' in node) or node == 'NP-PRD-A':
            obj_word_list = []
            for subposition in subtree.treepositions():
                # Don't check leaves or parents of leaves
                if (not isinstance(subtree[subposition], Tree) or
                        isinstance(subtree[subposition][0], Tree)):
                    continue

                theme_word = ' '.join(subtree[subposition].leaves()).lower()
                if theme_word is None:
                    continue

                # Get the actual object in question
                if ('NN' in subtree[subposition].node or 'CD' in subtree[subposition].node or 
                    'JJ' in subtree[subposition].node and theme_word not in obj_word_list):
                    m_word = morphy(theme_word, 'n')
                    if m_word is None:
                        m_word = theme_word
                    obj_word_list.append(m_word)

                # Get the quantifier info
                if len(obj_word_list) > 0 and quantifier.plural is not None:
                    if obj_word_list[0] != theme_word:
                        quantifier.proportionality = 'at least'
                        quantifier.number = 1
                    else:
                        quantifier.proportionality = 'exact'
                        quantifier.number = 1

            # Compile object reference into lower_case_with_underscores name
            if len(obj_word_list) > 0:
                obj_name = "_".join(word.lower() for word in obj_word_list)
                predicates[semantic_role].append(Predicate(semantic_role, obj_name))
                break

        # If it's just a noun, add it as a predicate
        elif 'N' in node and 'SBJ' not in node:
            predicates[semantic_role].append(Predicate(semantic_role,' '.join(subtree.leaves())))
            quantifier.definite = True
        elif 'ADV' in node:
            predicates[semantic_role].append(Predicate(semantic_role,' '.join(subtree.leaves())))
            
    entity_class = EntityClass(quantifier,predicates)

    return entity_class
Beispiel #4
0
def extract_frames_from_parse(parse_tree_string):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    result_list = []

    # In case we're handed an bad string, bail somewhat gracefully
    try:
        parse_tree = Tree.parse(parse_tree_string)
    except ValueError:
        print "Warning: semantics could not parse tree", repr(parse_tree_string)
        return result_list

    # Split clauses to handle them separately
    split_clause_dict = frames.split_clauses(parse_tree)

    # Activize clauses
    for key, (clause, conjunction) in split_clause_dict.items():
        activized_clause = frames.activize_clause(clause)
        split_clause_dict[key] = (activized_clause, conjunction)

    for (clause, conjunction) in split_clause_dict.values():
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = frames.split_conjunctions(clause)

        if conjunction != '':
            result_list.append(conjunction)

        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if frames.is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = frames.existential_there_insertion(tree)
                tree = frames.invert_clause(tree)
                tree = frames.wh_movement(tree)

                if EXTRACT_DEBUG:
                    print 'Transformed tree:'
                    print str(tree)

                verbs = frames.find_verbs(tree)

                # Create VFOs for each verb, then match them to the parse tree
                for verb, negation in verbs:
                    lemmatized_verb = morphy(verb, 'v')
                    vfo_list = frames.create_VerbFrameObjects(lemmatized_verb)
                    match_list = []

                    if EXTRACT_DEBUG:
                        print 'VFO list for %s:' % verb
                        print '\n'.join(str(vfo.frame_list) for vfo in vfo_list)

                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)

                        if match:
                            if EXTRACT_DEBUG:
                                print 'Matched:'
                                print '\t', str(vfo.frame_list)
                                print 'with'
                                print '\t', str(tree)
                            match_list.append((match, vfo.classid))

                    if EXTRACT_DEBUG:
                        print 'Match list:'

                        for m in match_list:
                            print 'Sense:', m[1]
                            for a, b in m[0].items():
                                print a, str(b)
                            print '\n\n'

                    (best_match, sense) = frames.pick_best_match(match_list)

                    if EXTRACT_DEBUG:
                        print 'Chose: '
                        if best_match:
                            for a, b in best_match.items():
                                print a, str(b)
                        else:
                            print str(None)
                        print '\n\n'
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense, verb, negation))

    return result_list