Example #1
0
 def setUp(self):
     self.exDict = {"carry_from_to" :{"sent" : "Carry the meals from the kitchen to the rooms.",
                                "tree" :  Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['kitchen'])])]), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['rooms'])])])]), Tree('.', ['.'])]),
                                "possible_frames": [[('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', '')],
                                                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')],
                                                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')],    
                                                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', '')]],
                                "correct_frame" :   [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')],
                                "correct_entry" : {'to towards': Tree('TO', ['to']), 'Destination': Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['rooms'])]), 'Agent': Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), 'Source': Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), 'Theme': Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['meals'])]), 'VERB': Tree('VB', ['Carry'])}                                    
                                },
               "carry_from_to_deep_pp":{"sent" : "Carry the meals from the kitchen to the cafeteria.",
                                "tree" :  Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cafeteria'])])])])])]), Tree('.', ['.'])]),
                                "possible_frames": [[('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', '')],
                                                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')],
                                                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')],    
                                                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', '')]],
                                "correct_frame" :   [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')] 
                                },           
             "defuse_in"   : {"sent" : "Defuse in the hallway.",
                              "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Defuse']), Tree('PP-CLR', [Tree('IN', ['in']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['hallway'])])])]), Tree('.', ['.'])]),
                              "wrong_frame" : [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', '')]
                              }
               }
     
     self.crazyframes = {"SUBPHRASE_PASS": 
                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('DT','DT','',''),('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')],
                    "SUBPHRASE_FAIL": 
                    [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''),('DT','DT','',''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')]
                   }
     self.th = TreeHandler()
     self.matcher = ParseMatcher(0,2)
Example #2
0
def verb_child_frame_match(child, parent, subject, vp, negated):
    result_tree = (parent if not subject else Tree('S', [subject, vp]))
    verb = child[0].lower()
    match = best_matching_frame(verb, result_tree)
    if match != (None, None):
        match = FrameMatch(verb, match[1], match[0], negated, result_tree)
        return match
Example #3
0
def process(parse,verbose=True):
    """Show the steps of transformation for a parse."""
    # Original parse
    parse_tree = Tree.parse(parse)
    print_parse(parse_tree, "Parse")

    frames = extract_frames_from_parse(parse, verbose=verbose)
    if verbose:
        print
        for frame in frames:
            print frame.pprint()
            if frame.condition:
                print "Condition:"
                print frame.condition.pprint()
        print

    # Bail if no frames matched
    if not frames:
        if verbose: print "No frames matched."
        return

    # Extract semantic structures
    semantic_structures = create_semantic_structures(frames)
    if semantic_structures:
        if verbose: print semantic_structures
        return semantic_structures
    else:
        if verbose: print "No semantic structures returned."
    def pos_split(self,tree,pos,possibleParents=["S","VP","NP"],desiredCC="and",ccpos="CC"):
        """The strategy for this is to find the CC in the VP (like the VB in matching),
            find the nearest left or right siblings and if they are heads, pop   
            
            @input tree input tree to split on
            @input pos is the part of speech we are splitting on
            @input possibleParents are the possible phrase parents currently supported
            @input desiredCC is the lemma of the CC we are looking for
            @input ccpos is the cc part of speech we are looking for
        """
        res = []        
        cursor = [-1]
        cccount = self.num_ccs(tree,desiredCC)
        for i in range(cccount):
            ccpath = list(self.th.get_main_pos_path(tree, ccpos, -1, cursor=cursor))
            parentPhrase = self.th.which_parent(tree, ccpath, possibleParents, -1)

            parentpos = parentPhrase.split(self.th.depthdelim)[0].split(self.th.posdelim)[0]

            if parentpos == pos:                               
                left = self.sibling_cc_path(ccpath)
                right = self.sibling_cc_path(ccpath,tree=tree);
                
                #ADDED
                #if siblings of CC are NN (e.g. "Go to office1 and office2")
                #make them NP instead
                #this is needed otherwise NNs lead to bad creation of commands
                for child_pos, child in enumerate(tree[ccpath[:-2]]):
                    if child.node.split(".")[0] == "NN":
                        temp_path = ccpath[:-2]
                        temp_path.append(child_pos)
                        child_path = temp_path
                        new_child = Tree("NP-A", [child])
                        tree[child_path] = new_child

                if len(left) != len(ccpath) != len(right):
                    raise UnlevelCCSiblings
                lefttree = copy.deepcopy(tree)

                #Instead of popping, need to replace parent with correct branch 
                temp = self.th.pop_path_cc(lefttree,right,ccpath)
                if temp:
                    #If splitting on S, temp will be the parent replacement
                    lefttree = temp
                #Recurse on "," list
                lefttrees = self.pos_split(lefttree,pos,possibleParents=possibleParents,desiredCC=self.listCC,ccpos=self.listCC)

                res.extend(lefttrees) #Copy and put left branch in results and keep going
                self.th.pop_left(tree,ccpath)#Pop everything to the left of ccpath and keep looking
                #self.th.pop_path_cc(tree, left,ccpath)#Pop for real, keep looking
                cursor = [-1]
            else:
                if DEBUG: print pos,' CC: ',ccpath
                cursor = ccpath
        if len(tree) == 1:
            #Only one branch-> split on S, consume
            tree = tree[0]  
        res.append(tree)               
        return res
Example #5
0
def process(parse):
    """Show the steps of transformation for a parse."""
    # Original parse
    parse = Tree.parse(parse)
    print_parse(parse, "Parse")

    split_trees = split_conjunctions(parse)
    for split_parse, conjunction in split_trees.values():
        for subtree in split_parse:
            print_if_diff(subtree, parse, "Subtree ({})".format(conjunction))
Example #6
0
def process(parse):
    """Show the steps of transformation for a parse."""
    # Original parse
    parse = Tree.parse(parse)
    print_parse(parse, "Parse")

    split_trees = split_conjunctions(parse)
    for split_parse, conjunction in split_trees.values():
        for subtree in split_parse:
            print_if_diff(subtree, parse, "Subtree ({})".format(conjunction))
Example #7
0
    def match_parse(self, parse_tree):
        """Takes a Treebank parse tree compiled into NLTK's tree structure.
        Outputs a result dictionary mapping predicates to arguments"""
        result_dict = {}
        subtree_list = []
        subtrees = parse_tree.subtrees()

        # We need a list of subtrees, not a generator
        for subtree in subtrees:
            subtree_list.append(subtree)

        current_subtree = 0
        matches = 0

        # For each frame element, find the next subtree that matches
        for frame_tag in self.frame_list:
            # Go through the subtrees until you run out of subtrees
            while current_subtree < len(subtree_list):
                subtree = subtree_list[current_subtree]

                # If there is no explicit NP for the Agent role, insert one
                # NOTE: Assumes Agent role is the first in the frame, may not be true
                if (current_subtree == 0 and frame_tag[1] == 'Agent'
                        and not self.__match_subtree(subtree, frame_tag)):
                    result_dict[frame_tag[1]] = Tree("(NP-SBJ-A (-NONE- *))")
                    matches += 1
                    break

                if self.__match_subtree(subtree, frame_tag):
                    #print 'Match -> ' + str(frame_tag)
                    # If the subtree matches, add role->phrase to the dictionary
                    #result_dict[frame_tag[1]] = ' '.join(subtree.leaves())
                    result_dict[frame_tag[1]] = subtree
                    matches += 1
                    break

                current_subtree += 1

        if current_subtree == len(subtree_list):
            return None

        # Only return something if every frame was matched
        if matches == len(self.frame_list):
            return result_dict
        else:
            return None
Example #8
0
def interactive_mode(window, first_input):
    """Interactively get input from the user and parse it."""
    input_frame, input_win, parse_win, semantic_win = setup_windows(window)

    # Initialize pipeline and knowledge base
    pipeline = PipelineClient()
    kb = KnowledgeBase()

    # Send some data through the pipeline
    result = pipeline.parse("This is a test.")
    input_frame.addstr(1, 1, 'Enter your input, then press Ctrl+G. '
                       'Enter "quit" or press Ctrl+C to exit.')
    input_frame.refresh()

    # Until the input is q/quit, process data
    last_input = first_input
    while True:
        # Display the first input if needed
        input_win.erase()
        input_win.refresh()
        if last_input:
            input_win.addstr(0, 0, last_input)

        # Get text from the input box, removing any embedded newlines
        if first_input:
            text = first_input
            first_input = None
        else:
            text = get_input(input_win).replace("\n", "").strip()
        last_input = text

        # Quit if needed
        if text == "q" or text == "quit":
            return

        # Get input again if it was empty
        if not text:
            continue

        # Echo input and display status, clearing both windows
        parse_win.clear()
        parse_win.addstr(text)
        parse_win.addstr('\nParsing and restoring null elements...')
        parse_win.refresh()
        semantic_win.clear()
        semantic_win.refresh()

        # Run the parse pipeline
        result = pipeline.parse(text)
        result_tree = Tree(result)

        # Output the longest parse that will fit. We try to draw the
        # possible output in order of decreasing length.
        parse_max_width = parse_win.getmaxyx()[1]
        possible_formats = (result_tree.pprint(margin=parse_max_width, force_multiline=True),
                            result_tree.pprint(margin=parse_max_width),
                            result)

        for formatted_result in possible_formats:
            parse_win.clear()
            try:
                parse_win.addstr(text + '\n')
                parse_win.addstr(formatted_result)
            except _curses.error:
                continue
            else:
                # We've successfully printed, stop trying formats
                break
        else:
            parse_win.clear()
            parse_win.addstr("Parse too large to show.\n")
        parse_win.refresh()

        # Do the same for semantics
        # Echo input and display status, after clearing the window
        semantic_win.clear()
        semantic_win.addstr(text)
        semantic_win.addstr('\nPerforming semantic analysis...')
        semantic_win.refresh()

        frames, new_commands, kb_response = process_parse_tree(result, text, kb)
        semantic_win.clear()
        try:
            if frames:
                semantic_win.addstr("Frames matched:\n")
                for frame in frames:
                    semantic_win.addstr("\t" + str(frame) + "\n")
            if new_commands:
                semantic_win.addstr("New commands:\n")
                for command in new_commands:
                    semantic_win.addstr(str(command) + "\n")
            if kb_response:
                semantic_win.addstr("KB response:\n")
                semantic_win.addstr(str(kb_response) + "\n")
            if not any((frames, new_commands, kb_response)):
                semantic_win.addstr("No frames matched.\n")
        except _curses.error:
            semantic_win.clear()
            semantic_win.addstr("Semantic representation too large to show.")
        semantic_win.refresh()

    return
Example #9
0
    def setUp(self):
        self.th = TreeHandler()
        self.exDict = {
            "carry_from_to": {
                "sent":
                "Carry the meals from the kitchen to the rooms.",
                "tree":
                Tree(
                    '''(S\n  (NP-SBJ-A (-NONE- *))\n  (VP\n    (VB Carry)\n    (NP-A (DT the) (NNS meals))\n    (PP-CLR (IN from) (NP-A (DT the) (NN kitchen)))\n    (PP-CLR (TO to) (NP-A (DT the) (NNS rooms))))\n  (. .))'''
                ),
                "solution": {
                    'Destination':
                    Tree('NP-A', [Tree('DT', ['the']),
                                  Tree('NNS', ['rooms'])]),
                    'Agent':
                    Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]),
                    'to towards':
                    Tree('TO', ['to']),
                    'Theme':
                    Tree('NP-A', [Tree('DT', ['the']),
                                  Tree('NNS', ['meals'])]),
                    'VERB':
                    Tree('VB', ['Carry']),
                    'DT':
                    Tree('DT', ['the'])
                }
            }
        }

        self.crazyframes = {
            "SUBPHRASE_PASS": [('NP', 'Agent', '', ''),
                               ('VERB', 'VERB', '', ''), ('DT', 'DT', '', ''),
                               ('NP', 'Theme', '', ''),
                               ('PREP', 'to towards', '', ''),
                               ('NP', 'Destination', '', '')],
            "SUBPHRASE_FAIL":
            [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''),
             ('NP', 'Theme', '', ''), ('DT', 'DT', '', ''),
             ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')]
        }
Example #10
0
def extract_frames_from_parse(parse_tree_string, verbose=False):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    result_list = []

    # In case we're handed an bad string, bail somewhat gracefully
    try:
        parse_tree = Tree.parse(parse_tree_string)
    except ValueError:
        print "Warning: semantics could not parse tree", repr(
            parse_tree_string)
        return result_list

    # Temporarily (and maybe permanently) disabled features:
    # 1. Clause splitting: we have not found any example where it does something
    # 2. Activizing clauses: for now, passives do not matter.

    # Split clauses to handle them separately
    #split_clause_dict = split_clauses(parse_tree)

    # Activize clauses
    #for key, (clause, conjunction) in split_clause_dict.items():
    #    activized_clause = activize_clause(clause)
    #    split_clause_dict[key] = (activized_clause, conjunction)

    #for (clause, conjunction) in split_clause_dict.values():
    for clause, conjunction in ((parse_tree, ''), ):
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = split_conjunctions(clause)

        if conjunction != '':
            result_list.append(conjunction)

        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = existential_there_insertion(tree)
                tree = invert_clause(tree)
                tree = wh_movement(tree)

                if EXTRACT_DEBUG:
                    print 'Transformed tree:'
                    print str(tree)

                verbs = find_verbs(tree)

                # Create VFOs for each verb, then match them to the parse tree
                for verb, negation in verbs:
                    lemmatized_verb = morphy(verb, 'v')
                    vfo_list = create_VerbFrameObjects(lemmatized_verb)
                    match_list = []

                    if EXTRACT_DEBUG:
                        print 'VFO list for %s:' % verb
                        print '\n'.join(
                            str(vfo.frame_list) for vfo in vfo_list)

                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)

                        if match:
                            if EXTRACT_DEBUG:
                                print 'Matched:'
                                print '\t', str(vfo.frame_list)
                                print 'with'
                                print '\t', str(tree)
                            match_list.append((match, vfo.classid))

                    if EXTRACT_DEBUG:
                        print 'Match list:'

                        for m in match_list:
                            print 'Sense:', m[1]
                            for a, b in m[0].items():
                                print a, str(b)
                            print '\n\n'

                    (best_match, sense) = pick_best_match(match_list)

                    if EXTRACT_DEBUG:
                        print 'Chose: '
                        if best_match:
                            print sense
                            for a, b in best_match.items():
                                print a, str(b)
                        else:
                            print str(None)
                        print '\n\n'
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense,
                                            verb, negation))

    return result_list
Example #11
0
def extract_frames_from_parse(parse_tree_string, verbose=False):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    result_list = []

    # In case we're handed an bad string, bail somewhat gracefully
    try:
        parse_tree = Tree.parse(parse_tree_string)
    except ValueError:
        print "Warning: semantics could not parse tree", repr(parse_tree_string)
        return result_list

    # Temporarily (and maybe permanently) disabled features:
    # 1. Clause splitting: we have not found any example where it does something
    # 2. Activizing clauses: for now, passives do not matter.

    # Split clauses to handle them separately
    #split_clause_dict = split_clauses(parse_tree)

    # Activize clauses
    #for key, (clause, conjunction) in split_clause_dict.items():
    #    activized_clause = activize_clause(clause)
    #    split_clause_dict[key] = (activized_clause, conjunction)

    #for (clause, conjunction) in split_clause_dict.values():
    for clause, conjunction in ((parse_tree, ''),):
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = split_conjunctions(clause)

        if conjunction != '':
            result_list.append(conjunction)

        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = existential_there_insertion(tree)
                tree = invert_clause(tree)
                tree = wh_movement(tree)

                if EXTRACT_DEBUG:
                    print 'Transformed tree:'
                    print str(tree)

                verbs = find_verbs(tree)

                # Create VFOs for each verb, then match them to the parse tree
                for verb, negation in verbs:
                    lemmatized_verb = morphy(verb, 'v')
                    vfo_list = create_VerbFrameObjects(lemmatized_verb)
                    match_list = []

                    if EXTRACT_DEBUG:
                        print 'VFO list for %s:' % verb
                        print '\n'.join(str(vfo.frame_list) for vfo in vfo_list)

                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)

                        if match:
                            if EXTRACT_DEBUG:
                                print 'Matched:'
                                print '\t', str(vfo.frame_list)
                                print 'with'
                                print '\t', str(tree)
                            match_list.append((match, vfo.classid))

                    if EXTRACT_DEBUG:
                        print 'Match list:'

                        for m in match_list:
                            print 'Sense:', m[1]
                            for a, b in m[0].items():
                                print a, str(b)
                            print '\n\n'

                    (best_match, sense) = pick_best_match(match_list)

                    if EXTRACT_DEBUG:
                        print 'Chose: '
                        if best_match:
                            print sense
                            for a, b in best_match.items():
                                print a, str(b)
                        else:
                            print str(None)
                        print '\n\n'
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense, verb, negation))

    return result_list
 def setUp(self):
     self.splitter = Split()
     self.th = TreeHandler()
     self.easyDict = {"Defuse_VP_VP" : {"sent": "Defuse the bomb and go to the hallway.",
                       "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VP-A', [Tree('VB', ['Defuse']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['bomb'])])]), Tree('CC', ['and']), Tree('VP-A', [Tree('VB', ['go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['hallway'])])])])]), Tree('.', ['.'])])
                       },
                 }
     
     self.exDict = {"Extracts VP, produces S trees with no verb" : {"sent" : "Go and defuse the bomb in the cellar.",
                 "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('CC', ['and']), Tree('VB', ['defuse']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['bomb'])]), Tree('PP-MNR', [Tree('IN', ['in']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])]), Tree('.', ['.'])])
                 },
               "Two carrys only one source-dest because of wrongly embedded 'to' pp": {"sent" : "Carry the hostages from the kitchen and the cafeteria to the cellar.",
                                                                 "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['hostages'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cafeteria'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])])])]), Tree('.', ['.'])])
                                                                 },
               "VP_NPNP_VP" : { "sent": "Defuse the bomb and the bomb and go to the hallway.",
                               "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VP-A', [Tree('VB', ['Defuse']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['bomb'])]), Tree('CC', ['and']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['bomb'])])])]), Tree('CC', ['and']), Tree('VP-A', [Tree('VB', ['go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['hallway'])])])])]), Tree('.', ['.'])])
                               },
          "go_to_np_comma_np_comma" : {
                                       "sent" : "Go to the cellar, kitchen, and bedroom.",
                                       "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cellar'])]), Tree(',', [',']), Tree('NP', [Tree('NNP', ['kitchen'])]), Tree(',', [',']), Tree('CC', ['and']), Tree('NP', [Tree('NN', ['bedroom'])])])])]), Tree('.', ['.'])]),
                                       "correct_list" : [Tree('S', [Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])]), Tree('.', ['.'])]), Tree('S', [Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP', [Tree('NNP', ['kitchen'])])])]), Tree('.', ['.'])]), Tree('S', [Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP', [Tree('NN', ['bedroom'])])])]), Tree('.', ['.'])])]
                                       }
               }
     self.ccConditionalDict = {"CC and adv conditional" : {"sent" : "Go to the cellar and if you see a bomb, activate your camera.",
                                           "tree" : Tree('S', [Tree('S-A', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])]), Tree('CC', ['and']), Tree('S-A', [Tree('SBAR-ADV', [Tree('IN', ['if']), Tree('S-A', [Tree('NP-SBJ-A', [Tree('PRP', ['you'])]), Tree('VP', [Tree('VBP', ['see']), Tree('NP-A', [Tree('DT', ['a']), Tree('NN', ['bomb'])])])])]), Tree(',', [',']), Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['activate']), Tree('NP-A', [Tree('PRP$', ['your']), Tree('NN', ['camera'])])])]), Tree('.', ['.'])]),
                                           "frames" : []                                      
                                       },
               "CC and tmp conditional" : {"sent" : "Go to the cellar and when you see a bomb, activate your camera.",
                                           "tree" : Tree('S', [Tree('S-A', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])]), Tree('CC', ['and']), Tree('S-A', [Tree('SBAR-TMP', [Tree('WHADVP-0', [Tree('WRB', ['when'])]), Tree('S-A', [Tree('NP-SBJ-A', [Tree('PRP', ['you'])]), Tree('VP', [Tree('VBP', ['see']), Tree('NP-A', [Tree('DT', ['a']), Tree('NN', ['bomb'])]), Tree('ADVP-0', [Tree('-NONE-', ['*T*'])])])])]), Tree(',', [',']), Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['activate']), Tree('NP-A', [Tree('PRP$', ['your']), Tree('NN', ['camera'])])])]), Tree('.', ['.'])]),
                                           "frames": [],
                                           }
                          
               }
     #Examples the current methods do not correctly parse
     self.hardDict = { "Carry_NP_NP" : {"sent" : "Carry the hostages from the kitchen and bedroom to the cellar.",
                                   "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['hostages'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('NN', ['bedroom'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])])])]), Tree('.', ['.'])])                           
                                   },
                 "Go_NP_NP" : {"sent": "Go to the cellar and kitchen.",
                               "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar']), Tree('CC', ['and']), Tree('NN', ['kitchen'])])])]), Tree('.', ['.'])])
                               },
                 "Carry_NP_NP_2" : {"sent": "Carry meals to the cafeteria and lounge.",
                                    "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cafeteria']), Tree('CC', ['and']), Tree('NN', ['lounge'])])])]), Tree('.', ['.'])])
                                    },
                 "Carry_NPNP_NPNP" : {"sent" : "Carry meals from the kitchen and cafeteria to the office and lounge.",
                                      #Wrong pp attachment for this tree ("to the office" modifies cafeteria)
                                      "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('NN', ['cafeteria'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['office'])])])]), Tree('CC', ['and']), Tree('NP', [Tree('NN', ['lounge'])])])])]), Tree('.', ['.'])])
                       },
                 "Two carrys only one source-dest because of wrongly embedded 'to' pp": {"sent" : "Carry the hostages from the kitchen and the cafeteria to the cellar.",
                                                                                 "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['hostages'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cafeteria'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])])])]), Tree('.', ['.'])])
                                                                                 },
                 "Extracts VP, produces S trees with no verb" : {"sent" : "Go and defuse the bomb in the cellar.",
                                                                 "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('CC', ['and']), Tree('VB', ['defuse']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['bomb'])]), Tree('PP-MNR', [Tree('IN', ['in']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])]), Tree('.', ['.'])])
                                                                 },
                     
                 "Carry_NPNP_NP" : {"sent" : "Carry meals to the library and cellar from the kitchen.",
                                    "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('NP', [Tree('NNS', ['meals'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['library']), Tree('CC', ['and']), Tree('NN', ['cellar'])])])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['kitchen'])])])]), Tree('.', ['.'])]),
                                    }
                 }
Example #13
0
 def append_period(self,tree):
     if len(tree) > 0 and tree[-1].node != '.':
         node = '.'+self.depthdelim+str(1)
         children = ['.'+'__'+str(self.get_ulid())+'__']            
         tree[1].append(Tree(node,children))
Example #14
0
def split_conjunctions_sparse(parse_tree):
    '''Find conjunctions, split them based on syntax tree. Return the result dictionary
    '''
    splitter = Split()
    #trees = splitter.split_on_cc(parse_tree)

    #DEBUG
    g1 = Digraph(format='png')
    utils.print_tree_to_png(g1, [parse_tree])
    g1.render("/home/gian/hri_project/figures/parse_trees/command.gv")

    #ADDED
    #check whether it's 'if-and' or 'if-or' phrase
    #example: 'If you see a hostage and|or a bomb...'
    if splitter.num_ccs(parse_tree, "if") > 0:
        for tree_pos in parse_tree.treepositions():
            #we don't want the leaves
            if not isinstance(parse_tree[tree_pos], Tree):
                continue
            #we look for the 'if' tree and we divide it from
            #the command tree
            if "SBAR-ADV" in parse_tree[tree_pos].node:
                if_tree = deepcopy(parse_tree[tree_pos])
                temp_tree = deepcopy(parse_tree)
                del temp_tree[tree_pos]
                cmd_tree = temp_tree

                num_ands = splitter.num_ccs(if_tree, "and")
                num_ors = splitter.num_ccs(if_tree, "or")

                #check whether if subtree has 'and|or' conjunctions inside
                if num_ands > 0:
                    #it's an 'if-and' phrase
                    #e.g. 'If you see a hostage and a bomb...'
                    trees = []
                    #check whether command tree contains CCs, in which case split it
                    for cmd in splitter.split_on_multiple_ccs(cmd_tree, 'and'):
                        children_of_tree = [if_tree]
                        for child in cmd:
                            children_of_tree.append(child)
                        trees.append(Tree('S', children_of_tree))
                elif num_ors > 0:
                    #it's an 'if-or' phrase
                    #e.g. 'If you see a hostage or a bomb...'
                    trees = []
                    for temp_tree in splitter.split_on_multiple_ccs(
                            parse_tree, "or"):
                        for tree in splitter.split_on_multiple_ccs(
                                temp_tree, "and"):
                            trees.append(tree)
                else:
                    #it's a simple 'if' phrase
                    #e.g. 'If you see a hostage activate the radio and the camera'
                    trees = splitter.split_on_multiple_ccs(parse_tree, "and")
                break
    else:
        #it's a simple command
        #e.g. 'Activate the radio and the camera'
        trees = splitter.split_on_multiple_ccs(parse_tree, "and")

    #DEBUG
    for i, item in enumerate(trees):
        g2 = Digraph(format='png')
        utils.print_tree_to_png(g2, [item])
        g2.render(
            "/home/gian/hri_project/figures/parse_trees/splitted_command{}.gv".
            format(str(i)))

    #Abstracting away path to conjunction because it isn't used anyway
    #Only support 'and' right now
    res = {}
    res[0] = (trees, "and")
    return res
Example #15
0
def extract_frames_from_parse(parse_tree_string, verbose=False):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    # TODO: Use verbose argument
    result_list = []

    # In case we're handed an bad string, bail somewhat gracefully
    try:
        parse_tree = Tree.parse(parse_tree_string)
    except ValueError:
        print "Error: semantics could not parse tree", repr(parse_tree_string)
        return result_list

    # Temporarily (and maybe permanently) disabled features:
    # 1. Clause splitting: we have not found any example where it does something
    # 2. Activizing clauses: for now, passives do not matter.

    # Split clauses to handle them separately
    # split_clause_dict = split_clauses(parse_tree)

    # Activize clauses
    # for key, (clause, conjunction) in split_clause_dict.items():
    #     activized_clause = activize_clause(clause)
    #     split_clause_dict[key] = (activized_clause, conjunction)

    # TODO: This strange loop is because split_clauses may not work
    # for (clause, conjunction) in split_clause_dict.values():
    for clause, conjunction in ((parse_tree, ''), ):
        # Split conjunctions and duplicate arguments if necessary
        #split_tree_dict = split_conjunctions(clause)
        split_tree_dict = split_conjunctions_sparse(clause)

        #DEBUG
        #print "SPLIT TREE DICT: ---------------"
        #print split_tree_dict

        if conjunction != '':
            result_list.append(conjunction)

        for (split_tree, conjunction) in split_tree_dict.values():
            for tree in split_tree:
                if conjunction and verbose:
                    print "Subtree ({}):".format(conjunction)
                    print tree.pprint(force_multiline=True)
                # TODO: Deactivated for now
                # Store whether there was an existential there
                # if is_existential(str(tree)):
                #     tag_list.append('ex')

                # Transformational grammar stuff
                orig_tree = tree
                tree = assertion_filter(tree)
                tree = existential_there_insertion(tree)
                tree = invert_clause(tree)
                tree = wh_movement(tree)

                if verbose and tree != orig_tree:
                    print 'Transformed tree:'
                    print tree.pprint(force_multiline=True)

                match = match_verb(tree, verbose=verbose)
                if match:
                    result_list.append(match)

    return result_list