def setUp(self): self.exDict = {"carry_from_to" :{"sent" : "Carry the meals from the kitchen to the rooms.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['kitchen'])])]), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['rooms'])])])]), Tree('.', ['.'])]), "possible_frames": [[('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', '')], [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')], [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')], [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', '')]], "correct_frame" : [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')], "correct_entry" : {'to towards': Tree('TO', ['to']), 'Destination': Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['rooms'])]), 'Agent': Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), 'Source': Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), 'Theme': Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['meals'])]), 'VERB': Tree('VB', ['Carry'])} }, "carry_from_to_deep_pp":{"sent" : "Carry the meals from the kitchen to the cafeteria.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cafeteria'])])])])])]), Tree('.', ['.'])]), "possible_frames": [[('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', '')], [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')], [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')], [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', '')]], "correct_frame" : [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'PREP', '', ''), ('NP', 'Source', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')] }, "defuse_in" : {"sent" : "Defuse in the hallway.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Defuse']), Tree('PP-CLR', [Tree('IN', ['in']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['hallway'])])])]), Tree('.', ['.'])]), "wrong_frame" : [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', '')] } } self.crazyframes = {"SUBPHRASE_PASS": [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('DT','DT','',''),('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')], "SUBPHRASE_FAIL": [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''),('DT','DT','',''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')] } self.th = TreeHandler() self.matcher = ParseMatcher(0,2)
def verb_child_frame_match(child, parent, subject, vp, negated): result_tree = (parent if not subject else Tree('S', [subject, vp])) verb = child[0].lower() match = best_matching_frame(verb, result_tree) if match != (None, None): match = FrameMatch(verb, match[1], match[0], negated, result_tree) return match
def process(parse,verbose=True): """Show the steps of transformation for a parse.""" # Original parse parse_tree = Tree.parse(parse) print_parse(parse_tree, "Parse") frames = extract_frames_from_parse(parse, verbose=verbose) if verbose: print for frame in frames: print frame.pprint() if frame.condition: print "Condition:" print frame.condition.pprint() print # Bail if no frames matched if not frames: if verbose: print "No frames matched." return # Extract semantic structures semantic_structures = create_semantic_structures(frames) if semantic_structures: if verbose: print semantic_structures return semantic_structures else: if verbose: print "No semantic structures returned."
def pos_split(self,tree,pos,possibleParents=["S","VP","NP"],desiredCC="and",ccpos="CC"): """The strategy for this is to find the CC in the VP (like the VB in matching), find the nearest left or right siblings and if they are heads, pop @input tree input tree to split on @input pos is the part of speech we are splitting on @input possibleParents are the possible phrase parents currently supported @input desiredCC is the lemma of the CC we are looking for @input ccpos is the cc part of speech we are looking for """ res = [] cursor = [-1] cccount = self.num_ccs(tree,desiredCC) for i in range(cccount): ccpath = list(self.th.get_main_pos_path(tree, ccpos, -1, cursor=cursor)) parentPhrase = self.th.which_parent(tree, ccpath, possibleParents, -1) parentpos = parentPhrase.split(self.th.depthdelim)[0].split(self.th.posdelim)[0] if parentpos == pos: left = self.sibling_cc_path(ccpath) right = self.sibling_cc_path(ccpath,tree=tree); #ADDED #if siblings of CC are NN (e.g. "Go to office1 and office2") #make them NP instead #this is needed otherwise NNs lead to bad creation of commands for child_pos, child in enumerate(tree[ccpath[:-2]]): if child.node.split(".")[0] == "NN": temp_path = ccpath[:-2] temp_path.append(child_pos) child_path = temp_path new_child = Tree("NP-A", [child]) tree[child_path] = new_child if len(left) != len(ccpath) != len(right): raise UnlevelCCSiblings lefttree = copy.deepcopy(tree) #Instead of popping, need to replace parent with correct branch temp = self.th.pop_path_cc(lefttree,right,ccpath) if temp: #If splitting on S, temp will be the parent replacement lefttree = temp #Recurse on "," list lefttrees = self.pos_split(lefttree,pos,possibleParents=possibleParents,desiredCC=self.listCC,ccpos=self.listCC) res.extend(lefttrees) #Copy and put left branch in results and keep going self.th.pop_left(tree,ccpath)#Pop everything to the left of ccpath and keep looking #self.th.pop_path_cc(tree, left,ccpath)#Pop for real, keep looking cursor = [-1] else: if DEBUG: print pos,' CC: ',ccpath cursor = ccpath if len(tree) == 1: #Only one branch-> split on S, consume tree = tree[0] res.append(tree) return res
def process(parse): """Show the steps of transformation for a parse.""" # Original parse parse = Tree.parse(parse) print_parse(parse, "Parse") split_trees = split_conjunctions(parse) for split_parse, conjunction in split_trees.values(): for subtree in split_parse: print_if_diff(subtree, parse, "Subtree ({})".format(conjunction))
def match_parse(self, parse_tree): """Takes a Treebank parse tree compiled into NLTK's tree structure. Outputs a result dictionary mapping predicates to arguments""" result_dict = {} subtree_list = [] subtrees = parse_tree.subtrees() # We need a list of subtrees, not a generator for subtree in subtrees: subtree_list.append(subtree) current_subtree = 0 matches = 0 # For each frame element, find the next subtree that matches for frame_tag in self.frame_list: # Go through the subtrees until you run out of subtrees while current_subtree < len(subtree_list): subtree = subtree_list[current_subtree] # If there is no explicit NP for the Agent role, insert one # NOTE: Assumes Agent role is the first in the frame, may not be true if (current_subtree == 0 and frame_tag[1] == 'Agent' and not self.__match_subtree(subtree, frame_tag)): result_dict[frame_tag[1]] = Tree("(NP-SBJ-A (-NONE- *))") matches += 1 break if self.__match_subtree(subtree, frame_tag): #print 'Match -> ' + str(frame_tag) # If the subtree matches, add role->phrase to the dictionary #result_dict[frame_tag[1]] = ' '.join(subtree.leaves()) result_dict[frame_tag[1]] = subtree matches += 1 break current_subtree += 1 if current_subtree == len(subtree_list): return None # Only return something if every frame was matched if matches == len(self.frame_list): return result_dict else: return None
def interactive_mode(window, first_input): """Interactively get input from the user and parse it.""" input_frame, input_win, parse_win, semantic_win = setup_windows(window) # Initialize pipeline and knowledge base pipeline = PipelineClient() kb = KnowledgeBase() # Send some data through the pipeline result = pipeline.parse("This is a test.") input_frame.addstr(1, 1, 'Enter your input, then press Ctrl+G. ' 'Enter "quit" or press Ctrl+C to exit.') input_frame.refresh() # Until the input is q/quit, process data last_input = first_input while True: # Display the first input if needed input_win.erase() input_win.refresh() if last_input: input_win.addstr(0, 0, last_input) # Get text from the input box, removing any embedded newlines if first_input: text = first_input first_input = None else: text = get_input(input_win).replace("\n", "").strip() last_input = text # Quit if needed if text == "q" or text == "quit": return # Get input again if it was empty if not text: continue # Echo input and display status, clearing both windows parse_win.clear() parse_win.addstr(text) parse_win.addstr('\nParsing and restoring null elements...') parse_win.refresh() semantic_win.clear() semantic_win.refresh() # Run the parse pipeline result = pipeline.parse(text) result_tree = Tree(result) # Output the longest parse that will fit. We try to draw the # possible output in order of decreasing length. parse_max_width = parse_win.getmaxyx()[1] possible_formats = (result_tree.pprint(margin=parse_max_width, force_multiline=True), result_tree.pprint(margin=parse_max_width), result) for formatted_result in possible_formats: parse_win.clear() try: parse_win.addstr(text + '\n') parse_win.addstr(formatted_result) except _curses.error: continue else: # We've successfully printed, stop trying formats break else: parse_win.clear() parse_win.addstr("Parse too large to show.\n") parse_win.refresh() # Do the same for semantics # Echo input and display status, after clearing the window semantic_win.clear() semantic_win.addstr(text) semantic_win.addstr('\nPerforming semantic analysis...') semantic_win.refresh() frames, new_commands, kb_response = process_parse_tree(result, text, kb) semantic_win.clear() try: if frames: semantic_win.addstr("Frames matched:\n") for frame in frames: semantic_win.addstr("\t" + str(frame) + "\n") if new_commands: semantic_win.addstr("New commands:\n") for command in new_commands: semantic_win.addstr(str(command) + "\n") if kb_response: semantic_win.addstr("KB response:\n") semantic_win.addstr(str(kb_response) + "\n") if not any((frames, new_commands, kb_response)): semantic_win.addstr("No frames matched.\n") except _curses.error: semantic_win.clear() semantic_win.addstr("Semantic representation too large to show.") semantic_win.refresh() return
def setUp(self): self.th = TreeHandler() self.exDict = { "carry_from_to": { "sent": "Carry the meals from the kitchen to the rooms.", "tree": Tree( '''(S\n (NP-SBJ-A (-NONE- *))\n (VP\n (VB Carry)\n (NP-A (DT the) (NNS meals))\n (PP-CLR (IN from) (NP-A (DT the) (NN kitchen)))\n (PP-CLR (TO to) (NP-A (DT the) (NNS rooms))))\n (. .))''' ), "solution": { 'Destination': Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['rooms'])]), 'Agent': Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), 'to towards': Tree('TO', ['to']), 'Theme': Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['meals'])]), 'VERB': Tree('VB', ['Carry']), 'DT': Tree('DT', ['the']) } } } self.crazyframes = { "SUBPHRASE_PASS": [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('DT', 'DT', '', ''), ('NP', 'Theme', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')], "SUBPHRASE_FAIL": [('NP', 'Agent', '', ''), ('VERB', 'VERB', '', ''), ('NP', 'Theme', '', ''), ('DT', 'DT', '', ''), ('PREP', 'to towards', '', ''), ('NP', 'Destination', '', '')] }
def extract_frames_from_parse(parse_tree_string, verbose=False): """Take a string representing the parse tree as input, and print the semantic parse. The result list consists of a list of tuples, with each tuple containing the VerbNet frame and its associated tree.""" result_list = [] # In case we're handed an bad string, bail somewhat gracefully try: parse_tree = Tree.parse(parse_tree_string) except ValueError: print "Warning: semantics could not parse tree", repr( parse_tree_string) return result_list # Temporarily (and maybe permanently) disabled features: # 1. Clause splitting: we have not found any example where it does something # 2. Activizing clauses: for now, passives do not matter. # Split clauses to handle them separately #split_clause_dict = split_clauses(parse_tree) # Activize clauses #for key, (clause, conjunction) in split_clause_dict.items(): # activized_clause = activize_clause(clause) # split_clause_dict[key] = (activized_clause, conjunction) #for (clause, conjunction) in split_clause_dict.values(): for clause, conjunction in ((parse_tree, ''), ): # Split conjunctions and duplicate arguments if necessary split_tree_dict = split_conjunctions(clause) if conjunction != '': result_list.append(conjunction) for (split_tree, conjunction) in split_tree_dict.values(): if conjunction != '': result_list.append(conjunction) for tree in split_tree: tag_list = [] # Store whether there was an existential there if is_existential(str(tree)): tag_list.append('ex') # Transformational grammar stuff tree = existential_there_insertion(tree) tree = invert_clause(tree) tree = wh_movement(tree) if EXTRACT_DEBUG: print 'Transformed tree:' print str(tree) verbs = find_verbs(tree) # Create VFOs for each verb, then match them to the parse tree for verb, negation in verbs: lemmatized_verb = morphy(verb, 'v') vfo_list = create_VerbFrameObjects(lemmatized_verb) match_list = [] if EXTRACT_DEBUG: print 'VFO list for %s:' % verb print '\n'.join( str(vfo.frame_list) for vfo in vfo_list) for vfo in vfo_list: match = vfo.match_parse(tree) if match: if EXTRACT_DEBUG: print 'Matched:' print '\t', str(vfo.frame_list) print 'with' print '\t', str(tree) match_list.append((match, vfo.classid)) if EXTRACT_DEBUG: print 'Match list:' for m in match_list: print 'Sense:', m[1] for a, b in m[0].items(): print a, str(b) print '\n\n' (best_match, sense) = pick_best_match(match_list) if EXTRACT_DEBUG: print 'Chose: ' if best_match: print sense for a, b in best_match.items(): print a, str(b) else: print str(None) print '\n\n' if not best_match is None: result_list.append((best_match, tree, tag_list, sense, verb, negation)) return result_list
def extract_frames_from_parse(parse_tree_string, verbose=False): """Take a string representing the parse tree as input, and print the semantic parse. The result list consists of a list of tuples, with each tuple containing the VerbNet frame and its associated tree.""" result_list = [] # In case we're handed an bad string, bail somewhat gracefully try: parse_tree = Tree.parse(parse_tree_string) except ValueError: print "Warning: semantics could not parse tree", repr(parse_tree_string) return result_list # Temporarily (and maybe permanently) disabled features: # 1. Clause splitting: we have not found any example where it does something # 2. Activizing clauses: for now, passives do not matter. # Split clauses to handle them separately #split_clause_dict = split_clauses(parse_tree) # Activize clauses #for key, (clause, conjunction) in split_clause_dict.items(): # activized_clause = activize_clause(clause) # split_clause_dict[key] = (activized_clause, conjunction) #for (clause, conjunction) in split_clause_dict.values(): for clause, conjunction in ((parse_tree, ''),): # Split conjunctions and duplicate arguments if necessary split_tree_dict = split_conjunctions(clause) if conjunction != '': result_list.append(conjunction) for (split_tree, conjunction) in split_tree_dict.values(): if conjunction != '': result_list.append(conjunction) for tree in split_tree: tag_list = [] # Store whether there was an existential there if is_existential(str(tree)): tag_list.append('ex') # Transformational grammar stuff tree = existential_there_insertion(tree) tree = invert_clause(tree) tree = wh_movement(tree) if EXTRACT_DEBUG: print 'Transformed tree:' print str(tree) verbs = find_verbs(tree) # Create VFOs for each verb, then match them to the parse tree for verb, negation in verbs: lemmatized_verb = morphy(verb, 'v') vfo_list = create_VerbFrameObjects(lemmatized_verb) match_list = [] if EXTRACT_DEBUG: print 'VFO list for %s:' % verb print '\n'.join(str(vfo.frame_list) for vfo in vfo_list) for vfo in vfo_list: match = vfo.match_parse(tree) if match: if EXTRACT_DEBUG: print 'Matched:' print '\t', str(vfo.frame_list) print 'with' print '\t', str(tree) match_list.append((match, vfo.classid)) if EXTRACT_DEBUG: print 'Match list:' for m in match_list: print 'Sense:', m[1] for a, b in m[0].items(): print a, str(b) print '\n\n' (best_match, sense) = pick_best_match(match_list) if EXTRACT_DEBUG: print 'Chose: ' if best_match: print sense for a, b in best_match.items(): print a, str(b) else: print str(None) print '\n\n' if not best_match is None: result_list.append((best_match, tree, tag_list, sense, verb, negation)) return result_list
def setUp(self): self.splitter = Split() self.th = TreeHandler() self.easyDict = {"Defuse_VP_VP" : {"sent": "Defuse the bomb and go to the hallway.", "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VP-A', [Tree('VB', ['Defuse']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['bomb'])])]), Tree('CC', ['and']), Tree('VP-A', [Tree('VB', ['go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['hallway'])])])])]), Tree('.', ['.'])]) }, } self.exDict = {"Extracts VP, produces S trees with no verb" : {"sent" : "Go and defuse the bomb in the cellar.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('CC', ['and']), Tree('VB', ['defuse']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['bomb'])]), Tree('PP-MNR', [Tree('IN', ['in']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])]), Tree('.', ['.'])]) }, "Two carrys only one source-dest because of wrongly embedded 'to' pp": {"sent" : "Carry the hostages from the kitchen and the cafeteria to the cellar.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['hostages'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cafeteria'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])])])]), Tree('.', ['.'])]) }, "VP_NPNP_VP" : { "sent": "Defuse the bomb and the bomb and go to the hallway.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VP-A', [Tree('VB', ['Defuse']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['bomb'])]), Tree('CC', ['and']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['bomb'])])])]), Tree('CC', ['and']), Tree('VP-A', [Tree('VB', ['go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['hallway'])])])])]), Tree('.', ['.'])]) }, "go_to_np_comma_np_comma" : { "sent" : "Go to the cellar, kitchen, and bedroom.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cellar'])]), Tree(',', [',']), Tree('NP', [Tree('NNP', ['kitchen'])]), Tree(',', [',']), Tree('CC', ['and']), Tree('NP', [Tree('NN', ['bedroom'])])])])]), Tree('.', ['.'])]), "correct_list" : [Tree('S', [Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])]), Tree('.', ['.'])]), Tree('S', [Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP', [Tree('NNP', ['kitchen'])])])]), Tree('.', ['.'])]), Tree('S', [Tree('NP-SBJ-A', [Tree('-NPNONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP', [Tree('NN', ['bedroom'])])])]), Tree('.', ['.'])])] } } self.ccConditionalDict = {"CC and adv conditional" : {"sent" : "Go to the cellar and if you see a bomb, activate your camera.", "tree" : Tree('S', [Tree('S-A', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])]), Tree('CC', ['and']), Tree('S-A', [Tree('SBAR-ADV', [Tree('IN', ['if']), Tree('S-A', [Tree('NP-SBJ-A', [Tree('PRP', ['you'])]), Tree('VP', [Tree('VBP', ['see']), Tree('NP-A', [Tree('DT', ['a']), Tree('NN', ['bomb'])])])])]), Tree(',', [',']), Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['activate']), Tree('NP-A', [Tree('PRP$', ['your']), Tree('NN', ['camera'])])])]), Tree('.', ['.'])]), "frames" : [] }, "CC and tmp conditional" : {"sent" : "Go to the cellar and when you see a bomb, activate your camera.", "tree" : Tree('S', [Tree('S-A', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])]), Tree('CC', ['and']), Tree('S-A', [Tree('SBAR-TMP', [Tree('WHADVP-0', [Tree('WRB', ['when'])]), Tree('S-A', [Tree('NP-SBJ-A', [Tree('PRP', ['you'])]), Tree('VP', [Tree('VBP', ['see']), Tree('NP-A', [Tree('DT', ['a']), Tree('NN', ['bomb'])]), Tree('ADVP-0', [Tree('-NONE-', ['*T*'])])])])]), Tree(',', [',']), Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['activate']), Tree('NP-A', [Tree('PRP$', ['your']), Tree('NN', ['camera'])])])]), Tree('.', ['.'])]), "frames": [], } } #Examples the current methods do not correctly parse self.hardDict = { "Carry_NP_NP" : {"sent" : "Carry the hostages from the kitchen and bedroom to the cellar.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['hostages'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('NN', ['bedroom'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])])])]), Tree('.', ['.'])]) }, "Go_NP_NP" : {"sent": "Go to the cellar and kitchen.", "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar']), Tree('CC', ['and']), Tree('NN', ['kitchen'])])])]), Tree('.', ['.'])]) }, "Carry_NP_NP_2" : {"sent": "Carry meals to the cafeteria and lounge.", "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cafeteria']), Tree('CC', ['and']), Tree('NN', ['lounge'])])])]), Tree('.', ['.'])]) }, "Carry_NPNP_NPNP" : {"sent" : "Carry meals from the kitchen and cafeteria to the office and lounge.", #Wrong pp attachment for this tree ("to the office" modifies cafeteria) "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('NNS', ['meals'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('NN', ['cafeteria'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['office'])])])]), Tree('CC', ['and']), Tree('NP', [Tree('NN', ['lounge'])])])])]), Tree('.', ['.'])]) }, "Two carrys only one source-dest because of wrongly embedded 'to' pp": {"sent" : "Carry the hostages from the kitchen and the cafeteria to the cellar.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('DT', ['the']), Tree('NNS', ['hostages'])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['kitchen'])]), Tree('CC', ['and']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['cafeteria'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])])])])]), Tree('.', ['.'])]) }, "Extracts VP, produces S trees with no verb" : {"sent" : "Go and defuse the bomb in the cellar.", "tree" : Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Go']), Tree('CC', ['and']), Tree('VB', ['defuse']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['bomb'])]), Tree('PP-MNR', [Tree('IN', ['in']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['cellar'])])])]), Tree('.', ['.'])]) }, "Carry_NPNP_NP" : {"sent" : "Carry meals to the library and cellar from the kitchen.", "tree": Tree('S', [Tree('NP-SBJ-A', [Tree('-NONE-', ['*'])]), Tree('VP', [Tree('VB', ['Carry']), Tree('NP-A', [Tree('NP', [Tree('NNS', ['meals'])]), Tree('PP', [Tree('TO', ['to']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['library']), Tree('CC', ['and']), Tree('NN', ['cellar'])])])]), Tree('PP-CLR', [Tree('IN', ['from']), Tree('NP-A', [Tree('DT', ['the']), Tree('NN', ['kitchen'])])])]), Tree('.', ['.'])]), } }
def append_period(self,tree): if len(tree) > 0 and tree[-1].node != '.': node = '.'+self.depthdelim+str(1) children = ['.'+'__'+str(self.get_ulid())+'__'] tree[1].append(Tree(node,children))
def split_conjunctions_sparse(parse_tree): '''Find conjunctions, split them based on syntax tree. Return the result dictionary ''' splitter = Split() #trees = splitter.split_on_cc(parse_tree) #DEBUG g1 = Digraph(format='png') utils.print_tree_to_png(g1, [parse_tree]) g1.render("/home/gian/hri_project/figures/parse_trees/command.gv") #ADDED #check whether it's 'if-and' or 'if-or' phrase #example: 'If you see a hostage and|or a bomb...' if splitter.num_ccs(parse_tree, "if") > 0: for tree_pos in parse_tree.treepositions(): #we don't want the leaves if not isinstance(parse_tree[tree_pos], Tree): continue #we look for the 'if' tree and we divide it from #the command tree if "SBAR-ADV" in parse_tree[tree_pos].node: if_tree = deepcopy(parse_tree[tree_pos]) temp_tree = deepcopy(parse_tree) del temp_tree[tree_pos] cmd_tree = temp_tree num_ands = splitter.num_ccs(if_tree, "and") num_ors = splitter.num_ccs(if_tree, "or") #check whether if subtree has 'and|or' conjunctions inside if num_ands > 0: #it's an 'if-and' phrase #e.g. 'If you see a hostage and a bomb...' trees = [] #check whether command tree contains CCs, in which case split it for cmd in splitter.split_on_multiple_ccs(cmd_tree, 'and'): children_of_tree = [if_tree] for child in cmd: children_of_tree.append(child) trees.append(Tree('S', children_of_tree)) elif num_ors > 0: #it's an 'if-or' phrase #e.g. 'If you see a hostage or a bomb...' trees = [] for temp_tree in splitter.split_on_multiple_ccs( parse_tree, "or"): for tree in splitter.split_on_multiple_ccs( temp_tree, "and"): trees.append(tree) else: #it's a simple 'if' phrase #e.g. 'If you see a hostage activate the radio and the camera' trees = splitter.split_on_multiple_ccs(parse_tree, "and") break else: #it's a simple command #e.g. 'Activate the radio and the camera' trees = splitter.split_on_multiple_ccs(parse_tree, "and") #DEBUG for i, item in enumerate(trees): g2 = Digraph(format='png') utils.print_tree_to_png(g2, [item]) g2.render( "/home/gian/hri_project/figures/parse_trees/splitted_command{}.gv". format(str(i))) #Abstracting away path to conjunction because it isn't used anyway #Only support 'and' right now res = {} res[0] = (trees, "and") return res
def extract_frames_from_parse(parse_tree_string, verbose=False): """Take a string representing the parse tree as input, and print the semantic parse. The result list consists of a list of tuples, with each tuple containing the VerbNet frame and its associated tree.""" # TODO: Use verbose argument result_list = [] # In case we're handed an bad string, bail somewhat gracefully try: parse_tree = Tree.parse(parse_tree_string) except ValueError: print "Error: semantics could not parse tree", repr(parse_tree_string) return result_list # Temporarily (and maybe permanently) disabled features: # 1. Clause splitting: we have not found any example where it does something # 2. Activizing clauses: for now, passives do not matter. # Split clauses to handle them separately # split_clause_dict = split_clauses(parse_tree) # Activize clauses # for key, (clause, conjunction) in split_clause_dict.items(): # activized_clause = activize_clause(clause) # split_clause_dict[key] = (activized_clause, conjunction) # TODO: This strange loop is because split_clauses may not work # for (clause, conjunction) in split_clause_dict.values(): for clause, conjunction in ((parse_tree, ''), ): # Split conjunctions and duplicate arguments if necessary #split_tree_dict = split_conjunctions(clause) split_tree_dict = split_conjunctions_sparse(clause) #DEBUG #print "SPLIT TREE DICT: ---------------" #print split_tree_dict if conjunction != '': result_list.append(conjunction) for (split_tree, conjunction) in split_tree_dict.values(): for tree in split_tree: if conjunction and verbose: print "Subtree ({}):".format(conjunction) print tree.pprint(force_multiline=True) # TODO: Deactivated for now # Store whether there was an existential there # if is_existential(str(tree)): # tag_list.append('ex') # Transformational grammar stuff orig_tree = tree tree = assertion_filter(tree) tree = existential_there_insertion(tree) tree = invert_clause(tree) tree = wh_movement(tree) if verbose and tree != orig_tree: print 'Transformed tree:' print tree.pprint(force_multiline=True) match = match_verb(tree, verbose=verbose) if match: result_list.append(match) return result_list