def _parse_userPref(self, chktree): if self.vb: print "Called _parse_userPref(chktree):- EntitySet" ## what will be returned: entSet = EntitySet("userPreference", self.vb) entSet = self._parse_entity_pairs(chktree, entSet) if entSet.count_entities() == 0: ## If there aren't any entity pairs ## Then perhaps this isn't a userPref utterance. ## Return Unknown for the DM. entSet = EntitySet("unknown") return self._expandMultiEntitySets(entSet)
def _parse_as_when(self, chktree): if self.vb: print "Called _parse_as_when(chktree):- EntitySet" entSet = EntitySet("trivia", self.vb) parameter_list = ["year"] entSet.add_entity("type", "year") found_subject = False for x in chktree: if self._isTreeBranch(x): #if "Tree" in str(type(x)) : if x.node == "TITLE": found_subject = True parameter_list.append('title') parameter_list.append(self._getTreeTxt(x)) entSet.add_entity("movieTitle", self._getTreeTxt(x)) elif x.node == "PERSON": found_subject = True parameter_list.append('person') parameter_list.append(self._getTreeTxt(x)) entSet.add_entity("person", self._getTreeTxt(x)) if found_subject == False: parameter_list.append('other') parameter_list.append('that') print "_parse_as_when: append 'other' and 'that' to parameter_list" #return parameter_list return [ entSet ]
def _expandMultiEntitySets(self, entSet): if self.vb: print "before expanding entSet: %s" % (entSet.toString()) print "" returnVar = [ entSet.copy() ] for k,v in entSet.get_entityitems(): x = re.search(r'^(\!)?([^\d]+)(\d+)?', k) if not returnVar[0].has_entity('rating'): pass if x.group(1) and returnVar[0].has_entity('rating'): ## found '!' and we have something we can inverse 'rating': returnVar[0].remove_entity(k) #newEntSet = returnVar[0].copy() newEntSet = EntitySet(returnVar[0].get_classifier()) newEntSet.add_entity(x.group(2), v) ## update rating theRating = returnVar[0].find_entity('rating') if theRating == 5: theRating = 1 elif theRating == 4: theRating = 2 elif theRating == 2: theRating = 4 elif theRating == 1: theRating = 5 newEntSet.add_entity('rating', theRating) returnVar.append(newEntSet) elif x.group(3): ## numeric value found! returnVar[0].remove_entity(k) #newEntSet = returnVar[0].copy() newEntSet = EntitySet(returnVar[0].get_classifier()) newEntSet.add_entity(x.group(2), v) newEntSet.add_entity('rating', returnVar[0].find_entity('rating')) returnVar.append(newEntSet) return returnVar
def _parse_as_TF(self, chktree): if self.vb: print "Called _parse_as_TF(chktree):- EntitySet" entSet = EntitySet("trivia_yesno", self.vb) entSet = self._parse_entity_pairs(chktree, entSet) return [ entSet ] ## Ignore the rest: looking_for = "person" negation = False parameter_list = [] for x in chktree: if self._isLeaf(x): #if "tuple" in str(type(x)): negation = self._negator(x[0], negation) if x[1] == "CD": parameter_list.append('year') parameter_list.append(x[0]) entSet.add_entity("year", x[0]) elif ( x[1] == "KW_DIRECTOR" ): looking_for = 'director' elif ( x[1] == "KW_STAR" ): looking_for = 'actor' elif x[1] == "KW_PLOT": looking_for = "plot" elif x[1] == "GNRE": subject = "genre" if negation: subject = "!genre" parameter_list.append('!genre') else: parameter_list.append('genre') parameter_list.append(x[0].title()) entSet.add_entity(subject, x[0].title()) else: if x.node == "TITLE": parameter_list.append('title') parameter_list.append(self._getTreeTxt(x)) entSet.add_entity("movieTitle", self._getTreeTxt(x)) elif x.node == "PERSON": if looking_for == "actor": if negation: parameter_list.append('!actor') else: parameter_list.append('actor') elif looking_for == "director": if negation: parameter_list.append('!director') else: parameter_list.append('director') else: if negation: parameter_list.append('!person') else: parameter_list.append('person') parameter_list.append(self._getTreeTxt(x)) entSet.add_entity(looking_for, self._getTreeTxt(x)) #return parameter_list return [ entSet ]
def _parse_as_what(self, chktree): if self.vb: print "Called _parse_as_what(%s):- EntitySet" % (chktree.node) entSet = EntitySet("trivia", self.vb) looking_for = 'other' negation = False ## Shouldn't need this with changes to KW_PLOT below: ''' flat = chktree.leaves() if flat[ len(flat) - 1 ][1] == ":": last = flat[ len(flat) - 2 ] else: last = flat[ len(flat) - 1 ] if last[1] == "KW_PLOT": entSet.add_entity("type", "plot") ''' for itor,x in enumerate(chktree): if self._isLeaf(x): nodetype = self._getLeafTos(x) nodetxt = self._getLeafTxt(x) if not entSet.has_entity('type'): if nodetype == "KW_YEAR": return self._parse_as_when(chktree) elif nodetype == "KW_DIRECTOR" or nodetype == "KW_STAR": next = chktree[itor+1] if self._isTreeBranch(next) and next.node == 'PERSON': looking_for = 'actor' if nodetype == 'KW_DIRECTOR': looking_for = 'director' else: return self._parse_as_who(chktree) elif nodetype == "KW_GENRE": entSet.add_entity("type", "genre") elif nodetype == "KW_MOVIE": entSet.add_entity("type", "movieTitle") elif nodetype == "KW_PLOT": entSet.add_entity("type", "plot") elif nodetype == "GNRE": entSet.add_entity("type", "movieTitle") entSet.add_entity("genre", nodetxt) else: negation = self._negator(nodetxt, negation) if nodetype == 'POS': ## We mis-categorized the type. ## Ex: "What was director Michael Bay's latest movie?" ## Director is not what the user is looking for. director describes Michael Bay, which describes movie. #if entSet.has_entity('type'): # looking_for = entSet.find_entity('type') # if looking_for = 'director' or looking_for == 'actor' pass if nodetype == "KW_DIRECTOR": looking_for = "director" elif nodetype == "KW_STAR": looking_for = "actor" elif nodetype == "KW_PLOT": #looking_for = "plot" ## TODO: TEST THIS entSet.add_entity('type', 'plot') elif nodetype == "GNRE": entSet.add_entity2('genre', nodetxt, negation) negation = False else: if x.node == "TITLE": entSet.add_entity2('movieTitle', self._getTreeTxt(x), negation) elif x.node == "PERSON": subject = looking_for if looking_for == 'other': subject = 'person' entSet.add_entity2(subject, self._getTreeTxt(x), negation) negation = False return [ entSet ]
def _parse_as_who(self, chktree): if self.vb: print "Called _parse_as_who(chktree):- EntitySet" entSet = EntitySet("trivia", self.vb) negation = False looking_for = 'person' for x in chktree: if self._isLeaf(x): negation = self._negator(self._getLeafTxt(x), negation) keyword = None if self._getLeafTos(x) == "KW_DIRECTOR": keyword = 'director' elif self._getLeafTos(x) == "KW_STAR": keyword = 'actor' if keyword: if not entSet.has_entity('type'): entSet.add_entity('type', keyword) else: looking_for = keyword else: if x.node == "TITLE": entSet.add_entity2("movieTitle", self._getTreeTxt(x), negation) negation = False elif x.node == "PERSON": personName = self._getTreeTxt(x) if not entSet.has_entity('type'): entSet.add_entity('type', 'person') entSet.add_entity('character', personName) else: entSet.add_entity2(looking_for, personName, negation) negation = False if entSet.count_entities() == 0: #empty entitySet -> unable to parse utterance -> let DM know entSet.change_classifier("unknown") elif not entSet.has_entity('type'): entSet.add_entity('type', 'person') return [ entSet ]
def _parse_command(self, chunked): if self.vb: print "Called _parse_command(chktree):- EntitySet" entSet = False ''' cmd_node = chunked[0] verbs = self._find_verbs(cmd_node) theVerb = None if len(verbs) == 1: theVerb = verbs[0].lower() if theVerb == 'know': #I would like to know directed "Twilight"? #know the cast of "Ocean's 11"? #Do you know which movies Keanu Reeves is in? return self._rechunk(chunked) else: theVerb = verbs[0] #theClassifier = self.verbToClassifierMapping.get(verbs[0].lower(), "unknown") elif len(verbs) == 0: # Just in Case Error-Catch if vb.self: print "NLU Error: Parsing COMMAND node resulted in no VB node. Impossible; grammar does not allow this" pass else: #TODO: Will this happen? (will there ever be more than one verb in command node? # if so, program! #theClassifier = self.verbToClassifierMapping.get(verbs[0].lower(), "unknown") theVerb = verbs[0] ''' theClassifier = None theVerb = None #debugging var score = 0.0 negator = False for x in chunked[0]: #ensure this isn't an adverb if self._isLeaf(x) and self._getLeafTos(x)[0] == 'V': theClassifier,score = self._classify_word(x, self.classifierSS_list) theVerb = self._getLeafTxt(x) negator = self._negator(theVerb, negator) break #elif self._isTreeBranch(x): #for y in x: #if self._isLeaf(y): #negator = self._negator(y) #theClassifier,score = self._classify_word(theVerb, self.classifierSS_list) if self.vb: print "parse_cmd: %s -> %s with score %.05f" % (theVerb, theClassifier, score) if theClassifier == None or score < .25: theClassifier = "unknown" type = False for itor,x in enumerate(chunked): if itor < 1: continue if self._isTreeBranch(x): ## this shouldn't happen, but just in case continue tagOfSpeech = self._getLeafTos(x) theWord = self._getLeafTxt(x) negator = self._negator(theWord, negator) ## stage look-forward or look-backward: keyword = "" if tagOfSpeech == 'KW_DIRECTOR': type = "director" elif tagOfSpeech == 'KW_STAR': type = 'actor' elif tagOfSpeech == 'KW_GENRE': type = 'genre' elif tagOfSpeech == 'KW_PLOT': type = 'plot' theClassifier = 'trivia' elif tagOfSpeech == 'KW_MOVIE' or tagOfSpeech == 'GNRE': ## include GNRE to catch sentences, such as "I want to watch a Comedy" type = 'movieTitle' itorBackTrack = itor while itorBackTrack > 0: y = chunked[itorBackTrack] if self._isTreeBranch(y): break yTos = self._getLeafTos(y) if yTos.startswith('JJ') or yTos.startswith('NN'): adj, score = self._classify_word(y, wn.synsets('recent')) if score > .25: entSet.add_entity('timespan', 'recent') else: adj, score = self._classify_(y, wn.synset('old')) #'recent' #'old' #'new' #'first' #'last' #'popular' #'similar' #'more' else: break if type: break entSet = EntitySet(theClassifier) entSet.add_entity("type", type) entSet = self._parse_entity_pairs(chunked, entSet) entSet.remove_entity('rating') return [ entSet ]