コード例 #1
0
ファイル: NLparser.py プロジェクト: jerroydmoore/YARB
	def _parse_userPref(self, chktree):
		if self.vb:
			print "Called _parse_userPref(chktree):- EntitySet"
		
		## what will be returned:
		entSet = EntitySet("userPreference", self.vb)
		entSet = self._parse_entity_pairs(chktree, entSet)
		
		if entSet.count_entities() == 0:
			## If there aren't any entity pairs
			## Then perhaps this isn't a userPref utterance.
			## Return Unknown for the DM.
			entSet = EntitySet("unknown")

		return self._expandMultiEntitySets(entSet)
コード例 #2
0
ファイル: NLparser.py プロジェクト: jerroydmoore/YARB
	def _parse_as_when(self, chktree):
		if self.vb:
			print "Called _parse_as_when(chktree):- EntitySet"
		
		entSet = EntitySet("trivia", self.vb)
		parameter_list = ["year"]
		entSet.add_entity("type", "year")
		found_subject = False
		for x in chktree:
			if self._isTreeBranch(x):
			#if "Tree" in str(type(x)) :
				if x.node == "TITLE":
					found_subject = True
					parameter_list.append('title')
					parameter_list.append(self._getTreeTxt(x)) 
					entSet.add_entity("movieTitle", self._getTreeTxt(x))
				elif x.node == "PERSON":
					found_subject = True
					parameter_list.append('person')
					parameter_list.append(self._getTreeTxt(x)) 
					entSet.add_entity("person", self._getTreeTxt(x))

		if found_subject == False:
			parameter_list.append('other')
			parameter_list.append('that')
			print "_parse_as_when: append 'other' and 'that' to parameter_list"

		#return parameter_list
		return [ entSet ]
コード例 #3
0
ファイル: NLparser.py プロジェクト: jerroydmoore/YARB
	def _expandMultiEntitySets(self, entSet):
		if self.vb:
			print "before expanding entSet: %s" % (entSet.toString())
			print ""
		
		returnVar = [ entSet.copy() ]
		for k,v in entSet.get_entityitems():
			x = re.search(r'^(\!)?([^\d]+)(\d+)?', k)
			if not returnVar[0].has_entity('rating'):
				pass
			if x.group(1) and returnVar[0].has_entity('rating'):
				## found '!' and we have something we can inverse 'rating':
				returnVar[0].remove_entity(k)
				#newEntSet = returnVar[0].copy()
				newEntSet = EntitySet(returnVar[0].get_classifier())
				newEntSet.add_entity(x.group(2), v)
				
				## update rating
				theRating = returnVar[0].find_entity('rating')
				if theRating == 5:
					theRating = 1
				elif theRating == 4:
					theRating = 2
				elif theRating == 2:
					theRating = 4
				elif theRating == 1:
					theRating = 5
				newEntSet.add_entity('rating', theRating)
				
				returnVar.append(newEntSet)
			elif x.group(3):
				## numeric value found!
				returnVar[0].remove_entity(k)
				#newEntSet = returnVar[0].copy()
				newEntSet = EntitySet(returnVar[0].get_classifier())
				newEntSet.add_entity(x.group(2), v)
				newEntSet.add_entity('rating', returnVar[0].find_entity('rating'))
				returnVar.append(newEntSet)
		return returnVar
コード例 #4
0
ファイル: NLparser.py プロジェクト: jerroydmoore/YARB
	def _parse_as_TF(self, chktree):
		if self.vb:
			print "Called _parse_as_TF(chktree):- EntitySet"
		
		entSet = EntitySet("trivia_yesno", self.vb)
		entSet = self._parse_entity_pairs(chktree, entSet)
		return [ entSet ]
		
		## Ignore the rest:
		looking_for = "person"
		negation = False
		parameter_list = []
		for x in chktree:
			if self._isLeaf(x):
			#if "tuple" in str(type(x)):
				negation = self._negator(x[0], negation)
				if x[1] == "CD":
					parameter_list.append('year')
					parameter_list.append(x[0])
					entSet.add_entity("year", x[0])
				elif ( x[1] == "KW_DIRECTOR" ):
					looking_for = 'director'
				elif ( x[1] == "KW_STAR" ):
					looking_for = 'actor'
				
				elif x[1] == "KW_PLOT":
					looking_for = "plot"
					
				elif x[1] == "GNRE":
					subject = "genre"
					if negation:
						subject = "!genre"
						parameter_list.append('!genre')
					else:
						parameter_list.append('genre')
					parameter_list.append(x[0].title())
					entSet.add_entity(subject, x[0].title())
			else:
				if x.node == "TITLE":
					parameter_list.append('title')
					parameter_list.append(self._getTreeTxt(x))
					entSet.add_entity("movieTitle", self._getTreeTxt(x))
				elif x.node == "PERSON":
					if looking_for == "actor":
						if negation:
							parameter_list.append('!actor')
						else:
							parameter_list.append('actor')
					elif looking_for == "director":
						if negation:
							parameter_list.append('!director')
						else:
							parameter_list.append('director')
					else:
						if negation:
							parameter_list.append('!person')
						else:
							parameter_list.append('person')

					parameter_list.append(self._getTreeTxt(x))
					entSet.add_entity(looking_for, self._getTreeTxt(x))
		#return parameter_list
		return [ entSet ]
コード例 #5
0
ファイル: NLparser.py プロジェクト: jerroydmoore/YARB
	def _parse_as_what(self, chktree):
		if self.vb:
			print "Called _parse_as_what(%s):- EntitySet" % (chktree.node)
		
		entSet = EntitySet("trivia", self.vb)
		looking_for = 'other'
		negation = False
		
		## Shouldn't need this with changes to KW_PLOT below:
		'''
		flat = chktree.leaves()
		if flat[ len(flat) - 1 ][1] == ":":
			last = flat[ len(flat) - 2 ]
		else:
			last = flat[ len(flat) - 1 ]
		if last[1] == "KW_PLOT":
			entSet.add_entity("type", "plot")
		'''
		for itor,x in enumerate(chktree):
			if self._isLeaf(x):
				nodetype = self._getLeafTos(x)
				nodetxt = self._getLeafTxt(x)
				if not entSet.has_entity('type'):
					if nodetype == "KW_YEAR":
						return self._parse_as_when(chktree)
					elif nodetype == "KW_DIRECTOR" or nodetype == "KW_STAR":
						next = chktree[itor+1]
						if self._isTreeBranch(next) and next.node == 'PERSON':
							looking_for = 'actor'
							if nodetype == 'KW_DIRECTOR':
								looking_for = 'director'
						else:	
							return self._parse_as_who(chktree)
					elif nodetype == "KW_GENRE":
						entSet.add_entity("type", "genre")
					elif nodetype == "KW_MOVIE":
						entSet.add_entity("type", "movieTitle")
					elif nodetype == "KW_PLOT":
						entSet.add_entity("type", "plot")
					elif nodetype == "GNRE":
						entSet.add_entity("type", "movieTitle")
						entSet.add_entity("genre", nodetxt)
				else:
					negation = self._negator(nodetxt, negation)
					if nodetype == 'POS':
						## We mis-categorized the type.
						## Ex: "What was director Michael Bay's latest movie?"
						## Director is not what the user is looking for. director describes Michael Bay, which describes movie.
						#if entSet.has_entity('type'):
						#	looking_for = entSet.find_entity('type')
						#	if looking_for = 'director' or looking_for == 'actor'
						pass
					if nodetype == "KW_DIRECTOR":
						looking_for = "director"
					elif nodetype == "KW_STAR":
						looking_for = "actor"
					elif nodetype == "KW_PLOT":
						#looking_for = "plot"
						## TODO: TEST THIS
						entSet.add_entity('type', 'plot')
					elif nodetype == "GNRE":
						entSet.add_entity2('genre', nodetxt, negation)
						negation = False
						
			else:
				if x.node == "TITLE":
					entSet.add_entity2('movieTitle', self._getTreeTxt(x), negation)
				elif x.node == "PERSON":
					subject = looking_for
					if looking_for == 'other':
						subject = 'person'
					
					entSet.add_entity2(subject, self._getTreeTxt(x), negation)
					negation = False
		return [ entSet ]
コード例 #6
0
ファイル: NLparser.py プロジェクト: jerroydmoore/YARB
	def _parse_as_who(self, chktree):
		if self.vb:
			print "Called _parse_as_who(chktree):- EntitySet"

		entSet = EntitySet("trivia", self.vb)
		negation = False
		looking_for = 'person'
		for x in chktree:
			if self._isLeaf(x):
				negation = self._negator(self._getLeafTxt(x), negation)
				
				keyword = None
				if self._getLeafTos(x) == "KW_DIRECTOR":
					keyword = 'director'
				elif self._getLeafTos(x) == "KW_STAR":
					keyword = 'actor'

				if keyword:
					if not entSet.has_entity('type'):
						entSet.add_entity('type', keyword)
					else:
						looking_for = keyword
			else:
				if x.node == "TITLE":
					entSet.add_entity2("movieTitle", self._getTreeTxt(x), negation)
					negation = False
				elif x.node == "PERSON":
					personName = self._getTreeTxt(x)
					if not entSet.has_entity('type'):
						entSet.add_entity('type', 'person')
						entSet.add_entity('character', personName)
					else:
						entSet.add_entity2(looking_for, personName, negation)
						negation = False
		
		if entSet.count_entities() == 0:
			#empty entitySet -> unable to parse utterance -> let DM know
			entSet.change_classifier("unknown")
		elif not entSet.has_entity('type'):
			entSet.add_entity('type', 'person')
		
		return [ entSet ]
コード例 #7
0
ファイル: NLparser.py プロジェクト: jerroydmoore/YARB
	def _parse_command(self, chunked):
		if self.vb:
			print "Called _parse_command(chktree):- EntitySet"
		entSet = False
		
		'''
		cmd_node = chunked[0]
		verbs = self._find_verbs(cmd_node)
		
		theVerb = None
		if len(verbs) == 1:
			theVerb = verbs[0].lower()
			if theVerb == 'know':
				#I would like to know directed "Twilight"?
				#know the cast of "Ocean's 11"?
				#Do you know which movies Keanu Reeves is in?
				return self._rechunk(chunked)
			else:
				theVerb = verbs[0]
				#theClassifier = self.verbToClassifierMapping.get(verbs[0].lower(), "unknown")
		elif len(verbs) == 0:
			# Just in Case Error-Catch
			if vb.self:
				print "NLU Error: Parsing COMMAND node resulted in no VB node. Impossible; grammar does not allow this"
			pass
		else:
			#TODO: Will this happen? (will there ever be more than one verb in command node?
			# if so, program!
			#theClassifier = self.verbToClassifierMapping.get(verbs[0].lower(), "unknown")
			theVerb = verbs[0]
		'''
		theClassifier = None
		theVerb = None #debugging var
		score = 0.0
		negator = False
		for x in chunked[0]:
			#ensure this isn't an adverb
			if self._isLeaf(x) and self._getLeafTos(x)[0] == 'V':
				theClassifier,score = self._classify_word(x, self.classifierSS_list)
				theVerb = self._getLeafTxt(x)
				negator = self._negator(theVerb, negator)
				break
			#elif self._isTreeBranch(x):
				#for y in x:
					#if self._isLeaf(y):
						#negator = self._negator(y)
				
		#theClassifier,score = self._classify_word(theVerb, self.classifierSS_list)
		if self.vb:
			print "parse_cmd: %s -> %s with score %.05f" % (theVerb, theClassifier, score)
		if theClassifier == None or score < .25:
			theClassifier = "unknown"

		type = False
		for itor,x in enumerate(chunked):
			if itor < 1:
				continue
			if self._isTreeBranch(x): ## this shouldn't happen, but just in case
				continue
			
			tagOfSpeech = self._getLeafTos(x)
			theWord = self._getLeafTxt(x)
			negator = self._negator(theWord, negator)
			
			## stage look-forward or look-backward:
			keyword = ""
			if tagOfSpeech == 'KW_DIRECTOR':
				type = "director"
			elif tagOfSpeech == 'KW_STAR':
				type = 'actor'
			elif tagOfSpeech == 'KW_GENRE':
				type = 'genre'
			elif tagOfSpeech == 'KW_PLOT':
				type = 'plot'
				theClassifier = 'trivia'
			elif tagOfSpeech == 'KW_MOVIE' or tagOfSpeech == 'GNRE':
				## include GNRE to catch sentences, such as "I want to watch a Comedy"
				type = 'movieTitle'
				itorBackTrack = itor
				while itorBackTrack > 0:
					y = chunked[itorBackTrack]
					if self._isTreeBranch(y):
						break
					yTos = self._getLeafTos(y)
					if yTos.startswith('JJ') or yTos.startswith('NN'):
						adj, score = self._classify_word(y, wn.synsets('recent'))
						if score > .25:
							entSet.add_entity('timespan', 'recent')
						else:
							adj, score = self._classify_(y, wn.synset('old'))
							
							#'recent'
							#'old'
							#'new'
							#'first'
							#'last'
							#'popular'
							#'similar'
							#'more'
							
					else:
						break
			if type:
				break

		entSet = EntitySet(theClassifier)
		entSet.add_entity("type", type)
		
		entSet = self._parse_entity_pairs(chunked, entSet)
		entSet.remove_entity('rating')

		return [ entSet ]