Example #1
0
	def getAction(self, actions, gameState):

		def Vopt(newGameState, numAgent, depth) :	 
		
			def getVopt(action) :	
				return Vopt(newGameState.generateSuccessor(action, numAgent % newGameState.getNumAgents()), numAgent + 1, depth)

			def expectedValue(actionList) :
				vList = []
				for action in actionList :
					vList.append(getVopt(action))
				prob = len(vList)
				vList[:] = [float(i / prob) for i in vList] 
				return sum(vList)

		
			actions = newGameState.getLegalActions(numAgent % newGameState.getNumAgents())		 

			if newGameState.isWin() or newGameState.isLose() or len(actions) == 0 or depth == 0 :
				if numAgent == self.index : return None
				else : return self.evaluationFunction(newGameState,self.evaluationArgs)
		

			if numAgent == self.index : return max(actions, key=getVopt)

			elif numAgent % newGameState.getNumAgents() == self.index : 
				v = float('-inf')
				for action in actions :
					v = max(v, getVopt(action))
				return v

			elif ((numAgent + 1) % newGameState.getNumAgents()) == self.index :
				depth -= 1
				return expectedValue(actions)

			else :
				return expectedValue(actions)


		return Vopt(gameState, gameState.getCurrPlayer(), self.depth)
Example #2
0
	def getAction(self, actions, gameState):
		"""
		Returns the minimax action using self.depth and self.evaluationFunction
		"""
		def Vopt(newGameState, numAgent, depth, alpha, beta) :	 

			
			def getVopt(action) :
				return Vopt(newGameState.generateSuccessor(action, numAgent % newGameState.getNumAgents()), numAgent + 1, depth, alpha, beta)

			
			actions = newGameState.getLegalActions(numAgent % newGameState.getNumAgents())	
			# if Directions.STOP in actions : actions.remove(Directions.STOP)		 

			if newGameState.isWin() or newGameState.isLose() or len(actions) == 0 or depth == 0 :
				if numAgent == self.index : return None
				else : return self.evaluationFunction(newGameState,self.evaluationArgs)
				# return self.evaluationFunction(newGameState)
			

			if numAgent == self.index :
				bestAction = None
				v = float('-inf')
				for action in actions :
					newV = getVopt(action)
					v = max(v, newV)
					if v == newV : bestAction = action
					if v >= beta : return action
					alpha = max(v, alpha)

				return bestAction


			elif numAgent % newGameState.getNumAgents() == self.index : 
				v = float('-inf')
				for action in actions :
					v = max(v, getVopt(action))
					if v >= beta : return v
					alpha = max(v, alpha)
				return v

			elif ((numAgent + 1) % newGameState.getNumAgents()) == self.index :
				depth -= 1
				v = float('+inf')
				for action in actions :
					v = min(v, getVopt(action))
					if v <= alpha : return v
					beta = min(beta, v)
				return v

			else :
				v = float('+inf')
				for action in actions :
					v = min(v, getVopt(action))
					if v <= alpha : return v
					beta = min(beta, v)
				return v

		if random.random() < .2:
			if actions:
				return random.choice(list(actions))
			return None
		else: return Vopt(gameState, gameState.getCurrPlayer(), self.depth, float('-inf'), float('+inf'))