def partiallyObservableAction(self,gameState): global possibleGhostStates global frontierStates pacmanLoc = gameState.getPacmanPosition() ghostLoc = gameState.getGhostPosition(1) room = gameState.getLoctoRoom() if room[int(ghostLoc[0])][int(ghostLoc[1])] in gameState.data.roomsOn: possibleGhostStates.clear() possibleGhostStates[ghostLoc] = 1.0 frontierStates = [ghostLoc] if (pacmanLoc, ghostLoc) in MDPUtil.AstarPolicy.keys(): action = self.convertTuple(MDPUtil.AstarPolicy[(pacmanLoc,ghostLoc)]) else: action = self.A_star(pacmanLoc,ghostLoc,self.heuristic,gameState) MDPUtil.AstarPolicy[(pacmanLoc,ghostLoc)] = self.convertAction(action) else: predictLoc = (-1,-1) while True: predictLoc = util.chooseFromDistribution(possibleGhostStates) if len(possibleGhostStates.keys())==1: predictLoc = possibleGhostStates.keys()[0] break while (room[int(predictLoc[0])][int(predictLoc[1])] in gameState.data.roomsOn) and len(possibleGhostStates.keys())!=1: del possibleGhostStates[predictLoc] predictLoc = util.chooseFromDistribution(possibleGhostStates) if len(possibleGhostStates.keys())==1: predictLoc = possibleGhostStates.keys()[0] break if predictLoc != pacmanLoc: break tempFrontierStates = Set() for frontierLoc in frontierStates: tempNode = self.node(int(frontierLoc[0]),int(frontierLoc[1])) possibleActions = self.getActions(gameState,tempNode) for frontierAction in possibleActions: actionTuple = self.convertAction(frontierAction) newFrontierState = (frontierLoc[0]+actionTuple[0],frontierLoc[1]+actionTuple[1]) if newFrontierState in possibleGhostStates.keys(): continue if room[int(newFrontierState[0])][int(newFrontierState[1])] in gameState.data.roomsOn: continue tempFrontierStates.add(newFrontierState) oldProb = possibleGhostStates[frontierLoc] possibleGhostStates[newFrontierState] += oldProb/float(len(possibleActions)) possibleGhostStates.normalize() frontierStates = tempFrontierStates if (pacmanLoc, predictLoc) in MDPUtil.AstarPolicy.keys(): action = self.convertTuple(MDPUtil.AstarPolicy[(pacmanLoc,predictLoc)]) else: action = self.A_star(pacmanLoc,predictLoc,self.heuristic,gameState) MDPUtil.AstarPolicy[(pacmanLoc,predictLoc)] = self.convertAction(action) return action
def get_generation(self): #if self.id == 'i0': timePeriod = self.environment.get_time() if timePeriod != self.last_time: tran = self.transitions[self.last_time] disto = {} for s in tran: if s['from'] == self.last_generation: for ns in s['to']: disto[ns['to']] = ns['prob'] break try: ss = self.last_generation self.last_generation = util.chooseFromDistribution(disto) except Exception as e: #if self.id == 'i0': print 'vvvvvvvvvvvvvvvvvvvvvvvvvvvv' print self.last_time, ss, self.last_generation, timePeriod, disto print '****************************' raise e self.last_time = timePeriod res = self.last_generation - 18 if res < 0: res = 0 return res
def getDirectionalExpectimaxValue(self, gameState, agentIndex, depth): if (agentIndex == 0 and depth == 1) or gameState.isWin() or gameState.isLose(): return self.evaluationFunction(gameState) legalMoves = gameState.getLegalActions(agentIndex) if agentIndex == 0: return max( self.getDirectionalExpectimaxValue( state, getNextIndexAgent(agentIndex, gameState), depth - 1) for state in [ gameState.generatePacmanSuccessor(action) for action in legalMoves ]) else: ghost = DirectionalGhost(index=agentIndex) act_prob_dict = ghost.getDistribution(gameState) val_prob_dict = util.Counter() for action in legalMoves: state = gameState.generateSuccessor(agentIndex, action) val = self.getDirectionalExpectimaxValue( state, getNextIndexAgent(agentIndex, gameState), depth) val_prob_dict[val] = act_prob_dict[action] val_prob_dict.normalize() return util.chooseFromDistribution(val_prob_dict)
def getAction(self, state): """ Get the action the ghost will do based on a distribution of possible actions. """ dist = self.getDistribution(state) return chooseFromDistribution(dist)
def getAction( self, state ): dist = self.getDistribution(state) # return state.getLegalActions(self.index)[0] if len(dist) == 0: return Directions.STOP else: return util.chooseFromDistribution( dist )
def getAction( self, state ): dist = self.getDistribution(state) if 'Stop' in dist.keys() and len(dist) > 1: del dist['Stop'] if len(dist) == 0: return Directions.STOP else: return util.chooseFromDistribution( dist )
def getThreeAction( self, state, list_of_ghost_actions): print evaluateHeuristic(state) dist = self.getDistribution(state, list_of_ghost_actions) if len(dist) == 0: return Directions.STOP else: act = util.chooseFromDistribution( dist ) #print self.index, act return act
def getAction(self, state): if (state.isWin() or state.isLose()): #whoami return Directions.STOP dist = self.getDistribution(state) if len(dist) == 0: return Directions.STOP else: return util.chooseFromDistribution(dist)
def sample_data(self, trainingData, trainingLabels, sample_weights): td = util.Counter() tl = util.Counter() for i in range(len(trainingLabels)): k = util.chooseFromDistribution(sample_weights) td[i] = trainingData[k] tl[i] = trainingLabels[k] return td, tl "*** YOUR CODE HERE ***"
def getPartiallyObservableLoc(self,state): global possiblePacmanStates global frontierStates pacmanLoc = state.getPacmanPosition() ghostLoc = state.getGhostPosition(self.index) room = state.getLoctoRoom() if room[int(pacmanLoc[0])][int(pacmanLoc[1])] in state.data.roomsOn: possiblePacmanStates.clear() possiblePacmanStates[pacmanLoc] = 1.0 frontierStates = [pacmanLoc] return pacmanLoc else: predictLoc = util.chooseFromDistribution(possiblePacmanStates) while (room[int(predictLoc[0])][int(predictLoc[1])] in state.data.roomsOn) and len(possiblePacmanStates.keys())!=1: del possiblePacmanStates[predictLoc] predictLoc = util.chooseFromDistribution(possiblePacmanStates) if len(possiblePacmanStates.keys())==1: predictLoc = possiblePacmanStates.keys()[0] break tempFrontierStates = Set() for frontierLoc in frontierStates: tempNode = self.node(int(frontierLoc[0]),int(frontierLoc[1])) possibleActions = self.getActions(state,tempNode) for frontierAction in possibleActions: actionTuple = self.convertAction(frontierAction) newFrontierState = (frontierLoc[0]+actionTuple[0],frontierLoc[1]+actionTuple[1]) if room[int(newFrontierState[0])][int(newFrontierState[1])] in state.data.roomsOn: continue tempFrontierStates.add(newFrontierState) oldProb = possiblePacmanStates[frontierLoc] possiblePacmanStates[newFrontierState] += oldProb/float(len(possibleActions)) possiblePacmanStates.normalize() frontierStates = tempFrontierStates pacmanLoc = predictLoc return pacmanLoc
def getAction(self, gameState): if not self.xl: return max(gameState.getLegalActions(self.index), key = lambda action: (1 - 2 * self.index) * self.rawStateLib[gameState.generateSuccessor(self.index, action)]) # 增加随机性 sumprob = 0 dist = util.Counter() for action in gameState.getLegalActions(self.index): newState = gameState.generateSuccessor(self.index, action) #sumprob += self.k ** self.rawStateLib[newState][self.index] dist[action] = (self.k ** ((1 - 2 * self.index) * self.rawStateLib[newState])) dist.normalize() return util.chooseFromDistribution( dist )
def make_decision(self): if self.dcopPhase == 0: if not self.decision_made: if self.last_state is not None: probs = {} for ns in self.next_states(self.last_state): probs[ns] = self.probabilities[(self.last_state, ns)] if len(probs) > 0: self.last_state = chooseFromDistribution(probs) self.commit_generators( self.generatorsValues[self.last_state]) self.commit_children_powerLines( self.powerLineValues[self.last_state]) self.decision_made = True for n in self.relayNode.neighbours: self.send(n, {'type': 'action-taken'}) else: # Asking children for solving DCOP self.printer('%d %s miss Unknown' % (self.environment.get_time(), self.name)) self.test_log.append(2) for c in self.relayNode.neighbours: self.send(c, {'type': 'request-for-dcop'}) self.dcopPhase = 1 else: raise 'last state is None' elif self.all_neighbours_took: # Checking for good decision powerLines = self.relayNode.get_powerLine_values() powerLineValues = tuple([powerLines[pl] for pl in powerLines]) # check for goodness if self.last_state[1] == powerLineValues: self.done = True else: # Asking children for solving DCOP self.printer('%d %s miss Prediction' % (self.environment.get_time(), self.name)) self.test_log.append(1) for c in self.relayNode.neighbours: self.send(c, {'type': 'request-for-dcop'}) self.dcopPhase = 1 elif self.dcopPhase == 3: self.dcopPhase = 0 self.decision_made = True self.done = True
def predictGhostMove(myPos, ghostPos): #TODO: NOTE: This actually doesn't work since it doesn't check validity of a move for the ghost ''' Returns the expected position of the ghost :param myPos: :param ghostPos: :return: ''' moves = util.Counter() #r = random.random() #where r is the degree of noise (randomness) #if optimal: if myPos[1] < ghostPos[1]: moves[Directions.SOUTH] = 1 if myPos[0] < ghostPos[0]: moves[Directions.SOUTH] = .8 moves[Directions.WEST] = .2 elif myPos[0] > ghostPos[0]: moves[Directions.SOUTH] = .8 moves[Directions.EAST] = .2 elif myPos[1] > ghostPos[1]: moves[Directions.NORTH] = 1 if myPos[0] < ghostPos[0]: moves[Directions.NORTH] = .8 moves[Directions.WEST] = .2 elif myPos[0] > ghostPos[0]: moves[Directions.NORTH] = .8 moves[Directions.EAST] = .2 elif myPos[1] == ghostPos[1]: if myPos[0] < ghostPos[0]: moves[Directions.WEST] = .9 moves[Directions.STOP] = .1 elif myPos[0] > ghostPos[0]: moves[Directions.EAST] = 1 moves[Directions.STOP] = .1 else: moves[Directions.STOP] = 1 #currently there is no check if ghost will move off the map or into a wall ## should just pick optimally for closer towards pac ## ghost might be using euc or man instead of maze distance though!!! move = util.chooseFromDistribution(moves) vector = Actions.directionToVector(move) newPos = (ghostPos[0] + vector[0], ghostPos[1] + vector[1]) return newPos
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None totalDeno = 0 probability_list = [] "*** YOUR CODE HERE ***" if (len(legalActions) == 0): return None if (util.flipCoin(self.epsilon)): for action in legalActions: currQValue = self.getQValue(state, action) currProb = math.exp(self.epsilon * currQValue) totalDeno = totalDeno + currProb for action in legalActions: currQValue = self.getQValue(state, action) currProb = math.exp(self.epsilon * currQValue) prob = (currProb / totalDeno) probability_list.append((prob, action)) action = util.chooseFromDistribution(probability_list) else: action = self.computeActionFromQValues(state) return action
def assignJointActions(self, state, depth=4): startTime = time.clock() pacmanPosition = state.getPacmanPosition() #pos = state.getGhostPosition( self.index ) allGhostPositions = state.getGhostPositions() numGhosts = len(allGhostPositions) jointActions = self.get_all_joint_actions(numGhosts, state) #print jointActions bestJointAction = None bestJointActionValue = float("-inf") for jointAction in jointActions: value = evaluate_joint_action(jointAction, state, depth) if value > bestJointActionValue: bestJointActionValue = value bestJointActions = [] bestJointActions.append(jointAction) elif value == bestJointActionValue: bestJointActions.append(jointAction) # if value(jointAction) > bestJointActionValue: # bestJointActionValue = value(jointAction) # bestJointAction = jointAction # elif value(jointAction) == bestJointActionValue: # compare bestProb = 0.95 distribution = util.Counter() for a in bestJointActions: distribution[tuple(a)] = bestProb / len(bestJointActions) for a in jointActions: distribution[tuple(a)] += ( 1-bestProb ) / len(jointActions) # jointAction = {} # for i in xrange(1, numGhosts+1): # jointAction[i] = 'Stop' #print "bestJointAction is : ", bestJointAction print evaluateHeuristic(state) return list(util.chooseFromDistribution( distribution ))
def getRandomSuccessor(self, gameState, agentIndex, currentDepth): # Randomly uniform selection of an action dist = self.getDistribution(agentIndex, gameState) selectedAction = util.chooseFromDistribution(dist) legal = gameState.getLegalActions(agentIndex) if Directions.STOP in legal: legal.remove(Directions.STOP) # NB : we browse this loop to expand all the gameState but it's not optimal # In practise we can just expand call expectiMax on the action selected previously for action in legal: successor = gameState.generateSuccessor(agentIndex, action) nextDepth = (currentDepth + 1) if (agentIndex == self.numGhosts) else currentDepth nextAgent = (agentIndex + 1) % (self.numGhosts + 1) (score, oldAction) = self.expectiMax(successor, nextAgent, nextDepth) # Check if the action is the one who was randomly selected if action == selectedAction: selectedScore = score return (selectedScore, selectedAction)
def getAction(self, gameState): leftnum = len(gameState.getLeftPiles(self.index)) legalaction = gameState.getLegalActions(self.index) sumprob = 0 max = -99999 dist = util.Counter() bestaction = legalaction[0] for action in legalaction: newgameState = gameState.generateSuccessor(self.index, action) #print str(newgameState.getBoard()) in self.gameStateValue[leftnum - 1].keys() if str(newgameState.getBoard()) in self.gameStateValue[self.index][leftnum - 1].keys(): ''' if self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())] > max: bestaction = action max = self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())] ''' sumprob += (self.k ** self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())]) else: ''' if self.evalFunc(newgameState, self.index) * 1.9 > max: bestaction = action max = self.evalFunc(newgameState, self.index) * 1.9 ''' sumprob += (self.k ** evalFunc(newgameState, self.index)) for action in legalaction: newgameState = gameState.generateSuccessor(self.index, action) if str(newgameState.getBoard()) in self.gameStateValue[self.index][leftnum - 1].keys(): dist[action] = float(self.k ** self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())]) / float(sumprob) else: dist[action] = float(self.k ** evalFunc(newgameState, self.index)) / float(sumprob) dist.normalize() return util.chooseFromDistribution( dist ) return bestaction
def getAction(self, state): dist = self.getDistribution(state) if len(dist) == 0: return Directions.STOP else: return util.chooseFromDistribution(dist)
def getAction( self, state ): dist = self.getDistribution(state) if len(dist) == 0: return Directions.STOP else: return util.chooseFromDistribution( dist )
def getAction( self, state ): dist = self.getDistribution( state ) if len(dist) == 0: return Actions.STOP return chooseFromDistribution( dist )
def pickAction(self, state): "Returns the action according to probability distribution." return util.chooseFromDistribution()
def getAction(self, state): dist = self.getDistribution(state) return chooseFromDistribution(dist)
def getAction(self, state, total_pacmen, agentIndex): dist = self.getDistribution(state, total_pacmen) if len(dist) == 0: return Directions.STOP else: return util.chooseFromDistribution(dist)
for i in range(21, 52): smallDice.append(i) mediumDice = range(51, 256) largeDice = range(256, 10000) roundTenDice = range(30, 10000, 10) roundHundredDice = range(100, 10000, 100) score = 0 results = [] for attempt in range(numGames): diceDist = util.Counter() diceDist[1] = 5 for number in range(1, 11): diceDist[number] = 10*(1/float(len(range(2,10)))) for number in range(11, 101): diceDist[number] = 3*(1/float(len(range(11,100)))) for number in range(101, 1001): diceDist[number] = 1*(1/float(len(range(101,1000)))) diceDist.normalize() numDice = util.chooseFromDistribution( diceDist ) sizeDist = util.Counter() for number in smallDice: sizeDist[number] = 150*(1/float(len(smallDice))) for number in mediumDice: sizeDist[number] = 40*(1/float(len(mediumDice))) for number in largeDice: sizeDist[number] = 10*(1/float(len(largeDice))) for number in dNDDice: sizeDist[number] += 350*(1/float(len(dNDDice))) for number in roundTenDice: sizeDist[number] += 40*(1/float(len(roundTenDice))) for number in roundHundredDice: sizeDist[number] += 60*(1/float(len(roundTenDice))) sizeDist.normalize() diceSize = util.chooseFromDistribution( sizeDist ) modDist = util.Counter() modDist[0] = 150
def direcional_expectimax(self, gameState, agent, depth): if gameState.isLose() or gameState.isWin() or len( gameState.getLegalActions()) == 0: return (gameState.getScore(), 0) if depth == self.depth: return (self.evaluationFunction(gameState), 0) actions = gameState.getLegalActions(agent) nextAgent = agent + 1 if agent == gameState.getNumAgents() - 1: nextAgent = 0 depth = depth + 1 if agent == 0: curMax = (-float('inf'), 0) currentActions = [curMax] for action in actions: temp = self.direcional_expectimax( gameState.generateSuccessor(agent, action), nextAgent, depth) if temp[0] == curMax[0]: currentActions.append((temp[0], action)) elif temp[0] > curMax[0]: curMax = (temp[0], action) currentActions = [curMax] return random.choice(currentActions) else: ghostState = gameState.getGhostState(agent) isScared = ghostState.scaredTimer > 0 pacmanPosition = gameState.getPacmanPosition() pos = gameState.getGhostPosition(agent) speed = 1 if isScared: speed = 0.5 actionVectors = [ Actions.directionToVector(a, speed) for a in actions ] newPositions = [(pos[0] + a[0], pos[1] + a[1]) for a in actionVectors] distancesToPacman = [ util.manhattanDistance(pos, pacmanPosition) for pos in newPositions ] if isScared: bestScore = max(distancesToPacman) bestProb = 0.8 else: bestScore = min(distancesToPacman) bestProb = 0.8 succ_actions = [] for action in actions: succ_actions.append( self.direcional_expectimax( gameState.generateSuccessor(agent, action), nextAgent, depth)) bestActions = [ action for action, distance in zip(succ_actions, distancesToPacman) if distance == bestScore ] # Construct distribution dist = util.Counter() for a in bestActions: dist[a] = bestProb / len(bestActions) for a in succ_actions: dist[a] += (1 - bestProb) / len(succ_actions) dist.normalize() return util.chooseFromDistribution(dist)