def generateVPIHuntersBoard(seed=None): width = 11 height = 11 foodHouseLeft = util.flipCoin(PROB_FOOD_LEFT) layoutTextGrid = [[' ' for _ in xrange(width)] for _ in xrange(height)] layoutTextGrid[0] = ['%' for _ in xrange(width)] layoutTextGrid[-1] = layoutTextGrid[0][:] for i in xrange(height): layoutTextGrid[i][0] = layoutTextGrid[i][-1] = '%' possibleLocations = pickPossibleLocations(width, height) # (foodX, foodY), (ghostX, ghostY) = tuple(random.sample(possibleLocations, 2)) bottomLeft, topLeft, bottomRight, topRight = tuple(possibleLocations) foodX, foodY = topLeft ghostX, ghostY = topRight if not util.flipCoin(PROB_FOOD_LEFT): (foodX, foodY), (ghostX, ghostY) = (ghostX, ghostY), (foodX, foodY) layoutTextGrid[-foodY-1][foodX] = '.' layoutTextGrid[-ghostY-1][ghostX] = 'G' for foodWallX, foodWallY in buildHouseAroundCenter(foodX, foodY): if util.flipCoin(PROB_FOOD_RED): layoutTextGrid[-foodWallY-1][foodWallX] = 'R' else: layoutTextGrid[-foodWallY-1][foodWallX] = 'B' for ghostWallX, ghostWallY in buildHouseAroundCenter(ghostX, ghostY): if util.flipCoin(PROB_GHOST_RED): layoutTextGrid[-ghostWallY-1][ghostWallX] = 'R' else: layoutTextGrid[-ghostWallY-1][ghostWallX] = 'B' layoutTextGrid[5][5] = 'P' layoutTextRowList = [''.join(row) for row in layoutTextGrid] return layoutTextRowList
def mutate(crossed,prob,maxLength,threshold = 1e6): for k,history in enumerate(crossed): for i,ele in enumerate(history): if util.flipCoin(prob): #if util.flipCoin(threshold * 1./fitness(ele,s_belief,t_belief,source_M, M_proj)) if util.flipCoin(0.8): if util.flipCoin(0.5): mutated = list(ele) mutated[0] = random.choice(Actions) crossed[k][i] = tuple(mutated) else: mutated = list(ele) mutated[1] = Obs[np.random.choice(range(len(Obs)),1,p=Obs_p)[0]] crossed[k][i] = tuple(mutated) else: mutated = list(ele) mutated[0] = random.choice(Actions) mutated[1] = Obs[np.random.choice(range(len(Obs)),1,p=Obs_p)[0]] crossed[k][i] = tuple(mutated) if util.flipCoin(prob): if util.flipCoin(0.5) and len(history) < maxLength: mutated = [0,0] #mutated[0] = random.choice(Actions) #mutated[1] = Obs[np.random.choice(range(len(Obs)),1,p=Obs_p)[0]] mutated = history[-1] crossed[k] = history + [tuple(mutated)] elif len(history)>=maxLength-1: crossed[k].pop() return crossed
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legalActions) == 0: return None # pick random one of the legal actions if # flipCoin returns true (exploration) if util.flipCoin(self.epsilon): return random.choice(legalActions) # otherwise, pick the best move determined by the policy (exploitation) return self.getPolicy(state)
def makeGrid(gridString): walk=False if gridString[0][0]=='W': walk=True if walk: obstacleProb=0.0 else: obstacleProb=0.2 width, height = 10, 3 grid = Grid(width, height) for h in range(height): if walk: grid[width-1][h]=10 elif gridString[0][0]=='C': grid[width-1][h]=35 else: grid[width-1][h]=2 for x in range(0,width-1): for y in range(0,height): if util.flipCoin(obstacleProb): grid[x][y]=-2 else: #grid[x][y]='-1' grid[x][y]=' ' grid[0][0]='S' return grid
def chooseAction(self, state): #return random.choice( state.getLegalActions( self.index ) ) if not self.firstTurnComplete: self.registerInitialState(state) self.firstTurnComplete = True """ Picks among the actions with the highest Q(s,a). """ actions = state.getLegalActions(self.index) if util.flipCoin(self.explorationRate): return random.choice(actions) # You can profile your evaluation time by uncommenting these lines # start = time.time() values = [(a, self.evaluate(state, a)) for a in actions] # print 'eval time for agent %d: %.4f' % (self.index, time.time() - start) #print 'VALUES: ' + str(values) maxValue = max(values, key=lambda val : val[1]) bestActions = [a for a, v in zip(actions, values) if v == maxValue] action = random.choice(bestActions) self.update(state, action, self.getSuccessor(state, action)) #print 'Features: ' + str(self.getFeatures) #print 'Weights: ' + str(self.weights) #print 'Action: ' + str(action) + ' - ' + str(self.getPosition(state)) + '--->' + str(self.getPosition(self.getSuccessor(state, action))) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None """Description: If the flip of the coin is favorable it will chose a random action, else it will get the best """ """ YOUR CODE HERE """ if not legalActions: return None if util.flipCoin(self.epsilon): action = random.choice(legalActions) else: action = self.getPolicy(state) """ END CODE """ return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legal_actions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" # util.raiseNotDefined() # return action if not legal_actions: return None # using flip coin fuction for adding randomness when # choosing action if util.flipCoin(self.epsilon): action = random.choice(legal_actions) else: action = self.computeActionFromQValues(state) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legalActions) == 0: return None # epsilon greedy, explotation or exploration if util.flipCoin(self.epsilon): action = random.choice(legalActions) else: action = self.computeActionFromQValues(state) return action
def playDefense(self,state, offensePlayerIndex): if state.isWinner(offensePlayerIndex): return True if util.flipCoin(self.naiveFactor): return True else: return False
def getAction(self, state): legalActions = self.getLegalActions(state) if (util.flipCoin(self.epsilon)): # Epsilon chance to choose random Hit or Stand, or follow policy. Epsilon 0 = Always policy return random.choice(legalActions) return self.getPolicy(state)
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action if state not in self.LegalActions: self.LegalActions[state] = self.getLegalActions(state) legalActions = self.LegalActions[state] if util.flipCoin(self.epsilon): return random.choice(legalActions) return self.getPolicy(state) "*** YOUR CODE HERE ***" util.raiseNotDefined() return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if not self.qTable.has_key(state): self.qTable[state] = {} for action in legalActions: self.qTable[state][action] = 0 if len(legalActions) == 0: return None coin = util.flipCoin(self.epsilon) if coin == True : action = random.choice(legalActions) else: v = -9999 for act in legalActions: if self.qTable[state][act] > v: v = self.qTable[state][act] action = act return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legal_actions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legal_actions) > 0: if util.flipCoin(self.epsilon): action = min([(self.state_uses[(state, act)], act) for act in legal_actions])[1] print action if not self.state_uses[(state, action)]: self.state_uses[(state, action)] = 0 else: self.state_uses[(state, action)] += 1 else: action = self.getPolicy(state) if not self.state_uses[(state, action)]: self.state_uses[(state, action)] = 0 else: self.state_uses[(state, action)] += 1 return action
def getAction(self, state): # Acc to some probability, we take a random action # Otherwise, we follow the best action available if util.flipCoin(self.epsilon): return random.choice(self.getLegalActions(state)) else: return self.getPolicy(state)
def getAction(self, hitOrnot,position): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action targetlist=[] legalDistance=[] target=() legalDistance =self.getAllpossibleDistance(position) # print"GA position is", position if legalDistance: if util.flipCoin(self.epsilon): # print "length",len(legalDistance) random_Distance = random.randint(0, len(legalDistance)-1) shoot_distance=legalDistance[random_Distance] # print "GA shoot_distance:",shoot_distance targetlist=self.findLocationWithShootDistance(position,shoot_distance) # print"GA TARGET LIST",targetlist,"len is",len(targetlist) randomTarget=random.randint(0, len(targetlist)-1) target=targetlist[randomTarget] print "shoot randomly at",target,self.q_shot_counter else: target = self.getPolicy(hitOrnot,position) return target
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" #Calculate probability for taking actionS if len(legalActions) > 0: #Using probability from self.epsilon if util.flipCoin( self.epsilon): #Get random action from list using random.choice action = random.choice( legalActions) else: #Get action from Policy pie. action = self.getPolicy( state) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legalActions) == 0: return None else: prob = util.flipCoin(self.epsilon) if prob: return random.choice(legalActions) else: q = util.Counter() for a in legalActions: """if self.getQValue(state, a) > result or action == None : action = a result = self.getQValue(state, a)""" q[state, a] = self.getQValue(state, a) return q.argMax()[1]
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" action = self.computeActionFromQValues(state) legal_actions=self.getLegalActions(state) if len(legal_actions)<=1: return action #suboptimal_actions.remove(action) # if state not in self.visit: # self.visit[state]=0 # self.visit[state]+=1 if util.flipCoin(self.epsilon):#/self.visit[state] return random.choice(legal_actions) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" # comprobamos si hay legal actions, sino, retornamos None if not legalActions: return action # lanzamos la moneda con epsilon para decidir que accion retornamos, al azar o best policy if util.flipCoin(self.epsilon): # retornamos una accion al azar, si no devuelve nada retornamos None return random.choice(legalActions) or None # retornamos bestPolicy return self.getPolicy(state)
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legalActions) < 1: return None else: randomAction = util.flipCoin(self.epsilon) #epsilon = prob of true; 1-epsilon = prob false if randomAction: action = random.choice(legalActions) else: action = self.getPolicy(state) print "action", action return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if (self.isTerminal(state)): return action prob = self.epsilon; if util.flipCoin(prob): action = random.choice(legalActions) else: action = self.getPolicy(state) self.doAction(state,action) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) qValLegalActions = util.Counter() for legalAction in legalActions: qValLegalAction = self.getQValue(state, legalAction) qValLegalActions[legalAction] = qValLegalAction action = None if len(qValLegalActions) > 0: action = qValLegalActions.argMax() if util.flipCoin(self.epsilon): action = random.choice(legalActions) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) actionsToExplore = [] for action in legalActions: if self.getQValue(state,action) == 0: actionsToExplore.append(action) action = None chooseRandom=util.flipCoin(self.epsilon) if (chooseRandom): if (len(actionsToExplore) > 0): action = random.choice(actionsToExplore) else: action = random.choice(legalActions) else: action=self.getPolicy(state) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" #if terminal state return None if len(legalActions)==0: return None #check random true or false randomOrNot= util.flipCoin(self.epsilon) if randomOrNot: #Chose east, west, north, south? how do I get the list? return random.choice(legalActions) else: #best policy action get policy or compute action from q values? return self.computeActionFromQValues(state) util.raiseNotDefined()
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" #OUR CODE HERE if legalActions==None or len(legalActions) is 0: return None #So do we take a random action or not? if util.flipCoin(self.epsilon): #lyee says: no idea what epsilon is! #We will take a random action action= random.choice(legalActions) else: #We follow the policy action = self.getPolicy(state) #lyee fix: kendall previously had just getPolicy.. I added the 'self' part. hope that's what kendall meant D: return action
def getAction(self, state): """ What action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. After you choose an action make sure to inform your parent self.doAction(state,action) This is done for you, just don't clobber it HINT: you might want to use util.flipCoin here..... (see util.py) """ # Pick Action action = None epsilon = self.epsilon take_random_action = util.flipCoin(epsilon) list_of_actions = self.getLegalActions(state) if take_random_action: action = random.choice(list_of_actions) else: action = self.getPolicy(state) #return action # Need to inform parent of action for Pacman self.doAction(state,action) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action "*** YOUR CODE HERE ***" s = state legalActions = self.getLegalActions(state) #print 'LEGAL:'+str(legalActions) if len(legalActions)==0: #print 'NONE' return None action = None if util.flipCoin(self.epsilon): action = random.choice(legalActions) else: action = self.getPolicy(s) #print action #print 'return' +str(action) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legalActions) == 0: #print "No legal actions" action = None elif util.flipCoin(self.epsilon): #print "Random Choice of Action" action = random.choice(legalActions) else: #print "Choice of action based on Policy" action = self.getPolicy(state) #print "Action:", action return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ actions = self.getLegalActions(state) bestQValue = -99999999 bestActions = [] for action in actions: q = self.getQValue(state, action) if q == bestQValue: bestActions.append(action) elif q > bestQValue: bestActions = [action] bestQValue = q if len(bestActions) == 0: return None # Pick Action legalActions = self.getLegalActions(state) action = None if legalActions: if util.flipCoin(self.epsilon): action = random.choice(legalActions) else: action = random.choice(bestActions) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) "*** YOUR CODE HERE ***" # terminal state if not legalActions: return None else: # pick if we should explore by flipping a coin goRandom = util.flipCoin(self.epsilon) if goRandom: # randomly choose an action return random.choice(legalActions) else: # choose the best action return self.getPolicy(state)
def chooseAction(self, gameState): """ @version 1.2.1 """ self.observationHistory.append(gameState) actions = gameState.getLegalActions(self.index) if len(actions) == 0: return None if util.flipCoin(self.learningRate): action = random.choice(actions) print("now do exploitation") else: action = self.computeActionFromQValues(gameState) self.stepLeft -= 1 if self.considerBack: #double check actions or bestActions as argument action = self.updateCarryFood( gameState, self.backToSafetyPosition(gameState, actions)) self.lastState = gameState # action = self.updateCarryFood(gameState, random.choice(bestActions)) self.lastAction = action # self.updateWeights(self.lastState, self.lastAction, gameState) print "above all, I choose ", action print "=============================" print "=============ends============" print "=============================" return self.updateCarryFood(gameState, action)
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" ################################################ # 0039026 # ########### """Instance variables you have access to - self.epsilon (exploration prob) - self.alpha (learning rate) - self.discount (discount rate)""" #util.flipCoin(p) #print util.flipCoin(10) #print self.epsilon bool = util.flipCoin(self.epsilon) if not bool: #print "COINhere false ----------------" action = self.computeActionFromQValues(state) else: #print "COINhere true ----------------" action = random.choice(legalActions) return action
def getAction(self, state): legalActions = self.getLegalActions(state) action = None "end game" if not self.getLegalActions(state): return action if self.new_episode: "Analyze state" self.state_discription = self.getStateDiscription(state) self.new_episode = False "Explore or Exploit" if util.flipCoin(self.epsilon): action = random.choice(legalActions) else: "predict based on current state" values = self.first_model.predict( np.array([self.state_discription])) actions = copy.deepcopy(ACTIONS) actions = [ action for _, action in sorted(zip(values, actions), reverse=True) ] for a in actions: if a in legalActions: actions = a break if action not in legalActions: action = ACTIONS[4] self.doAction(state, action) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legalActions) == 0: return action if util.flipCoin(self.epsilon): action = random.choice(legalActions) else: action = self.computeActionFromQValues(state) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) isHeads = util.flipCoin(self.epsilon) if len(legalActions) is 0: return None if isHeads: #print "Taking the known policy" return random.choice(legalActions) else: #print "Taking the random choice" return self.getPolicy(state)
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" #util.raiseNotDefined() if len(legalActions ) != 0: #if there are no legal actions, returns action none if util.flipCoin(self.epsilon ) == True: #getting probability of exploration. action = random.choice(legalActions) #getting randomly actions else: action = self.getPolicy(state) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ "*** YOUR CODE HERE ***" # Pick Action random_choice = util.flipCoin(self.epsilon) legalActions = self.getLegalActions(state) action = self.getPolicy(state) if random_choice: if not legalActions: return None return util.random.choice(legalActions) return action
def getAction(self, state, withEpsilon=True): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. """ # print "getAction QLearningAgentt" # Pick Action print(self.epsilon) legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if len(legalActions) == 0: return action if withEpsilon and util.flipCoin(self.epsilon): action = random.choice(legalActions) else: action = self.computeActionFromQValues(state) return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action actionList = self.getLegalActions(state) #returns true with self.epsilon probability probability = util.flipCoin(self.epsilon) #if true, explore if probability: return random.choice(actionList) #if false, do basic value-iteration else: return self.computeActionFromQValues(state)
def chooseAction(self, state): # Append game state to observation history... self.observationHistory.append(state) # Pick Action legalActions = state.getLegalActions(self.index) action = None if (DEBUG): print self.newline() print "AGENT " + str(self.index) + " choosing action!" if len(legalActions): if util.flipCoin(self.epsilon) and self.isTraining(): action = random.choice(legalActions) if (DEBUG): print "ACTION CHOSE FROM RANDOM: " + action else: action = self.computeActionFromQValues(state) if (DEBUG): print "ACTION CHOSE FROM Q VALUES: " + action self.lastAction = action foodLeft = len(self.getFood(state).asList()) # Prioritize going back to start if we have <= 2 pellets left if foodLeft <= 2: bestDist = 9999 for a in legalActions: successor = self.getSuccessor(state, a) pos2 = successor.getAgentPosition(self.index) dist = self.getMazeDistance(self.start, pos2) if dist < bestDist: action = a bestDist = dist if (DEBUG): print "AGENT " + str(self.index) + " chose action " + action + "!" return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" possible = util.flipCoin(self.epsilon) # print('This is the possibility', possible) if possible: action = random.choice(legalActions) else: action = self.getPolicy(state) # util.raiseNotDefined() return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ "*** YOUR CODE HERE ***" #Local Declarations action = None actionList = self.getLegalActions(state) #Error check, make sure list of actions is not empty if not (actionList): return None #Check over probabiliy choice; choose action randomly or from computed QValues if (util.flipCoin(self.epsilon)): action = random.choice(actionList) else: action = self.computeActionFromQValues(state) return action
def generateRandomHuntersBoard(seed=None, width=None, height=None): """Note that this is constructing a string, so indexing is [-y-1][x] rather than [x][y]""" random.seed(seed) leftHouseTop = util.flipCoin(PROB_LEFT_TOP) if not width or not height: width = random.randrange(11, 20, 4) height = random.randrange(11, 16, 4) layoutTextGrid = [[' ' for _ in range(width)] for _ in range(height)] layoutTextGrid[0] = ['%' for _ in range(width)] layoutTextGrid[-1] = layoutTextGrid[0][:] for i in range(height): layoutTextGrid[i][0] = layoutTextGrid[i][-1] = '%' possibleLocations = pickPossibleLocations(width, height) # (foodX, foodY), (ghostX, ghostY) = tuple(random.sample(possibleLocations, 2)) bottomLeft, topLeft, bottomRight, topRight = tuple(possibleLocations) if leftHouseTop: foodX, foodY = topLeft ghostX, ghostY = bottomRight if util.flipCoin(PROB_OPPOSITE_CORNERS) else topRight else: foodX, foodY = bottomLeft ghostX, ghostY = topRight if util.flipCoin(PROB_OPPOSITE_CORNERS) else bottomRight if not util.flipCoin(PROB_FOOD_LEFT): (foodX, foodY), (ghostX, ghostY) = (ghostX, ghostY), (foodX, foodY) layoutTextGrid[-foodY-1][foodX] = '.' layoutTextGrid[-ghostY-1][ghostX] = 'G' for foodWallX, foodWallY in buildHouseAroundCenter(foodX, foodY): if util.flipCoin(PROB_FOOD_RED): layoutTextGrid[-foodWallY-1][foodWallX] = 'R' else: layoutTextGrid[-foodWallY-1][foodWallX] = 'B' for ghostWallX, ghostWallY in buildHouseAroundCenter(ghostX, ghostY): if util.flipCoin(PROB_GHOST_RED): layoutTextGrid[-ghostWallY-1][ghostWallX] = 'R' else: layoutTextGrid[-ghostWallY-1][ghostWallX] = 'B' layoutTextGrid[-2][1] = 'P' layoutTextRowList = [''.join(row) for row in layoutTextGrid] return layoutTextRowList
def mutate(self, p): newString = "" for i in xrange(self.n): if util.flipCoin(p): newString += str(int(not int(self.string[i]))) else: newString += self.string[i] self.string = newString
def getAction(self, state): """ Compute the action to take in the current state. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ action = None legalActions = self.getLegalActions(state) # List of actions, excluding those that are known to simply keep the agent in the same state possibleActions = [] # List of actions and their respective Q values possibleActionQValues = util.Counter() # List of actions and the absolute values of their respective Q values, # excluding actions that would cause the agent to backtrack to the previous state possibleActionsNoBacktrack = util.Counter() absPossibleActionsNoBacktrack = util.Counter() # Assemble lists of actions that are permitted depending on the circumstances for action in legalActions: if (state, action) not in self.forbiddenActions: possibleActionQValues[action] = self.getQValue(state, action) possibleActions.append(action) if not self.isBacktrackAction(action): possibleActionsNoBacktrack[action] = self.getQValue( state, action) absPossibleActionsNoBacktrack[action] = abs( self.getQValue(state, action)) if len(possibleActionQValues) > 0: print "goalMode: ", self.goalMode print "epsilon 1: ", self.epsilon1 print "epsilon 2: ", self.epsilon2 print "meanQValue: ", self.getValue(state) print "possibleActions: ", possibleActions print "possibleActionQValues: ", possibleActionQValues # Training to populate Q table if self.goalMode == 'maxQMode': if util.flipCoin(0.5): # action = possibleActionsNoBacktrack.argMax() action = possibleActionQValues.argMax() # action = random.choice(possibleActions) else: action = random.choice(possibleActions) elif self.goalMode == 'minQMode': if util.flipCoin(self.epsilon1): # action = possibleActionsNoBacktrack.argMin() action = possibleActionQValues.argMin() # action = random.choice(possibleActions) else: action = random.choice(possibleActions) print "Random: ", action # Training to find deceptive path else: largestQValue = possibleActionQValues.argMax() print "Equilibrium state: ", state, self.isEquilibriumState( state) # If agent has already found an equidistant state with the largest-Q-value action seen so far, # then continue to the true goal if self.maxQValuePolicy: action = possibleActionsNoBacktrack.argMax() # Otherwise, keep searching for the equidistant state that has the largest-Q-value action. else: # If the agent has arrived at (what was thought to be) the LDP, and found that this state # no longer has at least one positively valued action and at least one negatively valued action, # then forget about this state. if self.lastDeceptivePoint is not None and state == self.lastDeceptivePoint[ 0] and not self.isEquilibriumState(state): self.lastDeceptivePoint = None # If an equidistant state has been found... if self.isEquilibriumState(state): # If the agent has arrived at an equidistant state that has the largest-Q-value action # seen so far (or if the agent has arrived at what is currently thought to be the LDP), # then update the details of the likeliest LDP candidate... if self.lastDeceptivePoint is None\ or possibleActionQValues.get(largestQValue) > self.lastDeceptivePoint[1]\ or state == self.lastDeceptivePoint[0]: self.lastDeceptivePoint = ( state, possibleActionQValues.get(largestQValue)) # Now head directly to the true goal, with probability epsilon2... if util.flipCoin(1 - self.epsilon2): self.maxQValuePolicy = True action = possibleActionsNoBacktrack.argMax() # Or continue searching for equidistant states that might have a larger Q value else: action = absPossibleActionsNoBacktrack.argMin() if self.epsilon2 >= 1.0 / float( self.phaseTwoEpisodes): self.epsilon2 -= 1.0 / float( self.phaseTwoEpisodes) # If this equidistant state does NOT have the largest-Q-value action # of all equidistant states seen so far, then keep searching for such an equidistant state else: action = absPossibleActionsNoBacktrack.argMin() # Otherwise, keep searching for an equidistant state: else: if self.getValue(state) > 0: action = possibleActionsNoBacktrack.argMin() elif self.getValue(state) < 0: action = possibleActionsNoBacktrack.argMax() else: action = absPossibleActionsNoBacktrack.argMin() print "self.lastDeceptivePoint: ", self.lastDeceptivePoint return action
def getAction(self, state): if util.flipCoin(self.epsilon) is True: return random.choice(self.legalActions) return self.computeActionFromQValues(state)
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = self.getLegalActions(state) action = None "*** YOUR CODE HERE ***" if not legalActions: action = None elif util.flipCoin(self.epsilon): action = random.choice(legalActions) else: action = self.computeActionFromQValues(state) # if action not in legalActions: # print("WTFFFF") action_copy = action if self.shield: safe = False legal_qval = [] # print(legalActions) # legalActions.remove(action) for ac in legalActions: if ac != action: legal_qval.append((ac, self.getQValue(state, ac))) sorted(legal_qval, key=lambda x: x[1], reverse=True) i = 0 self.discarded = [] # print(legal_qval) while not safe and len(legalActions) != 0: # if action not in legalActions: # print("WTFFFFF") px, py = state.getPacmanPosition() if action == 'East': px += 1 elif action == 'West': px -= 1 elif action == 'North': py += 1 elif action == 'South': py -= 1 num_ghosts = len(state.data.agentStates) - 1 safe = True for j in range(num_ghosts): ghostpos = state.getGhostPosition(j + 1) dist = manhattanDistance((px, py), ghostpos) # print(dist) if dist < 2.0: safe = False self.discarded.append(action) legalActions.remove(action) break # if action == 'Stop': # safe = False # print(action) if not safe and i < len(legal_qval): action = legal_qval[i][0] i += 1 if len(legalActions) == 0: action = action_copy return action
def chooseOfAction(self, gameState): # Pick Action ghost=[] ghostIndex = 0 opAgents = CaptureAgent.getOpponents(self,gameState) currentPos = gameState.getAgentPosition(self.index) # Get ghost locations and states if observable if opAgents: for opponent in opAgents: opPos = gameState.getAgentPosition(opponent) opIsPacman = gameState.getAgentState(opponent).isPacman if opPos and not opIsPacman: dis = abs(currentPos[0]-opPos[0])+abs(currentPos[1]-opPos[1]) if dis<=6: ghost.append(opPos) ghostIndex = opponent if len(self.getFood(gameState).asList())>2: if len(ghost) ==0 : if gameState.getAgentState(self.index).numCarrying>1 and gameState.data.timeleft<200: self.weights =self.weights4 print("444444444444444444444") else: self.weights = self.weights1 print("111111111111111111111") else: if min([self.getMazeDistance(gameState.getAgentPosition(self.index),a) for a in ghost])>6: self.weights = self.weights1 print("111111111111111111111") else: if gameState.getAgentState(ghostIndex).scaredTimer<10: if gameState.data.timeleft<200 : if gameState.getAgentState(self.index).numCarrying>2: self.weights = self.weights3 print("33333333333333333333") else: self.weights = self.weights2 print("2222222222222222222222") else: if gameState.getAgentState(self.index).numCarrying>10: if self.red: middle = int((gameState.data.layout.width - 2)/2 ) else: middle = int((gameState.data.layout.width - 2)/2 + 1) if abs(gameState.getAgentPosition(self.index)[0]-middle) < middle/2: self.weights = self.weights3 print("33333333333333333333") else : self.weights = self.weights2 print("2222222222222222222222") else: self.weights = self.weights2 print("2222222222222222222222") else : self.weights = self.weights1 print("111111111111111111111") else : if len(ghost) ==0: self.weights = self.weights4 print("44444444444444444444") else: if gameState.getAgentState(ghostIndex).scaredTimer<10: self.weights = self.weights3 print("33333333333333333333") else: self.weights = self.weights4 print("44444444444444444444") legalActions = gameState.getLegalActions(self.index) legalActions.remove(Directions.STOP) action = None if len(legalActions) != 0: prob = util.flipCoin(self.epsilon) if prob: action = random.choice(legalActions) else: if self.weights ==self.weights1: action = self.getSafePolicy(gameState) else: action = self.getPolicy(gameState) if self.weights == self.weights2: food = self.getFeatures(gameState,action)["closest-food"] ghost = self.getFeatures(gameState,action)["closest-ghosts"] print(food*100,ghost*100) if not gameState.getAgentState(self.index).isPacman: if self.red: if self.finish: self.mode =2 else: if self.finish: self.mode = 2 return action
def getAction(self, state): ############## Build network map ############## # This builds a network of positions pacman can move to based on a state. # Required for search algorithms. This is specific to this maze. if not self.map_built: # If map isn't built, build it self.w_pos = self.wall_pos(state) # Get wall locations self.map_graph = self.map_graph(self.w_pos) # Get network self.map_built = True # Turn off map building # Update weights if self.played: # Have we played yet? if self.learning: ############## Get Reward of state ############## self.r = self.reward_signal(state.getScore(), self.old_score) self.old_score = state.getScore( ) # Udate the old score for next time ############## Extract legal moves ############## # Get legal actions & remove STOP legal = state.getLegalPacmanActions() if Directions.STOP in legal: legal.remove(Directions.STOP) # Convert available actions in form of int available_actions = self.AvailableActions(legal) ############## Calculate Max Q(s', a') ############## # Coordinates for grids around pacman south = (state.getPacmanPosition()[0], state.getPacmanPosition()[1] - 1) east = (state.getPacmanPosition()[0] + 1, state.getPacmanPosition()[1]) west = (state.getPacmanPosition()[0] - 1, state.getPacmanPosition()[1]) north = (state.getPacmanPosition()[0], state.getPacmanPosition()[1] + 1) directions = [south, east, west, north] possible_directions = [ ] # List with +1 moves to examine in legal direction_integer = [] # Mapping grid examined with direction # Examine available actions and store grids to look at for pacman if 0 in available_actions: possible_directions.append(east) direction_integer.append(0) if 1 in available_actions: possible_directions.append(south) direction_integer.append(1) if 2 in available_actions: possible_directions.append(west) direction_integer.append(2) if 3 in available_actions: possible_directions.append(north) direction_integer.append(3) # Stores Q(s', a') values, and their respective function scores Q_values = [] f1_score = [] f2_score = [] # Looking at possible actions and compute Q values for i in possible_directions: f1 = self.Distance_to_food(state, state.getFood(), i, self.map_graph, state.getGhostPositions()) f2 = self.Distance_to_Ghost(state, state.getGhostPositions(), i, self.map_graph) f1_score.append(f1) f2_score.append(f2) Q = self.weights[ 0] + self.weights[1] * f1 + self.weights[2] * f2 Q_values.append(Q) # Choose the best action index = Q_values.index(max(Q_values)) action = self.IntToMove(direction_integer[index]) ############## Weight Updates ############## # UPDATE THE WEIGHTS difference = self.r + self.gamma * max(Q_values) - self.Qsa self.weights[0] = self.weights[0] + self.alpha * difference self.weights[1] = self.weights[ 1] + self.alpha * difference * f1_score[index] self.weights[2] = self.weights[ 2] + self.alpha * difference * f2_score[index] # Save the chosen action's previous function scores self.Qsa = max( Q_values ) # This will be Q(s,a) after the move has been made # Save f values in case of death self.f1_death = f1_score[index] self.f2_death = f2_score[index] else: ############## Extract legal moves ############## # Get legal actions & remove STOP legal = state.getLegalPacmanActions() if Directions.STOP in legal: legal.remove(Directions.STOP) # Convert available actions in form of int available_actions = self.AvailableActions(legal) ############## Calculate Max Q(s', a') ############## # Coordinates for grids around pacman south = (state.getPacmanPosition()[0], state.getPacmanPosition()[1] - 1) east = (state.getPacmanPosition()[0] + 1, state.getPacmanPosition()[1]) west = (state.getPacmanPosition()[0] - 1, state.getPacmanPosition()[1]) north = (state.getPacmanPosition()[0], state.getPacmanPosition()[1] + 1) directions = [south, east, west, north] possible_directions = [] # List with +1 moves to examine in legal direction_integer = [] # Mapping grid examined with direction # Examine available actions and store grids to look at for pacman if 0 in available_actions: possible_directions.append(east) direction_integer.append(0) if 1 in available_actions: possible_directions.append(south) direction_integer.append(1) if 2 in available_actions: possible_directions.append(west) direction_integer.append(2) if 3 in available_actions: possible_directions.append(north) direction_integer.append(3) # Stores Q(s', a') values, and their respective function scores Q_values = [] f1_score = [] f2_score = [] # Looking at possible actions and compute Q values for i in possible_directions: f1 = self.Distance_to_food(state, state.getFood(), i, self.map_graph, state.getGhostPositions()) f2 = self.Distance_to_Ghost(state, state.getGhostPositions(), i, self.map_graph) f1_score.append(f1) f2_score.append(f2) Q = self.weights[ 0] + self.weights[1] * f1 + self.weights[2] * f2 Q_values.append(Q) # Choose the best action index = Q_values.index(max(Q_values)) action = self.IntToMove(direction_integer[index]) # Save the function values self.Qsa = max( Q_values) # This will be Q(s,a) after the move has been made # Begin learning if self.learning: self.played = True # Exploration function if util.flipCoin(self.epsilon) and self.learning: choices = range(0, len(Q_values)) index = random.choice(choices) action = self.IntToMove(direction_integer[index]) self.Qsa = Q_values[ index] # This will be Q(s,a) after the move has been made print self.weights return action
for i in range(20): nasa = open('i%d.txt' % i, 'r') skipLine = 9 lineIndex = 0 result = {} p = [0, 0, 0, 0] for line in nasa: if lineIndex > skipLine: line = line.strip() v = line.split(' ') r = [int(v[0]), int(v[1]), int(v[2]), float(v[6])] r[-1] *= random.choice(surface) if not util.flipCoin(random.choice(health_prob)): r[-1] = 0 r[-1] = int(r[-1]) if p[1] not in result: result[p[1]] = {} if p[2] not in result[p[1]]: result[p[1]][p[2]] = {} if p[3] not in result[p[1]][p[2]]: result[p[1]][p[2]][p[3]] = {} if r[3] not in result[p[1]][p[2]][p[3]]: result[p[1]][p[2]][p[3]][r[3]] = 0
def getAction(self, state): ############## Build network map ############## # This builds a network of positions pacman can move to based on a state. # Required for search algorithms. This is specific to this maze. if not self.map_built: # If map isn't built, build it self.w_pos = self.wall_pos(state) # Get wall locations self.map_graph = self.map_graph(self.w_pos) # Get network self.map_built = True # Turn off map building ############## Extract legal moves ############## # Get legal actions & remove STOP legal = state.getLegalPacmanActions() if Directions.STOP in legal: legal.remove(Directions.STOP) # Convert available actions in form of int available_actions = self.AvailableActions(legal) ############## Extract State information S' ############## # Get CURRENT state info S' (in form of key) self.s_cur = self.key(state.getPacmanPosition(), state.getGhostPositions(), state.getFood(), self.map_graph, state) ############## Update Q-Table ############## # If this is not the first action we make in a game. If it is, skip to "else" if self.played: # If this is the first time we have seen that state, initialize key-value pairs of all possible actions to 0. If not the dictionary will be empty and not function. Allows us to add states as we see them. for i in available_actions: if self.Q.get((self.s_cur, i)) == None: self.Q[self.s_cur, i] = 0 # Get the current reward (R') self.r_cur = self.reward_signal(state.getScore(), self.old_score) # Update old score self.old_score = state.getScore() # Increment the state/action pair that we were in previously. (Nsa += 1) self.Nsa[(self.s, self.a)] += 1 # Calculate alpha adjustment based on Nsa (if activated) if self.alpha_adjustment: self.alpha = self.adjusted_alpha(self.Nsa[(self.s, self.a)]) else: # Use regular alpha if not active self.alpha = self.alpha # Update the Q Table for previous state/action pair self.Q[(self.s, self.a)] = self.Q[(self.s, self.a)] + self.alpha * ( self.r + self.gamma * max(self.Q[(self.s_cur, i)] for i in available_actions) - self.Q[(self.s, self.a)]) else: # This code is only run once at the beginning of each game. # Initialize the current reward for starting self.r_cur = state.getScore() self.old_score = state.getScore( ) # "Old score" is the same as current score at t = 0. # Initialize playing state. We will not come here again until the new game. self.played = True # Ensure dictionary is not empty for current starting position and available actions. # They are initialized to 0. for i in available_actions: if self.Q.get((self.s_cur, i)) == None: self.Q[self.s_cur, i] = 0 ############## Update S, R ############## # Adjust state, and reward. We have already updated the Q table so this will only be relevant at the next table update (given we survived an extra move) self.s = self.s_cur self.r = self.r_cur ############## Chosing argmax Q(s', a') and updating A ############## self.scores = [ ] # Will keep track of all rewards for each action in legal ############## If using function exploration if self.function_exploration: ## Adjust action (need the arg max Q(s', a')) # Obtaining the action which maximizes the rewards. Examine all possible actions # and store their Q-values in a list for i in available_actions: # If the state I can go to hasn't been visited enough time, incentivise it properly using the large reward. Agent must also be in a state of learning if (self.Nsa[(self.s_cur, i)] < self.Ne) and self.learning: self.scores.append(self.L_Reward) # If it has, get the true calculated utility of that state else: self.scores.append(self.Q[(self.s_cur, i)]) # Verify that the number of scores which are equal to max score. This will be used to make a random choice if we have several unseen state-action pairs. counter = 0 # Serves as a counter and index for max score max_score_index = [] for i in self.scores: if i == max(self.scores): max_score_index.append(counter) counter += 1 # Extract the index for the highest score. Either randomly when there is more than one max score, or the first element in the list when there is only 1. This is needed to map the score back with the action which produced it. if max_score_index > 1: max_ = random.choice(max_score_index) else: max_ = max_score_index[0] # Map the index corresponding to the highest score back to its respective action in available_actions self.a = available_actions[max_] # Convert int action to actual action and return move. action = self.IntToMove(self.a) ############## If using epsilon exploration (not used) if self.epsilon_exploration: for i in available_actions: self.scores.append(self.Q[(self.s_cur, i)]) # If less than epsilon, and we're learning, make a random choice if util.flipCoin(self.epsilon) and self.learning: self.a = random.choice(available_actions) else: max_ = self.scores.index(max(self.scores)) self.a = available_actions[max_] action = self.IntToMove(self.a) ############## Return Action Arg Max Q(S', A') ############## return action
def __le__(self, other): return util.flipCoin(0.5)
def chooseAction(self, state): start = time.time() self.debugDraw([(0, 0)], [0, 0, 0], clear=True) self_agent = state.getAgentState(self.index) actions = state.getLegalActions(self.index) food = state.getBlueFood().count( True) if self.isOnRedTeam else state.getRedFood().count(True) # Particle filtering self.observeState() particle_filtering_time = time.time() - start # If we're carrying enough, just go home! if self_agent.numCarrying >= 3: return self.returnHome(state) # Otherwise, run minimax elif self.use_minimax: max_score = -99999 max_action = None alpha = -99999 beta = 99999 for action in actions: # Update successor ghost positions to be the max pos in our particle distributions successor = state.generateSuccessor(self.index, action) ghosts = self.getBeliefDistribution().argMax() successor = self.setGhostPositions(successor, ghosts, self.getOpponents(state)) time_depth = 1 - particle_filtering_time - 0.2 result = self.minimax(successor, start, 1, alpha, beta, 1, time_depth) if result >= max_score: max_score = result max_action = action if max_score > beta: return max_action alpha = max(alpha, max_score) action = max_action # Or compute action from q-values else: action = random.choice(actions) if util.flipCoin( self.epsilon) else self.computeActionFromQValues(state) # Q-learning if self.learn: reward = self.getReward( state.generateSuccessor(self.index, action), state) self.update(state, action, state.generateSuccessor(self.index, action), reward) # Draw particle distribution # self.drawBeliefs() # Update particles self.elapseTime(state) end = time.time() if end - start > 1: print("Overtime --> total time was ", end - start) return action
def actionSelector(self, gameState): if util.flipCoin(self.epsilon): return random.choice(self.getLegalActions(gameState)) return self.getPolicy(gameState)
def chooseAction(self,gameState): start = time.time() actions = gameState.getLegalActions(self.index) action = None foodLeft = len(self.getFood(gameState).asList()) myCurrentPos = gameState.getAgentState(self.index).getPosition() InitialPosition = gameState.getInitialAgentPosition(self.index) enemies = [] enemyGhost = [] enemyPacman = [] for opponent in self.getOpponents(gameState): enemy = gameState.getAgentState(opponent) enemies.append(enemy) #print"enemies", enemies enemyGhost = [a for a in enemies if not a.isPacman and a.getPosition() != None] enemyPacman = [a for a in enemies if a.isPacman and a.getPosition() != None] ghostPositions = [] disToG = 6666 ranges = [] enemyGhostPosition = [Ghost.getPosition() for Ghost in enemyGhost] enemyPacmanPosition = [Pacman.getPosition() for Pacman in enemyPacman] mid = gameState.data.layout.width / 2 if gameState.isOnRedTeam(self.index): mid = mid - 1 else: mid = mid + 1 legalPositions = [p for p in gameState.getWalls().asList(False) if p[1] > 1] border = [p for p in legalPositions if p[0] == mid] if len(enemyGhostPosition) >0 and not gameState.getAgentState(self.index).isPacman: disToG = min([self.getMazeDistance(myCurrentPos, ghostPos) for ghostPos in enemyGhostPosition]) print'position in action',myCurrentPos print'If Astar111',self.aSt if disToG <5: randomPoint = random.choice(border) while randomPoint == myCurrentPos: randomPoint = random.choice(border) self.farPoint = randomPoint self.aSt = True print'If Astar',self.aSt print'current pacman position',myCurrentPos self.stopAction = True actionList = [] if myCurrentPos!= self.farPoint: if self.aSt: bestDist = 9999 #for action2 in actions: #successor = self.getSuccessor(gameState, action2,self.farPoint) #pos2 = successor.getAgentPosition(self.index) #action3 = None print'self.actionList outside if-else',self.actionList if len(self.actionList) == 0: #self.actionList.remove('Stop') self.actionList = self.aStar(gameState,self.farPoint,myCurrentPos,start) #self.actionList = action3 print'self.actionList',self.actionList if len(self.actionList) != 0: if self.actionList[0] == 9999: self.aSt = False self.skipAstar = True self.actionList.remove(9999) print'skip astar first' elif self.actionList[0] == 9999: self.aSt = False self.skipAstar = True self.actionList.remove(9999) print'skip a star' else: #actionList = self.actionList #if len(self.actionList)>0: bestAction = self.actionList[0] print 'eval time in A STAR for agent %d: %.4f' % (self.index, time.time() - start) print'bestAction',bestAction self.actionList.remove(bestAction) if myCurrentPos == InitialPosition or len(self.actionList) == 0: self.aSt = False else: #for move in actions: if bestAction in actions: return bestAction else: self.aSt = False #return bestAction else: self.aSt = False #bestDist = dist #print'bestAction',bestAction #return bestAction else: self.aSt = False if len(actions) !=0: probability = util.flipCoin(self.epsilon) if probability: #print"random" action = random.choice(actions) else: action = self.getPolicy(gameState) action = self.getPolicy(gameState) """if self.stopAction and not self.skipAstar: print'stop for a sec' action = Directions.STOP self.stopAction = False""" """elif self.aSt: action = Directions.STOP self.stopAction = False print'stop second time'""" print"final action",action print 'eval time in Q-Learning for agent %d: %.4f' % (self.index, time.time() - start) return action
def p():return n0tf0rn00bs.flipCoin(mutationProbability) q=np.array([z if p()else 1-z for z in chromosome])
def chooseAction(self, gameState): """ Picks the best of all legal actions based on their estimated Q values, which are computed with a linear combination of the feature values and their weights. This is the function that is called at every turn; any other functions that should be called each turn should be called from here. """ # Only update weights if we are currently training if TRAINING: # First, update weights based on reward received from the move we just took s_prime = self.getCurrentObservation() a = self.lastAction agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent" showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent") if showOutput: print(agentName + " action just taken: " + str(a)) s = self.getPreviousObservation() if len( self.observationHistory) > 1 else s_prime reward = self.getReward(s, a, s_prime) # sets self.justDied = 25 self.updateWeights(s, a, s_prime, reward) # Choose our next action! actions = gameState.getLegalActions(self.index) qValuesOfNextActions = [self.evaluatePotentialNextState( gameState, a) for a in actions] maxValue = max(qValuesOfNextActions) bestActions = [a for a, v in zip( actions, qValuesOfNextActions) if v == maxValue] # If there are 2 (or fewer) pellets left, the game is pretty much over, so the best action will be # the one that moves us closer to where we initially started foodLeft = len(self.getFood(gameState).asList()) if foodLeft <= 2: actionChoice = self.getActionToGoBackHome(gameState, actions) self.lastAction = actionChoice agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent" showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent") if showOutput: print(agentName + " CHOOSING ACTION: " + str(actionChoice)) return actionChoice # Do a coin flip with probability self.epsilon to choose randomly instead of optimally, only if TRAINING coin_flip = util.flipCoin(self.epsilon) if coin_flip and TRAINING: actionChoice = random.choice(actions) self.lastAction = actionChoice agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent" showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent") if showOutput: print(agentName + "CHOOSING ACTION: " + str(actionChoice)) return actionChoice # In all other cases, choose the best action based on computed Q values # If multiple actions are tied, break ties randomly. actionChoice = random.choice(bestActions) self.lastAction = actionChoice agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent" showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent") if showOutput: print(agentName + " CHOOSING ACTION: " + str(actionChoice)) return actionChoice
messageCallback = lambda x: None else: if opts.manual and opts.agent == None: displayCallback = lambda state: display.displayNullValues(state) else: if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES") if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES") if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES") if episode < a.phaseOneEpisodes: if util.flipCoin(0.5): goalMode = 'maxQMode' else: goalMode = 'minQMode' else: goalMode = 'deceptiveMode' # Run episode according to goal mode returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode, goalMode) a.episodesSoFar += 1 a.maxQValuePolicy = False a.stepCount = 0 if episodes > 0: print
first = False else: # alpha = (alpha) / (alpha + count / 100) # decay learning rate count += 1 #if count % 300 == 0: # Q_values = util.Counter() status = getStatus(input, output) # win or tie or lost if status == 'win': reward = 1 elif status == 'tie': reward = 0 else: reward = -1 change = getChange(input, last_input) # + or - or o new_state = (status, change) new_q = max([Q_values[(new_state, action)] for action in actions]) Q_values[(state, output)] = (1.0 - alpha) * Q_values[(state, output)] + \ alpha * (reward + gamma * new_q) state = new_state if count < 20: #count % 100 == 1: f.write('{0} {1} {2} {3}\n'.format(input, output, reward, Q_values)) if util.flipCoin(epsilon): # epsilon greedy output = rd.choice(actions) else: value = max([Q_values[(state, action)] for action in actions]) output = rd.choice([ action for action in actions if Q_values[(state, action)] == value ]) # TODO, exploration f.close()
def MCTS(self, curState): # First, determine the tactics and set the timer startTime = time.time() QValues = util.Counter() Values = util.Counter() tactic = self.getTactics(curState, self.index) print(self.index, " ", tactic) # FIXME for debug purpose # Do the main loop of MCTS fringe = PriorityQueue() fringe.push(curState, 0) tempActions = Deque() bestActions = Deque() pathAndReward = util.Stack() bestReward = float("-inf") expandedStates = util.Counter() nextStates = util.Counter() curDepth = 0 while fringe.isEmpty() is False: state = fringe.pop() topPos = state.getAgentPosition(self.index) if curDepth >= self.depth: # Backpropagation cumulativeReward = 0 while pathAndReward.isEmpty() is False: state, reward = pathAndReward.pop() cumulativeReward = reward + cumulativeReward * self.discount Values[state] = cumulativeReward if cumulativeReward > bestReward: bestReward = cumulativeReward bestActions = tempActions # print(bestActions.list) # util.pause() (priority, _, _) = fringe.peekPriority() depthDiff = curDepth + priority curDepth = -priority for _ in range(depthDiff): tempActions.popBack() else: reward = self.getReward(state) pathAndReward.push((state, reward)) if expandedStates[state] > 0: # Not only calculate Q(s, a), should consider V(s) for some descendants expandedStates[state] += 1 actionProb = util.Counter() for action in nextStates[state]: nextState = nextStates[state][action] # If next state is expanded, use V(s) if expandedStates[nextState] > 0: actionProb[action] = Values[nextState] # If next state is not expanded, use Q(s, a) else: actionProb[action] = QValues[topPos][action] # Calculate probability according to Q(s, a) or V(s) actionProb = softmax(actionProb) else: # If the state has not been expanded, expand the state expandedStates[state] += 1 legalActions = state.getLegalActions(self.index) actionProb = util.Counter() for action in legalActions: # print(self.getQValue(topState, action, tactic), QValues[topPos][action]) if QValues[topPos] == 0: QValues[topPos] = util.Counter() QValues[topPos][action] = self.getQValue( state, action, tactic)[0] actionProb[action] = QValues[topPos][action] if nextStates[state] == 0: nextStates[state] = util.Counter() nextStates[state][action] = self.getNextState( state, action) actionProb = softmax( actionProb ) # Calculate probability according to Q(s, a) # Choose action according to action probability flip = random.random() cumulative = 0 chosenAction = "Error" # Marking error if util.flipCoin(self.epsilon): for prob in actionProb: if cumulative <= flip <= cumulative + actionProb[prob]: chosenAction = prob break else: cumulative += actionProb[prob] else: chosenAction = actionProb.argMax() tempActions.push((chosenAction, QValues[topPos][chosenAction])) nextState = nextStates[state][chosenAction] # Determine whether to do a back track if util.flipCoin(1 / exp(.4 * (curDepth + self.bias))): fringe.push(curState, -curDepth) curDepth += 1 fringe.push(nextState, -curDepth) endTime = time.time() if endTime - startTime > self.timeInterval: break self.actionsChosen = bestActions