def computeValueFromQValues(self, state): """ Returns max_action Q(state,action) where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of 0.0. """ actions = ReinforcementAgent.getLegalActions(self, state) if len(actions) < 1: return 0.0 else: ## max value among all actions max = -float("inf") for action in actions: val = self.getQValue(state, action) if max < val: max = val return max
def computeActionFromQValues(self, state): """ Compute the best action to take in a state. Note that if there are no legal actions, which is the case at the terminal state, you should return None. """ actions = ReinforcementAgent.getLegalActions(self, state) if len(actions) < 1: return None else: max = -float("inf") action = None for a in actions: val = self.getQValue(state, a) if max < val: max = val action = a return action
def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flipCoin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legalActions = ReinforcementAgent.getLegalActions(self, state) if len(legalActions) == 0: return None else: isRandom = util.flipCoin(self.epsilon) if isRandom: return random.choice(legalActions) else: return self.computeActionFromQValues(state)