def getFeatures(self, state, action): """ Returns a dict from features to counts Usually, the count will just be 1.0 for indicator functions. """ util.raiseNotDefined()
def computeQValueFromValues(self, state, action): """ Compute the Q-value of action in state from the value function stored in self.values. """ "*** YOUR CODE HERE ***" util.raiseNotDefined()
def getValue(self, state): """ What is the value of this state under the best action? Concretely, this is given by V(s) = max_{a in actions} Q(s,a) """ util.raiseNotDefined()
def computeActionFromValues(self, state): """ The policy is the best action in the given state according to the values currently stored in self.values. You may break ties any way you see fit. Note that if there are no legal actions, which is the case at the terminal state, you should return None. """ "*** YOUR CODE HERE ***" util.raiseNotDefined()
def getPolicy(self, state): """ What is the best action to take in the state. Note that because we might want to explore, this might not coincide with getAction Concretely, this is given by policy(s) = arg_max_{a in actions} Q(s,a) If many actions achieve the maximal Q-value, it doesn't matter which is selected. """ util.raiseNotDefined()
def getAction(self, state): """ state: can call state.getLegalActions() Choose an action and return it. """ util.raiseNotDefined()
def getQValue(self, state, action): """ Should return Q(state,action) """ util.raiseNotDefined()
def update(self, state, action, nextState, reward): """ This class will call this function, which you write, after observing a transition and reward """ util.raiseNotDefined()