Example #1
0
 def getFeatures(self, state, action):
     """
       Returns a dict from features to counts
       Usually, the count will just be 1.0 for
       indicator functions.
     """
     util.raiseNotDefined()
Example #2
0
    def computeQValueFromValues(self, state, action):
        """
          Compute the Q-value of action in state from the
          value function stored in self.values.
        """

        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
Example #3
0
    def getValue(self, state):
        """
        What is the value of this state under the best action?
        Concretely, this is given by

        V(s) = max_{a in actions} Q(s,a)
        """
        util.raiseNotDefined()
Example #4
0
    def computeActionFromValues(self, state):
        """
          The policy is the best action in the given state
          according to the values currently stored in self.values.

          You may break ties any way you see fit.  Note that if
          there are no legal actions, which is the case at the
          terminal state, you should return None.
        """
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()
Example #5
0
    def getPolicy(self, state):
        """
        What is the best action to take in the state. Note that because
        we might want to explore, this might not coincide with getAction
        Concretely, this is given by

        policy(s) = arg_max_{a in actions} Q(s,a)

        If many actions achieve the maximal Q-value,
        it doesn't matter which is selected.
        """
        util.raiseNotDefined()
Example #6
0
 def getAction(self, state):
     """
     state: can call state.getLegalActions()
     Choose an action and return it.
     """
     util.raiseNotDefined()
Example #7
0
 def getQValue(self, state, action):
     """
     Should return Q(state,action)
     """
     util.raiseNotDefined()
Example #8
0
 def update(self, state, action, nextState, reward):
     """
             This class will call this function, which you write, after
             observing a transition and reward
     """
     util.raiseNotDefined()