def runAgent(self, moduleDict, numExperiences):
     agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
     states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     lastExperience = None
     for i in range(numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         lastExperience = (startState, action, endState, reward)
         agent.update(*lastExperience)
     actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
     values = {}
     qValues = {}
     policy = {}
     for state in states:
         values[state] = agent.computeValueFromQValues(state)
         policy[state] = agent.computeActionFromQValues(state)
         possibleActions = self.grid.getPossibleActions(state)
         for action in actions:
             if not qValues.has_key(action):
                 qValues[action] = {}
             if action in possibleActions:
                 qValues[action][state] = agent.getQValue(state, action)
             else:
                 qValues[action][state] = None
     valuesPretty = self.prettyValues(values)
     policyPretty = self.prettyPolicy(policy)
     qValuesPretty = {}
     for action in actions:
         qValuesPretty[action] = self.prettyValues(qValues[action])
     return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
 def runAgent(self, moduleDict, numExperiences):
     agent = moduleDict["qlearningAgents"].ApproximateQAgent(extractor=self.extractor, **self.opts)
     states = filter(lambda state: len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     lastExperience = None
     for i in range(numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         lastExperience = (startState, action, endState, reward)
         agent.update(*lastExperience)
     actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states]))
     qValues = {}
     weights = agent.getWeights()
     for state in states:
         possibleActions = self.grid.getPossibleActions(state)
         for action in actions:
             if not qValues.has_key(action):
                 qValues[action] = {}
             if action in possibleActions:
                 qValues[action][state] = agent.getQValue(state, action)
             else:
                 qValues[action][state] = None
     qValuesPretty = {}
     for action in actions:
         qValuesPretty[action] = self.prettyValues(qValues[action])
     return (qValuesPretty, weights, actions, lastExperience)
 def runAgent(self, moduleDict):
     agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
     states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())
     states.sort()
     randObj = FixedRandom().random
     # choose a random start state and a random possible action from that state
     # get the next state and reward from the transition function
     for i in range(self.numExperiences):
         startState = randObj.choice(states)
         action = randObj.choice(self.grid.getPossibleActions(startState))
         (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj)
         agent.update(startState, action, endState, reward)
     return agent