def runAgent(self, moduleDict, numExperiences): agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts) states = list( filter(lambda state: len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates())) sorted(states) #.sort() randObj = FixedRandom().random # choose a random start state and a random possible action from that state # get the next state and reward from the transition function lastExperience = None for i in range(numExperiences): startState = states[int(randObj.random() * len(states))] # startState = randObj.choice(states) s = self.grid.getPossibleActions(startState) ss = randObj.random() action = s[int(ss * len(s))] # action = randObj.choice(s) # action = randObj.choice(self.grid.getPossibleActions(startState)) (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj) lastExperience = (startState, action, endState, reward) agent.update(*lastExperience) actions = list( reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states])) values = {} qValues = {} policy = {} for state in states: values[state] = agent.computeValueFromQValues(state) policy[state] = agent.computeActionFromQValues(state) possibleActions = self.grid.getPossibleActions(state) for action in actions: if action not in qValues.keys(): qValues[action] = {} if action in possibleActions: qValues[action][state] = agent.getQValue(state, action) else: qValues[action][state] = None valuesPretty = self.prettyValues(values) policyPretty = self.prettyPolicy(policy) qValuesPretty = {} for action in actions: qValuesPretty[action] = self.prettyValues(qValues[action]) return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)