def runAgent(self, moduleDict, numExperiences):
        agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts)
        states = list(
            filter(lambda state: len(self.grid.getPossibleActions(state)) > 0,
                   self.grid.getStates()))
        sorted(states)  #.sort()
        randObj = FixedRandom().random
        # choose a random start state and a random possible action from that state
        # get the next state and reward from the transition function
        lastExperience = None
        for i in range(numExperiences):
            startState = states[int(randObj.random() * len(states))]
            # startState = randObj.choice(states)
            s = self.grid.getPossibleActions(startState)
            ss = randObj.random()
            action = s[int(ss * len(s))]
            # action = randObj.choice(s)
            # action = randObj.choice(self.grid.getPossibleActions(startState))
            (endState, reward) = self.env.getRandomNextState(startState,
                                                             action,
                                                             randObj=randObj)
            lastExperience = (startState, action, endState, reward)
            agent.update(*lastExperience)
        actions = list(
            reduce(lambda a, b: set(a).union(b),
                   [self.grid.getPossibleActions(state) for state in states]))

        values = {}
        qValues = {}
        policy = {}
        for state in states:
            values[state] = agent.computeValueFromQValues(state)
            policy[state] = agent.computeActionFromQValues(state)
            possibleActions = self.grid.getPossibleActions(state)
            for action in actions:
                if action not in qValues.keys():
                    qValues[action] = {}
                if action in possibleActions:
                    qValues[action][state] = agent.getQValue(state, action)
                else:
                    qValues[action][state] = None
        valuesPretty = self.prettyValues(values)
        policyPretty = self.prettyPolicy(policy)
        qValuesPretty = {}
        for action in actions:
            qValuesPretty[action] = self.prettyValues(qValues[action])
        return (valuesPretty, qValuesPretty, actions, policyPretty,
                lastExperience)