def runAgent(self, moduleDict, numExperiences): agent = moduleDict['qlearningAgents'].ApproximateQAgent(extractor=self.extractor, **self.opts) states = [state for state in self.grid.getStates() if len(self.grid.getPossibleActions(state)) > 0] states.sort() randObj = FixedRandom().random # choose a random start state and a random possible action from that state # get the next state and reward from the transition function lastExperience = None for i in range(numExperiences): startState = randObj.choice(states) action = randObj.choice(self.grid.getPossibleActions(startState)) (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj) lastExperience = (startState, action, endState, reward) agent.update(*lastExperience) actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states])) qValues = {} weights = agent.getWeights() for state in states: possibleActions = self.grid.getPossibleActions(state) for action in actions: if action not in qValues: qValues[action] = {} if action in possibleActions: qValues[action][state] = agent.getQValue(state, action) else: qValues[action][state] = None qValuesPretty = {} for action in actions: qValuesPretty[action] = self.prettyValues(qValues[action]) return (qValuesPretty, weights, actions, lastExperience)
def runAgent(self, moduleDict, numExperiences): agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts) states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates()) states.sort() randObj = FixedRandom().random # choose a random start state and a random possible action from that state # get the next state and reward from the transition function lastExperience = None for i in range(numExperiences): startState = randObj.choice(states) action = randObj.choice(self.grid.getPossibleActions(startState)) (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj) lastExperience = (startState, action, endState, reward) agent.update(*lastExperience) actions = list(reduce(lambda a, b: set(a).union(b), [self.grid.getPossibleActions(state) for state in states])) values = {} qValues = {} policy = {} for state in states: values[state] = agent.computeValueFromQValues(state) policy[state] = agent.computeActionFromQValues(state) possibleActions = self.grid.getPossibleActions(state) for action in actions: if not qValues.has_key(action): qValues[action] = {} if action in possibleActions: qValues[action][state] = agent.getQValue(state, action) else: qValues[action][state] = None valuesPretty = self.prettyValues(values) policyPretty = self.prettyPolicy(policy) qValuesPretty = {} for action in actions: qValuesPretty[action] = self.prettyValues(qValues[action]) return (valuesPretty, qValuesPretty, actions, policyPretty, lastExperience)
def runAgent(self, moduleDict): agent = moduleDict['qlearningAgents'].QLearningAgent(**self.opts) states = filter(lambda state : len(self.grid.getPossibleActions(state)) > 0, self.grid.getStates()) states.sort() randObj = FixedRandom().random # choose a random start state and a random possible action from that state # get the next state and reward from the transition function for i in range(self.numExperiences): startState = randObj.choice(states) action = randObj.choice(self.grid.getPossibleActions(startState)) (endState, reward) = self.env.getRandomNextState(startState, action, randObj=randObj) agent.update(startState, action, endState, reward) return agent
def run_agent(self, module_dict, num_experiences): agent = module_dict['q_learning_agents'].QLearningAgent(**self.opts) states = [ state for state in self.grid.get_states() if len(self.grid.get_possible_actions(state)) > 0 ] states.sort() rand_obj = FixedRandom().random # choose a random start state and a random possible action from that state # get the next state and reward from the transition function last_experience = None for i in range(num_experiences): start_state = rand_obj.choice(states) action = rand_obj.choice( self.grid.get_possible_actions(start_state)) (end_state, reward) = self.env.get_random_next_state(start_state, action, rand_obj=rand_obj) last_experience = (start_state, action, end_state, reward) agent.update(*last_experience) actions = list( reduce(lambda a, b: set(a).union(b), [self.grid.get_possible_actions(state) for state in states])) values = {} q_values = {} policy = {} for state in states: values[state] = agent.compute_value_from_q_values(state) policy[state] = agent.compute_action_from_q_values(state) possible_actions = self.grid.get_possible_actions(state) for action in actions: if action not in q_values: q_values[action] = {} if action in possible_actions: q_values[action][state] = agent.get_q_value(state, action) else: q_values[action][state] = None values_pretty = self.pretty_values(values) policy_pretty = self.pretty_policy(policy) q_values_pretty = {} for action in actions: q_values_pretty[action] = self.pretty_values(q_values[action]) return (values_pretty, q_values_pretty, actions, policy_pretty, last_experience)
def run_agent(self, module_dict): agent = module_dict['q_learning_agents'].QLearningAgent(**self.opts) states = [ state for state in self.grid.get_states() if len(self.grid.get_possible_actions(state)) > 0 ] states.sort() rand_obj = FixedRandom().random # choose a random start state and a random possible action from that state # get the next state and reward from the transition function for i in range(self.num_experiences): start_state = rand_obj.choice(states) action = rand_obj.choice( self.grid.get_possible_actions(start_state)) (end_state, reward) = self.env.get_random_next_state(start_state, action, rand_obj=rand_obj) agent.update(start_state, action, end_state, reward) return agent
def run_agent(self, module_dict, num_experiences): agent = module_dict['q_learning_agents'].ApproximateQAgent( extractor=self.extractor, **self.opts) states = filter( lambda state: len(self.grid.get_possible_actions(state)) > 0, self.grid.get_states()) states.sort() rand_obj = FixedRandom().random # choose a random start state and a random possible action from that state # get the next state and reward from the transition function last_experience = None for i in range(num_experiences): start_state = rand_obj.choice(states) action = rand_obj.choice( self.grid.get_possible_actions(start_state)) (end_state, reward) = self.env.get_random_next_state(start_state, action, rand_obj=rand_obj) last_experience = (start_state, action, end_state, reward) agent.update(*last_experience) actions = list( reduce(lambda a, b: set(a).union(b), [self.grid.get_possible_actions(state) for state in states])) q_values = {} weights = agent.get_weights() for state in states: possible_actions = self.grid.get_possible_actions(state) for action in actions: if not q_values.has_key(action): q_values[action] = {} if action in possible_actions: q_values[action][state] = agent.get_q_value(state, action) else: q_values[action][state] = None q_values_pretty = {} for action in actions: q_values_pretty[action] = self.pretty_values(q_values[action]) return (q_values_pretty, weights, actions, last_experience)
# Student side autograding was added by Brad Miller, Nick Hay, and Pieter # Abbeel in Spring 2013. # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html # imports from python standard library import grading import imp import optparse import os import re import sys import projectParams import random from util import FixedRandom random.setstate(FixedRandom().random.getstate()) # register arguments and set default values def readCommand(argv): parser = optparse.OptionParser( description='Run public tests on student code') parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False) parser.add_option( '--test-directory', dest='testRoot', default='test_cases', help=
def __init__(self, index): self.index = index self.fixedrandom = FixedRandom()