def __init__(self, question, testDict): super(QLearningTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict['epsilon']) self.learningRate = float(testDict['learningRate']) self.opts = { 'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate } numExperiences = int(testDict['numExperiences']) maxPreExperiences = 10 self.numsExperiencesForDisplay = list( range(min(numExperiences, maxPreExperiences))) self.testOutFile = testDict['test_out_file'] if maxPreExperiences < numExperiences: self.numsExperiencesForDisplay.append(numExperiences)
if __name__ == '__main__': opts = parseOptions() ########################### # GET THE GRIDWORLD ########################### import gridworld mdpFunction = getattr(gridworld, "get" + opts.grid) mdp = mdpFunction() mdp.setLivingReward(opts.livingReward) mdp.setNoise(opts.noise) env = gridworld.GridworldEnvironment(mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay display = textGridworldDisplay.TextGridworldDisplay(mdp) if not opts.textDisplay: import graphicsGridworldDisplay display = graphicsGridworldDisplay.GraphicsGridworldDisplay( mdp, opts.gridSize, opts.speed) try: display.start() except KeyboardInterrupt: sys.exit(0)
def main(myargs): sys.argv = myargs.split() opts = parseOptions() ########################### # GET THE GRIDWORLD ########################### if opts.grid == 'VerticalBridgeGrid': opts.gridSize = 120 import gridworld mdpFunction = getattr(gridworld, "get"+opts.grid) mdp = mdpFunction() mdp.setLivingReward(opts.livingReward) mdp.setNoise(opts.noise) env = gridworld.GridworldEnvironment(mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay display = textGridworldDisplay.TextGridworldDisplay(mdp) if not opts.textDisplay: import graphicsGridworldDisplay display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed) try: display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE AGENT ########################### import valueIterationAgents, qlearningAgents a = None if opts.agent == 'value': a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters) elif opts.agent == 'q': #env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon #simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) gridWorldEnv = GridworldEnvironment(mdp) actionFn = lambda state: mdp.getPossibleActions(state) qLearnOpts = {'gamma': opts.discount, 'alpha': opts.learningRate, 'epsilon': opts.epsilon, 'actionFn': actionFn} a = qlearningAgents.QLearningAgent(**qLearnOpts) elif opts.agent == 'random': # # No reason to use the random agent without episodes if opts.episodes == 0: opts.episodes = 10 class RandomAgent: def getAction(self, state): return random.choice(mdp.getPossibleActions(state)) def getValue(self, state): return 0.0 def getQValue(self, state, action): return 0.0 def getPolicy(self, state): "NOTE: 'random' is a special policy value; don't use it in your code." return 'random' def update(self, state, action, nextState, reward): pass a = RandomAgent() else: if not opts.manual: raise 'Unknown agent type: '+opts.agent ########################### # RUN EPISODES ########################### # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES try: if not opts.manual and opts.agent == 'value': if opts.valueSteps: for i in range(opts.iters): tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i) display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS") display.pause() display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS") display.pause() display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS") display.pause() except KeyboardInterrupt: sys.exit(0) # FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING) displayCallback = lambda x: None if not opts.quiet: if opts.manual and opts.agent == None: displayCallback = lambda state: display.displayNullValues(state) else: if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES") if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES") if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES") messageCallback = lambda x: printString(x) if opts.quiet: messageCallback = lambda x: None # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP pauseCallback = lambda : None if opts.pause: pauseCallback = lambda : display.pause() # FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS) if opts.manual: decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions) else: decisionCallback = a.getAction # RUN EPISODES if opts.episodes > 0: print() print("RUNNING", opts.episodes, "EPISODES") print() returns = 0 for episode in range(1, opts.episodes+1): returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode) if opts.episodes > 0: print() print("AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes)) print() print() # DISPLAY POST-LEARNING VALUES / Q-VALUES if opts.agent == 'q' and not opts.manual: try: display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES") display.pause() display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES") display.pause() except KeyboardInterrupt: sys.exit(0)
def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0 , noise=0, epsilon=0.3, display_speed=0.5 , grid_size=150, text_only=False, n_episodes=100 , agent_window_size=1 , agent_max_n_experiences=1000 , check_policy_converge=False , optimal_policy=None , expr_log_dir=None , agent_type="qLearningAgent" , init_temp=1024.0 , temp_decrease_rate=2.0 , is_asyn_input=True): """ :param agent_type: "qLearningAgent" or "TamerAgent" or "preferenceTAMERAgent" """ ########################### # GENERAL CONTROL ########################### self.text_only = text_only self.display_speed = display_speed self.n_episodes = n_episodes self.discount = discount self.check_policy_converge = check_policy_converge self.optimal_policy = optimal_policy self.expr_log_dir = expr_log_dir self.save_VDBE = False ########################### # GET THE INPUT MODULE ########################### if agent_type == qlearningAgents.QLearningAgent.getAgentType(): self.user_input_module = None else: self.user_input_module = user_input.UserInputModule(is_asyn=is_asyn_input) ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr(gridworld, "get" + grid_name) self.mdp = mdp_function() self.mdp.setLivingReward(living_reward) self.mdp.setNoise(noise) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # Variables used to store parameters values ########################### # init VDBE values records global VDBE_RECORDS VDBE_RECORDS = dict() for state in self.env.getGridWorld().getNonTerminalStates(): VDBE_RECORDS[state] = list() ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda m_state: self.mdp.getPossibleActions(m_state) q_learn_opts = { 'gamma': discount, 'alpha': learning_rate, 'epsilon': epsilon, 'actionFn': action_function, 'init_temp': init_temp, 'temp_decrease_rate': temp_decrease_rate } if agent_type == qlearningAgents.QLearningAgent.getAgentType(): self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) elif agent_type == qlearningAgents.TamerQAgent.getAgentType(): self.agent = qlearningAgents.TamerQAgent(max_n_experiences=agent_max_n_experiences , window_size=agent_window_size , is_asyn_input=is_asyn_input , **q_learn_opts) elif agent_type == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType(): self.agent = preferenceTamerAgent.PreferenceTAMERAgent(max_n_experiences=agent_max_n_experiences , window_size=agent_window_size , is_asyn_input=is_asyn_input , **q_learn_opts)
def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0 , noise=0, epsilon=0.3, display_speed=0.5 , grid_size=150, text_only=False , save_optimal_policy_file=None , init_temp=1024.0 , temp_decrease_rate=2.0 , delta=0.02): ########################### # GENERAL CONTROL ########################### self.text_only = text_only self.display_speed = display_speed self.discount = discount self.delta = delta self.save_optimal_policy_file = save_optimal_policy_file ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr(gridworld, "get" + grid_name) self.mdp = mdp_function() self.mdp.setLivingReward(living_reward) self.mdp.setNoise(noise) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### import qlearningAgents # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = gridworld.GridworldEnvironment(self.mdp) action_function = lambda state: self.mdp.getPossibleActions(state) q_learn_opts = { 'gamma': discount, 'alpha': learning_rate, 'epsilon': epsilon, 'actionFn': action_function, 'init_temp': init_temp, 'temp_decrease_rate': temp_decrease_rate } self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
def do_turn(self, ants): # track all moves, prevent collisions orders = {} def do_move_direction(loc, direction): #Rrr destination takes care of wrapping around, returns the destination #issues the moving order new_loc = ants.destination(loc, direction) #Rrr orders is the dictionary of location of ants if (ants.unoccupied(new_loc) and new_loc not in orders): ants.issue_order((loc, direction)) orders[new_loc] = loc return True else: return False targets = {} #ROHAN added the variable directn def do_move_location(loc, dirctn): #Rrr ants.direction takes a location and a destination and returns a list of the closest direction "as the crow flies". #If the target is up and to the left, it will return ['n', 'w'] and we should then try and move our ant one of the two directions. #If the target is directly down, it will return ['s'], which is a list of one item. directions = dirctn for direction in directions: if do_move_direction(loc, direction): #targets[dest] = loc return True return False # --------------------------------starts from here-------------------------------------- # find close self.turn = self.turn + 1 #MY HILLLLSSS for hill_loc in ants.my_hills(): x, y = hill_loc self.grid[x][y] = self.MYHILL #Rrr The dummy entry doesn't need a from location, so we just set the value to None. #prevent stepping on own hill orders[hill_loc] = None #ENEMY HILLLSSSS for hill_loc, hill_owner in ants.enemy_hills(): hillrow, hillcol = hill_loc self.grid[hillrow][hillcol] = self.ENEMYHILL #LAND, water food for i in range(ants.rows): for j in range(ants.cols): #if ((ants.visible((i,j))==True) or (self.grid[i][j]==(self.FOOD or self.ENEMYANTS or self.BOUNDARY2 or self.ENEMYANTS2))): # self.grid[i][j]=' ' # self.gridu[i][j]='v' if ants.visible((i, j)) == True: self.gridu[i][j] = 'v' if (self.grid[i][j] != (self.MYHILL or self.MYHILL2 or self.ENEMYHILL or self.WATER)): self.grid[i][j] = ' ' elif self.grid[i][j] == (self.FOOD or self.ENEMYANTS or self.BOUNDARY2 or self.ENEMYANTS2 or self.MYANTS): self.grid[i][j] = ' ' if ants.map[i][j] == -3: self.grid[i][j] = self.FOOD elif ants.map[i][j] == -4: self.grid[i][j] = self.WATER # if i cant see my hill, retreat to it urgently if self.grid[i][j] == self.MYHILL: if ants.visible((i, j)) == False: print >> sys.stderr, 'hill retreat', i, j sys.stderr.flush() self.grid[i][j] = self.MYHILL2 else: self.grid[i][j] = self.MYHILL if self.grid[i][j] == self.ENEMYHILL: print >> sys.stderr, 'hill attack!!!!!!!!!!!!!!!!!!', i, j sys.stderr.flush() #MY ANTSSSSSSSSSS S num_ants = 0 sx = 0 sy = 0 for ant_loc in ants.my_ants(): antrow, antcol = ant_loc sx = sx + antrow sy = sy + antcol #self.grid[antrow][antcol]=self.MYANTS num_ants = num_ants + 1 sx = int(sx / num_ants) sy = int(sy / num_ants) self.grid[sx][sy] = self.MYANTS ## change MODE------------------------------------------what do i do here ????????? if num_ants >= 0: ##(ants.rows*ants.cols/200): self.BOUNDARY = self.BOUNDARY2 else: self.BOUNDARY = ' ' #ENEMYYYYYYYY ANTSSSSSSS for enemy_loc, enemy_owner in ants.enemy_ants(): enemyrow, enemycol = enemy_loc #TO DO, if own ant concentration is good near enemy ant (enemy ant conc in the area), then a positive reward self.grid[enemyrow][enemycol] = self.ENEMYANTS #if they're near my base, retreat to base for hill_loc in ants.my_hills(): x, y = hill_loc if ants.distance(hill_loc, enemy_loc) < 9.0: self.grid[enemyrow][enemycol] = self.ENEMYANTS2 #if i can surround em attack :) TODO: also check the enemy density surround = 0 for ant_loc in ants.my_ants(): antrow, antcol = ant_loc if ants.distance(ant_loc, enemy_loc) < 6.0: surround = surround + 1 if surround >= 3: self.grid[enemyrow][enemycol] = self.ENEMYANTS2 print >> sys.stderr, 'ursurrounded attack: ', enemyrow, enemycol sys.stderr.flush() #BOUNDARY EXPANDING !!!!!!!!! for i in range(ants.rows): for j in range(ants.cols): if (self.gridu[i][j] == 'v' and self.grid[i][j] == ' '): if (ants.visible(ants.destination((i, j), 'n')) == False or ants.visible(ants.destination( (i, j), 'e')) == False or ants.visible(ants.destination( (i, j), 'w')) == False or ants.visible(ants.destination( (i, j), 's')) == False): self.grid[i][j] = self.BOUNDARY #-----------------------------------------------------------------------------------------VALUE ITERATION #opts={'agent': 'value', 'discount': 0.9, 'iters': 200, 'noise': 0.01, 'livingReward': 0.0, 'epsilon': 0.0, 'pause': False, 'manual': False, 'quiet': True, 'episodes': 100, 'learningRate': 0.5, 'grid': 'BookGrid', 'gridSize': 150, 'speed': 1000.0, 'textDisplay': False} opts = { 'livingReward': 0.0, 'discount': 0.9, 'iters': 300, 'noise': 0.05, 'epsilon': 0.0, 'manual': False, 'quiet': True, 'agent': 'value', 'pause': False, 'episodes': 100, 'learningRate': 0.5, 'grid': 'BookGrid', 'gridSize': 150, 'speed': 1000.0, 'textDisplay': False } mdp = gridworld.Gridworld(self.grid) mdp.setLivingReward(opts['livingReward']) mdp.setNoise(opts['noise']) env = gridworld.GridworldEnvironment(mdp) ########################### # GET THE AGENT ########################### #time_to_spare = (ants.turntime/1000.0) - (0.00064286 + 0.0000547619*num_ants + 0.0000065476*(num_ants*num_ants)) - 0.01 if num_ants <= 60: time_to_spare = (ants.turntime / 1000.0) - 0.03 else: time_to_spare = (ants.turntime / 1000.0) - ( -0.003512 + 0.00047632 * num_ants - 0.00000105286 * (num_ants * num_ants)) - 0.005 a = None a = valueIterationAgents.ValueIterationAgent(ants.turn_start_time, time_to_spare, mdp, opts['discount'], opts['iters']) #TIME TIME TIME TIME #t1 = time.time() for ant_loc in ants.my_ants(): antcol, antrow = ant_loc antcol = ants.rows - antcol - 1 inverted_ant_loc = (antrow, antcol) if (a.getQValue(inverted_ant_loc, 'north') == a.getQValue( inverted_ant_loc, 'south') == a.getQValue( inverted_ant_loc, 'east') == a.getQValue( inverted_ant_loc, 'west')): direct = random.choice('sewn') do_move_location(ant_loc, direct) elif a.getPolicy(inverted_ant_loc) == 'north': direct = 'n' do_move_location(ant_loc, direct) elif a.getPolicy(inverted_ant_loc) == 'south': direct = 's' do_move_location(ant_loc, direct) elif a.getPolicy(inverted_ant_loc) == 'east': direct = 'e' do_move_location(ant_loc, direct) elif a.getPolicy(inverted_ant_loc) == 'west': direct = 'w' do_move_location(ant_loc, direct) else: direct = random.choice('sewn') do_move_location(ant_loc, direct) #TIME 2 TIME 2 TIME 2 #t2 = time.time() - t1 print >> sys.stderr, 'turn: ', self.turn, 'ants :', num_ants, 'spare:', time_to_spare, 'time:', ( time.time() - ants.turn_start_time) sys.stderr.flush() # unblock own hill for hill_loc in ants.my_hills(): if hill_loc in ants.my_ants() and hill_loc not in orders.values(): for direction in ('s', 'e', 'w', 'n'): if do_move_direction(hill_loc, direction): break
def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0, noise=0, epsilon=0.3, display_speed=0.5, grid_size=150, text_only=False, n_episodes=100, agent_window_size=1, agent_max_n_experiences=1000, check_value_converge=False, check_policy_converge=False, optimal_policy=None, expr_log_dir=None, delta=0.02, is_use_q_agent=False, init_temp=1024.0, temp_decrease_rate=2.0, is_asyn_input=True): ########################### # GENERAL CONTROL ########################### self.text_only = text_only self.display_speed = display_speed self.n_episodes = n_episodes self.discount = discount self.check_value_converge = check_value_converge self.check_policy_converge = check_policy_converge self.optimal_policy = optimal_policy self.expr_log_dir = expr_log_dir self.delta = delta ########################### # GET THE INPUT MODULE ########################### if is_use_q_agent: self.user_input_module = None else: self.user_input_module = user_input.UserInputModule( is_asyn=is_asyn_input) ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr(gridworld, "get" + grid_name) self.mdp = mdp_function() self.mdp.setLivingReward(living_reward) self.mdp.setNoise(noise) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay( self.mdp, grid_size, display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### import qlearningAgents # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda state: self.mdp.getPossibleActions(state) q_learn_opts = { 'gamma': discount, 'alpha': learning_rate, 'epsilon': epsilon, 'actionFn': action_function, 'init_temp': init_temp, 'temp_decrease_rate': temp_decrease_rate } if is_use_q_agent: self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) else: self.agent = qlearningAgents.TamerQAgent( max_n_experiences=agent_max_n_experiences, window_size=agent_window_size, is_asyn_input=is_asyn_input, **q_learn_opts)
def __init__(self, optimal_policy=None, expr_log_dir=None): ########################### # GENERAL CONTROL ########################### self.text_only = ExperimentConfigurator.experimentConfig['text_only'] self.display_speed = ExperimentConfigurator.experimentConfig[ 'display_speed'] self.n_episodes = ExperimentConfigurator.gridWorldConfig['n_episodes'] self.discount = ExperimentConfigurator.gridWorldConfig['discount'] self.check_policy_converge = ExperimentConfigurator.experimentConfig[ 'check_policy_converge'] self.optimal_policy = optimal_policy self.expr_log_dir = expr_log_dir self.save_VDBE = ExperimentConfigurator.experimentConfig['save_VDBE'] ########################### # GET THE INPUT MODULE ########################### if ExperimentConfigurator.experimentConfig[ 'agent_type'] == qlearningAgents.QLearningAgent.getAgentType(): self.user_input_module = None else: self.user_input_module = user_input.UserInputModule( is_asyn=ExperimentConfigurator.TamerConfig['is_asyn_input']) self.auto_feedback = AutoFeedback() ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr( gridworld, "get" + ExperimentConfigurator.gridWorldConfig['grid_name']) self.mdp = mdp_function() self.mdp.setLivingReward( ExperimentConfigurator.gridWorldConfig['living_reward']) self.mdp.setNoise(ExperimentConfigurator.gridWorldConfig['noise']) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # Variables used to store parameters values ########################### # init VDBE values records global VDBE_RECORDS VDBE_RECORDS = dict() for state in self.env.getGridWorld().getNonTerminalStates(): VDBE_RECORDS[state] = list() ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not self.text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay( self.mdp, ExperimentConfigurator.gridWorldConfig['grid_size'], self.display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda m_state: self.mdp.getPossibleActions(m_state) q_learn_opts = {'actionFn': action_function} if ExperimentConfigurator.experimentConfig[ 'agent_type'] == qlearningAgents.QLearningAgent.getAgentType(): self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) elif ExperimentConfigurator.experimentConfig[ 'agent_type'] == qlearningAgents.TamerQAgent.getAgentType(): self.agent = qlearningAgents.TamerQAgent( max_n_experiences=ExperimentConfigurator. TamerConfig['agent_max_n_experiences'], window_size=ExperimentConfigurator. TamerConfig['agent_window_size'], is_asyn_input=ExperimentConfigurator. TamerConfig['is_asyn_input'], **q_learn_opts) elif ExperimentConfigurator.experimentConfig[ 'agent_type'] == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType( ): self.agent = preferenceTamerAgent.PreferenceTAMERAgent( max_n_experiences=ExperimentConfigurator. TamerConfig['agent_max_n_experiences'], window_size=ExperimentConfigurator. TamerConfig['agent_window_size'], is_asyn_input=ExperimentConfigurator. TamerConfig['is_asyn_input'], **q_learn_opts)
def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0, noise=0.2, epsilon=0.3, display_speed=0.5, grid_size=150, text_only=False, n_episodes=100, agent_window_size=1, agent_max_n_experiences=1000, is_use_q_agent=False): self.text_only = text_only self.display_speed = display_speed self.n_episodes = n_episodes self.discount = discount ########################### # GET THE INPUT MODULE ########################### if is_use_q_agent: self.user_input_module = None else: self.user_input_module = user_input.UserInputModule() ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr(gridworld, "get" + grid_name) self.mdp = mdp_function() self.mdp.setLivingReward(living_reward) self.mdp.setNoise(noise) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay( self.mdp, grid_size, display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### import qlearningAgents # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda state: self.mdp.getPossibleActions(state) q_learn_opts = { 'gamma': discount, 'alpha': learning_rate, 'epsilon': epsilon, 'actionFn': action_function } if is_use_q_agent: self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) else: self.agent = qlearningAgents.TamerQAgent( max_n_experiences=agent_max_n_experiences, window_size=agent_window_size, **q_learn_opts)