def __init__(self, question, test_dict): super(QLearningTest, self).__init__(question, test_dict) self.discount = float(test_dict['discount']) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) if 'noise' in test_dict: self.grid.set_noise(float(test_dict['noise'])) if 'living_reward' in test_dict: self.grid.set_living_reward(float(test_dict['living_reward'])) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) self.env = grid_world.GridWorldEnvironment(self.grid) self.epsilon = float(test_dict['epsilon']) self.learning_rate = float(test_dict['learning_rate']) self.opts = { 'action_fn': self.env.get_possible_actions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learning_rate } num_experiences = int(test_dict['num_experiences']) max_pre_experiences = 10 self.nums_experiences_for_display = range( min(num_experiences, max_pre_experiences)) self.test_out_file = test_dict['test_out_file'] if max_pre_experiences < num_experiences: self.nums_experiences_for_display.append(num_experiences)
def __init__(self, question, test_dict): super(EpsilonGreedyTest, self).__init__(question, test_dict) self.discount = float(test_dict['discount']) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) if 'noise' in test_dict: self.grid.set_noise(float(test_dict['noise'])) if 'living_reward' in test_dict: self.grid.set_living_reward(float(test_dict['living_reward'])) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) self.env = grid_world.GridWorldEnvironment(self.grid) self.epsilon = float(test_dict['epsilon']) self.learning_rate = float(test_dict['learning_rate']) self.num_experiences = int(test_dict['num_experiences']) self.num_iterations = int(test_dict['iterations']) self.opts = { 'action_fn': self.env.get_possible_actions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learning_rate }
return "get_" + all_cap_re.sub(r'\1_\2', s1).lower() if __name__ == '__main__': opts = parse_options() ########################### # GET THE GRIDWORLD ########################### import grid_world mdp_function = getattr(grid_world, get_function(opts.grid)) mdp = mdp_function() mdp.set_living_reward(opts.living_reward) mdp.set_noise(opts.noise) env = grid_world.GridWorldEnvironment(mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import text_grid_world_display display = text_grid_world_display.TextGridWorldDisplay(mdp) if not opts.text_display: import graphics_grid_world_display display = graphics_grid_world_display.GraphicsGridWorldDisplay(mdp, opts.grid_size, opts.speed) try: display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE AGENT