def main(open_plot=True): # Taxi initial state attributes.. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}] walls = [] mdp = TaxiOOMDP(width=4, height=4, agent=agent, walls=walls, passengers=passengers) # Agents. ql_agent = QLearnerAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) viz = False if viz: # Visualize Taxi. run_single_agent_on_mdp(ql_agent, mdp, episodes=50, steps=1000) mdp.visualize_agent(ql_agent) else: # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=50, episodes=1, steps=2000, reset_at_terminal=True, open_plot=open_plot)
def choose_mdp(mdp_name, env_name="Asteroids-v0"): ''' Args: mdp_name (str): one of {gym, grid, chain, taxi, ...} gym_env_name (str): gym environment name, like 'CartPole-v0' Returns: (MDP) ''' # Other imports from simple_rl.tasks import ChainMDP, GridWorldMDP, FourRoomMDP, TaxiOOMDP, RandomMDP, PrisonersDilemmaMDP, RockPaperScissorsMDP, GridGameMDP # Taxi MDP. agent = {"x":1, "y":1, "has_passenger":0} passengers = [{"x":4, "y":3, "dest_x":2, "dest_y":2, "in_taxi":0}] walls = [] if mdp_name == "gym": # OpenAI Gym MDP. try: from simple_rl.tasks.gym.GymMDPClass import GymMDP except: raise ValueError("(simple_rl) Error: OpenAI gym not installed.") return GymMDP(env_name, render=True) else: return {"grid":GridWorldMDP(5, 5, (1, 1), goal_locs=[(5, 3), (4,1)]), "four_room":FourRoomMDP(), "chain":ChainMDP(5), "taxi":TaxiOOMDP(10, 10, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers), "random":RandomMDP(num_states=40, num_rand_trans=20), "prison":PrisonersDilemmaMDP(), "rps":RockPaperScissorsMDP(), "grid_game":GridGameMDP(), "multi":{0.5:RandomMDP(num_states=40, num_rand_trans=20), 0.5:RandomMDP(num_states=40, num_rand_trans=5)}}[mdp_name]
def make_mdp(mdp_class="grid", grid_dim=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = grid_dim, grid_dim upworld_goal_locs = [(i, width) for i in range(1, height+1)] four_room_goal_locs = [(width, height)] #, (width, 1), (1, height)] # (1, height - 2), (width - 2, height - 2), (width - 1, height - 1), (width - 2, 1)] four_room_goal_loc = four_room_goal_locs[0] # Taxi stuff. agent = {"x":1, "y":1, "has_passenger":0} passengers = [{"x":grid_dim / 2, "y":grid_dim / 2, "dest_x":grid_dim-2, "dest_y":2, "in_taxi":0}] walls = [] # Trench stuff tr_agent = {"x": 1, "y": 1, "dx": 1, "dy": 0, "dest_x": grid_dim, "dest_y": grid_dim, "has_block": 0} blocks = [{"x": grid_dim, "y": 1}] lavas = [{"x": x, "y": y} for x, y in map(lambda z: (z + 1, (grid_dim + 1) / 2), range(grid_dim))] # Do grids separately to avoid making error-prone domains. if mdp_class == "four_room": mdp = FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc]) else: mdp = {"upworld":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=upworld_goal_locs), "chain":ChainMDP(num_states=grid_dim), "random":RandomMDP(num_states=50, num_rand_trans=2), "hanoi":HanoiMDP(num_pegs=grid_dim, num_discs=3), "taxi":TaxiOOMDP(width=grid_dim, height=grid_dim, agent=agent, walls=walls, passengers=passengers), "trench":TrenchOOMDP(width=grid_dim, height=3, agent=tr_agent, blocks=blocks, lavas=lavas)}[mdp_class] return mdp
def make_custom_mdp(mdp_class, mdp_parameters): if mdp_class == 'augmented_taxi': mdp_candidate = AugmentedTaxiOOMDP( width=mdp_parameters['width'], height=mdp_parameters['height'], agent=mdp_parameters['agent'], walls=mdp_parameters['walls'], passengers=mdp_parameters['passengers'], tolls=mdp_parameters['tolls'], traffic=mdp_parameters['traffic'], fuel_stations=mdp_parameters['fuel_station'], gamma=mdp_parameters['gamma'], weights=mdp_parameters['weights'], env_code=mdp_parameters['env_code'], sample_rate=1) elif mdp_class == 'two_goal': mdp_candidate = TwoGoalOOMDP(width=mdp_parameters['width'], height=mdp_parameters['height'], agent=mdp_parameters['agent'], walls=mdp_parameters['walls'], goals=mdp_parameters['goals'], gamma=mdp_parameters['gamma'], weights=mdp_parameters['weights'], env_code=mdp_parameters['env_code'], sample_rate=1) elif mdp_class == 'skateboard': mdp_candidate = SkateboardOOMDP( width=mdp_parameters['width'], height=mdp_parameters['height'], agent=mdp_parameters['agent'], walls=mdp_parameters['walls'], goal=mdp_parameters['goal'], skateboard=mdp_parameters['skateboard'], gamma=mdp_parameters['gamma'], weights=mdp_parameters['weights'], env_code=mdp_parameters['env_code'], sample_rate=1) elif mdp_class == 'taxi': mdp_candidate = TaxiOOMDP(width=mdp_parameters['width'], height=mdp_parameters['height'], agent=mdp_parameters['agent'], walls=mdp_parameters['walls'], passengers=mdp_parameters['passengers'], gamma=mdp_parameters['gamma'], weights=mdp_parameters['weights']) elif mdp_class == 'cookie_crumb': mdp_candidate = CookieCrumbOOMDP(width=mdp_parameters['width'], height=mdp_parameters['height'], agent=mdp_parameters['agent'], walls=mdp_parameters['walls'], goals=mdp_parameters['goals'], crumbs=mdp_parameters['crumbs'], gamma=mdp_parameters['gamma'], weights=mdp_parameters['weights'], env_code=mdp_parameters['env_code'], sample_rate=1) else: raise Exception("Unknown MDP class.") return mdp_candidate
def make_mdp(mdp_class="grid", grid_dim=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = grid_dim, grid_dim hall_goal_locs = [(i, width) for i in range(1, height + 1)] four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)] four_room_goal_loc = four_room_goal_locs[5] # Taxi stuff. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{ "x": grid_dim / 2, "y": grid_dim / 2, "dest_x": grid_dim - 2, "dest_y": 2, "in_taxi": 0 }] walls = [] mdp = { "hall": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=hall_goal_locs), "pblocks_grid": make_grid_world_from_file("pblocks_grid.txt", randomize=True), "grid": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[(grid_dim, grid_dim)]), "four_room": FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc]), "chain": ChainMDP(num_states=grid_dim), "random": RandomMDP(num_states=50, num_rand_trans=2), "hanoi": HanoiMDP(num_pegs=grid_dim, num_discs=3), "taxi": TaxiOOMDP(width=grid_dim, height=grid_dim, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers) }[mdp_class] return mdp
def main(open_plot=True): # Taxi initial state attributes.. agent = {"x":1, "y":1, "has_passenger":0} passengers = [{"x":3, "y":2, "dest_x":2, "dest_y":3, "in_taxi":0}] walls = [] mdp = TaxiOOMDP(width=4, height=4, agent=agent, walls=walls, passengers=passengers) # Agents. ql_agent = QLearningAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) viz = False if viz: # Visualize Taxi. run_single_agent_on_mdp(ql_agent, mdp, episodes=50, steps=1000) mdp.visualize_agent(ql_agent) else: # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10, episodes=1, steps=500, reset_at_terminal=True, open_plot=open_plot)
def make_mdp(mdp_class="grid", state_size=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = state_size, state_size hall_goal_locs = [(i, width) for i in range(1, height + 1)] # Taxi stuff. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{ "x": state_size / 2, "y": state_size / 2, "dest_x": state_size - 2, "dest_y": 2, "in_taxi": 0 }] walls = [] mdp = { "hall": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=hall_goal_locs), "pblocks_grid": make_grid_world_from_file("pblocks_grid.txt", randomize=True), "grid": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[(state_size, state_size)]), "four_room": FourRoomMDP(width=width, height=height, goal_locs=[(width, height)]), "chain": ChainMDP(num_states=state_size), "random": RandomMDP(num_states=50, num_rand_trans=2), "taxi": TaxiOOMDP(width=state_size, height=state_size, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers) }[mdp_class] return mdp
def choose_mdp(mdp_name, atari_game="centipede"): ''' Args: mdp_name (str): one of {atari, grid, chain, taxi} atari_game (str): one of {centipede, breakout, etc.} Returns: (MDP) ''' # Grid World MDP. grid_mdp = GridWorldMDP(10, 10, (1, 1), (10, 10)) # Chain MDP. chain_mdp = ChainMDP(15) # Taxi MDP. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 5, "y": 5, "dest_x": 3, "dest_y": 3, "in_taxi": 0}] taxi_mdp = TaxiOOMDP(6, 6, agent_loc=agent, walls=[], passengers=passengers) if mdp_name == "atari": # Atari import is here in case users don't have the Arcade Learning Environment. try: from simple_rl.tasks.atari.AtariMDPClass import AtariMDP return AtariMDP(rom=atari_game, grayscale=True) except: print "ERROR: you don't have the Arcade Learning Environment installed." print "\tTry here: https://github.com/mgbellemare/Arcade-Learning-Environment." quit() else: return { "grid": grid_mdp, "chain": chain_mdp, "taxi": taxi_mdp }[mdp_name]
type=int, default=10, help='number of steps for incidence matrix sampling') args = parser.parse_args() dom, task = args.task.split('_') if dom == 'grid': mdp = make_grid_world_from_file('../tasks/' + task + '.txt') elif dom == 'taxi': width = 4 height = 4 agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}] mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers) elif dom == 'gym': mdp = GymMDP(env_name=task, render=False) elif dom == 'hanoi': mdp = HanoiMDP(num_pegs=3, num_discs=4) elif dom == 'track': mdp = make_race_track_from_file('../tasks/' + task + '.txt') else: print('Unknown task name: ', task) assert (False) mdp.set_gamma(0.99) if args.experiment == 'online': print('test_online_agent') test_online_agent(args, mdp)
def make_mdp_distr(mdp_class="grid", grid_dim=7, horizon=0): ''' Args: mdp_class (str): one of {"grid", "random"} horizon (int) Returns: (MDPDistribution) ''' mdp_dist_dict = {} height, width = grid_dim, grid_dim # Define goal locations. # Corridor. corr_width = 20 corr_goal_magnitude = random.randint(1, 5) corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude)] + [ j for j in xrange(corr_width - corr_goal_magnitude, corr_width + 1) ] corr_goal_locs = list(itertools.product(corr_goal_cols, [1])) # Grid World grid_world_rows, grid_world_cols = [i for i in xrange(width - 4, width)], [ j for j in xrange(height - 4, height) ] grid_goal_locs = list(itertools.product(grid_world_rows, grid_world_cols)) # Hallway. hall_goal_locs = [(i, width) for i in range(1, height + 1)] # Four room. four_room_goal_locs = [(2, 2), (width, height), (width, 1), (1, height)] # Taxi. agent = {"x": 1, "y": 1, "has_passenger": 0} walls = [] goal_loc_dict = { "four_room": four_room_goal_locs, "hall": hall_goal_locs, "grid": grid_goal_locs, "corridor": corr_goal_locs } # MDP Probability. num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len( goal_loc_dict[mdp_class]) mdp_prob = 1.0 / num_mdps for i in range(num_mdps): new_mdp = {"hall":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["hall"][i % len(goal_loc_dict["hall"])]]), "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True), "grid":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True), "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]]), # THESE GOALS ARE SPECIFIED IMPLICITLY: "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True), "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])), "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)), "taxi":TaxiOOMDP(4, 4, slip_prob=0.0, agent=agent, walls=walls, \ passengers=[{"x":2, "y":2, "dest_x":random.randint(1,4), "dest_y":random.randint(1,4), "in_taxi":0}])}[mdp_class] mdp_dist_dict[new_mdp] = mdp_prob return MDPDistribution(mdp_dist_dict, horizon=horizon)
def make_mdp_distr(mdp_class="grid", grid_dim=9, horizon=0, step_cost=0, gamma=0.99): ''' Args: mdp_class (str): one of {"grid", "random"} horizon (int) step_cost (float) gamma (float) Returns: (MDPDistribution) ''' mdp_dist_dict = {} height, width = grid_dim, grid_dim # Define goal locations. # Corridor. corr_width = 20 corr_goal_magnitude = 1 #random.randint(1, 5) corr_goal_cols = [i for i in range(1, corr_goal_magnitude + 1)] + [ j for j in range(corr_width - corr_goal_magnitude + 1, corr_width + 1) ] corr_goal_locs = list(itertools.product(corr_goal_cols, [1])) # Grid World tl_grid_world_rows, tl_grid_world_cols = [ i for i in range(width - 4, width) ], [j for j in range(height - 4, height)] tl_grid_goal_locs = list( itertools.product(tl_grid_world_rows, tl_grid_world_cols)) tr_grid_world_rows, tr_grid_world_cols = [i for i in range(1, 4)], [ j for j in range(height - 4, height) ] tr_grid_goal_locs = list( itertools.product(tr_grid_world_rows, tr_grid_world_cols)) grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs # Hallway. hall_goal_locs = [(i, height) for i in range(1, 30)] # Four room. four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2)] #, (width - 2, 1)] # Taxi. agent = {"x": 1, "y": 1, "has_passenger": 0} walls = [] goal_loc_dict = { "four_room": four_room_goal_locs, "hall": hall_goal_locs, "grid": grid_goal_locs, "corridor": corr_goal_locs, } # MDP Probability. num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len( goal_loc_dict[mdp_class]) if mdp_class == "octo": num_mdps = 12 mdp_prob = 1.0 / num_mdps for i in range(num_mdps): new_mdp = {"hrooms":make_grid_world_from_file("hierarch_rooms.txt", num_goals=7, randomize=False), "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i), "hall":GridWorldMDP(width=30, height=height, rand_init=False, goal_locs=goal_loc_dict["hall"], name="hallway", is_goal_terminal=True), "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True, name="corridor"), "grid":GridWorldMDP(width=width, height=height, rand_init=True, goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True), "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True), # THESE GOALS ARE SPECIFIED IMPLICITLY: "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True, slip_prob=0.1), "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])), "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)), "taxi":TaxiOOMDP(3, 4, slip_prob=0.0, agent=agent, walls=walls, \ passengers=[{"x":2, "y":1, "dest_x":random.choice([2,3]), "dest_y":random.choice([2,3]), "in_taxi":0}, {"x":1, "y":2, "dest_x":random.choice([1,2]), "dest_y":random.choice([1,4]), "in_taxi":0}])}[mdp_class] new_mdp.set_step_cost(step_cost) new_mdp.set_gamma(gamma) mdp_dist_dict[new_mdp] = mdp_prob return MDPDistribution(mdp_dist_dict, horizon=horizon)
#!/usr/bin/env python # Other imports. import srl_example_setup from simple_rl.agents import QLearnerAgent, RandomAgent from simple_rl.tasks import TaxiOOMDP, BlockDudeOOMDP from simple_rl.run_experiments import run_agents_on_mdp, run_single_agent_on_mdp # Taxi initial state attributes.. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}] walls = [] mdp = TaxiOOMDP(width=4, height=4, agent=agent, walls=walls, passengers=passengers) ql_agent = QLearnerAgent(actions=mdp.get_actions()) rand_agent = RandomAgent(actions=mdp.get_actions()) viz = False if viz: # Visualize Taxi. run_single_agent_on_mdp(ql_agent, mdp, episodes=50, steps=1000) mdp.visualize_agent(ql_agent) else: # Run experiment and make plot. run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10,