def make_mdp(mdp_class="grid", grid_dim=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = grid_dim, grid_dim upworld_goal_locs = [(i, width) for i in range(1, height+1)] four_room_goal_locs = [(width, height)] #, (width, 1), (1, height)] # (1, height - 2), (width - 2, height - 2), (width - 1, height - 1), (width - 2, 1)] four_room_goal_loc = four_room_goal_locs[0] # Taxi stuff. agent = {"x":1, "y":1, "has_passenger":0} passengers = [{"x":grid_dim / 2, "y":grid_dim / 2, "dest_x":grid_dim-2, "dest_y":2, "in_taxi":0}] walls = [] # Trench stuff tr_agent = {"x": 1, "y": 1, "dx": 1, "dy": 0, "dest_x": grid_dim, "dest_y": grid_dim, "has_block": 0} blocks = [{"x": grid_dim, "y": 1}] lavas = [{"x": x, "y": y} for x, y in map(lambda z: (z + 1, (grid_dim + 1) / 2), range(grid_dim))] # Do grids separately to avoid making error-prone domains. if mdp_class == "four_room": mdp = FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc]) else: mdp = {"upworld":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=upworld_goal_locs), "chain":ChainMDP(num_states=grid_dim), "random":RandomMDP(num_states=50, num_rand_trans=2), "hanoi":HanoiMDP(num_pegs=grid_dim, num_discs=3), "taxi":TaxiOOMDP(width=grid_dim, height=grid_dim, agent=agent, walls=walls, passengers=passengers), "trench":TrenchOOMDP(width=grid_dim, height=3, agent=tr_agent, blocks=blocks, lavas=lavas)}[mdp_class] return mdp
def make_mdp(mdp_class="grid", grid_dim=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = grid_dim, grid_dim hall_goal_locs = [(i, width) for i in range(1, height + 1)] four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)] four_room_goal_loc = four_room_goal_locs[5] # Taxi stuff. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{ "x": grid_dim / 2, "y": grid_dim / 2, "dest_x": grid_dim - 2, "dest_y": 2, "in_taxi": 0 }] walls = [] mdp = { "hall": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=hall_goal_locs), "pblocks_grid": make_grid_world_from_file("pblocks_grid.txt", randomize=True), "grid": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[(grid_dim, grid_dim)]), "four_room": FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc]), "chain": ChainMDP(num_states=grid_dim), "random": RandomMDP(num_states=50, num_rand_trans=2), "hanoi": HanoiMDP(num_pegs=grid_dim, num_discs=3), "taxi": TaxiOOMDP(width=grid_dim, height=grid_dim, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers) }[mdp_class] return mdp
args = parser.parse_args() dom, task = args.task.split('_') if dom == 'grid': mdp = make_grid_world_from_file('../tasks/' + task + '.txt') elif dom == 'taxi': width = 4 height = 4 agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}] mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers) elif dom == 'gym': mdp = GymMDP(env_name=task, render=False) elif dom == 'hanoi': mdp = HanoiMDP(num_pegs=3, num_discs=4) elif dom == 'track': mdp = make_race_track_from_file('../tasks/' + task + '.txt') else: print('Unknown task name: ', task) assert (False) mdp.set_gamma(0.99) if args.experiment == 'online': print('test_online_agent') test_online_agent(args, mdp) elif args.experiment == 'offline': print('test_offline_agent') test_offline_agent(args, mdp) elif args.experiment == 'utility':