def make_mdp(mdp_class="grid", grid_dim=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = grid_dim, grid_dim hall_goal_locs = [(i, width) for i in range(1, height+1)] four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)] # four_room_goal_loc = four_room_goal_locs[5] # Taxi stuff. agent = {"x":1, "y":1, "has_passenger":0} passengers = [{"x":grid_dim / 2, "y":grid_dim / 2, "dest_x":grid_dim-2, "dest_y":2, "in_taxi":0}] walls = [] mdp = {"hall":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=hall_goal_locs), "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True), "grid":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[(grid_dim, grid_dim)]), "four_room":FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc]), "chain":ChainMDP(num_states=grid_dim), "random":RandomMDP(num_states=50, num_rand_trans=2), "hanoi":HanoiMDP(num_pegs=grid_dim, num_discs=3), "taxi":TaxiOOMDP(width=grid_dim, height=grid_dim, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers)}[mdp_class] return mdp
def make_mdp(mdp_class="grid", grid_dim=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = grid_dim, grid_dim hall_goal_locs = [(i, width) for i in range(1, height + 1)] four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)] four_room_goal_loc = four_room_goal_locs[5] # Taxi stuff. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{ "x": grid_dim / 2, "y": grid_dim / 2, "dest_x": grid_dim - 2, "dest_y": 2, "in_taxi": 0 }] walls = [] mdp = { "hall": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=hall_goal_locs), "pblocks_grid": make_grid_world_from_file("pblocks_grid.txt", randomize=True), "grid": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[(grid_dim, grid_dim)]), "four_room": FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc]), "chain": ChainMDP(num_states=grid_dim), "random": RandomMDP(num_states=50, num_rand_trans=2), "hanoi": HanoiMDP(num_pegs=grid_dim, num_discs=3), "taxi": TaxiOOMDP(width=grid_dim, height=grid_dim, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers) }[mdp_class] return mdp
def run_all_experiments(): # TODOs # 1. Add Taxi # 2. Add ??? domains = ['9x9grid', 'fourroom', 'Imaze'] for d in domains: fname = d + '.txt' mdp = make_grid_world_from_file(fname) run_MIMO(domain, mdp, [1, 2, 3]) run_MOMI(domain, mdp, [12, 11, 10, 9, 8, 7, 6, 5, 4])
def make_mdp(mdp_class="grid", state_size=7): ''' Returns: (MDP) ''' # Grid/Hallway stuff. width, height = state_size, state_size hall_goal_locs = [(i, width) for i in range(1, height + 1)] # Taxi stuff. agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{ "x": state_size / 2, "y": state_size / 2, "dest_x": state_size - 2, "dest_y": 2, "in_taxi": 0 }] walls = [] mdp = { "hall": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=hall_goal_locs), "pblocks_grid": make_grid_world_from_file("pblocks_grid.txt", randomize=True), "grid": GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[(state_size, state_size)]), "four_room": FourRoomMDP(width=width, height=height, goal_locs=[(width, height)]), "chain": ChainMDP(num_states=state_size), "random": RandomMDP(num_states=50, num_rand_trans=2), "taxi": TaxiOOMDP(width=state_size, height=state_size, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers) }[mdp_class] return mdp
def TestMatching(): domain = '5x5grid' fname = '../tasks/' + domain + '.txt' mdp = make_grid_world_from_file(fname) G, intToS = GetAdjacencyMatrix(mdp) c = GetCost(G) matrix, F, LB = MinimumWeightMatching(G, c) print('F\'=', F) print('LB=', LB) Gnx = nx.from_edgelist(F) dic = dict() for i, s in enumerate(intToS): dic[i] = (s.x, s.y) nx.draw_networkx_nodes(Gnx, pos=dic, node_size=300, node_color='g') nx.draw_networkx_edges(Gnx, pos=dic) plt.savefig('Matching.pdf')
def get_mdp_params(args): state_dim = None state_bound = None num_actions = None action_dim = None action_bound = None # TODO: it is very hard to have a script which contains all # discrete/continuous state/actions. # Should we separete out the tasks, or refactor? if args.tasktype == 'pinball' or args.tasktype == 'p': # TODO: Add parameter for Configuration files by --task argument mdp = PinballMDP(cfg=args.task, render=args.render) state_dim = 4 num_actions = len(mdp.get_actions()) # assert(args.ffunction != 'fourier') elif args.tasktype == 'atari' or args.tasktype == 'atariram': grayscale = False downscale = True # downscale = args.tasktype == 'atari' mdp = GymMDP(env_name=args.task, grayscale=grayscale, downscale=downscale, render=args.render) # mdp = GymMDP(env_name=args.task, grayscale=True, render=args.render) mdp.env.seed(1234) state_dims = mdp.env.observation_space.shape # print('observation_space=', state_dims) if args.tasktype == 'atari': state_dim = 1 for d in state_dims: state_dim *= d # state_dim = 33600 # state_dim = 40000 # ? if grayscale: state_dim = int(state_dim / 3) if downscale: # state_dim = int(state_dim / 4) state_dim = 105 * 80 * 3 else: state_dim = 128 print('state_dim=', state_dim) num_actions = mdp.env.action_space.n # TODO: methods are fixed to dqn/ddpg/nn right now. print('args.highmethod is overwritten by dqn') print('args.lowmethod is overwritten by dqn') args.highmethod = 'dqn' args.lowmethod = 'dqn' # args.ffunction = 'nn' assert(args.highmethod == 'dqn') assert(args.lowmethod == 'dqn') # assert(args.ffunction == 'nn') elif args.tasktype == 'mujoco': mdp = GymMDP(env_name=args.task, render=args.render) mdp.env.seed(1234) state_dims = mdp.env.observation_space.shape state_dim = 1 for d in state_dims: state_dim *= d print('state_dim=', state_dim) action_dim = int(mdp.env.action_space.shape[0]) action_bound = mdp.action_bounds() # print(action_dim) # Fourier does not work for high dim space. # TODO: methods are fixed to dqn/ddpg/nn right now. print('args.highmethod is overwritten by dqn') print('args.lowmethod is overwritten by ddpg') args.highmethod = 'dqn' args.lowmethod = 'ddpg' # args.ffunction = 'nn' assert(args.highmethod == 'dqn') assert(args.lowmethod == 'ddpg') # assert(args.ffunction == 'nn') pass elif args.tasktype == 'grid': fname = '../tasks/' + args.task mdp = make_grid_world_from_file(fname) state_dim = 2 num_actions = 4 else: assert(False) pass state_bound = mdp.bounds() return mdp, state_dim, state_bound, num_actions, action_dim, action_bound
if __name__ == "__main__": TestMatching() exit(0) # domain = '5x5grid' # goals = [(1, 5), (1, 1), (5, 5), (3, 3), (5, 1)] domain = '9x9grid' goals = [(1, 1), (1, 9), (9, 1), (9, 9), (5, 5)] # domain = 'fourroom' # goals = [(1, 1), (1, 11), (11, 1), (11, 11), (5, 5), (8, 7), (5, 7)] fname = '../../tasks/' + domain + '.txt' mdp = make_grid_world_from_file(fname) G, intToS = GetAdjacencyMatrix(mdp) c = np.ones_like(G, dtype=int) d = GetCost(G) # print('d=', d) # TODO K = StatesToArray(intToS, goals) # K = np.random.binomial(n=1, p=0.2, size=G.shape[0]) # np.ones(G.shape[0], dtype=int) print('K=', K) D = 15 tree, options = DiameterConstrainedSteinerTree(G, c, d, K, D, 0.1)
parser.add_argument( '--nsepisodes', type=int, default=10, help='number of episodes for incidence matrix sampling') parser.add_argument('--nssteps', type=int, default=10, help='number of steps for incidence matrix sampling') args = parser.parse_args() dom, task = args.task.split('_') if dom == 'grid': mdp = make_grid_world_from_file('../tasks/' + task + '.txt') elif dom == 'taxi': width = 4 height = 4 agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}] mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers) elif dom == 'gym': mdp = GymMDP(env_name=task, render=False) elif dom == 'hanoi': mdp = HanoiMDP(num_pegs=3, num_discs=4) elif dom == 'track': mdp = make_race_track_from_file('../tasks/' + task + '.txt') else: print('Unknown task name: ', task) assert (False)
def make_mdp_distr(mdp_class="grid", grid_dim=7, horizon=0): ''' Args: mdp_class (str): one of {"grid", "random"} horizon (int) Returns: (MDPDistribution) ''' mdp_dist_dict = {} height, width = grid_dim, grid_dim # Define goal locations. # Corridor. corr_width = 20 corr_goal_magnitude = random.randint(1, 5) corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude)] + [ j for j in xrange(corr_width - corr_goal_magnitude, corr_width + 1) ] corr_goal_locs = list(itertools.product(corr_goal_cols, [1])) # Grid World grid_world_rows, grid_world_cols = [i for i in xrange(width - 4, width)], [ j for j in xrange(height - 4, height) ] grid_goal_locs = list(itertools.product(grid_world_rows, grid_world_cols)) # Hallway. hall_goal_locs = [(i, width) for i in range(1, height + 1)] # Four room. four_room_goal_locs = [(2, 2), (width, height), (width, 1), (1, height)] # Taxi. agent = {"x": 1, "y": 1, "has_passenger": 0} walls = [] goal_loc_dict = { "four_room": four_room_goal_locs, "hall": hall_goal_locs, "grid": grid_goal_locs, "corridor": corr_goal_locs } # MDP Probability. num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len( goal_loc_dict[mdp_class]) mdp_prob = 1.0 / num_mdps for i in range(num_mdps): new_mdp = {"hall":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["hall"][i % len(goal_loc_dict["hall"])]]), "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True), "grid":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True), "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]]), # THESE GOALS ARE SPECIFIED IMPLICITLY: "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True), "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])), "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)), "taxi":TaxiOOMDP(4, 4, slip_prob=0.0, agent=agent, walls=walls, \ passengers=[{"x":2, "y":2, "dest_x":random.randint(1,4), "dest_y":random.randint(1,4), "in_taxi":0}])}[mdp_class] mdp_dist_dict[new_mdp] = mdp_prob return MDPDistribution(mdp_dist_dict, horizon=horizon)
def make_mdp_distr(mdp_class="grid", grid_dim=9, horizon=0, step_cost=0, gamma=0.99): ''' Args: mdp_class (str): one of {"grid", "random"} horizon (int) step_cost (float) gamma (float) Returns: (MDPDistribution) ''' mdp_dist_dict = {} height, width = grid_dim, grid_dim # Define goal locations. # Corridor. corr_width = 20 corr_goal_magnitude = 1 #random.randint(1, 5) corr_goal_cols = [i for i in range(1, corr_goal_magnitude + 1)] + [ j for j in range(corr_width - corr_goal_magnitude + 1, corr_width + 1) ] corr_goal_locs = list(itertools.product(corr_goal_cols, [1])) # Grid World tl_grid_world_rows, tl_grid_world_cols = [ i for i in range(width - 4, width) ], [j for j in range(height - 4, height)] tl_grid_goal_locs = list( itertools.product(tl_grid_world_rows, tl_grid_world_cols)) tr_grid_world_rows, tr_grid_world_cols = [i for i in range(1, 4)], [ j for j in range(height - 4, height) ] tr_grid_goal_locs = list( itertools.product(tr_grid_world_rows, tr_grid_world_cols)) grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs # Hallway. hall_goal_locs = [(i, height) for i in range(1, 30)] # Four room. four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2)] #, (width - 2, 1)] # Taxi. agent = {"x": 1, "y": 1, "has_passenger": 0} walls = [] goal_loc_dict = { "four_room": four_room_goal_locs, "hall": hall_goal_locs, "grid": grid_goal_locs, "corridor": corr_goal_locs, } # MDP Probability. num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len( goal_loc_dict[mdp_class]) if mdp_class == "octo": num_mdps = 12 mdp_prob = 1.0 / num_mdps for i in range(num_mdps): new_mdp = {"hrooms":make_grid_world_from_file("hierarch_rooms.txt", num_goals=7, randomize=False), "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i), "hall":GridWorldMDP(width=30, height=height, rand_init=False, goal_locs=goal_loc_dict["hall"], name="hallway", is_goal_terminal=True), "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True, name="corridor"), "grid":GridWorldMDP(width=width, height=height, rand_init=True, goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True), "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True), # THESE GOALS ARE SPECIFIED IMPLICITLY: "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True, slip_prob=0.1), "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])), "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)), "taxi":TaxiOOMDP(3, 4, slip_prob=0.0, agent=agent, walls=walls, \ passengers=[{"x":2, "y":1, "dest_x":random.choice([2,3]), "dest_y":random.choice([2,3]), "in_taxi":0}, {"x":1, "y":2, "dest_x":random.choice([1,2]), "dest_y":random.choice([1,4]), "in_taxi":0}])}[mdp_class] new_mdp.set_step_cost(step_cost) new_mdp.set_gamma(gamma) mdp_dist_dict[new_mdp] = mdp_prob return MDPDistribution(mdp_dist_dict, horizon=horizon)
def make_mdp_distr(mdp_class="grid", grid_dim=9, horizon=0, step_cost=0, gamma=0.99): ''' Args: mdp_class (str): one of {"grid", "random"} horizon (int) step_cost (float) gamma (float) Returns: (MDPDistribution) ''' mdp_dist_dict = {} height, width = grid_dim, grid_dim # Define goal locations. # Corridor. corr_width = 20 corr_goal_magnitude = 1 #random.randint(1, 5) corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude + 1)] + [j for j in xrange(corr_width-corr_goal_magnitude + 1, corr_width + 1)] corr_goal_locs = list(itertools.product(corr_goal_cols, [1])) # Grid World tl_grid_world_rows, tl_grid_world_cols = [i for i in xrange(width - 4, width)], [j for j in xrange(height - 4, height)] tl_grid_goal_locs = list(itertools.product(tl_grid_world_rows, tl_grid_world_cols)) tr_grid_world_rows, tr_grid_world_cols = [i for i in xrange(1, 4)], [j for j in xrange(height - 4, height)] tr_grid_goal_locs = list(itertools.product(tr_grid_world_rows, tr_grid_world_cols)) grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs # Hallway. upworld_goal_locs = [(i, height) for i in xrange(1, 30)] # Four room. four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)] print four_room_goal_locs tight_four_room_goal_locs = [(width, height), (width, height-1), (width-1, height), (width, height - 2), (width - 2, height), (width-1, height-1)] # Taxi. agent = {"x":1, "y":1, "has_passenger":0} walls = [] goal_loc_dict = {"four_room":four_room_goal_locs, "color":four_room_goal_locs, "upworld":upworld_goal_locs, "grid":grid_goal_locs, "corridor":corr_goal_locs, "tight_four_room":tight_four_room_goal_locs, } # MDP Probability. num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len(goal_loc_dict[mdp_class]) if mdp_class == "octo": num_mdps = 12 mdp_prob = 1.0 / num_mdps for i in xrange(num_mdps): new_mdp = {"hrooms":make_grid_world_from_file("hierarch_rooms.txt", num_goals=7, randomize=False), "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i), "upworld":GridWorldMDP(width=30, height=height, rand_init=False, goal_locs=goal_loc_dict["upworld"], name="upworld", is_goal_terminal=True), "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True, name="corridor"), "grid":GridWorldMDP(width=width, height=height, rand_init=True, goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True), "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True), "color":ColorMDP(width=width, height=height, num_colors=4, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True), "tight_four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["tight_four_room"][i % len(goal_loc_dict["tight_four_room"])]], is_goal_terminal=True, name="tight_four_room")}[mdp_class] new_mdp.set_step_cost(step_cost) new_mdp.set_gamma(gamma) mdp_dist_dict[new_mdp] = mdp_prob return MDPDistribution(mdp_dist_dict, horizon=horizon)
parser.add_argument( '--nsepisodes', type=int, default=10, help='number of episodes for incidence matrix sampling') parser.add_argument('--nssteps', type=int, default=10, help='number of steps for incidence matrix sampling') args = parser.parse_args() dom, task = args.task.split('_') if dom == 'grid': mdp = make_grid_world_from_file('options/tasks/' + task + '.txt') elif dom == 'taxi': width = 4 height = 4 agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}] mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers) elif dom == 'gym': mdp = GymMDP(env_name=task, render=False) elif dom == 'hanoi': mdp = HanoiMDP(num_pegs=3, num_discs=4) elif dom == 'track': mdp = make_race_track_from_file('../tasks/' + task + '.txt') else: print('Unknown task name: ', task) assert (False)
def make_mdp_distr(mdp_class, is_goal_terminal, mdp_size=11, horizon=0, gamma=0.99): ''' Args: mdp_class (str): one of {"grid", "random"} horizon (int) step_cost (float) gamma (float) Returns: (MDPDistribution) ''' mdp_dist_dict = {} height, width, = mdp_size, mdp_size # Corridor. corr_width = 20 corr_goal_magnitude = 1 #random.randint(1, 5) corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude + 1)] + [j for j in xrange(corr_width-corr_goal_magnitude + 1, corr_width + 1)] corr_goal_locs = list(itertools.product(corr_goal_cols, [1])) # Grid World tl_grid_world_rows, tl_grid_world_cols = [i for i in xrange(width - 4, width)], [j for j in xrange(height - 4, height)] tl_grid_goal_locs = list(itertools.product(tl_grid_world_rows, tl_grid_world_cols)) tr_grid_world_rows, tr_grid_world_cols = [i for i in xrange(1, 4)], [j for j in xrange(height - 4, height)] tr_grid_goal_locs = list(itertools.product(tr_grid_world_rows, tr_grid_world_cols)) grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs # Four room. four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2)]#, (width - 2, 1)] # SPREAD vs. TIGHT spread_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1), (2,2)] tight_goal_locs = [(width, height), (width-1, height), (width, height-1), (width, height - 2), (width - 2, height), (width - 1, height-1), (width-2,height-2)] changing_entities = {"four_room":four_room_goal_locs, "grid":grid_goal_locs, "corridor":corr_goal_locs, "spread":spread_goal_locs, "tight":tight_goal_locs, "chain":[0.0, 0.01, 0.1, 0.5, 1.0], "combo_lock":[[3,1,2],[3,2,1],[2,3,1],[3,3,1]], "walls":make_wall_permutations(mdp_size), "lava":make_lava_permutations(mdp_size) } # MDP Probability. num_mdps = 10 if mdp_class not in changing_entities.keys() else len(changing_entities[mdp_class]) if mdp_class == "octo": num_mdps = 12 mdp_prob = 1.0 / num_mdps for i in xrange(num_mdps): new_mdp = {"chain":ChainMDP(reset_val=changing_entities["chain"][i%len(changing_entities["chain"])]), "lava":GridWorldMDP(width=width, height=height, rand_init=False, lava_locs=changing_entities["lava"][i%len(changing_entities["lava"])], goal_locs=[(mdp_size-3, mdp_size-3)], is_goal_terminal=is_goal_terminal, name="lava_world", slip_prob=0.1), "four_room":FourRoomMDP(width=width, height=height, goal_locs=[changing_entities["four_room"][i % len(changing_entities["four_room"])]], is_goal_terminal=is_goal_terminal), "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i), "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[changing_entities["corridor"][i % len(changing_entities["corridor"])]], is_goal_terminal=is_goal_terminal, name="corridor"), "walls":ThinWallGridMDP(width=width, height=height, walls=changing_entities["walls"][i%len(changing_entities["walls"])]), "combo_lock":ComboLockMDP(combo=changing_entities["combo_lock"][i%len(changing_entities["combo_lock"])]), "spread":GridWorldMDP(width=width, height=height, rand_init=False, goal_locs=[changing_entities["spread"][i % len(changing_entities["spread"])]], is_goal_terminal=is_goal_terminal, name="spread_grid"), "tight":GridWorldMDP(width=10, height=10, rand_init=False, goal_locs=[changing_entities["tight"][i % len(changing_entities["tight"])]], is_goal_terminal=is_goal_terminal, name="tight_grid"), }[mdp_class] new_mdp.set_gamma(gamma) mdp_dist_dict[new_mdp] = mdp_prob return MDPDistribution(mdp_dist_dict, horizon=horizon)
'--nsepisodes', type=int, default=10, help='number of episodes for incidence matrix sampling') parser.add_argument('--nssteps', type=int, default=10, help='number of steps for incidence matrix sampling') args = parser.parse_args() dom, task = args.task.split('_') if dom == 'grid': mdp = make_grid_world_from_file( os.path.dirname(os.path.realpath(__file__)) + '/../tasks/' + task + '.txt') elif dom == 'taxi': width = 4 height = 4 agent = {"x": 1, "y": 1, "has_passenger": 0} passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}] mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers) elif dom == 'gym': mdp = GymMDP(env_name=task, render=False) elif dom == 'hanoi': mdp = HanoiMDP(num_pegs=3, num_discs=4) elif dom == 'track': mdp = make_race_track_from_file( os.path.dirname(os.path.realpath(__file__)) + '/../tasks/' + task + '.txt')