Python make_grid_world_from_file Examples, simple_rl.tasks.grid_world.GridWorldMDPClass.make_grid_world_from_file Python Examples

Example #1

0

Show file

File: make_mdp.py Project: david-abel/simple_rl

def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    hall_goal_locs = [(i, width) for i in range(1, height+1)]

    four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)]
    # four_room_goal_loc = four_room_goal_locs[5]

    # Taxi stuff.
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":grid_dim / 2, "y":grid_dim / 2, "dest_x":grid_dim-2, "dest_y":2, "in_taxi":0}]
    walls = []

    mdp = {"hall":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=hall_goal_locs),
            "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True),
            "grid":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[(grid_dim, grid_dim)]),
            "four_room":FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc]),
            "chain":ChainMDP(num_states=grid_dim),
            "random":RandomMDP(num_states=50, num_rand_trans=2),
            "hanoi":HanoiMDP(num_pegs=grid_dim, num_discs=3),
            "taxi":TaxiOOMDP(width=grid_dim, height=grid_dim, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers)}[mdp_class]

    return mdp

Example #2

0

Show file

def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    four_room_goal_locs = [(width, height), (width, 1), (1, height),
                           (1, height - 2), (width - 2, height - 2),
                           (width - 2, 1)]
    four_room_goal_loc = four_room_goal_locs[5]

    # Taxi stuff.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{
        "x": grid_dim / 2,
        "y": grid_dim / 2,
        "dest_x": grid_dim - 2,
        "dest_y": 2,
        "in_taxi": 0
    }]
    walls = []

    mdp = {
        "hall":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=hall_goal_locs),
        "pblocks_grid":
        make_grid_world_from_file("pblocks_grid.txt", randomize=True),
        "grid":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=[(grid_dim, grid_dim)]),
        "four_room":
        FourRoomMDP(width=width, height=height,
                    goal_locs=[four_room_goal_loc]),
        "chain":
        ChainMDP(num_states=grid_dim),
        "random":
        RandomMDP(num_states=50, num_rand_trans=2),
        "hanoi":
        HanoiMDP(num_pegs=grid_dim, num_discs=3),
        "taxi":
        TaxiOOMDP(width=grid_dim,
                  height=grid_dim,
                  slip_prob=0.0,
                  agent=agent,
                  walls=walls,
                  passengers=passengers)
    }[mdp_class]

    return mdp

Example #3

0

Show file

File: singletask_experiments.py Project: sguo28/covering-options

def run_all_experiments():
    # TODOs
    # 1. Add Taxi
    # 2. Add ???
    domains = ['9x9grid', 'fourroom', 'Imaze']
    for d in domains:
        fname = d + '.txt'
        mdp = make_grid_world_from_file(fname)
        run_MIMO(domain, mdp, [1, 2, 3])

        run_MOMI(domain, mdp, [12, 11, 10, 9, 8, 7, 6, 5, 4])

Example #4

0

Show file

File: make_mdp.py Project: RoyalGuan/simple_rl

def make_mdp(mdp_class="grid", state_size=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = state_size, state_size
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    # Taxi stuff.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{
        "x": state_size / 2,
        "y": state_size / 2,
        "dest_x": state_size - 2,
        "dest_y": 2,
        "in_taxi": 0
    }]
    walls = []

    mdp = {
        "hall":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=hall_goal_locs),
        "pblocks_grid":
        make_grid_world_from_file("pblocks_grid.txt", randomize=True),
        "grid":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=[(state_size, state_size)]),
        "four_room":
        FourRoomMDP(width=width, height=height, goal_locs=[(width, height)]),
        "chain":
        ChainMDP(num_states=state_size),
        "random":
        RandomMDP(num_states=50, num_rand_trans=2),
        "taxi":
        TaxiOOMDP(width=state_size,
                  height=state_size,
                  slip_prob=0.0,
                  agent=agent,
                  walls=walls,
                  passengers=passengers)
    }[mdp_class]

    return mdp

Example #5

0

Show file

def TestMatching():
    domain = '5x5grid'

    fname = '../tasks/' + domain + '.txt'
    mdp = make_grid_world_from_file(fname)

    G, intToS = GetAdjacencyMatrix(mdp)
    c = GetCost(G)

    matrix, F, LB = MinimumWeightMatching(G, c)

    print('F\'=', F)
    print('LB=', LB)

    Gnx = nx.from_edgelist(F)
    dic = dict()
    for i, s in enumerate(intToS):
        dic[i] = (s.x, s.y)

    nx.draw_networkx_nodes(Gnx, pos=dic, node_size=300, node_color='g')
    nx.draw_networkx_edges(Gnx, pos=dic)

    plt.savefig('Matching.pdf')

Example #6

0

Show file

def get_mdp_params(args):
    state_dim = None
    state_bound = None
    num_actions = None
    action_dim = None
    action_bound = None

    # TODO: it is very hard to have a script which contains all
    #       discrete/continuous state/actions.
    #       Should we separete out the tasks, or refactor?
    
    if args.tasktype == 'pinball' or args.tasktype == 'p':
        # TODO: Add parameter for Configuration files by --task argument
        mdp = PinballMDP(cfg=args.task, render=args.render)
        state_dim = 4
        num_actions = len(mdp.get_actions())
        # assert(args.ffunction !=  'fourier')
    elif args.tasktype == 'atari' or args.tasktype == 'atariram':
        grayscale = False
        downscale = True
        # downscale = args.tasktype == 'atari'
        mdp = GymMDP(env_name=args.task, grayscale=grayscale, downscale=downscale, render=args.render)
        # mdp = GymMDP(env_name=args.task, grayscale=True, render=args.render)
        mdp.env.seed(1234)
        state_dims = mdp.env.observation_space.shape
        # print('observation_space=', state_dims)
        if args.tasktype == 'atari':
            state_dim = 1
            for d in state_dims:
                state_dim *= d
            # state_dim = 33600
            # state_dim = 40000 # ?
            if grayscale:
                state_dim = int(state_dim / 3)
            if downscale:
                # state_dim = int(state_dim / 4)
                state_dim = 105 * 80 * 3
        else:
            state_dim = 128
        print('state_dim=', state_dim)
        num_actions = mdp.env.action_space.n

        # TODO: methods are fixed to dqn/ddpg/nn right now.
        print('args.highmethod is overwritten by dqn')
        print('args.lowmethod is overwritten by dqn')
        args.highmethod = 'dqn'
        args.lowmethod = 'dqn'
        # args.ffunction = 'nn'
        assert(args.highmethod == 'dqn')
        assert(args.lowmethod == 'dqn')
        # assert(args.ffunction == 'nn')
    elif args.tasktype == 'mujoco':
        mdp = GymMDP(env_name=args.task, render=args.render)
        mdp.env.seed(1234)
        state_dims = mdp.env.observation_space.shape
        state_dim = 1
        for d in state_dims:
            state_dim *= d
        print('state_dim=', state_dim)

        action_dim = int(mdp.env.action_space.shape[0])
        action_bound = mdp.action_bounds()

        # print(action_dim)
        # Fourier does not work for high dim space.

        # TODO: methods are fixed to dqn/ddpg/nn right now.
        print('args.highmethod is overwritten by dqn')
        print('args.lowmethod is overwritten by ddpg')
        args.highmethod = 'dqn'
        args.lowmethod = 'ddpg'
        # args.ffunction = 'nn'
        assert(args.highmethod == 'dqn')
        assert(args.lowmethod == 'ddpg')
        # assert(args.ffunction == 'nn')
        pass
    elif args.tasktype == 'grid':
        fname = '../tasks/' + args.task
        mdp = make_grid_world_from_file(fname)
        state_dim = 2
        num_actions = 4
    else:
        assert(False)
        pass

    state_bound = mdp.bounds()

    return mdp, state_dim, state_bound, num_actions, action_dim, action_bound

Example #7

0

Show file


if __name__ == "__main__":
    TestMatching()
    exit(0)
    # domain = '5x5grid'
    # goals = [(1, 5), (1, 1), (5, 5), (3, 3), (5, 1)]

    domain = '9x9grid'
    goals = [(1, 1), (1, 9), (9, 1), (9, 9), (5, 5)]

    # domain = 'fourroom'
    # goals = [(1, 1), (1, 11), (11, 1), (11, 11), (5, 5), (8, 7), (5, 7)]

    fname = '../../tasks/' + domain + '.txt'
    mdp = make_grid_world_from_file(fname)

    G, intToS = GetAdjacencyMatrix(mdp)

    c = np.ones_like(G, dtype=int)
    d = GetCost(G)
    # print('d=', d)
    # TODO
    K = StatesToArray(intToS, goals)
    # K = np.random.binomial(n=1, p=0.2, size=G.shape[0]) # np.ones(G.shape[0], dtype=int)

    print('K=', K)
    D = 15

    tree, options = DiameterConstrainedSteinerTree(G, c, d, K, D, 0.1)

Example #8

0

Show file

    parser.add_argument(
        '--nsepisodes',
        type=int,
        default=10,
        help='number of episodes for incidence matrix sampling')
    parser.add_argument('--nssteps',
                        type=int,
                        default=10,
                        help='number of steps for incidence matrix sampling')

    args = parser.parse_args()

    dom, task = args.task.split('_')

    if dom == 'grid':
        mdp = make_grid_world_from_file('../tasks/' + task + '.txt')
    elif dom == 'taxi':
        width = 4
        height = 4
        agent = {"x": 1, "y": 1, "has_passenger": 0}
        passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}]
        mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers)
    elif dom == 'gym':
        mdp = GymMDP(env_name=task, render=False)
    elif dom == 'hanoi':
        mdp = HanoiMDP(num_pegs=3, num_discs=4)
    elif dom == 'track':
        mdp = make_race_track_from_file('../tasks/' + task + '.txt')
    else:
        print('Unknown task name: ', task)
        assert (False)

Example #9

0

Show file

File: make_mdp.py Project: RoyalGuan/simple_rl

def make_mdp_distr(mdp_class="grid", grid_dim=7, horizon=0):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        horizon (int)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}
    height, width = grid_dim, grid_dim

    # Define goal locations.

    # Corridor.
    corr_width = 20
    corr_goal_magnitude = random.randint(1, 5)
    corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude)] + [
        j for j in xrange(corr_width - corr_goal_magnitude, corr_width + 1)
    ]
    corr_goal_locs = list(itertools.product(corr_goal_cols, [1]))

    # Grid World
    grid_world_rows, grid_world_cols = [i for i in xrange(width - 4, width)], [
        j for j in xrange(height - 4, height)
    ]
    grid_goal_locs = list(itertools.product(grid_world_rows, grid_world_cols))

    # Hallway.
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    # Four room.
    four_room_goal_locs = [(2, 2), (width, height), (width, 1), (1, height)]

    # Taxi.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    walls = []

    goal_loc_dict = {
        "four_room": four_room_goal_locs,
        "hall": hall_goal_locs,
        "grid": grid_goal_locs,
        "corridor": corr_goal_locs
    }

    # MDP Probability.
    num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len(
        goal_loc_dict[mdp_class])
    mdp_prob = 1.0 / num_mdps

    for i in range(num_mdps):

        new_mdp = {"hall":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["hall"][i % len(goal_loc_dict["hall"])]]),
                    "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True),
                    "grid":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True),
                    "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]]),
                    # THESE GOALS ARE SPECIFIED IMPLICITLY:
                    "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True),
                    "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])),
                    "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)),
                    "taxi":TaxiOOMDP(4, 4, slip_prob=0.0, agent=agent, walls=walls, \
                                    passengers=[{"x":2, "y":2, "dest_x":random.randint(1,4), "dest_y":random.randint(1,4), "in_taxi":0}])}[mdp_class]

        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict, horizon=horizon)

Example #10

0

Show file

def make_mdp_distr(mdp_class="grid",
                   grid_dim=9,
                   horizon=0,
                   step_cost=0,
                   gamma=0.99):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        horizon (int)
        step_cost (float)
        gamma (float)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}
    height, width = grid_dim, grid_dim

    # Define goal locations.

    # Corridor.
    corr_width = 20
    corr_goal_magnitude = 1  #random.randint(1, 5)
    corr_goal_cols = [i for i in range(1, corr_goal_magnitude + 1)] + [
        j for j in range(corr_width - corr_goal_magnitude + 1, corr_width + 1)
    ]
    corr_goal_locs = list(itertools.product(corr_goal_cols, [1]))

    # Grid World
    tl_grid_world_rows, tl_grid_world_cols = [
        i for i in range(width - 4, width)
    ], [j for j in range(height - 4, height)]
    tl_grid_goal_locs = list(
        itertools.product(tl_grid_world_rows, tl_grid_world_cols))
    tr_grid_world_rows, tr_grid_world_cols = [i for i in range(1, 4)], [
        j for j in range(height - 4, height)
    ]
    tr_grid_goal_locs = list(
        itertools.product(tr_grid_world_rows, tr_grid_world_cols))
    grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs

    # Hallway.
    hall_goal_locs = [(i, height) for i in range(1, 30)]

    # Four room.
    four_room_goal_locs = [(width, height), (width, 1), (1, height),
                           (1, height - 2),
                           (width - 2, height - 2)]  #, (width - 2, 1)]

    # Taxi.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    walls = []

    goal_loc_dict = {
        "four_room": four_room_goal_locs,
        "hall": hall_goal_locs,
        "grid": grid_goal_locs,
        "corridor": corr_goal_locs,
    }

    # MDP Probability.
    num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len(
        goal_loc_dict[mdp_class])
    if mdp_class == "octo":
        num_mdps = 12
    mdp_prob = 1.0 / num_mdps

    for i in range(num_mdps):

        new_mdp = {"hrooms":make_grid_world_from_file("hierarch_rooms.txt", num_goals=7, randomize=False),
                    "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i),
                    "hall":GridWorldMDP(width=30, height=height, rand_init=False, goal_locs=goal_loc_dict["hall"], name="hallway", is_goal_terminal=True),
                    "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True, name="corridor"),
                    "grid":GridWorldMDP(width=width, height=height, rand_init=True, goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True),
                    "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True),
                    # THESE GOALS ARE SPECIFIED IMPLICITLY:
                    "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True, slip_prob=0.1),
                    "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])),
                    "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)),
                    "taxi":TaxiOOMDP(3, 4, slip_prob=0.0, agent=agent, walls=walls, \
                                    passengers=[{"x":2, "y":1, "dest_x":random.choice([2,3]), "dest_y":random.choice([2,3]), "in_taxi":0},
                                                {"x":1, "y":2, "dest_x":random.choice([1,2]), "dest_y":random.choice([1,4]), "in_taxi":0}])}[mdp_class]

        new_mdp.set_step_cost(step_cost)
        new_mdp.set_gamma(gamma)

        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict, horizon=horizon)

Example #11

0

Show file

def make_mdp_distr(mdp_class="grid", grid_dim=9, horizon=0, step_cost=0, gamma=0.99):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        horizon (int)
        step_cost (float)
        gamma (float)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}
    height, width = grid_dim, grid_dim

    # Define goal locations.
        
    # Corridor.
    corr_width = 20
    corr_goal_magnitude = 1 #random.randint(1, 5)
    corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude + 1)] + [j for j in xrange(corr_width-corr_goal_magnitude + 1, corr_width + 1)]
    corr_goal_locs  = list(itertools.product(corr_goal_cols, [1]))

    # Grid World
    tl_grid_world_rows, tl_grid_world_cols = [i for i in xrange(width - 4, width)], [j for j in xrange(height - 4, height)]
    tl_grid_goal_locs = list(itertools.product(tl_grid_world_rows, tl_grid_world_cols))
    tr_grid_world_rows, tr_grid_world_cols = [i for i in xrange(1, 4)], [j for j in xrange(height - 4, height)]
    tr_grid_goal_locs = list(itertools.product(tr_grid_world_rows, tr_grid_world_cols))
    grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs

    # Hallway.
    upworld_goal_locs = [(i, height) for i in xrange(1, 30)]

    # Four room.
    four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)]

    print four_room_goal_locs
                            
    tight_four_room_goal_locs = [(width, height), (width, height-1), (width-1, height), (width, height - 2), (width - 2, height), (width-1, height-1)]

    # Taxi.
    agent = {"x":1, "y":1, "has_passenger":0}
    walls = []

    goal_loc_dict = {"four_room":four_room_goal_locs,
                    "color":four_room_goal_locs,
                    "upworld":upworld_goal_locs,
                    "grid":grid_goal_locs,
                    "corridor":corr_goal_locs,
                    "tight_four_room":tight_four_room_goal_locs,
                    }

    # MDP Probability.
    num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len(goal_loc_dict[mdp_class])
    if mdp_class == "octo":
        num_mdps = 12
    mdp_prob = 1.0 / num_mdps

    for i in xrange(num_mdps):

        new_mdp = {"hrooms":make_grid_world_from_file("hierarch_rooms.txt", num_goals=7, randomize=False),
                    "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i),
                    "upworld":GridWorldMDP(width=30, height=height, rand_init=False, goal_locs=goal_loc_dict["upworld"], name="upworld", is_goal_terminal=True),
                    "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True, name="corridor"),
                    "grid":GridWorldMDP(width=width, height=height, rand_init=True, goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True),
                    "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True),
                    "color":ColorMDP(width=width, height=height, num_colors=4, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True),
                    "tight_four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["tight_four_room"][i % len(goal_loc_dict["tight_four_room"])]], is_goal_terminal=True, name="tight_four_room")}[mdp_class]

        new_mdp.set_step_cost(step_cost)
        new_mdp.set_gamma(gamma)
        
        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict, horizon=horizon)

Example #12

0

Show file

    parser.add_argument(
        '--nsepisodes',
        type=int,
        default=10,
        help='number of episodes for incidence matrix sampling')
    parser.add_argument('--nssteps',
                        type=int,
                        default=10,
                        help='number of steps for incidence matrix sampling')

    args = parser.parse_args()

    dom, task = args.task.split('_')

    if dom == 'grid':
        mdp = make_grid_world_from_file('options/tasks/' + task + '.txt')
    elif dom == 'taxi':
        width = 4
        height = 4
        agent = {"x": 1, "y": 1, "has_passenger": 0}
        passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}]
        mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers)
    elif dom == 'gym':
        mdp = GymMDP(env_name=task, render=False)
    elif dom == 'hanoi':
        mdp = HanoiMDP(num_pegs=3, num_discs=4)
    elif dom == 'track':
        mdp = make_race_track_from_file('../tasks/' + task + '.txt')
    else:
        print('Unknown task name: ', task)
        assert (False)

Example #13

0

Show file

File: utils.py Project: reinforcementdriving/transfer_rl_icml_2018

def make_mdp_distr(mdp_class, is_goal_terminal, mdp_size=11, horizon=0, gamma=0.99):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        horizon (int)
        step_cost (float)
        gamma (float)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}

    height, width, = mdp_size, mdp_size

    # Corridor.
    corr_width = 20
    corr_goal_magnitude = 1 #random.randint(1, 5)
    corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude + 1)] + [j for j in xrange(corr_width-corr_goal_magnitude + 1, corr_width + 1)]
    corr_goal_locs  = list(itertools.product(corr_goal_cols, [1]))

    # Grid World
    tl_grid_world_rows, tl_grid_world_cols = [i for i in xrange(width - 4, width)], [j for j in xrange(height - 4, height)]
    tl_grid_goal_locs = list(itertools.product(tl_grid_world_rows, tl_grid_world_cols))
    tr_grid_world_rows, tr_grid_world_cols = [i for i in xrange(1, 4)], [j for j in xrange(height - 4, height)]
    tr_grid_goal_locs = list(itertools.product(tr_grid_world_rows, tr_grid_world_cols))
    grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs

    # Four room.
    four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2)]#, (width - 2, 1)]

    # SPREAD vs. TIGHT
    spread_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1), (2,2)]
    tight_goal_locs = [(width, height), (width-1, height), (width, height-1), (width, height - 2), (width - 2, height), (width - 1, height-1), (width-2,height-2)]

    changing_entities = {"four_room":four_room_goal_locs,
                    "grid":grid_goal_locs,
                    "corridor":corr_goal_locs,
                    "spread":spread_goal_locs,
                    "tight":tight_goal_locs,
                    "chain":[0.0, 0.01, 0.1, 0.5, 1.0],
                    "combo_lock":[[3,1,2],[3,2,1],[2,3,1],[3,3,1]],
                    "walls":make_wall_permutations(mdp_size),
                    "lava":make_lava_permutations(mdp_size)
                    }

    # MDP Probability.
    num_mdps = 10 if mdp_class not in changing_entities.keys() else len(changing_entities[mdp_class])
    if mdp_class == "octo":
        num_mdps = 12
    mdp_prob = 1.0 / num_mdps

    for i in xrange(num_mdps):

        new_mdp = {"chain":ChainMDP(reset_val=changing_entities["chain"][i%len(changing_entities["chain"])]),
                    "lava":GridWorldMDP(width=width, height=height, rand_init=False, lava_locs=changing_entities["lava"][i%len(changing_entities["lava"])], goal_locs=[(mdp_size-3, mdp_size-3)], is_goal_terminal=is_goal_terminal, name="lava_world", slip_prob=0.1),
                    "four_room":FourRoomMDP(width=width, height=height, goal_locs=[changing_entities["four_room"][i % len(changing_entities["four_room"])]], is_goal_terminal=is_goal_terminal),
                    "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i),
                    "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[changing_entities["corridor"][i % len(changing_entities["corridor"])]], is_goal_terminal=is_goal_terminal, name="corridor"),
                    "walls":ThinWallGridMDP(width=width, height=height, walls=changing_entities["walls"][i%len(changing_entities["walls"])]),
                    "combo_lock":ComboLockMDP(combo=changing_entities["combo_lock"][i%len(changing_entities["combo_lock"])]),
                    "spread":GridWorldMDP(width=width, height=height, rand_init=False, goal_locs=[changing_entities["spread"][i % len(changing_entities["spread"])]], is_goal_terminal=is_goal_terminal, name="spread_grid"),
                    "tight":GridWorldMDP(width=10, height=10, rand_init=False, goal_locs=[changing_entities["tight"][i % len(changing_entities["tight"])]], is_goal_terminal=is_goal_terminal, name="tight_grid"),
                    }[mdp_class]

        new_mdp.set_gamma(gamma)
        
        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict, horizon=horizon)

Example #14

0

Show file

File: rl_experiments.py Project: xczhuusetc/Optimal-Options-ICML-2019

        '--nsepisodes',
        type=int,
        default=10,
        help='number of episodes for incidence matrix sampling')
    parser.add_argument('--nssteps',
                        type=int,
                        default=10,
                        help='number of steps for incidence matrix sampling')

    args = parser.parse_args()

    dom, task = args.task.split('_')

    if dom == 'grid':
        mdp = make_grid_world_from_file(
            os.path.dirname(os.path.realpath(__file__)) + '/../tasks/' + task +
            '.txt')
    elif dom == 'taxi':
        width = 4
        height = 4
        agent = {"x": 1, "y": 1, "has_passenger": 0}
        passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}]
        mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers)
    elif dom == 'gym':
        mdp = GymMDP(env_name=task, render=False)
    elif dom == 'hanoi':
        mdp = HanoiMDP(num_pegs=3, num_discs=4)
    elif dom == 'track':
        mdp = make_race_track_from_file(
            os.path.dirname(os.path.realpath(__file__)) + '/../tasks/' + task +
            '.txt')