예제 #1
0
def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    upworld_goal_locs = [(i, width) for i in range(1, height+1)]

    four_room_goal_locs = [(width, height)] #, (width, 1), (1, height)] # (1, height - 2), (width - 2, height - 2), (width - 1, height - 1), (width - 2, 1)]
    four_room_goal_loc = four_room_goal_locs[0]

    # Taxi stuff.
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":grid_dim / 2, "y":grid_dim / 2, "dest_x":grid_dim-2, "dest_y":2, "in_taxi":0}]
    walls = []

    # Trench stuff
    tr_agent = {"x": 1, "y": 1, "dx": 1, "dy": 0, "dest_x": grid_dim, "dest_y": grid_dim, "has_block": 0}
    blocks = [{"x": grid_dim, "y": 1}]
    lavas = [{"x": x, "y": y} for x, y in map(lambda z: (z + 1, (grid_dim + 1) / 2), range(grid_dim))]

    # Do grids separately to avoid making error-prone domains.
    if mdp_class == "four_room":
        mdp = FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc])
    else:
        mdp = {"upworld":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=upworld_goal_locs),
            "chain":ChainMDP(num_states=grid_dim),
            "random":RandomMDP(num_states=50, num_rand_trans=2),
            "hanoi":HanoiMDP(num_pegs=grid_dim, num_discs=3),
            "taxi":TaxiOOMDP(width=grid_dim, height=grid_dim, agent=agent, walls=walls, passengers=passengers),
            "trench":TrenchOOMDP(width=grid_dim, height=3, agent=tr_agent, blocks=blocks, lavas=lavas)}[mdp_class]

    return mdp
예제 #2
0
def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    four_room_goal_locs = [(width, height), (width, 1), (1, height),
                           (1, height - 2), (width - 2, height - 2),
                           (width - 2, 1)]
    four_room_goal_loc = four_room_goal_locs[5]

    # Taxi stuff.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{
        "x": grid_dim / 2,
        "y": grid_dim / 2,
        "dest_x": grid_dim - 2,
        "dest_y": 2,
        "in_taxi": 0
    }]
    walls = []

    mdp = {
        "hall":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=hall_goal_locs),
        "pblocks_grid":
        make_grid_world_from_file("pblocks_grid.txt", randomize=True),
        "grid":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=[(grid_dim, grid_dim)]),
        "four_room":
        FourRoomMDP(width=width, height=height,
                    goal_locs=[four_room_goal_loc]),
        "chain":
        ChainMDP(num_states=grid_dim),
        "random":
        RandomMDP(num_states=50, num_rand_trans=2),
        "hanoi":
        HanoiMDP(num_pegs=grid_dim, num_discs=3),
        "taxi":
        TaxiOOMDP(width=grid_dim,
                  height=grid_dim,
                  slip_prob=0.0,
                  agent=agent,
                  walls=walls,
                  passengers=passengers)
    }[mdp_class]

    return mdp
예제 #3
0
    args = parser.parse_args()

    dom, task = args.task.split('_')

    if dom == 'grid':
        mdp = make_grid_world_from_file('../tasks/' + task + '.txt')
    elif dom == 'taxi':
        width = 4
        height = 4
        agent = {"x": 1, "y": 1, "has_passenger": 0}
        passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}]
        mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers)
    elif dom == 'gym':
        mdp = GymMDP(env_name=task, render=False)
    elif dom == 'hanoi':
        mdp = HanoiMDP(num_pegs=3, num_discs=4)
    elif dom == 'track':
        mdp = make_race_track_from_file('../tasks/' + task + '.txt')
    else:
        print('Unknown task name: ', task)
        assert (False)

    mdp.set_gamma(0.99)

    if args.experiment == 'online':
        print('test_online_agent')
        test_online_agent(args, mdp)
    elif args.experiment == 'offline':
        print('test_offline_agent')
        test_offline_agent(args, mdp)
    elif args.experiment == 'utility':