Esempio n. 1
0
def main(open_plot=True):
    # Taxi initial state attributes..
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}]
    walls = []
    mdp = TaxiOOMDP(width=4,
                    height=4,
                    agent=agent,
                    walls=walls,
                    passengers=passengers)

    # Agents.
    ql_agent = QLearnerAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    viz = False
    if viz:
        # Visualize Taxi.
        run_single_agent_on_mdp(ql_agent, mdp, episodes=50, steps=1000)
        mdp.visualize_agent(ql_agent)
    else:
        # Run experiment and make plot.
        run_agents_on_mdp([ql_agent, rand_agent],
                          mdp,
                          instances=50,
                          episodes=1,
                          steps=2000,
                          reset_at_terminal=True,
                          open_plot=open_plot)
Esempio n. 2
0
def choose_mdp(mdp_name, env_name="Asteroids-v0"):
    '''
    Args:
        mdp_name (str): one of {gym, grid, chain, taxi, ...}
        gym_env_name (str): gym environment name, like 'CartPole-v0'

    Returns:
        (MDP)
    '''

    # Other imports
    from simple_rl.tasks import ChainMDP, GridWorldMDP, FourRoomMDP, TaxiOOMDP, RandomMDP, PrisonersDilemmaMDP, RockPaperScissorsMDP, GridGameMDP

    # Taxi MDP.
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":4, "y":3, "dest_x":2, "dest_y":2, "in_taxi":0}]
    walls = []
    if mdp_name == "gym":
        # OpenAI Gym MDP.
        try:
            from simple_rl.tasks.gym.GymMDPClass import GymMDP
        except:
            raise ValueError("(simple_rl) Error: OpenAI gym not installed.")
        return GymMDP(env_name, render=True)
    else:
        return {"grid":GridWorldMDP(5, 5, (1, 1), goal_locs=[(5, 3), (4,1)]),
                "four_room":FourRoomMDP(),
                "chain":ChainMDP(5),
                "taxi":TaxiOOMDP(10, 10, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers),
                "random":RandomMDP(num_states=40, num_rand_trans=20),
                "prison":PrisonersDilemmaMDP(),
                "rps":RockPaperScissorsMDP(),
                "grid_game":GridGameMDP(),
                "multi":{0.5:RandomMDP(num_states=40, num_rand_trans=20), 0.5:RandomMDP(num_states=40, num_rand_trans=5)}}[mdp_name]
Esempio n. 3
0
def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    upworld_goal_locs = [(i, width) for i in range(1, height+1)]

    four_room_goal_locs = [(width, height)] #, (width, 1), (1, height)] # (1, height - 2), (width - 2, height - 2), (width - 1, height - 1), (width - 2, 1)]
    four_room_goal_loc = four_room_goal_locs[0]

    # Taxi stuff.
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":grid_dim / 2, "y":grid_dim / 2, "dest_x":grid_dim-2, "dest_y":2, "in_taxi":0}]
    walls = []

    # Trench stuff
    tr_agent = {"x": 1, "y": 1, "dx": 1, "dy": 0, "dest_x": grid_dim, "dest_y": grid_dim, "has_block": 0}
    blocks = [{"x": grid_dim, "y": 1}]
    lavas = [{"x": x, "y": y} for x, y in map(lambda z: (z + 1, (grid_dim + 1) / 2), range(grid_dim))]

    # Do grids separately to avoid making error-prone domains.
    if mdp_class == "four_room":
        mdp = FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc])
    else:
        mdp = {"upworld":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=upworld_goal_locs),
            "chain":ChainMDP(num_states=grid_dim),
            "random":RandomMDP(num_states=50, num_rand_trans=2),
            "hanoi":HanoiMDP(num_pegs=grid_dim, num_discs=3),
            "taxi":TaxiOOMDP(width=grid_dim, height=grid_dim, agent=agent, walls=walls, passengers=passengers),
            "trench":TrenchOOMDP(width=grid_dim, height=3, agent=tr_agent, blocks=blocks, lavas=lavas)}[mdp_class]

    return mdp
Esempio n. 4
0
def make_custom_mdp(mdp_class, mdp_parameters):
    if mdp_class == 'augmented_taxi':
        mdp_candidate = AugmentedTaxiOOMDP(
            width=mdp_parameters['width'],
            height=mdp_parameters['height'],
            agent=mdp_parameters['agent'],
            walls=mdp_parameters['walls'],
            passengers=mdp_parameters['passengers'],
            tolls=mdp_parameters['tolls'],
            traffic=mdp_parameters['traffic'],
            fuel_stations=mdp_parameters['fuel_station'],
            gamma=mdp_parameters['gamma'],
            weights=mdp_parameters['weights'],
            env_code=mdp_parameters['env_code'],
            sample_rate=1)
    elif mdp_class == 'two_goal':
        mdp_candidate = TwoGoalOOMDP(width=mdp_parameters['width'],
                                     height=mdp_parameters['height'],
                                     agent=mdp_parameters['agent'],
                                     walls=mdp_parameters['walls'],
                                     goals=mdp_parameters['goals'],
                                     gamma=mdp_parameters['gamma'],
                                     weights=mdp_parameters['weights'],
                                     env_code=mdp_parameters['env_code'],
                                     sample_rate=1)
    elif mdp_class == 'skateboard':
        mdp_candidate = SkateboardOOMDP(
            width=mdp_parameters['width'],
            height=mdp_parameters['height'],
            agent=mdp_parameters['agent'],
            walls=mdp_parameters['walls'],
            goal=mdp_parameters['goal'],
            skateboard=mdp_parameters['skateboard'],
            gamma=mdp_parameters['gamma'],
            weights=mdp_parameters['weights'],
            env_code=mdp_parameters['env_code'],
            sample_rate=1)
    elif mdp_class == 'taxi':
        mdp_candidate = TaxiOOMDP(width=mdp_parameters['width'],
                                  height=mdp_parameters['height'],
                                  agent=mdp_parameters['agent'],
                                  walls=mdp_parameters['walls'],
                                  passengers=mdp_parameters['passengers'],
                                  gamma=mdp_parameters['gamma'],
                                  weights=mdp_parameters['weights'])
    elif mdp_class == 'cookie_crumb':
        mdp_candidate = CookieCrumbOOMDP(width=mdp_parameters['width'],
                                         height=mdp_parameters['height'],
                                         agent=mdp_parameters['agent'],
                                         walls=mdp_parameters['walls'],
                                         goals=mdp_parameters['goals'],
                                         crumbs=mdp_parameters['crumbs'],
                                         gamma=mdp_parameters['gamma'],
                                         weights=mdp_parameters['weights'],
                                         env_code=mdp_parameters['env_code'],
                                         sample_rate=1)
    else:
        raise Exception("Unknown MDP class.")

    return mdp_candidate
Esempio n. 5
0
def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    four_room_goal_locs = [(width, height), (width, 1), (1, height),
                           (1, height - 2), (width - 2, height - 2),
                           (width - 2, 1)]
    four_room_goal_loc = four_room_goal_locs[5]

    # Taxi stuff.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{
        "x": grid_dim / 2,
        "y": grid_dim / 2,
        "dest_x": grid_dim - 2,
        "dest_y": 2,
        "in_taxi": 0
    }]
    walls = []

    mdp = {
        "hall":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=hall_goal_locs),
        "pblocks_grid":
        make_grid_world_from_file("pblocks_grid.txt", randomize=True),
        "grid":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=[(grid_dim, grid_dim)]),
        "four_room":
        FourRoomMDP(width=width, height=height,
                    goal_locs=[four_room_goal_loc]),
        "chain":
        ChainMDP(num_states=grid_dim),
        "random":
        RandomMDP(num_states=50, num_rand_trans=2),
        "hanoi":
        HanoiMDP(num_pegs=grid_dim, num_discs=3),
        "taxi":
        TaxiOOMDP(width=grid_dim,
                  height=grid_dim,
                  slip_prob=0.0,
                  agent=agent,
                  walls=walls,
                  passengers=passengers)
    }[mdp_class]

    return mdp
Esempio n. 6
0
def main(open_plot=True):
    # Taxi initial state attributes..
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":3, "y":2, "dest_x":2, "dest_y":3, "in_taxi":0}]
    walls = []
    mdp = TaxiOOMDP(width=4, height=4, agent=agent, walls=walls, passengers=passengers)

    # Agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions()) 
    rand_agent = RandomAgent(actions=mdp.get_actions())

    viz = False
    if viz:
        # Visualize Taxi.
        run_single_agent_on_mdp(ql_agent, mdp, episodes=50, steps=1000)
        mdp.visualize_agent(ql_agent)
    else:
        # Run experiment and make plot.
        run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10, episodes=1, steps=500, reset_at_terminal=True, open_plot=open_plot)
Esempio n. 7
0
def make_mdp(mdp_class="grid", state_size=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = state_size, state_size
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    # Taxi stuff.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{
        "x": state_size / 2,
        "y": state_size / 2,
        "dest_x": state_size - 2,
        "dest_y": 2,
        "in_taxi": 0
    }]
    walls = []

    mdp = {
        "hall":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=hall_goal_locs),
        "pblocks_grid":
        make_grid_world_from_file("pblocks_grid.txt", randomize=True),
        "grid":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=[(state_size, state_size)]),
        "four_room":
        FourRoomMDP(width=width, height=height, goal_locs=[(width, height)]),
        "chain":
        ChainMDP(num_states=state_size),
        "random":
        RandomMDP(num_states=50, num_rand_trans=2),
        "taxi":
        TaxiOOMDP(width=state_size,
                  height=state_size,
                  slip_prob=0.0,
                  agent=agent,
                  walls=walls,
                  passengers=passengers)
    }[mdp_class]

    return mdp
Esempio n. 8
0
def choose_mdp(mdp_name, atari_game="centipede"):
    '''
    Args:
        mdp_name (str): one of {atari, grid, chain, taxi}
        atari_game (str): one of {centipede, breakout, etc.}

    Returns:
        (MDP)
    '''
    # Grid World MDP.
    grid_mdp = GridWorldMDP(10, 10, (1, 1), (10, 10))

    # Chain MDP.
    chain_mdp = ChainMDP(15)

    # Taxi MDP.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{"x": 5, "y": 5, "dest_x": 3, "dest_y": 3, "in_taxi": 0}]
    taxi_mdp = TaxiOOMDP(6,
                         6,
                         agent_loc=agent,
                         walls=[],
                         passengers=passengers)
    if mdp_name == "atari":
        # Atari import is here in case users don't have the Arcade Learning Environment.
        try:
            from simple_rl.tasks.atari.AtariMDPClass import AtariMDP
            return AtariMDP(rom=atari_game, grayscale=True)
        except:
            print "ERROR: you don't have the Arcade Learning Environment installed."
            print "\tTry here: https://github.com/mgbellemare/Arcade-Learning-Environment."
            quit()
    else:
        return {
            "grid": grid_mdp,
            "chain": chain_mdp,
            "taxi": taxi_mdp
        }[mdp_name]
Esempio n. 9
0
                        type=int,
                        default=10,
                        help='number of steps for incidence matrix sampling')

    args = parser.parse_args()

    dom, task = args.task.split('_')

    if dom == 'grid':
        mdp = make_grid_world_from_file('../tasks/' + task + '.txt')
    elif dom == 'taxi':
        width = 4
        height = 4
        agent = {"x": 1, "y": 1, "has_passenger": 0}
        passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}]
        mdp = TaxiOOMDP(width, height, agent, walls=[], passengers=passengers)
    elif dom == 'gym':
        mdp = GymMDP(env_name=task, render=False)
    elif dom == 'hanoi':
        mdp = HanoiMDP(num_pegs=3, num_discs=4)
    elif dom == 'track':
        mdp = make_race_track_from_file('../tasks/' + task + '.txt')
    else:
        print('Unknown task name: ', task)
        assert (False)

    mdp.set_gamma(0.99)

    if args.experiment == 'online':
        print('test_online_agent')
        test_online_agent(args, mdp)
Esempio n. 10
0
def make_mdp_distr(mdp_class="grid", grid_dim=7, horizon=0):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        horizon (int)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}
    height, width = grid_dim, grid_dim

    # Define goal locations.

    # Corridor.
    corr_width = 20
    corr_goal_magnitude = random.randint(1, 5)
    corr_goal_cols = [i for i in xrange(1, corr_goal_magnitude)] + [
        j for j in xrange(corr_width - corr_goal_magnitude, corr_width + 1)
    ]
    corr_goal_locs = list(itertools.product(corr_goal_cols, [1]))

    # Grid World
    grid_world_rows, grid_world_cols = [i for i in xrange(width - 4, width)], [
        j for j in xrange(height - 4, height)
    ]
    grid_goal_locs = list(itertools.product(grid_world_rows, grid_world_cols))

    # Hallway.
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    # Four room.
    four_room_goal_locs = [(2, 2), (width, height), (width, 1), (1, height)]

    # Taxi.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    walls = []

    goal_loc_dict = {
        "four_room": four_room_goal_locs,
        "hall": hall_goal_locs,
        "grid": grid_goal_locs,
        "corridor": corr_goal_locs
    }

    # MDP Probability.
    num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len(
        goal_loc_dict[mdp_class])
    mdp_prob = 1.0 / num_mdps

    for i in range(num_mdps):

        new_mdp = {"hall":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["hall"][i % len(goal_loc_dict["hall"])]]),
                    "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True),
                    "grid":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True),
                    "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]]),
                    # THESE GOALS ARE SPECIFIED IMPLICITLY:
                    "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True),
                    "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])),
                    "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)),
                    "taxi":TaxiOOMDP(4, 4, slip_prob=0.0, agent=agent, walls=walls, \
                                    passengers=[{"x":2, "y":2, "dest_x":random.randint(1,4), "dest_y":random.randint(1,4), "in_taxi":0}])}[mdp_class]

        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict, horizon=horizon)
Esempio n. 11
0
def make_mdp_distr(mdp_class="grid",
                   grid_dim=9,
                   horizon=0,
                   step_cost=0,
                   gamma=0.99):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        horizon (int)
        step_cost (float)
        gamma (float)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}
    height, width = grid_dim, grid_dim

    # Define goal locations.

    # Corridor.
    corr_width = 20
    corr_goal_magnitude = 1  #random.randint(1, 5)
    corr_goal_cols = [i for i in range(1, corr_goal_magnitude + 1)] + [
        j for j in range(corr_width - corr_goal_magnitude + 1, corr_width + 1)
    ]
    corr_goal_locs = list(itertools.product(corr_goal_cols, [1]))

    # Grid World
    tl_grid_world_rows, tl_grid_world_cols = [
        i for i in range(width - 4, width)
    ], [j for j in range(height - 4, height)]
    tl_grid_goal_locs = list(
        itertools.product(tl_grid_world_rows, tl_grid_world_cols))
    tr_grid_world_rows, tr_grid_world_cols = [i for i in range(1, 4)], [
        j for j in range(height - 4, height)
    ]
    tr_grid_goal_locs = list(
        itertools.product(tr_grid_world_rows, tr_grid_world_cols))
    grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs

    # Hallway.
    hall_goal_locs = [(i, height) for i in range(1, 30)]

    # Four room.
    four_room_goal_locs = [(width, height), (width, 1), (1, height),
                           (1, height - 2),
                           (width - 2, height - 2)]  #, (width - 2, 1)]

    # Taxi.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    walls = []

    goal_loc_dict = {
        "four_room": four_room_goal_locs,
        "hall": hall_goal_locs,
        "grid": grid_goal_locs,
        "corridor": corr_goal_locs,
    }

    # MDP Probability.
    num_mdps = 10 if mdp_class not in goal_loc_dict.keys() else len(
        goal_loc_dict[mdp_class])
    if mdp_class == "octo":
        num_mdps = 12
    mdp_prob = 1.0 / num_mdps

    for i in range(num_mdps):

        new_mdp = {"hrooms":make_grid_world_from_file("hierarch_rooms.txt", num_goals=7, randomize=False),
                    "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i),
                    "hall":GridWorldMDP(width=30, height=height, rand_init=False, goal_locs=goal_loc_dict["hall"], name="hallway", is_goal_terminal=True),
                    "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[goal_loc_dict["corridor"][i % len(goal_loc_dict["corridor"])]], is_goal_terminal=True, name="corridor"),
                    "grid":GridWorldMDP(width=width, height=height, rand_init=True, goal_locs=[goal_loc_dict["grid"][i % len(goal_loc_dict["grid"])]], is_goal_terminal=True),
                    "four_room":FourRoomMDP(width=width, height=height, goal_locs=[goal_loc_dict["four_room"][i % len(goal_loc_dict["four_room"])]], is_goal_terminal=True),
                    # THESE GOALS ARE SPECIFIED IMPLICITLY:
                    "pblocks_grid":make_grid_world_from_file("pblocks_grid.txt", randomize=True, slip_prob=0.1),
                    "chain":ChainMDP(num_states=10, reset_val=random.choice([0, 0.01, 0.05, 0.1, 0.2, 0.5])),
                    "random":RandomMDP(num_states=40, num_rand_trans=random.randint(1,10)),
                    "taxi":TaxiOOMDP(3, 4, slip_prob=0.0, agent=agent, walls=walls, \
                                    passengers=[{"x":2, "y":1, "dest_x":random.choice([2,3]), "dest_y":random.choice([2,3]), "in_taxi":0},
                                                {"x":1, "y":2, "dest_x":random.choice([1,2]), "dest_y":random.choice([1,4]), "in_taxi":0}])}[mdp_class]

        new_mdp.set_step_cost(step_cost)
        new_mdp.set_gamma(gamma)

        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict, horizon=horizon)
Esempio n. 12
0
#!/usr/bin/env python

# Other imports.
import srl_example_setup
from simple_rl.agents import QLearnerAgent, RandomAgent
from simple_rl.tasks import TaxiOOMDP, BlockDudeOOMDP
from simple_rl.run_experiments import run_agents_on_mdp, run_single_agent_on_mdp

# Taxi initial state attributes..
agent = {"x": 1, "y": 1, "has_passenger": 0}
passengers = [{"x": 3, "y": 2, "dest_x": 2, "dest_y": 3, "in_taxi": 0}]
walls = []
mdp = TaxiOOMDP(width=4,
                height=4,
                agent=agent,
                walls=walls,
                passengers=passengers)

ql_agent = QLearnerAgent(actions=mdp.get_actions())
rand_agent = RandomAgent(actions=mdp.get_actions())

viz = False
if viz:
    # Visualize Taxi.
    run_single_agent_on_mdp(ql_agent, mdp, episodes=50, steps=1000)
    mdp.visualize_agent(ql_agent)
else:
    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=10,