コード例 #1
0
    def __init__(self,
                 col_sq_locs_dict,
                 width=5,
                 height=3,
                 init_loc=(1, 1),
                 goal_locs=[(5, 3)]):
        '''
        Args:
            col_sq_locs_dict (dict):
                Key: int (width)
                Val: dict
                    Key: int (height)
                    Val: color
            width (int)
            height (int)
            init_loc (tuple)
            goal_locs (list of tuples)
        '''
        GridWorldMDP.__init__(self,
                              width,
                              height,
                              init_loc=init_loc,
                              goal_locs=goal_locs)

        self.init_state = ColoredGridWorldState(
            init_loc[0], init_loc[1],
            col_sq_locs_dict[init_loc[0]][init_loc[1]])
        self.col_sq_locs_dict = col_sq_locs_dict
コード例 #2
0
ファイル: simple_q_mdp.py プロジェクト: thanhkaist/cherry
def main(open_plot=True):
    # Setup MDP.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)],
                       slip_prob=0.05)

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())
    tabular_agent = CherryQAgent(mdp,
                                 model=lambda *x: ActionValueFunction(*x, init=1.0),
                                 name='Tabular',
                                 lr=0.7)
    linear_agent = CherryQAgent(mdp,
                                model=lambda *x: nn.Linear(*x),
                                name='Linear',
                                lr=0.1)
    mlp_agent = CherryQAgent(mdp,
                             model=lambda *x: MLP(*x),
                             name='MLP',
                             lr=0.07)

    # Run experiment and make plot.
    agents = [rand_agent, ql_agent, tabular_agent, linear_agent, mlp_agent]
    run_agents_on_mdp(agents,
                      mdp,
                      instances=10,
                      episodes=50,
                      steps=50,
                      open_plot=open_plot)
コード例 #3
0
def main(open_plot=True):

    # Setup MDP.
    mdp = GridWorldMDP(width=8,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(8, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)],
                       slip_prob=0.05)

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=20,
                      episodes=300,
                      steps=20,
                      open_plot=open_plot,
                      track_success=True,
                      success_reward=1)
コード例 #4
0
ファイル: gridworld.py プロジェクト: phymucs/llrl
 def __init__(self,
              width=5,
              height=3,
              init_loc=(1, 1),
              rand_init=False,
              goal_locs=[(5, 3)],
              lava_locs=[()],
              walls=[],
              is_goal_terminal=True,
              gamma=0.99,
              slip_prob=0.0,
              step_cost=0.0,
              lava_cost=0.01,
              goal_rewards=[1.],
              name="Grid-world"):
     GridWorldMDP.__init__(self,
                           width=width,
                           height=height,
                           init_loc=init_loc,
                           rand_init=rand_init,
                           goal_locs=goal_locs,
                           lava_locs=lava_locs,
                           walls=walls,
                           is_goal_terminal=is_goal_terminal,
                           gamma=gamma,
                           slip_prob=slip_prob,
                           step_cost=step_cost,
                           lava_cost=lava_cost,
                           name=name)
     self.goal_rewards = goal_rewards
     self.slip_unidirectional = True
コード例 #5
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)])
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())
    abstr_identity_agent = AbstractionWrapper(QLearningAgent, agent_params={"epsilon":0.9, "actions":mdp.get_actions()})

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent, abstr_identity_agent], mdp, instances=5, episodes=100, steps=150, open_plot=open_plot)
コード例 #6
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)])
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())
    abstr_identity_agent = AbstractionWrapper(QLearningAgent, agent_params={"epsilon":0.9}, actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent, abstr_identity_agent], mdp, instances=5, episodes=100, steps=150, open_plot=open_plot)
コード例 #7
0
def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    four_room_goal_locs = [(width, height), (width, 1), (1, height),
                           (1, height - 2), (width - 2, height - 2),
                           (width - 2, 1)]
    four_room_goal_loc = four_room_goal_locs[5]

    # Taxi stuff.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{
        "x": grid_dim / 2,
        "y": grid_dim / 2,
        "dest_x": grid_dim - 2,
        "dest_y": 2,
        "in_taxi": 0
    }]
    walls = []

    mdp = {
        "hall":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=hall_goal_locs),
        "pblocks_grid":
        make_grid_world_from_file("pblocks_grid.txt", randomize=True),
        "grid":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=[(grid_dim, grid_dim)]),
        "four_room":
        FourRoomMDP(width=width, height=height,
                    goal_locs=[four_room_goal_loc]),
        "chain":
        ChainMDP(num_states=grid_dim),
        "random":
        RandomMDP(num_states=50, num_rand_trans=2),
        "hanoi":
        HanoiMDP(num_pegs=grid_dim, num_discs=3),
        "taxi":
        TaxiOOMDP(width=grid_dim,
                  height=grid_dim,
                  slip_prob=0.0,
                  agent=agent,
                  walls=walls,
                  passengers=passengers)
    }[mdp_class]

    return mdp
コード例 #8
0
def main():

    # Setup MDP.

    actual_args = {
        "width":
        10,
        "height":
        10,
        "init_loc": (1, 1),
        "goal_locs": [(10, 10)],
        "lava_locs": [(1, 10), (3, 10), (5, 10), (7, 10), (9, 10)],
        "gamma":
        0.9,
        "walls": [
            (2, 2), (2, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9),
            (4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (4, 7), (4, 8), (4, 9),
            (6, 2), (6, 3), (6, 4), (6, 5), (6, 6), (6, 7), (6, 8), (6, 9),
            (8, 2), (8, 3), (8, 4), (8, 5), (8, 6), (8, 7), (8, 8), (8, 9)
        ],
        "slip_prob":
        0.01,
        "lava_cost":
        1.0,
        "step_cost":
        0.1
    }

    mdp = GridWorldMDP(**actual_args)

    # Initialize the custom Q function for a q-learning agent. This should be equivalent to potential shaping.
    # This should cause the Q agent to learn more quickly.
    custom_q = defaultdict(lambda: defaultdict(lambda: 0))
    custom_q[GridWorldState(5, 1)]['right'] = 1.0
    custom_q[GridWorldState(2, 1)]['right'] = 1.0

    # Make a normal q-learning agent and another initialized with the custom_q above.
    # Finally, make a random agent to compare against.
    ql_agent = QLearningAgent(actions=mdp.get_actions(),
                              epsilon=0.2,
                              alpha=0.4)
    ql_agent_pot = QLearningAgent(actions=mdp.get_actions(),
                                  epsilon=0.2,
                                  alpha=0.4,
                                  custom_q_init=custom_q,
                                  name="PotQ")
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, ql_agent_pot, rand_agent],
                      mdp,
                      instances=2,
                      episodes=60,
                      steps=200,
                      open_plot=True,
                      verbose=True)
コード例 #9
0
def main(open_plot=True):

    # Setup MDP.
    mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.05)

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=10, episodes=50, steps=10, open_plot=open_plot)
コード例 #10
0
def main():
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2)
    value_iter = ValueIteration(mdp, sample_rate=5)
    value_iter.run_vi()

    # Value Iteration.
    action_seq, state_seq = value_iter.plan(mdp.get_init_state())

    print("Plan for", mdp)
    for i in range(len(action_seq)):
        print("\t", action_seq[i], state_seq[i])
コード例 #11
0
ファイル: make_mdp.py プロジェクト: RoyalGuan/simple_rl
def make_mdp(mdp_class="grid", state_size=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = state_size, state_size
    hall_goal_locs = [(i, width) for i in range(1, height + 1)]

    # Taxi stuff.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{
        "x": state_size / 2,
        "y": state_size / 2,
        "dest_x": state_size - 2,
        "dest_y": 2,
        "in_taxi": 0
    }]
    walls = []

    mdp = {
        "hall":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=hall_goal_locs),
        "pblocks_grid":
        make_grid_world_from_file("pblocks_grid.txt", randomize=True),
        "grid":
        GridWorldMDP(width=width,
                     height=height,
                     init_loc=(1, 1),
                     goal_locs=[(state_size, state_size)]),
        "four_room":
        FourRoomMDP(width=width, height=height, goal_locs=[(width, height)]),
        "chain":
        ChainMDP(num_states=state_size),
        "random":
        RandomMDP(num_states=50, num_rand_trans=2),
        "taxi":
        TaxiOOMDP(width=state_size,
                  height=state_size,
                  slip_prob=0.0,
                  agent=agent,
                  walls=walls,
                  passengers=passengers)
    }[mdp_class]

    return mdp
コード例 #12
0
def main():
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2)

    value_iter = ValueIteration(mdp, sample_rate=5)
    mcts = MCTS(mdp, num_rollouts_per_step=50)
    # _, val = value_iter.run_vi()

    # Value Iteration.
    vi_action_seq, vi_state_seq = value_iter.plan(mdp.get_init_state())
    mcts_action_seq, mcts_state_seq = mcts.plan(mdp.get_init_state())

    print("Plan for", mdp)
    for i in range(len(mcts_action_seq)):
        print("\t", mcts_action_seq[i], mcts_state_seq[i])
コード例 #13
0
def main():
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2)

    value_iter = ValueIteration(mdp, sample_rate=5)
    mcts = MCTS(mdp, num_rollouts_per_step=50)
    # _, val = value_iter.run_vi()

    # Value Iteration.
    vi_action_seq, vi_state_seq = value_iter.plan(mdp.get_init_state())
    mcts_action_seq, mcts_state_seq = mcts.plan(mdp.get_init_state())

    print("Plan for", mdp)
    for i in range(len(mcts_action_seq)):
        print("\t", mcts_action_seq[i], mcts_state_seq[i])
コード例 #14
0
def main():
    mdp = GridWorldMDP(3, 5, (1, 1), [(3, 5)])

    vi = ValueIteration(mdp)
    # print "num states:", len(vi._compute_reachable_state_space())

    visualize_mdp(mdp)
コード例 #15
0
def make_mdp(mdp_class="grid", grid_dim=7):
    '''
    Returns:
        (MDP)
    '''
    # Grid/Hallway stuff.
    width, height = grid_dim, grid_dim
    upworld_goal_locs = [(i, width) for i in range(1, height+1)]

    four_room_goal_locs = [(width, height)] #, (width, 1), (1, height)] # (1, height - 2), (width - 2, height - 2), (width - 1, height - 1), (width - 2, 1)]
    four_room_goal_loc = four_room_goal_locs[0]

    # Taxi stuff.
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":grid_dim / 2, "y":grid_dim / 2, "dest_x":grid_dim-2, "dest_y":2, "in_taxi":0}]
    walls = []

    # Trench stuff
    tr_agent = {"x": 1, "y": 1, "dx": 1, "dy": 0, "dest_x": grid_dim, "dest_y": grid_dim, "has_block": 0}
    blocks = [{"x": grid_dim, "y": 1}]
    lavas = [{"x": x, "y": y} for x, y in map(lambda z: (z + 1, (grid_dim + 1) / 2), range(grid_dim))]

    # Do grids separately to avoid making error-prone domains.
    if mdp_class == "four_room":
        mdp = FourRoomMDP(width=width, height=height, goal_locs=[four_room_goal_loc])
    else:
        mdp = {"upworld":GridWorldMDP(width=width, height=height, init_loc=(1, 1), goal_locs=upworld_goal_locs),
            "chain":ChainMDP(num_states=grid_dim),
            "random":RandomMDP(num_states=50, num_rand_trans=2),
            "hanoi":HanoiMDP(num_pegs=grid_dim, num_discs=3),
            "taxi":TaxiOOMDP(width=grid_dim, height=grid_dim, agent=agent, walls=walls, passengers=passengers),
            "trench":TrenchOOMDP(width=grid_dim, height=3, agent=tr_agent, blocks=blocks, lavas=lavas)}[mdp_class]

    return mdp
コード例 #16
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=10,
                       height=10,
                       init_loc=(1, 1),
                       goal_locs=[(10, 10)])
    ql_agent = QLearnerAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=5,
                      episodes=100,
                      steps=150,
                      open_plot=open_plot)
コード例 #17
0
def choose_mdp(mdp_name, env_name="Asteroids-v0"):
    '''
    Args:
        mdp_name (str): one of {gym, grid, chain, taxi, ...}
        gym_env_name (str): gym environment name, like 'CartPole-v0'

    Returns:
        (MDP)
    '''

    # Other imports
    from simple_rl.tasks import ChainMDP, GridWorldMDP, FourRoomMDP, TaxiOOMDP, RandomMDP, PrisonersDilemmaMDP, RockPaperScissorsMDP, GridGameMDP

    # Taxi MDP.
    agent = {"x":1, "y":1, "has_passenger":0}
    passengers = [{"x":4, "y":3, "dest_x":2, "dest_y":2, "in_taxi":0}]
    walls = []
    if mdp_name == "gym":
        # OpenAI Gym MDP.
        try:
            from simple_rl.tasks.gym.GymMDPClass import GymMDP
        except:
            raise ValueError("(simple_rl) Error: OpenAI gym not installed.")
        return GymMDP(env_name, render=True)
    else:
        return {"grid":GridWorldMDP(5, 5, (1, 1), goal_locs=[(5, 3), (4,1)]),
                "four_room":FourRoomMDP(),
                "chain":ChainMDP(5),
                "taxi":TaxiOOMDP(10, 10, slip_prob=0.0, agent=agent, walls=walls, passengers=passengers),
                "random":RandomMDP(num_states=40, num_rand_trans=20),
                "prison":PrisonersDilemmaMDP(),
                "rps":RockPaperScissorsMDP(),
                "grid_game":GridGameMDP(),
                "multi":{0.5:RandomMDP(num_states=40, num_rand_trans=20), 0.5:RandomMDP(num_states=40, num_rand_trans=5)}}[mdp_name]
コード例 #18
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       gamma=0.95,
                       walls=[(2, 2)])

    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=10,
                      episodes=1,
                      steps=20,
                      open_plot=open_plot)
コード例 #19
0
    def __init__(self,
                 width=5,
                 height=3,
                 init_loc=(1, 1),
                 rand_init=False,
                 goal_locs=[(5, 3)],
                 lava_locs=[()],
                 walls=[],
                 safe_locs=[],
                 is_goal_terminal=True,
                 gamma=0.99,
                 slip_prob=0.0,
                 step_cost=0.0,
                 lava_cost=1,
                 name="gridworld",
                 gui=False):

        GridWorldMDP.__init__(self, width, height, init_loc, rand_init,
                              goal_locs, lava_locs, walls, is_goal_terminal,
                              gamma, slip_prob, step_cost, lava_cost, name)

        self.jump_dist = 2

        self.actions = UnsafeGridWorldMDP.ACTIONS
        self.safe_states = set()
        for state in self.get_all_states():
            if (state.x, state.y) in safe_locs:
                self.safe_states.add(state)

        self.gui = gui
        if gui:
            self.screen = pygame.display.set_mode(
                (SCREEN_HEIGHT, SCREEN_HEIGHT))
            self.agent_shape = None

            # Pygame setup.
            pygame.init()
            self.screen.fill((255, 255, 255))
            pygame.display.update()

            self.agent_shape = self._draw_state(self.init_state,
                                                draw_statics=True)
コード例 #20
0
def main():
    mdp1 = GridWorldMDP(width=2,
                        height=1,
                        init_loc=(1, 1),
                        goal_locs=[(2, 1)],
                        slip_prob=0.5,
                        gamma=0.5)

    vi = ValueIteration(mdp1)
    iters, value = vi.run_vi()
    print("value=", value)
コード例 #21
0
    def __init__(self, col_sq_locs_dict, width=5, height=3, init_loc=(1, 1), goal_locs=[(5, 3)]):
        '''
        Args:
            col_sq_locs_dict (dict):
                Key: int (width)
                Val: dict
                    Key: int (height)
                    Val: color
            width (int)
            height (int)
            init_loc (tuple)
            goal_locs (list of tuples)
        '''
        GridWorldMDP.__init__(self,
                              width,
                              height,
                              init_loc=init_loc,
                              goal_locs=goal_locs)

        self.init_state = ColoredGridWorldState(init_loc[0], init_loc[1], col_sq_locs_dict[init_loc[0]][init_loc[1]])
        self.col_sq_locs_dict = col_sq_locs_dict
コード例 #22
0
def main():
    test_mdp = GridWorldMDP(width=6,
                            height=6,
                            goal_locs=[(6, 6)],
                            slip_prob=0.2)
    lower_value_function = MonotoneLowerBound(test_mdp).lower_values
    upper_value_function = MonotoneUpperBound(test_mdp).upper_values
    bounded_rtdp = BoundedRTDP(test_mdp,
                               lower_values_init=lower_value_function,
                               upper_values_init=upper_value_function)
    test_policy = bounded_rtdp.plan()
    print('Derived policy:\n{}'.format(test_policy))
コード例 #23
0
def main(open_plot=True):
    # Setup MDP.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)])

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=5,
                      episodes=50,
                      steps=25,
                      open_plot=open_plot,
                      track_disc_reward=False)
コード例 #24
0
def main(open_plot=True):
    # Setup MDP.
    mdp = GridWorldMDP(width=4,
                       height=3,
                       init_loc=(1, 1),
                       goal_locs=[(4, 3)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)])

    # Make agents.
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent],
                      mdp,
                      instances=5,
                      episodes=50,
                      steps=25,
                      open_plot=open_plot)

    # Reproduce the experiment.
    reproduce_from_exp_file(exp_name=str(mdp), open_plot=open_plot)
コード例 #25
0
def main():
    # Simple test code.
    from simple_rl.tasks import GridWorldMDP

    mdp_distr = {}
    height, width = 8, 8
    prob_list = [0.0, 0.1, 0.2, 0.3, 0.4]

    for i in range(len(prob_list)):
        next_mdp = GridWorldMDP(width=width, height=width, init_loc=(1, 1), goal_locs=r.sample(zip(range(1, width + 1), [height] * width), 2), is_goal_terminal=True)

        mdp_distr[next_mdp] = prob_list[i]

    m = MDPDistribution(mdp_distr)
    m.sample()
コード例 #26
0
def generate_MDP(width, height, init_loc, goal_locs, lava_locs, gamma, walls,
                 slip_prob):
    """ Creates an MDP object based on user input """
    actual_args = {
        "width": width,
        "height": height,
        "init_loc": init_loc,
        "goal_locs": goal_locs,
        "lava_locs": lava_locs,
        "gamma": gamma,
        "walls": walls,
        "slip_prob": slip_prob,
        "lava_cost": 1.0,
        "step_cost": 0.1
    }
    return GridWorldMDP(**actual_args)
コード例 #27
0
def make_mdp_distr(mdp_class="grid", num_mdps=15, gamma=0.99):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        num_mdps (int)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}
    mdp_prob = 1.0 / num_mdps
    height, width = 10, 10

    # Make @num_mdps MDPs.
    for i in xrange(num_mdps):
        next_goals = rnd.sample([(1, 7), (7, 1), (7, 7), (6, 6), (6, 1),
                                 (1, 6)], 2)
        new_mdp = {
            "grid":
            GridWorldMDP(width=width,
                         height=height,
                         init_loc=(1, 1),
                         goal_locs=rnd.sample(
                             zip(range(1, width + 1), [height] * width), 1),
                         is_goal_terminal=True,
                         gamma=gamma),
            "four_room":
            FourRoomMDP(width=8, height=8, goal_locs=next_goals, gamma=gamma),
            "chain":
            ChainMDP(num_states=10,
                     reset_val=rnd.choice([0, 0.01, 0.05, 0.1]),
                     gamma=gamma),
            "random":
            RandomMDP(num_states=40,
                      num_rand_trans=rnd.randint(1, 10),
                      gamma=gamma)
        }[mdp_class]

        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict)
コード例 #28
0
def choose_mdp(mdp_name, atari_game="centipede"):
    '''
    Args:
        mdp_name (str): one of {atari, grid, chain, taxi}
        atari_game (str): one of {centipede, breakout, etc.}

    Returns:
        (MDP)
    '''
    # Grid World MDP.
    grid_mdp = GridWorldMDP(10, 10, (1, 1), (10, 10))

    # Chain MDP.
    chain_mdp = ChainMDP(15)

    # Taxi MDP.
    agent = {"x": 1, "y": 1, "has_passenger": 0}
    passengers = [{"x": 5, "y": 5, "dest_x": 3, "dest_y": 3, "in_taxi": 0}]
    taxi_mdp = TaxiOOMDP(6,
                         6,
                         agent_loc=agent,
                         walls=[],
                         passengers=passengers)
    if mdp_name == "atari":
        # Atari import is here in case users don't have the Arcade Learning Environment.
        try:
            from simple_rl.tasks.atari.AtariMDPClass import AtariMDP
            return AtariMDP(rom=atari_game, grayscale=True)
        except:
            print "ERROR: you don't have the Arcade Learning Environment installed."
            print "\tTry here: https://github.com/mgbellemare/Arcade-Learning-Environment."
            quit()
    else:
        return {
            "grid": grid_mdp,
            "chain": chain_mdp,
            "taxi": taxi_mdp
        }[mdp_name]
コード例 #29
0
    def planFromAtoB(self, Maps, nearestVertex, kStepConfig):

        # if not self.computedMDP:
        #     self.wallLocations = []
        #     for x in range(len(self.Maps.occupancyMap)):
        #         for y in range(len(self.Maps.occupancyMap[x])):
        #             if self.Maps.occupancyMap[x][y] == Env.WALL:
        #                 self.wallLocations.append(Loc.Location(x,y))
        #     self.computedMDP = True

        mdp = GridWorldMDP(width=len(Maps.occupancyMap),
                           height=len(Maps.occupancyMap[0]),
                           init_loc=(nearestVertex.x, nearestVertex.y),
                           goal_locs=[(kStepConfig.x, kStepConfig.y)],
                           gamma=0.95)
        vi = ValueIteration(mdp)
        vi.run_vi()
        action_seq, state_seq = vi.plan()

        #check if conflict
        for s in state_seq:
            if Maps.occupancyMap[s[0], s[1]] == env.WALL:
                return False
        return True
コード例 #30
0
def main():

    # Setup MDP, Agents.
    args = parse_args()
    mdp = generate_MDP(args.width, args.height, args.i_loc, args.g_loc,
                       args.l_loc, args.gamma, args.Walls, args.slip)
    mdp = GridWorldMDP(width=7,
                       height=7,
                       init_loc=(1, 1),
                       goal_locs=[(7, 7)],
                       lava_locs=[(4, 2)],
                       gamma=0.95,
                       walls=[(2, 2)],
                       slip_prob=0.1)
    ql_agent = QLearningAgent(mdp.get_actions(),
                              epsilon=args.epsilon,
                              alpha=args.alpha,
                              gamma=args.gamma,
                              explore=args.explore,
                              anneal=args.anneal)

    # Choose viz type.

    viz = args.visualization

    if viz == "value":
        # --> Color corresponds to higher value.
        # Run experiment and make plot.
        mdp.visualize_value()
    elif viz == "policy":
        # Viz policy
        value_iter = ValueIteration(mdp)
        value_iter.run_vi()
        policy = value_iter.policy
        mdp.visualize_policy(policy)
    elif viz == "agent":
        # --> Press <spacebar> to advance the agent.
        # First let the agent solve the problem and then visualize the agent's resulting policy.
        print("\n", str(ql_agent), "interacting with", str(mdp))
        run_single_agent_on_mdp(ql_agent, mdp, episodes=500, steps=200)
        mdp.visualize_agent(ql_agent)
    elif viz == "learning":
        mdp.visualize_learning(ql_agent,
                               delay=0.005,
                               num_ep=500,
                               num_steps=200)
    elif viz == "interactive":
        # Press <1>, <2>, <3>, and so on to execute action 1, action 2, etc.
        mdp.visualize_interaction()
コード例 #31
0
#!/usr/bin/env python
'''
NOTE: Incomplete. Planning infrastructure in development.
'''

# Other imports.
import srl_example_setup
from simple_rl.tasks import GridWorldMDP
from simple_rl.planning import ValueIteration, MCTS

# Setup MDP, Agents.
mdp = GridWorldMDP(width=6, height=6, goal_locs=[(6, 6)])
vi = ValueIteration(mdp)
vi.run_vi()

action_seq, state_seq = vi.plan(mdp.get_init_state())

for i in range(len(action_seq)):
    print action_seq[i], state_seq[i]
コード例 #32
0
def make_mdp_distr(mdp_class, is_goal_terminal, mdp_size=11, horizon=0, gamma=0.99):
    '''
    Args:
        mdp_class (str): one of {"grid", "random"}
        horizon (int)
        step_cost (float)
        gamma (float)

    Returns:
        (MDPDistribution)
    '''
    mdp_dist_dict = {}

    height, width, = mdp_size, mdp_size

    # Corridor.
    corr_width = 20
    corr_goal_magnitude = 1 #random.randint(1, 5)
    corr_goal_cols = [i for i in range(1, corr_goal_magnitude + 1)] + [j for j in range(corr_width-corr_goal_magnitude + 1, corr_width + 1)]
    corr_goal_locs  = list(itertools.product(corr_goal_cols, [1]))

    # Grid World
    tl_grid_world_rows, tl_grid_world_cols = [i for i in range(width - 4, width)], [j for j in range(height - 4, height)]
    tl_grid_goal_locs = list(itertools.product(tl_grid_world_rows, tl_grid_world_cols))
    tr_grid_world_rows, tr_grid_world_cols = [i for i in range(1, 4)], [j for j in range(height - 4, height)]
    tr_grid_goal_locs = list(itertools.product(tr_grid_world_rows, tr_grid_world_cols))
    grid_goal_locs = tl_grid_goal_locs + tr_grid_goal_locs

    # Four room.
    four_room_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1)]

    # SPREAD vs. TIGHT
    spread_goal_locs = [(width, height), (width, 1), (1, height), (1, height - 2), (width - 2, height - 2), (width - 2, 1), (2,2)]
    tight_goal_locs = [(width, height), (width-1, height), (width, height-1), (width, height - 2), (width - 2, height), (width - 1, height-1), (width-2,height-2)]

    changing_entities = {"four_room":four_room_goal_locs,
                    "grid":grid_goal_locs,
                    "corridor":corr_goal_locs,
                    "spread":spread_goal_locs,
                    "tight":tight_goal_locs,
                    "chain":[0.0, 0.01, 0.1, 0.5, 1.0],
                    "combo_lock":[[3,1,2],[3,2,1],[2,3,1],[3,3,1]],
                    "walls":make_wall_permutations(mdp_size),
                    "lava":make_lava_permutations(mdp_size)
                    }

    # MDP Probability.
    num_mdps = 10 if mdp_class not in changing_entities.keys() else len(changing_entities[mdp_class])
    if mdp_class == "octo":
        num_mdps = 12
    mdp_prob = 1.0 / num_mdps

    for i in range(num_mdps):

        new_mdp = {"chain":ChainMDP(reset_val=changing_entities["chain"][i%len(changing_entities["chain"])]),
                   # "lava":GridWorldMDP(width=width, height=height, rand_init=False, step_cost=-0.001, lava_cost=0.0, lava_locs=changing_entities["lava"][i%len(changing_entities["lava"])], goal_locs=[(mdp_size-3, mdp_size-3)], is_goal_terminal=is_goal_terminal, name="lava_world", slip_prob=0.1),
                    "four_room":FourRoomMDP(width=width, height=height, goal_locs=[changing_entities["four_room"][i % len(changing_entities["four_room"])]], is_goal_terminal=is_goal_terminal),
                   # "octo":make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False, goal_num=i),
                    "corridor":GridWorldMDP(width=20, height=1, init_loc=(10, 1), goal_locs=[changing_entities["corridor"][i % len(changing_entities["corridor"])]], is_goal_terminal=is_goal_terminal, name="corridor"),
                    "combo_lock":ComboLockMDP(combo=changing_entities["combo_lock"][i%len(changing_entities["combo_lock"])]),
                    "spread":GridWorldMDP(width=width, height=height, rand_init=False, goal_locs=[changing_entities["spread"][i % len(changing_entities["spread"])]], is_goal_terminal=is_goal_terminal, name="spread_grid"),
                    "tight":GridWorldMDP(width=width, height=height, rand_init=False, goal_locs=[changing_entities["tight"][i % len(changing_entities["tight"])]], is_goal_terminal=is_goal_terminal, name="tight_grid"),
                    }[mdp_class]

        new_mdp.set_gamma(gamma)
        
        mdp_dist_dict[new_mdp] = mdp_prob

    return MDPDistribution(mdp_dist_dict, horizon=horizon)
コード例 #33
0
def main(open_plot=True):
    mdp = GridWorldMDP(width=4, height=3, init_loc=(1, 1), goal_locs=[(4, 3)], lava_locs=[(4, 2)], gamma=0.95, walls=[(2, 2)], slip_prob=0.05 )

    ql_agent =
コード例 #34
0
ファイル: NavigationMDP.py プロジェクト: roma-patel/ltl-amdp
    def __init__(self,
                 width=30,
                 height=30,
                 goal_locs=[(21, 21)],
                 cell_types=["empty", "yellow", "red", "green", "purple"],
                 cell_type_rewards=[0, 0, -10, -10, -10],
                 cell_distribution="probability",
                 # cell_type_probs: default is chosen arbitrarily larger than
                 # percolation threshold for square lattice, which is just an
                 # approximation to match cell distribution with that of the
                 # paper.
                 cell_type_probs=[0.68, 0.17, 0.05, 0.05, 0.05],
                 cell_type_forced_locations=[np.inf, np.inf,
                                             [(1,1),(5,5)], [(2,2)], [4,4]],
                 gamma=0.99,
                 slip_prob=0.00,
                 step_cost=0.0,
                 goal_rewards=[1.0],
                 is_goal_terminal=True,
                 traj_init_cell_types=[0],
                 goal_colors=["blue"],
                 init_loc=(1,1),
                 rand_init=True,
                 init_state=None,
                 name="Navigation MDP"):
        """
        Note: 1. locations and state dimensions start from 1 instead of 0.
              2. 2d locations are interpreted in (x,y) format.
        Args:
            height (int): Height of navigation grid in no. of cells.
            width (int): Width of navigation grid in no. of cells.
            goal_locs (list of tuples: [(int, int)...]): Goal locations.
            cell_type (list of cell types: [str, str, ...]): Non-goal cell types.
            cell_rewards (list of ints): Reward for each @cell_type.
            cell_distribution (str): 
                "probability" - will assign cells according 
                to @cell_type_probs over the state space. 
                "manual" - will  use @cell_type_forced_locations to assign cells to locations.
            cell_type_probs (list of floats): Only applicable when
                @cell_distribution is set to "probability". Specifies probability 
                corresponding to each @cell_type. Values must sum to 1. Each value 
                signifies the probability of occurence of particular cell type in the grid.
                Note: the actual probabilities would be slightly off because 
                this doesn't factor in number of goals.
            cell_type_forced_locations (list of list of tuples 
            [[(x1,y1), (x2,y2)], [(x3,y3), ...], np.inf, ...}):
                Only applicable when @cell_distribution is set to "Manual". Used 
                to specify additional cells and their locations. If elements are 
                set to np.inf, all of them will be sampled uniformly at random.
            goal_colors (list of str/int): Color of goal corresponding to @goal_locs.
                If colors are different, each goal will be represented with 
                a unique feature, otherwise all goals are mapped to same feature.
            traj_init_cell_types (list of ints): To specify which cell types
                are navigable. This is used in sampling empty/drivable states 
                while generating trajectories.
        Not used but are properties of superclass GridWorldMDP:
            init_loc (tuple: (int, int)): (x,y) initial location
            rand_init (bool): Whether to use random initial location
            init_state (GridWorldState): Initial GridWorldState
            """

        assert height > 0 and isinstance(height, int) and width > 0 \
               and isinstance(width, int), "height and widht must be integers and > 0"
        assert len(goal_colors) == len(goal_locs) == len(goal_rewards)
        assert len(cell_types) == len(cell_type_rewards)
        assert cell_distribution == "manual" or len(cell_types) == len(cell_type_probs)
        assert cell_distribution == "probability" or len(cell_types) == len(cell_type_forced_locations)

        self.value_iter = None
        self._policy_invalidated = True
        self.cell_types = cell_types
        GridWorldMDP.__init__(self,
                              width=width,
                              height=height,
                              init_loc=init_loc,
                              rand_init=rand_init,
                              goal_locs=goal_locs,
                              lava_locs=[()],
                              walls=[],  # no walls in this mdp
                              is_goal_terminal=is_goal_terminal,
                              gamma=gamma,
                              init_state=init_state,
                              slip_prob=slip_prob,
                              step_cost=step_cost,
                              name=name)

        # Cell Types
        self.cells = self.__generate_cell_type_grid(
                                            height, width,
                                            cell_distribution, cell_type_probs,
                                            cell_type_forced_locations)
        # Preserve a copy without goals
        self.cells_wo_goals = self.cells.copy()

        # Cell Rewards
        self.cell_type_rewards = cell_type_rewards
        self.cell_rewards = np.asarray(
                        [[self.cell_type_rewards[item] for item in row]
                            for row in self.cells]
                        ).reshape(height,width)
        # Preserve a copy without goals
        self.cell_rewards_wo_goals = self.cell_rewards.copy()

        # Update cells and cell_rewards with goal and its rewards
        self.reset_goals(goal_locs, goal_rewards, goal_colors)

        # Find set of Empty/Navigable cells for sampling trajectory init state
        self.set_traj_init_cell_types(cell_types=traj_init_cell_types)

        # Additional book-keeping
        self.feature_cell_dist = None
        self.feature_cell_dist_normalized = None