Exemple #1
0
 def get_all_states(self):
     states = set()
     for x in range(1, self.width + 1):
         for y in range(1, self.height + 1):
             state = GridWorldState(x, y)
             state.set_terminal(self._terminal_function(state))
             states.add(state)
     return states
Exemple #2
0
def main():

    # Setup MDP.

    actual_args = {
        "width":
        10,
        "height":
        10,
        "init_loc": (1, 1),
        "goal_locs": [(10, 10)],
        "lava_locs": [(1, 10), (3, 10), (5, 10), (7, 10), (9, 10)],
        "gamma":
        0.9,
        "walls": [
            (2, 2), (2, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9),
            (4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (4, 7), (4, 8), (4, 9),
            (6, 2), (6, 3), (6, 4), (6, 5), (6, 6), (6, 7), (6, 8), (6, 9),
            (8, 2), (8, 3), (8, 4), (8, 5), (8, 6), (8, 7), (8, 8), (8, 9)
        ],
        "slip_prob":
        0.01,
        "lava_cost":
        1.0,
        "step_cost":
        0.1
    }

    mdp = GridWorldMDP(**actual_args)

    # Initialize the custom Q function for a q-learning agent. This should be equivalent to potential shaping.
    # This should cause the Q agent to learn more quickly.
    custom_q = defaultdict(lambda: defaultdict(lambda: 0))
    custom_q[GridWorldState(5, 1)]['right'] = 1.0
    custom_q[GridWorldState(2, 1)]['right'] = 1.0

    # Make a normal q-learning agent and another initialized with the custom_q above.
    # Finally, make a random agent to compare against.
    ql_agent = QLearningAgent(actions=mdp.get_actions(),
                              epsilon=0.2,
                              alpha=0.4)
    ql_agent_pot = QLearningAgent(actions=mdp.get_actions(),
                                  epsilon=0.2,
                                  alpha=0.4,
                                  custom_q_init=custom_q,
                                  name="PotQ")
    rand_agent = RandomAgent(actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, ql_agent_pot, rand_agent],
                      mdp,
                      instances=2,
                      episodes=60,
                      steps=200,
                      open_plot=True,
                      verbose=True)
Exemple #3
0
 def location_invariance_equivalency(self, state1, action1, state_prime1, state2, action2):
     state_prime2 = None
     if action1 == action2:
         x_diff = state_prime1.x - state1.x
         y_diff = state_prime1.y - state1.y
         
         x = state2.x + x_diff
         y = state2.y + y_diff
         state_prime2 = GridWorldState(x, y)
         state_prime2.set_terminal(self._terminal_function(state_prime2))
 
     return state_prime2
Exemple #4
0
 def states(self):
     """
     Return a list of the states of the environment.
     :return: list of states
     """
     states = []
     for i in range(1, self.width + 1):
         for j in range(1, self.height + 1):
             s = GridWorldState(i, j)
             if self.is_goal_terminal and (i, j) in self.goal_locs:
                 s.set_terminal(True)
             states.append(s)
     return states
    def __init__(self,
                width=5,
                height=3,
                init_loc=(1, 1),
                rand_init=False,
                goal_locs=[()],
                lava_locs=[()],
                walls=[],
                is_goal_terminal=True,
                is_lava_terminal=False,
                gamma=0.99,
                slip_prob=0.0,
                step_cost=0.0,
                lava_cost=1.0,
                name="gridworld"):
        '''
        Args:
            height (int)
            width (int)
            init_loc (tuple: (int, int))
            goal_locs (list of tuples: [(int, int)...])
            lava_locs (list of tuples: [(int, int)...]): These locations return -1 reward.
            walls (list)
            is_goal_terminal (bool)
        '''

        # Setup init location.
        self.rand_init = rand_init
        if rand_init:
            init_loc = random.randint(1, width), random.randint(1, height)
            while init_loc in walls:
                init_loc = random.randint(1, width), random.randint(1, height)
        self.init_loc = init_loc
        init_state = GridWorldState(init_loc[0], init_loc[1])

        MDP.__init__(self, GridWorldMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)

        if type(goal_locs) is not list:
            raise ValueError("(simple_rl) GridWorld Error: argument @goal_locs needs to be a list of locations. For example: [(3,3), (4,3)].")
        self.step_cost = step_cost
        self.lava_cost = lava_cost
        self.walls = walls
        self.width = width
        self.height = height
        self.goal_locs = goal_locs
        self.cur_state = GridWorldState(init_loc[0], init_loc[1])
        self.is_goal_terminal = is_goal_terminal
        self.is_lava_terminal = is_lava_terminal
        self.slip_prob = slip_prob
        self.name = name
        self.lava_locs = lava_locs
def example():
    size = 5
    gamma = .9
    epsilon = .1
    delta = .05
    fancy_plot = True

    # Create environment
    env = GridWorld(width=size,
                    height=size,
                    init_loc=(1, 1),
                    goal_locs=[(size, size)],
                    gamma=gamma,
                    slip_prob=.1,
                    goal_reward=1.0,
                    is_goal_terminal=True)

    # Run approximate value iteration
    value_function = approximate_value_iteration(env, gamma, epsilon, delta)

    # Print computed value function
    print('Computed value function:')
    if fancy_plot:
        for j in range(size, 0, -1):
            for i in range(1, size + 1):
                print('{:>9}'.format(
                    round(value_function[GridWorldState(i, j)], 2)),
                      end=' ')
            print()
    else:
        for s in value_function:
            print('Value of', str(s), ':', value_function[s])
    def __init__(self,
                 width=5,
                 height=3,
                 init_loc=(1, 1),
                 goal_locs=[(5, 3)],
                 walls=[],
                 is_goal_terminal=True,
                 gamma=0.99,
                 init_state=None):
        '''
        Args:
            height (int)
            width (int)
            init_loc (tuple: (int, int))
            goal_locs (list of tuples: [(int, int)...])
        '''
        init_state = GridWorldState(
            init_loc[0], init_loc[1]) if init_state is None else init_state
        MDP.__init__(self,
                     GridWorldMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)
        if type(goal_locs) is not list:
            print "Error: argument @goal_locs needs to be a list of locations. For example: [(3,3), (4,3)]."
            quit()
        self.walls = walls
        for g in goal_locs:
            if g[0] > width or g[1] > height:
                print "Error: goal provided is off the map or overlaps with a wall.."
                print "\tGridWorld dimensions: (" + str(width) + "," + str(
                    height) + ")"
                print "\tProblematic Goal:", g
                quit()
            if self.is_wall(g[0], g[1]):
                print "Error: goal provided is off the map or overlaps with a wall.."
                print "\tWalls:", walls
                print "\tProblematic Goal:", g
                quit()

        self.width = width
        self.height = height
        self.init_loc = init_loc
        self.goal_locs = goal_locs
        self.cur_state = GridWorldState(init_loc[0], init_loc[1])
        self.is_goal_terminal = is_goal_terminal
 def get_random_init_state(self):
     """
     Returns a random empty/white cell 
     """
     rows, cols = np.where(self.cells == 0)
     rand_idx = np.random.randint(len(rows))
     x, y = self._rowcol_to_xy(rows[rand_idx], cols[rand_idx])
     return GridWorldState(x, y)
Exemple #9
0
def compute_cell_values(nvmdp, SFT_full, heap_size, nA, w_r, tf_graph):
    v_map = np.zeros((nvmdp.height, nvmdp.width), dtype=np.float32)
    for row in range(nvmdp.height):
        for col in range(nvmdp.width):
            x, y = nvmdp._rowcol_to_xy(row, col)
            v_map[row, col] = RHC_value(SFT_full[GridWorldState(x, y)],
                                        heap_size, nA, w_r, tf_graph)[0][0]
    return v_map
    def _transition_func(self, state, action):
        '''
        Args:
            state (simple_rl.State)
            action (str)

        Returns:
            state (simple_rl.State)
        '''
        if state.is_terminal():
            return state

        noise = np.random.randn(1)[0] / 100.0
        to_move = self.delta + noise

        if action == "up":
            next_state = GridWorldState(state.x, min(state.y + to_move, 1))
        elif action == "down":
            next_state = GridWorldState(state.x, max(state.y - to_move, 0))
        elif action == "right":
            next_state = GridWorldState(min(state.x + to_move, 1), state.y)
        elif action == "left":
            next_state = GridWorldState(max(state.x - to_move, 0), state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if self._is_goal_state_action(state, action) and self.is_goal_terminal:
            next_state.set_terminal(True)

        return next_state
Exemple #11
0
def compute_full_SFT(nvmdp, nA, phi, h):
    SFT_full = {}
    for row in range(nvmdp.height):
        for col in range(nvmdp.width):
            x, y = nvmdp._rowcol_to_xy(row, col)
            state = GridWorldState(x, y)
            SFT_full[state] = get_FLH(state, nA, phi, nvmdp.transition_func,
                                      nvmdp.actions, h)[0]
    return SFT_full
Exemple #12
0
    def sample_empty_state(self, idx=None):
        """
        Returns a random empty/white state of type GridWorldState()
        """

        if idx is None:
            rand_idx = np.random.randint(len(self.traj_init_cell_row_idxs))
        else:
            assert 0 <= idx < len(self.traj_init_cell_row_idxs)
            rand_idx = idx

        x, y = self._rowcol_to_xy(self.traj_init_cell_row_idxs[rand_idx],
                                  self.traj_init_cell_col_idxs[rand_idx])
        return GridWorldState(x, y)
Exemple #13
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (simple_rl)
            action (str)

        Returns
            (State)
        '''
        if state.is_terminal():
            return [state], [1]

        dx = [0, 0, 0]
        dy = [0, 0, 0]
        if action == "up":
            dx = [-1, 0, 1]
            dy = [1, 1, 1]
        elif action == "down":
            dx = [-1, 0, 1]
            dy = [-1, -1, -1]
        elif action == "right":
            dx = [1, 1, 1]
            dy = [-1, 0, 1]
        elif action == "left":
            dx = [-1, -1, -1]
            dy = [-1, 0, 1]
        elif action == "jump up":
            dx = [-1, 0, 1]
            dy = [2, 2, 2]
        elif action == "jump down":
            dx = [-1, 0, 1]
            dy = [-2, -2, -2]
        elif action == "jump right":
            dx = [2, 2, 2]
            dy = [-1, 0, 1]
        elif action == "jump left":
            dx = [-2, -2, -2]
            dy = [-1, 0, 1]

        next_states = []
        for delta_x, delta_y in zip(dx, dy):
            x = np.clip(state.x + delta_x, 1, self.width)
            y = np.clip(state.y + delta_y, 1, self.height)
            if self.is_wall(x, y):
                next_state = GridWorldState(state.x, state.y)
            else:
                next_state = GridWorldState(x, y)
            next_state.set_terminal(self._terminal_function(next_state))
            next_states.append(next_state)

        p = [self.slip_prob / 2., 1 - self.slip_prob, self.slip_prob / 2.]
        assert len(next_states) == len(p)
        return next_states, p
Exemple #14
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''

        gw_state = GridWorldState(state.x, state.y)

        next_gw_state = GridWorldMDP._transition_func(self, gw_state, action)

        # Add random color.
        rand_color = random.randint(1, self.num_colors)
        next_col_state = ColorState(next_gw_state.x, next_gw_state.y, rand_color)

        return next_col_state
Exemple #15
0
    def _transition_func(self, state, action):

        if action == "up":
            next_state = GridWorldState(state.x, state.y + .01)
        elif action == "down":
            next_state = GridWorldState(state.x, state.y - .01)
        elif action == "right":
            next_state = GridWorldState(state.x + .01, state.y)
        elif action == "left":
            next_state = GridWorldState(state.x - .01, state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if (next_state.x, next_state.y) in self.goal_locs and self.is_goal_terminal:
            next_state.set_terminal(True)


        return next_state
Exemple #16
0
    def transition(self, s, a):
        """
        Joint transition method.

        :param s: (GridWorldState) state
        :param a: (str) action
        :return: reward and resulting state (r, s_p)
        """

        if s.is_terminal():
            return 0., s

        if self.slip_prob > random.random():  # Flip direction
            if a == "up":
                a = random.choice(["left", "right"
                                   ]) if self.slip_unidirectional else "right"
            elif a == "down":
                a = random.choice(["left", "right"
                                   ]) if self.slip_unidirectional else "left"
            elif a == "left":
                a = random.choice(["up", "down"
                                   ]) if self.slip_unidirectional else "up"
            elif a == "right":
                a = random.choice(["up", "down"
                                   ]) if self.slip_unidirectional else "down"

        if a == "up" and s.y < self.height and not self.is_wall(s.x, s.y + 1):
            s_p = GridWorldState(s.x, s.y + 1)
        elif a == "down" and s.y > 1 and not self.is_wall(s.x, s.y - 1):
            s_p = GridWorldState(s.x, s.y - 1)
        elif a == "right" and s.x < self.width and not self.is_wall(
                s.x + 1, s.y):
            s_p = GridWorldState(s.x + 1, s.y)
        elif a == "left" and s.x > 1 and not self.is_wall(s.x - 1, s.y):
            s_p = GridWorldState(s.x - 1, s.y)
        else:
            s_p = GridWorldState(s.x, s.y)

        if (s_p.x, s_p.y) in self.goal_locs and self.is_goal_terminal:
            s_p.set_terminal(True)

        if (s_p.x, s_p.y) in self.goal_locs:
            r = -self.step_cost
            for i in range(len(self.goal_locs)):
                if (s_p.x, s_p.y) == self.goal_locs[i]:
                    r += self.goal_rewards[i]
                    break
        elif (s_p.x, s_p.y) in self.lava_locs:
            r = 0. - self.lava_cost
        else:
            r = 0. - self.step_cost

        return r, s_p
Exemple #17
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (simple_rl.State)
            action (str)

        Returns:
            state (simple_rl.State)
        '''
        if state.is_terminal():
            return state

        noise = np.random.randn(1)[0] / 100.0
        to_move = self.delta + noise

        if action == "up":
            next_state = GridWorldState(state.x, min(state.y + to_move, 1))
        elif action == "down":
            next_state = GridWorldState(state.x, max(state.y - to_move, 0))
        elif action == "right":
            next_state = GridWorldState(min(state.x + to_move, 1), state.y)
        elif action == "left":
            next_state = GridWorldState(max(state.x - to_move, 0), state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if self._is_goal_state_action(state, action) and self.is_goal_terminal:
            next_state.set_terminal(True)

        return next_state
Exemple #18
0
 def __init__(self, x, y, q):
     GridWorldState.__init__(self,x,y)
     self.q = q
     self.data.append(q)
 def get_init_state(self):
     x = random.choice([0.0, 0.2, 0.4, 0.6, 0.8, 1])
     y = random.choice([0.0, 0.2, 0.4, 0.6, 0.8, 1])
     return  GridWorldState(x, y)
def parse_custom_q_table(q_dict, default_q):
    custom_q = defaultdict(lambda: defaultdict(lambda: default_q))
    for state, action_dict in q_dict.items():
        for action, value in action_dict.items():
            custom_q[GridWorldState(*ast.literal_eval(state))][action] = value
    return custom_q
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''
        if state.is_terminal():
            return state
        
        if not(self._is_goal_state_action(state, action)) and self.slip_prob > random.random():
            # Flip dir.
            if action == "up":
                action = random.choice(["left", "right"])
            elif action == "down":
                action = random.choice(["left", "right"])
            elif action == "left":
                action = random.choice(["up", "down"])
            elif action == "right":
                action = random.choice(["up", "down"])

        if action == "up" and state.y < self.height and not self.is_wall(state.x, state.y + 1):
            next_state = GridWorldState(state.x, state.y + 1)
        elif action == "down" and state.y > 1 and not self.is_wall(state.x, state.y - 1):
            next_state = GridWorldState(state.x, state.y - 1)
        elif action == "right" and state.x < self.width and not self.is_wall(state.x + 1, state.y):
            next_state = GridWorldState(state.x + 1, state.y)
        elif action == "left" and state.x > 1 and not self.is_wall(state.x - 1, state.y):
            next_state = GridWorldState(state.x - 1, state.y)
        else:
            next_state = GridWorldState(state.x, state.y)


        landed_in_term_goal = (next_state.x, next_state.y) in self.goal_locs and self.is_goal_terminal
        landed_in_term_lava = (next_state.x, next_state.y) in self.lava_locs and self.is_lava_terminal
        if landed_in_term_goal or landed_in_term_lava:
            next_state.set_terminal(True)

        if (next_state.x, next_state.y) in self.lava_locs:
            next_state.set_terminal(True)

        return next_state
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''
        if state.is_terminal():
            return state

        if action == "up" and state.y < self.height and not self.is_wall(
                state.x, state.y + 1):
            next_state = GridWorldState(state.x, state.y + 1)
        elif action == "down" and state.y > 1 and not self.is_wall(
                state.x, state.y - 1):
            next_state = GridWorldState(state.x, state.y - 1)
        elif action == "right" and state.x < self.width and not self.is_wall(
                state.x + 1, state.y):
            next_state = GridWorldState(state.x + 1, state.y)
        elif action == "left" and state.x > 1 and not self.is_wall(
                state.x - 1, state.y):
            next_state = GridWorldState(state.x - 1, state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if (next_state.x,
                next_state.y) in self.goal_locs and self.is_goal_terminal:
            next_state.set_terminal(True)

        return next_state
 def reset(self):
     if self.rand_init:
         init_loc = random.randint(1, num_cols), random.randint(1, num_rows)
         self.cur_state = GridWorldState(init_loc[0], init_loc[1])
     else:
         self.cur_state = copy.deepcopy(self.init_state)
Exemple #24
0
    def transition(self, s, a):
        """
        Joint transition method.

        :param s: (GridWorldState) state
        :param a: (str) action
        :return: reward and resulting state (r, s_p)
        """

        if s.is_terminal():
            return 0., s

        if self.slip_prob > random.random():  # Flip direction
            if a == "up":
                a = random.choice(["left", "right"])
            elif a == "down":
                a = random.choice(["left", "right"])
            elif a == "left":
                a = random.choice(["up", "down"])
            elif a == "right":
                a = random.choice(["up", "down"])

        if a == "up" and s.y < self.height and not self.is_wall(s.x, s.y + 1):
            s_p = GridWorldState(s.x, s.y + 1)
        elif a == "down" and s.y > 1 and not self.is_wall(s.x, s.y - 1):
            s_p = GridWorldState(s.x, s.y - 1)
        elif a == "right" and s.x < self.width and not self.is_wall(
                s.x + 1, s.y):
            s_p = GridWorldState(s.x + 1, s.y)
        elif a == "left" and s.x > 1 and not self.is_wall(s.x - 1, s.y):
            s_p = GridWorldState(s.x - 1, s.y)
        else:
            s_p = GridWorldState(s.x, s.y)

        if (s_p.x, s_p.y) in self.goal_locs and self.is_goal_terminal:
            s_p.set_terminal(True)

        if (s_p.x, s_p.y) in self.goal_locs:
            r = self.goal_reward - self.step_cost
        elif (s_p.x, s_p.y) in self.lava_locs:
            r = -self.lava_cost
        else:
            heat_reward = 0.
            if self.reward_span > 0.:
                for g in self.goal_locs:
                    heat_reward += self.goal_reward * np.exp(-(
                        (s_p.x - g[0])**2 +
                        (s_p.y - g[1])**2) / (2. * self.reward_span**2))
            r = heat_reward - self.step_cost

        return r, s_p