Beispiel #1
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (simple_rl.State)
            action (str)

        Returns:
            state (simple_rl.State)
        '''
        if state.is_terminal():
            return state

        noise = np.random.randn(1)[0] / 100.0
        to_move = self.delta + noise

        if action == "up":
            next_state = GridWorldState(state.x, min(state.y + to_move, 1))
        elif action == "down":
            next_state = GridWorldState(state.x, max(state.y - to_move, 0))
        elif action == "right":
            next_state = GridWorldState(min(state.x + to_move, 1), state.y)
        elif action == "left":
            next_state = GridWorldState(max(state.x - to_move, 0), state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if self._is_goal_state_action(state, action) and self.is_goal_terminal:
            next_state.set_terminal(True)

        return next_state
Beispiel #2
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (simple_rl.State)
            action (str)

        Returns:
            state (simple_rl.State)
        '''
        if state.is_terminal():
            return state

        noise = np.random.randn(1)[0] / 100.0
        to_move = self.delta + noise

        if action == "up":
            next_state = GridWorldState(state.x, min(state.y + to_move, 1))
        elif action == "down":
            next_state = GridWorldState(state.x, max(state.y - to_move, 0))
        elif action == "right":
            next_state = GridWorldState(min(state.x + to_move, 1), state.y)
        elif action == "left":
            next_state = GridWorldState(max(state.x - to_move, 0), state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if self._is_goal_state_action(state, action) and self.is_goal_terminal:
            next_state.set_terminal(True)

        return next_state
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''
        if state.is_terminal():
            return state

        if action == "up" and state.y < self.height and not self.is_wall(
                state.x, state.y + 1):
            next_state = GridWorldState(state.x, state.y + 1)
        elif action == "down" and state.y > 1 and not self.is_wall(
                state.x, state.y - 1):
            next_state = GridWorldState(state.x, state.y - 1)
        elif action == "right" and state.x < self.width and not self.is_wall(
                state.x + 1, state.y):
            next_state = GridWorldState(state.x + 1, state.y)
        elif action == "left" and state.x > 1 and not self.is_wall(
                state.x - 1, state.y):
            next_state = GridWorldState(state.x - 1, state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if (next_state.x,
                next_state.y) in self.goal_locs and self.is_goal_terminal:
            next_state.set_terminal(True)

        return next_state
Beispiel #4
0
 def get_all_states(self):
     states = set()
     for x in range(1, self.width + 1):
         for y in range(1, self.height + 1):
             state = GridWorldState(x, y)
             state.set_terminal(self._terminal_function(state))
             states.add(state)
     return states
Beispiel #5
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (simple_rl)
            action (str)

        Returns
            (State)
        '''
        if state.is_terminal():
            return [state], [1]

        dx = [0, 0, 0]
        dy = [0, 0, 0]
        if action == "up":
            dx = [-1, 0, 1]
            dy = [1, 1, 1]
        elif action == "down":
            dx = [-1, 0, 1]
            dy = [-1, -1, -1]
        elif action == "right":
            dx = [1, 1, 1]
            dy = [-1, 0, 1]
        elif action == "left":
            dx = [-1, -1, -1]
            dy = [-1, 0, 1]
        elif action == "jump up":
            dx = [-1, 0, 1]
            dy = [2, 2, 2]
        elif action == "jump down":
            dx = [-1, 0, 1]
            dy = [-2, -2, -2]
        elif action == "jump right":
            dx = [2, 2, 2]
            dy = [-1, 0, 1]
        elif action == "jump left":
            dx = [-2, -2, -2]
            dy = [-1, 0, 1]

        next_states = []
        for delta_x, delta_y in zip(dx, dy):
            x = np.clip(state.x + delta_x, 1, self.width)
            y = np.clip(state.y + delta_y, 1, self.height)
            if self.is_wall(x, y):
                next_state = GridWorldState(state.x, state.y)
            else:
                next_state = GridWorldState(x, y)
            next_state.set_terminal(self._terminal_function(next_state))
            next_states.append(next_state)

        p = [self.slip_prob / 2., 1 - self.slip_prob, self.slip_prob / 2.]
        assert len(next_states) == len(p)
        return next_states, p
Beispiel #6
0
    def transition(self, s, a):
        """
        Joint transition method.

        :param s: (GridWorldState) state
        :param a: (str) action
        :return: reward and resulting state (r, s_p)
        """

        if s.is_terminal():
            return 0., s

        if self.slip_prob > random.random():  # Flip direction
            if a == "up":
                a = random.choice(["left", "right"
                                   ]) if self.slip_unidirectional else "right"
            elif a == "down":
                a = random.choice(["left", "right"
                                   ]) if self.slip_unidirectional else "left"
            elif a == "left":
                a = random.choice(["up", "down"
                                   ]) if self.slip_unidirectional else "up"
            elif a == "right":
                a = random.choice(["up", "down"
                                   ]) if self.slip_unidirectional else "down"

        if a == "up" and s.y < self.height and not self.is_wall(s.x, s.y + 1):
            s_p = GridWorldState(s.x, s.y + 1)
        elif a == "down" and s.y > 1 and not self.is_wall(s.x, s.y - 1):
            s_p = GridWorldState(s.x, s.y - 1)
        elif a == "right" and s.x < self.width and not self.is_wall(
                s.x + 1, s.y):
            s_p = GridWorldState(s.x + 1, s.y)
        elif a == "left" and s.x > 1 and not self.is_wall(s.x - 1, s.y):
            s_p = GridWorldState(s.x - 1, s.y)
        else:
            s_p = GridWorldState(s.x, s.y)

        if (s_p.x, s_p.y) in self.goal_locs and self.is_goal_terminal:
            s_p.set_terminal(True)

        if (s_p.x, s_p.y) in self.goal_locs:
            r = -self.step_cost
            for i in range(len(self.goal_locs)):
                if (s_p.x, s_p.y) == self.goal_locs[i]:
                    r += self.goal_rewards[i]
                    break
        elif (s_p.x, s_p.y) in self.lava_locs:
            r = 0. - self.lava_cost
        else:
            r = 0. - self.step_cost

        return r, s_p
Beispiel #7
0
 def location_invariance_equivalency(self, state1, action1, state_prime1, state2, action2):
     state_prime2 = None
     if action1 == action2:
         x_diff = state_prime1.x - state1.x
         y_diff = state_prime1.y - state1.y
         
         x = state2.x + x_diff
         y = state2.y + y_diff
         state_prime2 = GridWorldState(x, y)
         state_prime2.set_terminal(self._terminal_function(state_prime2))
 
     return state_prime2
Beispiel #8
0
 def states(self):
     """
     Return a list of the states of the environment.
     :return: list of states
     """
     states = []
     for i in range(1, self.width + 1):
         for j in range(1, self.height + 1):
             s = GridWorldState(i, j)
             if self.is_goal_terminal and (i, j) in self.goal_locs:
                 s.set_terminal(True)
             states.append(s)
     return states
Beispiel #9
0
    def transition(self, s, a):
        """
        Joint transition method.

        :param s: (GridWorldState) state
        :param a: (str) action
        :return: reward and resulting state (r, s_p)
        """

        if s.is_terminal():
            return 0., s

        if self.slip_prob > random.random():  # Flip direction
            if a == "up":
                a = random.choice(["left", "right"])
            elif a == "down":
                a = random.choice(["left", "right"])
            elif a == "left":
                a = random.choice(["up", "down"])
            elif a == "right":
                a = random.choice(["up", "down"])

        if a == "up" and s.y < self.height and not self.is_wall(s.x, s.y + 1):
            s_p = GridWorldState(s.x, s.y + 1)
        elif a == "down" and s.y > 1 and not self.is_wall(s.x, s.y - 1):
            s_p = GridWorldState(s.x, s.y - 1)
        elif a == "right" and s.x < self.width and not self.is_wall(
                s.x + 1, s.y):
            s_p = GridWorldState(s.x + 1, s.y)
        elif a == "left" and s.x > 1 and not self.is_wall(s.x - 1, s.y):
            s_p = GridWorldState(s.x - 1, s.y)
        else:
            s_p = GridWorldState(s.x, s.y)

        if (s_p.x, s_p.y) in self.goal_locs and self.is_goal_terminal:
            s_p.set_terminal(True)

        if (s_p.x, s_p.y) in self.goal_locs:
            r = self.goal_reward - self.step_cost
        elif (s_p.x, s_p.y) in self.lava_locs:
            r = -self.lava_cost
        else:
            heat_reward = 0.
            if self.reward_span > 0.:
                for g in self.goal_locs:
                    heat_reward += self.goal_reward * np.exp(-(
                        (s_p.x - g[0])**2 +
                        (s_p.y - g[1])**2) / (2. * self.reward_span**2))
            r = heat_reward - self.step_cost

        return r, s_p
Beispiel #10
0
    def _transition_func(self, state, action):

        if action == "up":
            next_state = GridWorldState(state.x, state.y + .01)
        elif action == "down":
            next_state = GridWorldState(state.x, state.y - .01)
        elif action == "right":
            next_state = GridWorldState(state.x + .01, state.y)
        elif action == "left":
            next_state = GridWorldState(state.x - .01, state.y)
        else:
            next_state = GridWorldState(state.x, state.y)

        if (next_state.x, next_state.y) in self.goal_locs and self.is_goal_terminal:
            next_state.set_terminal(True)


        return next_state
Beispiel #11
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''
        if state.is_terminal():
            return state
        
        if not(self._is_goal_state_action(state, action)) and self.slip_prob > random.random():
            # Flip dir.
            if action == "up":
                action = random.choice(["left", "right"])
            elif action == "down":
                action = random.choice(["left", "right"])
            elif action == "left":
                action = random.choice(["up", "down"])
            elif action == "right":
                action = random.choice(["up", "down"])

        if action == "up" and state.y < self.height and not self.is_wall(state.x, state.y + 1):
            next_state = GridWorldState(state.x, state.y + 1)
        elif action == "down" and state.y > 1 and not self.is_wall(state.x, state.y - 1):
            next_state = GridWorldState(state.x, state.y - 1)
        elif action == "right" and state.x < self.width and not self.is_wall(state.x + 1, state.y):
            next_state = GridWorldState(state.x + 1, state.y)
        elif action == "left" and state.x > 1 and not self.is_wall(state.x - 1, state.y):
            next_state = GridWorldState(state.x - 1, state.y)
        else:
            next_state = GridWorldState(state.x, state.y)


        landed_in_term_goal = (next_state.x, next_state.y) in self.goal_locs and self.is_goal_terminal
        landed_in_term_lava = (next_state.x, next_state.y) in self.lava_locs and self.is_lava_terminal
        if landed_in_term_goal or landed_in_term_lava:
            next_state.set_terminal(True)

        if (next_state.x, next_state.y) in self.lava_locs:
            next_state.set_terminal(True)

        return next_state