def _transition_func(self, state, action): ''' Args: state (simple_rl.State) action (str) Returns: state (simple_rl.State) ''' if state.is_terminal(): return state noise = np.random.randn(1)[0] / 100.0 to_move = self.delta + noise if action == "up": next_state = GridWorldState(state.x, min(state.y + to_move, 1)) elif action == "down": next_state = GridWorldState(state.x, max(state.y - to_move, 0)) elif action == "right": next_state = GridWorldState(min(state.x + to_move, 1), state.y) elif action == "left": next_state = GridWorldState(max(state.x - to_move, 0), state.y) else: next_state = GridWorldState(state.x, state.y) if self._is_goal_state_action(state, action) and self.is_goal_terminal: next_state.set_terminal(True) return next_state
def _transition_func(self, state, action): ''' Args: state (simple_rl.State) action (str) Returns: state (simple_rl.State) ''' if state.is_terminal(): return state noise = np.random.randn(1)[0] / 100.0 to_move = self.delta + noise if action == "up": next_state = GridWorldState(state.x, min(state.y + to_move, 1)) elif action == "down": next_state = GridWorldState(state.x, max(state.y - to_move, 0)) elif action == "right": next_state = GridWorldState(min(state.x + to_move, 1), state.y) elif action == "left": next_state = GridWorldState(max(state.x - to_move, 0), state.y) else: next_state = GridWorldState(state.x, state.y) if self._is_goal_state_action(state, action) and self.is_goal_terminal: next_state.set_terminal(True) return next_state
def _transition_func(self, state, action): ''' Args: state (State) action (str) Returns (State) ''' if state.is_terminal(): return state if action == "up" and state.y < self.height and not self.is_wall( state.x, state.y + 1): next_state = GridWorldState(state.x, state.y + 1) elif action == "down" and state.y > 1 and not self.is_wall( state.x, state.y - 1): next_state = GridWorldState(state.x, state.y - 1) elif action == "right" and state.x < self.width and not self.is_wall( state.x + 1, state.y): next_state = GridWorldState(state.x + 1, state.y) elif action == "left" and state.x > 1 and not self.is_wall( state.x - 1, state.y): next_state = GridWorldState(state.x - 1, state.y) else: next_state = GridWorldState(state.x, state.y) if (next_state.x, next_state.y) in self.goal_locs and self.is_goal_terminal: next_state.set_terminal(True) return next_state
def get_all_states(self): states = set() for x in range(1, self.width + 1): for y in range(1, self.height + 1): state = GridWorldState(x, y) state.set_terminal(self._terminal_function(state)) states.add(state) return states
def _transition_func(self, state, action): ''' Args: state (simple_rl) action (str) Returns (State) ''' if state.is_terminal(): return [state], [1] dx = [0, 0, 0] dy = [0, 0, 0] if action == "up": dx = [-1, 0, 1] dy = [1, 1, 1] elif action == "down": dx = [-1, 0, 1] dy = [-1, -1, -1] elif action == "right": dx = [1, 1, 1] dy = [-1, 0, 1] elif action == "left": dx = [-1, -1, -1] dy = [-1, 0, 1] elif action == "jump up": dx = [-1, 0, 1] dy = [2, 2, 2] elif action == "jump down": dx = [-1, 0, 1] dy = [-2, -2, -2] elif action == "jump right": dx = [2, 2, 2] dy = [-1, 0, 1] elif action == "jump left": dx = [-2, -2, -2] dy = [-1, 0, 1] next_states = [] for delta_x, delta_y in zip(dx, dy): x = np.clip(state.x + delta_x, 1, self.width) y = np.clip(state.y + delta_y, 1, self.height) if self.is_wall(x, y): next_state = GridWorldState(state.x, state.y) else: next_state = GridWorldState(x, y) next_state.set_terminal(self._terminal_function(next_state)) next_states.append(next_state) p = [self.slip_prob / 2., 1 - self.slip_prob, self.slip_prob / 2.] assert len(next_states) == len(p) return next_states, p
def transition(self, s, a): """ Joint transition method. :param s: (GridWorldState) state :param a: (str) action :return: reward and resulting state (r, s_p) """ if s.is_terminal(): return 0., s if self.slip_prob > random.random(): # Flip direction if a == "up": a = random.choice(["left", "right" ]) if self.slip_unidirectional else "right" elif a == "down": a = random.choice(["left", "right" ]) if self.slip_unidirectional else "left" elif a == "left": a = random.choice(["up", "down" ]) if self.slip_unidirectional else "up" elif a == "right": a = random.choice(["up", "down" ]) if self.slip_unidirectional else "down" if a == "up" and s.y < self.height and not self.is_wall(s.x, s.y + 1): s_p = GridWorldState(s.x, s.y + 1) elif a == "down" and s.y > 1 and not self.is_wall(s.x, s.y - 1): s_p = GridWorldState(s.x, s.y - 1) elif a == "right" and s.x < self.width and not self.is_wall( s.x + 1, s.y): s_p = GridWorldState(s.x + 1, s.y) elif a == "left" and s.x > 1 and not self.is_wall(s.x - 1, s.y): s_p = GridWorldState(s.x - 1, s.y) else: s_p = GridWorldState(s.x, s.y) if (s_p.x, s_p.y) in self.goal_locs and self.is_goal_terminal: s_p.set_terminal(True) if (s_p.x, s_p.y) in self.goal_locs: r = -self.step_cost for i in range(len(self.goal_locs)): if (s_p.x, s_p.y) == self.goal_locs[i]: r += self.goal_rewards[i] break elif (s_p.x, s_p.y) in self.lava_locs: r = 0. - self.lava_cost else: r = 0. - self.step_cost return r, s_p
def location_invariance_equivalency(self, state1, action1, state_prime1, state2, action2): state_prime2 = None if action1 == action2: x_diff = state_prime1.x - state1.x y_diff = state_prime1.y - state1.y x = state2.x + x_diff y = state2.y + y_diff state_prime2 = GridWorldState(x, y) state_prime2.set_terminal(self._terminal_function(state_prime2)) return state_prime2
def states(self): """ Return a list of the states of the environment. :return: list of states """ states = [] for i in range(1, self.width + 1): for j in range(1, self.height + 1): s = GridWorldState(i, j) if self.is_goal_terminal and (i, j) in self.goal_locs: s.set_terminal(True) states.append(s) return states
def transition(self, s, a): """ Joint transition method. :param s: (GridWorldState) state :param a: (str) action :return: reward and resulting state (r, s_p) """ if s.is_terminal(): return 0., s if self.slip_prob > random.random(): # Flip direction if a == "up": a = random.choice(["left", "right"]) elif a == "down": a = random.choice(["left", "right"]) elif a == "left": a = random.choice(["up", "down"]) elif a == "right": a = random.choice(["up", "down"]) if a == "up" and s.y < self.height and not self.is_wall(s.x, s.y + 1): s_p = GridWorldState(s.x, s.y + 1) elif a == "down" and s.y > 1 and not self.is_wall(s.x, s.y - 1): s_p = GridWorldState(s.x, s.y - 1) elif a == "right" and s.x < self.width and not self.is_wall( s.x + 1, s.y): s_p = GridWorldState(s.x + 1, s.y) elif a == "left" and s.x > 1 and not self.is_wall(s.x - 1, s.y): s_p = GridWorldState(s.x - 1, s.y) else: s_p = GridWorldState(s.x, s.y) if (s_p.x, s_p.y) in self.goal_locs and self.is_goal_terminal: s_p.set_terminal(True) if (s_p.x, s_p.y) in self.goal_locs: r = self.goal_reward - self.step_cost elif (s_p.x, s_p.y) in self.lava_locs: r = -self.lava_cost else: heat_reward = 0. if self.reward_span > 0.: for g in self.goal_locs: heat_reward += self.goal_reward * np.exp(-( (s_p.x - g[0])**2 + (s_p.y - g[1])**2) / (2. * self.reward_span**2)) r = heat_reward - self.step_cost return r, s_p
def _transition_func(self, state, action): if action == "up": next_state = GridWorldState(state.x, state.y + .01) elif action == "down": next_state = GridWorldState(state.x, state.y - .01) elif action == "right": next_state = GridWorldState(state.x + .01, state.y) elif action == "left": next_state = GridWorldState(state.x - .01, state.y) else: next_state = GridWorldState(state.x, state.y) if (next_state.x, next_state.y) in self.goal_locs and self.is_goal_terminal: next_state.set_terminal(True) return next_state
def _transition_func(self, state, action): ''' Args: state (State) action (str) Returns (State) ''' if state.is_terminal(): return state if not(self._is_goal_state_action(state, action)) and self.slip_prob > random.random(): # Flip dir. if action == "up": action = random.choice(["left", "right"]) elif action == "down": action = random.choice(["left", "right"]) elif action == "left": action = random.choice(["up", "down"]) elif action == "right": action = random.choice(["up", "down"]) if action == "up" and state.y < self.height and not self.is_wall(state.x, state.y + 1): next_state = GridWorldState(state.x, state.y + 1) elif action == "down" and state.y > 1 and not self.is_wall(state.x, state.y - 1): next_state = GridWorldState(state.x, state.y - 1) elif action == "right" and state.x < self.width and not self.is_wall(state.x + 1, state.y): next_state = GridWorldState(state.x + 1, state.y) elif action == "left" and state.x > 1 and not self.is_wall(state.x - 1, state.y): next_state = GridWorldState(state.x - 1, state.y) else: next_state = GridWorldState(state.x, state.y) landed_in_term_goal = (next_state.x, next_state.y) in self.goal_locs and self.is_goal_terminal landed_in_term_lava = (next_state.x, next_state.y) in self.lava_locs and self.is_lava_terminal if landed_in_term_goal or landed_in_term_lava: next_state.set_terminal(True) if (next_state.x, next_state.y) in self.lava_locs: next_state.set_terminal(True) return next_state