def __call__(self, state, action, mdp): ''' This needs access to the MDP parameters Parameters: state:GridWorldState action:Enum mdp:GridWorldMDP Returns: state:GridWorldState ''' next_state = state # If terminal, do nothing if state.is_terminal(): return next_state # Apply slip probability and change action if applicable if random.random() < self.slip_prob: if action in [Dir.UP, Dir.DOWN]: action = random.choice([Dir.LEFT, Dir.RIGHT]) elif action in [Dir.LEFT, Dir.RIGHT]: action = random.choice([Dir.UP, Dir.DOWN]) # Calculate next state based on action if action == Dir.UP and state.y < mdp.height and (state.x, state.y + 1) not in mdp.walls: next_state = GridWorldState(state.x, state.y + 1) if action == Dir.DOWN and state.y > 1 and (state.x, state.y - 1) not in mdp.walls: next_state = GridWorldState(state.x, state.y - 1) if action == Dir.LEFT and state.x > 1 and (state.x - 1, state.y) not in mdp.walls: next_state = GridWorldState(state.x - 1, state.y) if action == Dir.RIGHT and state.x < mdp.width and ( state.x + 1, state.y) not in mdp.walls: next_state = GridWorldState(state.x + 1, state.y) if (next_state.x, next_state.y) in mdp.goal_location: next_state.set_terminal(True) return next_state
def transition(self, state, action): ''' Parameters: state:GridWorldState action:Enum mdp:GridWorldMDP Returns: state:GridWorldState ''' next_state = state # If MDP is already in the goal state, no actions should be available if self.is_goal_state(state): return state # Apply slip probability and change action if applicable if random.random() < self.slip_prob: if action in [Dir.UP, Dir.DOWN]: action = random.choice([Dir.LEFT, Dir.RIGHT]) elif action in [Dir.LEFT, Dir.RIGHT]: action = random.choice([Dir.UP, Dir.DOWN]) # Calculate next state based on action if action == Dir.UP and state.y < self.height and (state.x, state.y + 1) not in self.walls: next_state = GridWorldState(state.x, state.y + 1) if action == Dir.DOWN and state.y > 1 and (state.x, state.y - 1) not in self.walls: next_state = GridWorldState(state.x, state.y - 1) if action == Dir.LEFT and state.x > 1 and (state.x - 1, state.y) not in self.walls: next_state = GridWorldState(state.x - 1, state.y) if action == Dir.RIGHT and state.x < self.width and (state.x + 1, state.y) not in self.walls: next_state = GridWorldState(state.x + 1, state.y) # If the next state takes the agent into the goal location, # return initial state if (next_state.x, next_state.y) in self.goal_location: next_state.set_terminal(True) return next_state
def transition(self, state, action): # If in goal state, no actions available if self.is_goal_state(state): return state # Apply slip probability if random.random() < self.slip_prob: if action in [Dir.UP, Dir.DOWN]: action = random.choice([Dir.LEFT, Dir.RIGHT]) else: action = random.choice([Dir.UP, Dir.DOWN]) # Start by assigning next_state to current_state. This way we only have to check for cases where action # successfully changes states below next_state = state # Check if state is outside of the two rooms; if so action should have no effect if not self.is_inside_rooms(state): return next_state # Calculate next state for cases where action changes state; add +1 to upper_height to account for # wall if action == Dir.UP: # If in lower room not against wall, or in lower room under hallway state, or in upper room # not against wall, or in hallway ''' if state.y < self.lower_height \ or (state.y == self.lower_height and state.x in self.hallway_states) \ or (self.upper_start_height <= state.y < self.total_height) \ or (self.lower_height < state.y < self.upper_start_height and state.x in self.hallway_states): next_state = GridWorldState(state.x, state.y + 1) ''' next_state = GridWorldState(state.x, state.y + 1) if not self.is_inside_rooms(next_state): next_state = GridWorldState(state.x, state.y) elif action == Dir.DOWN: # In upper room not against wall, in upper room above hallway, or in lower room not against wall, or in # hallway ''' if (state.y > self.upper_start_height) \ or (state.y == self.upper_start_height and state.x in self.hallway_states) \ or (1 < state.y <= self.lower_height) \ or (self.lower_height < state.y < self.upper_start_height and state.x in self.hallway_states): next_state = GridWorldState(state.x, state.y - 1) ''' next_state = GridWorldState(state.x, state.y - 1) if not self.is_inside_rooms(next_state): next_state = GridWorldState(state.x, state.y) elif action == Dir.LEFT: # In lower room not against wall, or upper room not against wall ''' if (state.y <= self.lower_height and state.x > max(self.lower_offset + 1, 1)) \ or (state.y >= self.upper_start_height and state.x > max(self.upper_offset + 1, 1)): next_state = GridWorldState(state.x - 1, state.y) ''' next_state = GridWorldState(state.x - 1, state.y) if not self.is_inside_rooms(next_state): next_state = GridWorldState(state.x, state.y) elif action == Dir.RIGHT: # In lower room not against wall, or upper room not against wall ''' if (state.y <= self.lower_height and state.x < self.lower_width + self.lower_offset) \ or (state.y >= self.upper_start_height and state.x < self.upper_width + self.upper_offset): next_state = GridWorldState(state.x + 1, state.y) ''' next_state = GridWorldState(state.x + 1, state.y) if not self.is_inside_rooms(next_state): next_state = GridWorldState(state.x, state.y) # If agent enters goal state, make next state terminal if (next_state.x, next_state.y) in self.goal_location: next_state.set_terminal(True) return next_state