def __call__(self, state, action, mdp):
        '''
        This needs access to the MDP parameters

        Parameters:
            state:GridWorldState
            action:Enum
            mdp:GridWorldMDP

        Returns:
            state:GridWorldState
        '''
        next_state = state

        # If terminal, do nothing
        if state.is_terminal():
            return next_state

        # Apply slip probability and change action if applicable
        if random.random() < self.slip_prob:
            if action in [Dir.UP, Dir.DOWN]:
                action = random.choice([Dir.LEFT, Dir.RIGHT])
            elif action in [Dir.LEFT, Dir.RIGHT]:
                action = random.choice([Dir.UP, Dir.DOWN])

        # Calculate next state based on action
        if action == Dir.UP and state.y < mdp.height and (state.x, state.y +
                                                          1) not in mdp.walls:
            next_state = GridWorldState(state.x, state.y + 1)
        if action == Dir.DOWN and state.y > 1 and (state.x, state.y -
                                                   1) not in mdp.walls:
            next_state = GridWorldState(state.x, state.y - 1)
        if action == Dir.LEFT and state.x > 1 and (state.x - 1,
                                                   state.y) not in mdp.walls:
            next_state = GridWorldState(state.x - 1, state.y)
        if action == Dir.RIGHT and state.x < mdp.width and (
                state.x + 1, state.y) not in mdp.walls:
            next_state = GridWorldState(state.x + 1, state.y)

        if (next_state.x, next_state.y) in mdp.goal_location:
            next_state.set_terminal(True)

        return next_state
예제 #2
0
    def transition(self, state, action):
        '''
        Parameters:
            state:GridWorldState
            action:Enum
            mdp:GridWorldMDP

        Returns:
            state:GridWorldState
        '''
        next_state = state

        # If MDP is already in the goal state, no actions should be available
        if self.is_goal_state(state):
            return state

        # Apply slip probability and change action if applicable
        if random.random() < self.slip_prob:
            if action in [Dir.UP, Dir.DOWN]:
                action = random.choice([Dir.LEFT, Dir.RIGHT])
            elif action in [Dir.LEFT, Dir.RIGHT]:
                action = random.choice([Dir.UP, Dir.DOWN])

        # Calculate next state based on action
        if action == Dir.UP and state.y < self.height and (state.x, state.y + 1) not in self.walls:
            next_state = GridWorldState(state.x, state.y + 1)
        if action == Dir.DOWN and state.y > 1 and (state.x, state.y - 1) not in self.walls:
            next_state = GridWorldState(state.x, state.y - 1)
        if action == Dir.LEFT and state.x > 1 and (state.x - 1, state.y) not in self.walls:
            next_state = GridWorldState(state.x - 1, state.y)
        if action == Dir.RIGHT and state.x < self.width and (state.x + 1, state.y) not in self.walls:
            next_state = GridWorldState(state.x + 1, state.y)

        # If the next state takes the agent into the goal location,
        # return initial state
        if (next_state.x, next_state.y) in self.goal_location:
            next_state.set_terminal(True)
        return next_state
예제 #3
0
    def transition(self, state, action):

        # If in goal state, no actions available
        if self.is_goal_state(state):
            return state

        # Apply slip probability
        if random.random() < self.slip_prob:
            if action in [Dir.UP, Dir.DOWN]:
                action = random.choice([Dir.LEFT, Dir.RIGHT])
            else:
                action = random.choice([Dir.UP, Dir.DOWN])

        # Start by assigning next_state to current_state. This way we only have to check for cases where action
        #  successfully changes states below
        next_state = state

        # Check if state is outside of the two rooms; if so action should have no effect
        if not self.is_inside_rooms(state):
            return next_state

        # Calculate next state for cases where action changes state; add +1 to upper_height to account for
        #  wall
        if action == Dir.UP:
            # If in lower room not against wall, or in lower room under hallway state, or in upper room
            #  not against wall, or in hallway
            '''
            if state.y < self.lower_height \
                    or (state.y == self.lower_height and state.x in self.hallway_states) \
                    or (self.upper_start_height <= state.y < self.total_height) \
                    or (self.lower_height < state.y < self.upper_start_height and state.x in self.hallway_states):
                next_state = GridWorldState(state.x, state.y + 1)
            '''
            next_state = GridWorldState(state.x, state.y + 1)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        elif action == Dir.DOWN:
            # In upper room not against wall, in upper room above hallway, or in lower room not against wall, or in
            #  hallway
            '''
            if (state.y > self.upper_start_height) \
                    or (state.y == self.upper_start_height and state.x in self.hallway_states) \
                    or (1 < state.y <= self.lower_height) \
                    or (self.lower_height < state.y < self.upper_start_height and state.x in self.hallway_states):
                next_state = GridWorldState(state.x, state.y - 1)
            '''
            next_state = GridWorldState(state.x, state.y - 1)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        elif action == Dir.LEFT:
            # In lower room not against wall, or upper room not against wall
            '''
            if (state.y <= self.lower_height and state.x > max(self.lower_offset + 1, 1)) \
                    or (state.y >= self.upper_start_height and state.x > max(self.upper_offset + 1, 1)):
                next_state = GridWorldState(state.x - 1, state.y)
            '''
            next_state = GridWorldState(state.x - 1, state.y)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        elif action == Dir.RIGHT:
            # In lower room not against wall, or upper room not against wall
            '''
            if (state.y <= self.lower_height and state.x < self.lower_width + self.lower_offset) \
                    or (state.y >= self.upper_start_height and state.x < self.upper_width + self.upper_offset):
                next_state = GridWorldState(state.x + 1, state.y)
            '''
            next_state = GridWorldState(state.x + 1, state.y)
            if not self.is_inside_rooms(next_state):
                next_state = GridWorldState(state.x, state.y)

        # If agent enters goal state, make next state terminal
        if (next_state.x, next_state.y) in self.goal_location:
            next_state.set_terminal(True)
        return next_state