def transition_probability(self, state: tuple, action: int,
                               next_state: tuple) -> float:

        probability = 1.

        if action == self.actions['DOWN_PROB']:

            # Next position is on right
            if ue.is_on_right_or_same_position(state=state,
                                               next_position=next_state):
                probability = self.p_stochastic
            # Next position is on down
            elif ue.is_on_down_or_same_position(state=state,
                                                next_state=next_state):
                probability = 1. - self.p_stochastic

        elif action == self.actions['RIGHT_PROB']:

            # Next position is on right
            if ue.is_on_right_or_same_position(state=state,
                                               next_position=next_state):
                probability = 1. - self.p_stochastic
            # Next position is on down
            elif ue.is_on_down_or_same_position(state=state,
                                                next_state=next_state):
                probability = self.p_stochastic

        return probability
Example #2
0
    def transition_probability(self, state: tuple, action: int,
                               next_state: tuple) -> float:
        """
        Return probability to reach `next_state` from `position` using `action`.
        :param state: initial position
        :param action: action to do
        :param next_state: next position reached
        :return:
        """

        probability = 1.

        if action == self.actions['DOWN_PROB']:

            # Next position is on right
            if ue.is_on_right_or_same_position(state=state,
                                               next_position=next_state):
                probability = 1. - self.p_stochastic
            # Next position is on down
            elif ue.is_on_down_or_same_position(state=state,
                                                next_state=next_state):
                probability = self.p_stochastic

        elif action == self.actions['RIGHT_PROB']:

            # Next position is on right
            if ue.is_on_right_or_same_position(state=state,
                                               next_position=next_state):
                probability = self.p_stochastic
            # Next position is on down
            elif ue.is_on_down_or_same_position(state=state,
                                                next_state=next_state):
                probability = 1. - self.p_stochastic

        return probability
Example #3
0
    def transition_probability(self, state: tuple, action: int,
                               next_state: tuple) -> float:
        """
        Return probability to reach `next_state` from `state` using `action`.

        :param state: initial position
        :param action: action to do
        :param next_state: next position reached
        :return:
        """

        n_actions = len(self.actions)
        coefficient = (n_actions - action)

        if ue.is_on_up_or_same_position(state=state, next_state=next_state):
            probability = self.transitions[(coefficient + 0) % n_actions]
        elif ue.is_on_right_or_same_position(state=state,
                                             next_position=next_state):
            probability = self.transitions[(coefficient + 1) % n_actions]
        elif ue.is_on_down_or_same_position(state=state,
                                            next_state=next_state):
            probability = self.transitions[(coefficient + 2) % n_actions]
        else:
            probability = self.transitions[(coefficient + 3) % n_actions]

        return probability
Example #4
0
    def transition_probability(self, state: tuple, action: int,
                               next_state: tuple) -> float:
        """
        Return probability to reach `next_state` from `position` using `action`.
        :param state: initial position
        :param action: action to do
        :param next_state: next position reached
        :return:
        """
        # Probability
        desired_probability = self.n_transition

        desired_transition = (
            (action == self.actions['UP'] and ue.is_on_up_or_same_position(
                state=state, next_state=next_state))
            or (action == self.actions['RIGHT']
                and ue.is_on_right_or_same_position(
                    state=state, next_position=next_state)) or
            (action == self.actions['DOWN'] and ue.is_on_down_or_same_position(
                state=state, next_state=next_state)) or
            (action == self.actions['LEFT'] and ue.is_on_left_or_same_position(
                state=state, next_state=next_state)))

        if not desired_transition:
            desired_probability = (1. -
                                   self.n_transition) / self.action_space.n

        return desired_probability
    def transition_probability(self, state: tuple, action: int,
                               next_state: tuple) -> float:
        """
        Return probability to reach `next_state` from `position` using `action`.

        In non-stochastic environments this return always 1.

        :param state: initial position
        :param action: action to do
        :param next_state: next position reached
        :return:
        """

        probability = self.transitions[1]

        straight_movement = (
            (action == self.actions['UP'] and ue.is_on_up_or_same_position(
                state=state, next_state=next_state))
            or (action == self.actions['RIGHT']
                and ue.is_on_right_or_same_position(
                    state=state, next_position=next_state)) or
            (action == self.actions['DOWN'] and ue.is_on_down_or_same_position(
                state=state, next_state=next_state)) or
            (action == self.actions['LEFT'] and ue.is_on_left_or_same_position(
                state=state, next_state=next_state)))

        if straight_movement:
            probability = self.transitions[0]

        return probability
Example #6
0
    def test_transition_probability(self):

        # For all states, for all actions and for all next_state possibles, transition probability must be return 1.
        for state in self.environment.states():

            # Set state as current state
            self.environment.current_state = state

            for action in self.environment.action_space:

                for next_state in self.environment.reachable_states(
                        state=state, action=action):

                    probability = self.environment.transition_probability(
                        state=state, action=action, next_state=next_state)

                    if (ue.is_on_up_or_same_position(state=state,
                                                     next_state=next_state) and
                        (action == self.environment.actions['UP'])) or (
                            ue.is_on_right_or_same_position(
                                state=state, next_position=next_state) and
                            (action == self.environment.actions['RIGHT'])) or (
                                ue.is_on_down_or_same_position(
                                    state=state, next_state=next_state) and
                                (action == self.environment.actions['DOWN'])
                            ) or (ue.is_on_left_or_same_position(
                                state=state, next_state=next_state) and
                                  (action
                                   == self.environment.actions['LEFT'])):
                        self.assertEqual(self.environment.n_transition,
                                         probability)
                    else:
                        self.assertEqual((1. - self.environment.n_transition) /
                                         self.environment.action_space.n,
                                         probability)
    def test_transition_probability(self):

        # Get actions
        action_up = self.environment.actions['UP']
        action_down = self.environment.actions['DOWN']
        action_right = self.environment.actions['RIGHT']
        action_left = self.environment.actions['LEFT']

        # For all states, for all actions and for all next_state possibles, transition probability must be return 1.
        for state in self.environment.states():

            # Set state as current state
            self.environment.current_state = state

            for action in self.environment.action_space:

                for next_state in self.environment.reachable_states(
                        state=state, action=action):

                    probability = self.environment.transition_probability(
                        state=state, action=action, next_state=next_state)

                    n_actions = len(self.environment.actions)
                    coefficient = (n_actions - action)

                    if action == action_up and ue.is_on_up_or_same_position(
                            state=state, next_state=next_state):
                        self.assertEqual(
                            self.environment.transitions[(coefficient + 0) %
                                                         n_actions],
                            probability)
                    elif action == action_right and ue.is_on_right_or_same_position(
                            state=state, next_position=next_state):
                        self.assertEqual(
                            self.environment.transitions[(coefficient + 1) %
                                                         n_actions],
                            probability)
                    elif action == action_down and ue.is_on_down_or_same_position(
                            state=state, next_state=next_state):
                        self.assertEqual(
                            self.environment.transitions[(coefficient + 2) %
                                                         n_actions],
                            probability)
                    elif action == action_left and ue.is_on_left_or_same_position(
                            state=state, next_state=next_state):
                        self.assertEqual(
                            self.environment.transitions[(coefficient + 3) %
                                                         n_actions],
                            probability)
    def test_transition_probability(self):

        # For all states, for all actions and for all next_state possibles, transition probability must be return 1.
        for state in self.environment.states():

            # Set state as current state
            self.environment.current_state = state

            # For each action in action space
            for action in self.environment.action_space:

                for next_state in self.environment.reachable_states(
                        state=state, action=action):

                    probability = self.environment.transition_probability(
                        state=state, action=action, next_state=next_state)

                    if action == self.environment.actions['RIGHT_PROB']:

                        if ue.is_on_right_or_same_position(
                                state=state, next_position=next_state):
                            self.assertEqual(self.environment.p_stochastic,
                                             probability)
                        else:
                            self.assertEqual(
                                1. - self.environment.p_stochastic,
                                probability)

                    elif action == self.environment.actions['DOWN_PROB']:

                        if ue.is_on_down_or_same_position(
                                state=state, next_state=next_state):
                            self.assertEqual(self.environment.p_stochastic,
                                             probability)
                        else:
                            self.assertEqual(
                                1. - self.environment.p_stochastic,
                                probability)

                    elif action == self.environment.actions['DOWN']:
                        self.assertEqual(1., probability)
                    else:
                        raise ValueError('Action invalid.')