def transition_probability(self, state: tuple, action: int, next_state: tuple) -> float: """ Return probability to reach `next_state` from `state` using `action`. :param state: initial position :param action: action to do :param next_state: next position reached :return: """ n_actions = len(self.actions) coefficient = (n_actions - action) if ue.is_on_up_or_same_position(state=state, next_state=next_state): probability = self.transitions[(coefficient + 0) % n_actions] elif ue.is_on_right_or_same_position(state=state, next_position=next_state): probability = self.transitions[(coefficient + 1) % n_actions] elif ue.is_on_down_or_same_position(state=state, next_state=next_state): probability = self.transitions[(coefficient + 2) % n_actions] else: probability = self.transitions[(coefficient + 3) % n_actions] return probability
def transition_probability(self, state: tuple, action: int, next_state: tuple) -> float: """ Return probability to reach `next_state` from `position` using `action`. In non-stochastic environments this return always 1. :param state: initial position :param action: action to do :param next_state: next position reached :return: """ probability = self.transitions[1] straight_movement = ( (action == self.actions['UP'] and ue.is_on_up_or_same_position( state=state, next_state=next_state)) or (action == self.actions['RIGHT'] and ue.is_on_right_or_same_position( state=state, next_position=next_state)) or (action == self.actions['DOWN'] and ue.is_on_down_or_same_position( state=state, next_state=next_state)) or (action == self.actions['LEFT'] and ue.is_on_left_or_same_position( state=state, next_state=next_state))) if straight_movement: probability = self.transitions[0] return probability
def transition_probability(self, state: tuple, action: int, next_state: tuple) -> float: """ Return probability to reach `next_state` from `position` using `action`. :param state: initial position :param action: action to do :param next_state: next position reached :return: """ # Probability desired_probability = self.n_transition desired_transition = ( (action == self.actions['UP'] and ue.is_on_up_or_same_position( state=state, next_state=next_state)) or (action == self.actions['RIGHT'] and ue.is_on_right_or_same_position( state=state, next_position=next_state)) or (action == self.actions['DOWN'] and ue.is_on_down_or_same_position( state=state, next_state=next_state)) or (action == self.actions['LEFT'] and ue.is_on_left_or_same_position( state=state, next_state=next_state))) if not desired_transition: desired_probability = (1. - self.n_transition) / self.action_space.n return desired_probability
def test_transition_probability(self): # For all states, for all actions and for all next_state possibles, transition probability must be return 1. for state in self.environment.states(): # Set state as current state self.environment.current_state = state for action in self.environment.action_space: for next_state in self.environment.reachable_states( state=state, action=action): probability = self.environment.transition_probability( state=state, action=action, next_state=next_state) if (ue.is_on_up_or_same_position(state=state, next_state=next_state) and (action == self.environment.actions['UP'])) or ( ue.is_on_right_or_same_position( state=state, next_position=next_state) and (action == self.environment.actions['RIGHT'])) or ( ue.is_on_down_or_same_position( state=state, next_state=next_state) and (action == self.environment.actions['DOWN']) ) or (ue.is_on_left_or_same_position( state=state, next_state=next_state) and (action == self.environment.actions['LEFT'])): self.assertEqual(self.environment.n_transition, probability) else: self.assertEqual((1. - self.environment.n_transition) / self.environment.action_space.n, probability)
def test_transition_probability(self): # Get actions action_up = self.environment.actions['UP'] action_down = self.environment.actions['DOWN'] action_right = self.environment.actions['RIGHT'] action_left = self.environment.actions['LEFT'] # For all states, for all actions and for all next_state possibles, transition probability must be return 1. for state in self.environment.states(): # Set state as current state self.environment.current_state = state for action in self.environment.action_space: for next_state in self.environment.reachable_states( state=state, action=action): probability = self.environment.transition_probability( state=state, action=action, next_state=next_state) n_actions = len(self.environment.actions) coefficient = (n_actions - action) if action == action_up and ue.is_on_up_or_same_position( state=state, next_state=next_state): self.assertEqual( self.environment.transitions[(coefficient + 0) % n_actions], probability) elif action == action_right and ue.is_on_right_or_same_position( state=state, next_position=next_state): self.assertEqual( self.environment.transitions[(coefficient + 1) % n_actions], probability) elif action == action_down and ue.is_on_down_or_same_position( state=state, next_state=next_state): self.assertEqual( self.environment.transitions[(coefficient + 2) % n_actions], probability) elif action == action_left and ue.is_on_left_or_same_position( state=state, next_state=next_state): self.assertEqual( self.environment.transitions[(coefficient + 3) % n_actions], probability)