コード例 #1
0
 def  _get_transition_value(self, state: TrafficLightState, action: TrafficLightAction, next_state: Optional[TrafficLightState] = None) -> TransitionValue:
     '''
       Returns the value of the transition
     '''
     # DONE
     if next_state is not None:
         return TransitionValue(cost=next_state.cars_queueing_east+state.cars_queueing_north,
                                reward=-(next_state.cars_queueing_east+state.cars_queueing_north))
     else:
         return TransitionValue(cost=0)
コード例 #2
0
 def _get_transition_value(
     self,
     memory: D.T_memory[D.T_state],
     action: D.T_agent[D.T_concurrency[D.T_event]],
     next_state: Optional[D.T_state] = None
 ) -> D.T_agent[TransitionValue[D.T_value]]:
     v = super()._get_transition_value(memory, action, next_state)
     return TransitionValue(reward=v.reward - 1)
 def _get_transition_value(
     self,
     memory: D.T_memory[D.T_state],
     action: D.T_agent[D.T_concurrency[D.T_event]],
     next_state: Optional[D.T_state] = None
 ) -> D.T_agent[TransitionValue[D.T_value]]:
     # every move costs 1
     return TransitionValue(cost=abs(next_state.x - memory.x) +
                            abs(next_state.y - memory.y))
コード例 #4
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     obs, reward, done, info = self._gym_env.step(action)
     return TransitionOutcome(state=obs,
                              value=TransitionValue(reward=reward),
                              termination=done,
                              info=info)
コード例 #5
0
 def _sample(self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]]) -> \
     EnvironmentOutcome[D.T_agent[D.T_observation], D.T_agent[TransitionValue[D.T_value]], D.T_agent[D.T_info]]:
     o = super()._sample(memory, action)
     return EnvironmentOutcome(observation=GymDomainStateProxy(
         state=normalize_and_round(o.observation._state),
         context=o.observation._context),
                               value=TransitionValue(reward=o.value.reward -
                                                     1),
                               termination=o.termination,
                               info=o.info)
コード例 #6
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     o = super()._state_step(action)
     return TransitionOutcome(state=GymDomainStateProxy(
         state=normalize_and_round(o.state._state),
         context=o.state._context),
                              value=TransitionValue(reward=o.value.reward -
                                                    1),
                              termination=o.termination,
                              info=o.info)
コード例 #7
0
    def test(self):
        dom = TrafficLightDomain()

        state = TrafficLightState(cars_queueing_north=3,
                                  cars_queueing_east=2,
                                  north_light=SingleLightState.RECENT_RED,
                                  east_light=SingleLightState.RED)
        next_state = TrafficLightState(cars_queueing_north=3,
                                       cars_queueing_east=3,
                                       north_light=SingleLightState.RED,
                                       east_light=SingleLightState.GREEN)
        action = TrafficLightAction.DO_NOT_SWITCH
        self.assertEqual(dom.get_transition_value(state, action, next_state),
                         TransitionValue(cost=6))
コード例 #8
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     obs, reward, done, info = self._gym_env.step(action)
     if self._set_state is not None and self._get_state is not None:
         state = GymDomainStateProxy(state=obs,
                                     context=self._initial_env_state)
     else:
         state = GymDomainStateProxy(state=obs, context=self._init_env)
     return TransitionOutcome(state=state,
                              value=TransitionValue(reward=reward),
                              termination=done,
                              info=info)
コード例 #9
0
 def _get_next_state(
         self, memory: D.T_memory[D.T_state],
         action: D.T_agent[D.T_concurrency[D.T_event]]) -> D.T_state:
     env = memory._context[0]
     if self._set_state is None or self._get_state is None:
         env = deepcopy(env)
     elif memory._context[4] != self._get_state(env):
         self._set_state(env, memory._context[4])
     self._gym_env = env  # Just in case the simulation environment would be different from the planner's environment...
     obs, reward, done, info = env.step(action)
     outcome = TransitionOutcome(state=obs,
                                 value=TransitionValue(reward=reward),
                                 termination=done,
                                 info=info)
     # print('Transition:', str(memory._state), ' -> ', str(action), ' -> ', str(outcome.state))
     return GymDomainStateProxy(
         state=outcome.state,
         context=[
             env, memory._state, action, outcome,
             self._get_state(env) if
             (self._get_state is not None
              and self._set_state is not None) else None
         ])
コード例 #10
0
 def _get_transition_value(self, state: GridState, action: GridAction, next_state: Optional[GridState] = None) -> TransitionValue:
     return TransitionValue(cost=action._cost)