def _get_transition_value(self, state: TrafficLightState, action: TrafficLightAction, next_state: Optional[TrafficLightState] = None) -> TransitionValue: ''' Returns the value of the transition ''' # DONE if next_state is not None: return TransitionValue(cost=next_state.cars_queueing_east+state.cars_queueing_north, reward=-(next_state.cars_queueing_east+state.cars_queueing_north)) else: return TransitionValue(cost=0)
def _get_transition_value( self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]], next_state: Optional[D.T_state] = None ) -> D.T_agent[TransitionValue[D.T_value]]: v = super()._get_transition_value(memory, action, next_state) return TransitionValue(reward=v.reward - 1)
def _get_transition_value( self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]], next_state: Optional[D.T_state] = None ) -> D.T_agent[TransitionValue[D.T_value]]: # every move costs 1 return TransitionValue(cost=abs(next_state.x - memory.x) + abs(next_state.y - memory.y))
def _state_step( self, action: D.T_agent[D.T_concurrency[D.T_event]] ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]], D.T_agent[D.T_info]]: obs, reward, done, info = self._gym_env.step(action) return TransitionOutcome(state=obs, value=TransitionValue(reward=reward), termination=done, info=info)
def _sample(self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]]) -> \ EnvironmentOutcome[D.T_agent[D.T_observation], D.T_agent[TransitionValue[D.T_value]], D.T_agent[D.T_info]]: o = super()._sample(memory, action) return EnvironmentOutcome(observation=GymDomainStateProxy( state=normalize_and_round(o.observation._state), context=o.observation._context), value=TransitionValue(reward=o.value.reward - 1), termination=o.termination, info=o.info)
def _state_step( self, action: D.T_agent[D.T_concurrency[D.T_event]] ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]], D.T_agent[D.T_info]]: o = super()._state_step(action) return TransitionOutcome(state=GymDomainStateProxy( state=normalize_and_round(o.state._state), context=o.state._context), value=TransitionValue(reward=o.value.reward - 1), termination=o.termination, info=o.info)
def test(self): dom = TrafficLightDomain() state = TrafficLightState(cars_queueing_north=3, cars_queueing_east=2, north_light=SingleLightState.RECENT_RED, east_light=SingleLightState.RED) next_state = TrafficLightState(cars_queueing_north=3, cars_queueing_east=3, north_light=SingleLightState.RED, east_light=SingleLightState.GREEN) action = TrafficLightAction.DO_NOT_SWITCH self.assertEqual(dom.get_transition_value(state, action, next_state), TransitionValue(cost=6))
def _state_step( self, action: D.T_agent[D.T_concurrency[D.T_event]] ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]], D.T_agent[D.T_info]]: obs, reward, done, info = self._gym_env.step(action) if self._set_state is not None and self._get_state is not None: state = GymDomainStateProxy(state=obs, context=self._initial_env_state) else: state = GymDomainStateProxy(state=obs, context=self._init_env) return TransitionOutcome(state=state, value=TransitionValue(reward=reward), termination=done, info=info)
def _get_next_state( self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]]) -> D.T_state: env = memory._context[0] if self._set_state is None or self._get_state is None: env = deepcopy(env) elif memory._context[4] != self._get_state(env): self._set_state(env, memory._context[4]) self._gym_env = env # Just in case the simulation environment would be different from the planner's environment... obs, reward, done, info = env.step(action) outcome = TransitionOutcome(state=obs, value=TransitionValue(reward=reward), termination=done, info=info) # print('Transition:', str(memory._state), ' -> ', str(action), ' -> ', str(outcome.state)) return GymDomainStateProxy( state=outcome.state, context=[ env, memory._state, action, outcome, self._get_state(env) if (self._get_state is not None and self._set_state is not None) else None ])
def _get_transition_value(self, state: GridState, action: GridAction, next_state: Optional[GridState] = None) -> TransitionValue: return TransitionValue(cost=action._cost)