Esempio n. 1
0
 def sample(self, state, action):
     if state._context != self.current_outcome.observation._context:
         self._gym_env.set_state(state._context)
     outcome = super().step(action)
     observation = GymDomainStateProxy(state=normalize_and_round(outcome.observation), context=self._gym_env.get_state())
     self.current_outcome = EnvironmentOutcome(observation=observation, value=outcome.value, termination=outcome.termination, info=outcome.info)
     return self.current_outcome
Esempio n. 2
0
 def reset(self):
     self.current_outcome = EnvironmentOutcome(
         observation=GymDomainStateProxy(state=super().reset(), context=[]),
         value=None,
         termination=False,
         info=None,
     )
     return self.current_outcome.observation
Esempio n. 3
0
 def sample(self, state, action):
     if state != self.current_outcome.observation:
         self.reset()
         for a in state._context:
             self.step(a)
     outcome = self.step(action)
     observation = GymDomainStateProxy(state=outcome.observation._state, context=state._context + [action])
     self.current_outcome = EnvironmentOutcome(observation=observation, value=outcome.value, termination=outcome.termination, info=outcome.info)
     return self.current_outcome
Esempio n. 4
0
 def get_next_state_distribution(self, state, action):
     if state != self.current_outcome.observation:
         self.reset()
         for a in state._context:
             self.step(a)
     outcome = self.step(action)
     observation = GymDomainStateProxy(state=outcome.observation._state, context=state._context + [action])
     self.current_outcome = EnvironmentOutcome(observation=observation, value=outcome.value, termination=outcome.termination, info=outcome.info)
     return DiscreteDistribution([(observation, 1.0)])
Esempio n. 5
0
 def step(self, action):
     outcome = super().step(action)
     observation = GymDomainStateProxy(state=outcome.observation,
                                       context=None)
     return EnvironmentOutcome(
         observation=observation,
         value=outcome.value,
         termination=outcome.termination,
         info=outcome.info,
     )
Esempio n. 6
0
 def _sample(self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]]) -> \
     EnvironmentOutcome[D.T_agent[D.T_observation], D.T_agent[TransitionValue[D.T_value]], D.T_agent[D.T_info]]:
     o = super()._sample(memory, action)
     return EnvironmentOutcome(observation=GymDomainStateProxy(
         state=normalize_and_round(o.observation._state),
         context=o.observation._context),
                               value=TransitionValue(reward=o.value.reward -
                                                     1),
                               termination=o.termination,
                               info=o.info)
Esempio n. 7
0
 def reset(self):
     self.current_outcome = EnvironmentOutcome(
         observation=GymDomainStateProxy(
             state=normalize_and_round(super().reset()),
             context=self._gym_env.get_state(),
         ),
         value=None,
         termination=False,
         info=None,
     )
     return self.current_outcome.observation
Esempio n. 8
0
 def step(self, action):
     outcome = super().step(action)
     observation = GymDomainStateProxy(
         state=normalize_and_round(outcome.observation),
         context=self._gym_env.get_state(),
     )
     return EnvironmentOutcome(
         observation=observation,
         value=outcome.value,
         termination=outcome.termination,
         info=outcome.info,
     )
Esempio n. 9
0
 def get_next_state_distribution(self, state, action):
     if state._context != self.current_outcome.observation._context:
         self._gym_env.set_state(state._context)
     outcome = super().step(action)
     observation = GymDomainStateProxy(
         state=normalize_and_round(outcome.observation),
         context=self._gym_env.get_state(),
     )
     self.current_outcome = EnvironmentOutcome(
         observation=observation,
         value=outcome.value,
         termination=outcome.termination,
         info=outcome.info,
     )
     return DiscreteDistribution([(observation, 1.0)])
 def decode(outcome):
     return EnvironmentOutcome(
         observation=MyShmProxy.StateProxy.decode(outcome[0]),
         value=MyShmProxy.TransitionValueProxy.decode(outcome[1:3]),
         termination=MyShmProxy.BoolProxy.decode(outcome[3]))
Esempio n. 11
0
 def decode(outcome):
     return EnvironmentOutcome(
         observation=GridShmProxy.StateProxy.decode(outcome[0]),
         value=GridShmProxy.ValueProxy.decode(outcome[1:3]),
         termination=GridShmProxy.BoolProxy.decode(outcome[3]),
     )