Esempio n. 1
0
 def step(self, action_e):
     # TODO implement clock_speed: step only if self.clock.to_step()
     if self.done:
         return self.reset()
     action_e = util.nanflatten(action_e)
     env_info_dict = self.u_env.step(action_e)
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
     for (a, b), body in util.ndenumerate_nonan(self.body_e):
         env_info_a = self.get_env_info(env_info_dict, a)
         reward_e[(a, b)] = env_info_a.rewards[b]
         state_e[(a, b)] = env_info_a.states[b]
         done_e[(a, b)] = env_info_a.local_done[b]
     self.done = (util.nonan_all(done_e) or self.clock.get('t') > self.max_timestep)
     return reward_e, state_e, done_e
Esempio n. 2
0
 def step(self, action_e):
     assert len(action_e) == 1, 'OpenAI Gym supports only single body'
     # TODO implement clock_speed: step only if self.clock.to_step()
     if self.done:  # t will actually be 0
         return self.reset()
     action = action_e[(0, 0)]
     (state, reward, done, _info) = self.u_env.step(action)
     if util.get_lab_mode() == 'dev':
         self.u_env.render()
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
     for (a, b), body in util.ndenumerate_nonan(self.body_e):
         reward_e[(a, b)] = reward
         state_e[(a, b)] = state
         done_e[(a, b)] = done
     self.done = (util.nonan_all(done_e) or self.clock.get('t') > self.max_timestep)
     return reward_e, state_e, done_e
Esempio n. 3
0
 def space_step(self, action_e):
     # TODO implement clock_speed: step only if self.clock.to_step()
     if self.done:
         return self.space_reset()
     action_e = util.nanflatten(action_e)
     env_info_dict = self.u_env.step(action_e)
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(
         ENV_DATA_NAMES, e=self.e)
     for (a, b), body in util.ndenumerate_nonan(self.body_e):
         env_info_a = self._get_env_info(env_info_dict, a)
         reward_e[(a, b)] = env_info_a.rewards[b] * self.reward_scale
         state_e[(a, b)] = env_info_a.states[b]
         done_e[(a, b)] = env_info_a.local_done[b]
     self.done = (util.nonan_all(done_e) or self.clock.t > self.max_t)
     logger.debug(
         f'Env {self.e} step reward_e: {reward_e}, state_e: {state_e}, done_e: {done_e}'
     )
     return reward_e, state_e, done_e
Esempio n. 4
0
def test_nonan_all(v, isall):
    assert util.nonan_all(v) == isall
Esempio n. 5
0
def test_nonan_all(v, isall):
    assert util.nonan_all(v) == isall
Esempio n. 6
0
 def __bool__(self):
     return util.nonan_all(self.data)
Esempio n. 7
0
 def __bool__(self):
     return util.nonan_all(self.data)