def step(self, action_e): # TODO implement clock_speed: step only if self.clock.to_step() if self.done: return self.reset() action_e = util.nanflatten(action_e) env_info_dict = self.u_env.step(action_e) reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e) for (a, b), body in util.ndenumerate_nonan(self.body_e): env_info_a = self.get_env_info(env_info_dict, a) reward_e[(a, b)] = env_info_a.rewards[b] state_e[(a, b)] = env_info_a.states[b] done_e[(a, b)] = env_info_a.local_done[b] self.done = (util.nonan_all(done_e) or self.clock.get('t') > self.max_timestep) return reward_e, state_e, done_e
def step(self, action_e): assert len(action_e) == 1, 'OpenAI Gym supports only single body' # TODO implement clock_speed: step only if self.clock.to_step() if self.done: # t will actually be 0 return self.reset() action = action_e[(0, 0)] (state, reward, done, _info) = self.u_env.step(action) if util.get_lab_mode() == 'dev': self.u_env.render() reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e) for (a, b), body in util.ndenumerate_nonan(self.body_e): reward_e[(a, b)] = reward state_e[(a, b)] = state done_e[(a, b)] = done self.done = (util.nonan_all(done_e) or self.clock.get('t') > self.max_timestep) return reward_e, state_e, done_e
def space_step(self, action_e): # TODO implement clock_speed: step only if self.clock.to_step() if self.done: return self.space_reset() action_e = util.nanflatten(action_e) env_info_dict = self.u_env.step(action_e) reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s( ENV_DATA_NAMES, e=self.e) for (a, b), body in util.ndenumerate_nonan(self.body_e): env_info_a = self._get_env_info(env_info_dict, a) reward_e[(a, b)] = env_info_a.rewards[b] * self.reward_scale state_e[(a, b)] = env_info_a.states[b] done_e[(a, b)] = env_info_a.local_done[b] self.done = (util.nonan_all(done_e) or self.clock.t > self.max_t) logger.debug( f'Env {self.e} step reward_e: {reward_e}, state_e: {state_e}, done_e: {done_e}' ) return reward_e, state_e, done_e
def test_nonan_all(v, isall): assert util.nonan_all(v) == isall
def __bool__(self): return util.nonan_all(self.data)