Example #1
0
 def space_step(self, action_e):
     action = action_e[(0, 0)]  # single body
     if self.done:  # space envs run continually without a central reset signal
         return self.space_reset()
     if not self.is_discrete:
         action = np.array([action])
     state, reward, done, _info = self.u_env.step(action)
     reward = guard_reward(reward)
     reward *= self.reward_scale
     if util.to_render():
         self.u_env.render()
     self.done = done = done or self.clock.get('t') > self.max_timestep
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(
         ENV_DATA_NAMES, e=self.e)
     for ab, body in util.ndenumerate_nonan(self.body_e):
         reward_e[ab] = reward
         state_e[ab] = state
         done_e[ab] = done
     logger.debug(
         f'Env {self.e} step reward_e: {reward_e}, state_e: {state_e}, done_e: {done_e}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return reward_e, state_e, done_e
Example #2
0
 def sample(self):
     '''
     Samples a batch from memory.
     Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes
     '''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(
                 body_batch['actions'], body.action_space.high)
         body_batch = util.to_torch_batch(body_batch, self.net.gpu)
         batches.append(body_batch)
     # Concat states at dim=1 for feedforward
     batch = {
         'states':
         torch.cat([body_batch['states'] for body_batch in batches], dim=1),
         'next_states':
         torch.cat([body_batch['next_states'] for body_batch in batches],
                   dim=1),
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch
Example #3
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     # one-hot actions to calc q_targets
     if self.body.is_discrete:
         batch['actions'] = util.to_one_hot(batch['actions'], self.body.action_space.high)
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
Example #4
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         batches.append(body_batch)
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
Example #5
0
 def reset(self):
     _reward = np.nan
     state = self.u_env.reset()
     self.done = done = False
     if util.to_render():
         self.u_env.render()
     logger.debug(
         f'Env {self.e} reset reward: {_reward}, state: {state}, done: {done}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return _reward, state, done
Example #6
0
 def sample(self):
     '''Samples a batch from memory'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['one_hot_actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         batches.append(body_batch)
     batch = util.concat_batches(batches)
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
Example #7
0
 def sample(self):
     '''Samples a batch from memory'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['one_hot_actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         batches.append(body_batch)
     batch = util.concat_batches(batches)
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
Example #8
0
 def sample(self):
     '''Samples a batch from memory'''
     batch = self.body.memory.sample()
     # one-hot actions to calc q_targets
     if self.body.is_discrete:
         batch['one_hot_actions'] = util.to_one_hot(
             batch['actions'], self.body.action_space.high)
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(
             self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.memory.is_episodic)
     return batch
Example #9
0
 def space_reset(self):
     _reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(
         ENV_DATA_NAMES, e=self.e)
     for ab, body in util.ndenumerate_nonan(self.body_e):
         state = self.u_env.reset()
         state_e[ab] = state
         done_e[ab] = self.done = False
     if util.to_render():
         self.u_env.render()
     logger.debug(
         f'Env {self.e} reset reward_e: {_reward_e}, state_e: {state_e}, done_e: {done_e}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return _reward_e, state_e, done_e
Example #10
0
 def step(self, action):
     if not self.is_discrete:  # guard for continuous
         action = np.array([action])
     state, reward, done, _info = self.u_env.step(action)
     reward = guard_reward(reward)
     reward *= self.reward_scale
     if util.to_render():
         self.u_env.render()
     self.done = done = done or self.clock.get('t') > self.max_timestep
     logger.debug(
         f'Env {self.e} step reward: {reward}, state: {state}, done: {done}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return reward, state, done
Example #11
0
 def sample(self):
     '''Samples a batch per body, which may experience different environment'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         body_batch = util.to_torch_batch(body_batch, self.net.gpu)
         batches.append(body_batch)
     # collect per body for feedforward to hydra heads
     batch = {
         'states': [body_batch['states'] for body_batch in batches],
         'next_states': [body_batch['next_states'] for body_batch in batches],
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch
Example #12
0
 def sample(self):
     '''
     Samples a batch from memory.
     Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes
     '''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         body_batch = util.to_torch_batch(body_batch, self.net.gpu)
         batches.append(body_batch)
     # Concat states at dim=1 for feedforward
     batch = {
         'states': torch.cat([body_batch['states'] for body_batch in batches], dim=1),
         'next_states': torch.cat([body_batch['next_states'] for body_batch in batches], dim=1),
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch