Ejemplo n.º 1
0
 def update(self, action_a, reward_a, state_a, done_a):
     '''
     Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net
     '''
     for (e, b), body in util.ndenumerate_nonan(self.body_a):
         body.memory.update(
             action_a[(e, b)], reward_a[(e, b)], state_a[(e, b)], done_a[(e, b)])
     loss_a = self.algorithm.train()
     loss_a = util.guard_data_a(self, loss_a, 'loss')
     explore_var_a = self.algorithm.update()
     explore_var_a = util.guard_data_a(self, explore_var_a, 'explore_var')
     return loss_a, explore_var_a
Ejemplo n.º 2
0
 def update(self, action_a, reward_a, state_a, done_a):
     '''
     Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net
     '''
     for (e, b), body in util.ndenumerate_nonan(self.body_a):
         body.memory.update(action_a[(e, b)], reward_a[(e, b)], state_a[(e, b)], done_a[(e, b)])
     loss_a = self.algorithm.train()
     loss_a = util.guard_data_a(self, loss_a, 'loss')
     for (e, b), body in util.ndenumerate_nonan(self.body_a):
         body.loss = loss_a[(e, b)]
     explore_var_a = self.algorithm.update()
     explore_var_a = util.guard_data_a(self, explore_var_a, 'explore_var')
     return loss_a, explore_var_a
Ejemplo n.º 3
0
 def space_update(self, action_a, reward_a, state_a, done_a):
     '''Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net'''
     for eb, body in util.ndenumerate_nonan(self.body_a):
         body.action_pd_update()
         body.memory.update(action_a[eb], reward_a[eb], state_a[eb],
                            done_a[eb])
     loss_a = self.algorithm.space_train()
     loss_a = util.guard_data_a(self, loss_a, 'loss')
     for eb, body in util.ndenumerate_nonan(self.body_a):
         if not np.isnan(loss_a[eb]):  # set for log_summary()
             body.loss = loss_a[eb]
     explore_var_a = self.algorithm.space_update()
     explore_var_a = util.guard_data_a(self, explore_var_a, 'explore_var')
     logger.debug(
         f'Agent {self.a} loss: {loss_a}, explore_var_a {explore_var_a}')
     for eb, body in util.ndenumerate_nonan(self.body_a):
         if body.env.done:
             body.epi_update()
     return loss_a, explore_var_a