Exemple #1
0
 def update_nets(self):
     '''Update target networks'''
     if util.frame_mod(self.body.env.clock.frame, self.q1_net.update_frequency, self.body.env.num_envs):
         if self.q1_net.update_type == 'replace':
             net_util.copy(self.q1_net, self.target_q1_net)
             net_util.copy(self.q2_net, self.target_q2_net)
         elif self.q1_net.update_type == 'polyak':
             net_util.polyak_update(self.q1_net, self.target_q1_net, self.q1_net.polyak_coef)
             net_util.polyak_update(self.q2_net, self.target_q2_net, self.q2_net.polyak_coef)
         else:
             raise ValueError('Unknown q1_net.update_type. Should be "replace" or "polyak". Exiting.')
Exemple #2
0
 def to_ckpt(self, env, mode='eval'):
     '''Check with clock whether to run log/eval ckpt: at the start, save_freq, and the end'''
     if mode == 'eval' and util.in_eval_lab_modes(
     ):  # avoid double-eval: eval-ckpt in eval mode
         return False
     clock = env.clock
     frame = clock.get()
     frequency = env.eval_frequency if mode == 'eval' else env.log_frequency
     to_ckpt = util.frame_mod(frame, frequency,
                              env.num_envs) or frame == clock.max_frame
     return to_ckpt
Exemple #3
0
 def to_ckpt(self, env, mode='eval'):
     '''Check with clock whether to run log/eval ckpt: at the start, save_freq, and the end'''
     if mode == 'eval' and util.in_eval_lab_modes(
     ):  # avoid double-eval: eval-ckpt in eval mode
         return False
     clock = env.clock
     frame = clock.get()
     frequency = env.eval_frequency if mode == 'eval' else env.log_frequency
     if frequency is None:  # default episodic
         to_ckpt = env.done
     else:  # normal ckpt condition by mod remainder (general for venv)
         to_ckpt = util.frame_mod(frame, frequency,
                                  env.num_envs) or frame == clock.max_frame
     return to_ckpt