Esempio n. 1
0
File: sac.py Progetto: takuma-ynd/dl
    def save(self):
        """Save."""
        state_dict = {
            'pi': self.pi.state_dict(),
            'qf1': self.qf1.state_dict(),
            'qf2': self.qf2.state_dict(),
            'vf': self.vf.state_dict(),
            'opt_pi': self.opt_pi.state_dict(),
            'opt_qf1': self.opt_qf1.state_dict(),
            'opt_qf2': self.opt_qf2.state_dict(),
            'opt_vf': self.opt_vf.state_dict(),
            'log_alpha': (self.log_alpha if self.automatic_entropy_tuning
                          else None),
            'opt_alpha': (self.opt_alpha.state_dict()
                          if self.automatic_entropy_tuning else None),
            'env': misc.env_state_dict(self.env),
            't': self.t
        }
        buffer_dict = self.buffer.state_dict()
        state_dict['buffer_format'] = nest.get_structure(buffer_dict)
        self.ckptr.save(state_dict, self.t)

        # save buffer seperately and only once (because it can be huge)
        np.savez(os.path.join(self.ckptr.ckptdir, 'buffer.npz'),
                 **{f'{i:04d}': x for i, x in
                    enumerate(nest.flatten(buffer_dict))})
Esempio n. 2
0
 def save(self):
     """State dict."""
     state_dict = {
         'pi': self.pi.state_dict(),
         'opt': self.opt.state_dict(),
         'env': misc.env_state_dict(self.env),
         't': self.t
     }
     self.ckptr.save(state_dict, self.t)
Esempio n. 3
0
 def save(self):
     """State dict."""
     state_dict = {
         'pi': self.pi.state_dict(),
         'opt': self.opt.state_dict(),
         'lambda_': self.log_lambda_,
         'opt_l': self.opt_l.state_dict(),
         'env': misc.env_state_dict(self.env),
         '_actor': self._actor.state_dict(),
         't': self.t
     }
     self.ckptr.save(state_dict, self.t)
 def save(self):
     """State dict."""
     state_dict = {
         'pi': self.pi.state_dict(),
         'vf': self.vf.state_dict(),
         'opt_pi': self.opt_pi.state_dict(),
         'opt_vf': self.opt_vf.state_dict(),
         'kl_weight': self.kl_weight,
         'env': misc.env_state_dict(self.env),
         '_actor': self._actor.state_dict(),
         't': self.t
     }
     self.ckptr.save(state_dict, self.t)
Esempio n. 5
0
    def save(self):
        """Save."""
        state_dict = {
            'qf': self.qf.state_dict(),
            'qf_targ': self.qf.state_dict(),
            'opt': self.opt.state_dict(),
            '_actor': self._actor.state_dict(),
            'env': misc.env_state_dict(self.env),
            't': self.t
        }
        buffer_dict = self.buffer.state_dict()
        state_dict['buffer_format'] = nest.get_structure(buffer_dict)
        self.ckptr.save(state_dict, self.t)

        # save buffer seperately and only once (because it can be huge)
        np.savez(os.path.join(self.ckptr.ckptdir, 'buffer.npz'),
                 **{f'{i:04d}': x for i, x in
                    enumerate(nest.flatten(buffer_dict))})