def prep_training(self, train_device): self.q1.train() self.q2.train() to_device(self.q1, train_device) to_device(self.q2, train_device) to_device(self.tq1, train_device) to_device(self.tq2, train_device) to_device(self.log_alpha, train_device)
def prep_training(self, train_device): self.pi.train() self.q1.train() self.q2.train() to_device(self.pi, train_device) to_device(self.q1, train_device) to_device(self.q2, train_device) to_device(self.tq1, train_device) to_device(self.tq2, train_device)
def prep_rollout(self, rollout_device): self.pi.eval() self.q1.eval() self.q2.eval() to_device(self.pi, rollout_device) to_device(self.q1, rollout_device) to_device(self.q2, rollout_device)
def prep_training(self, train_device): self.discriminator.train() to_device(self.discriminator, train_device)
def prep_rollout(self, rollout_device): self.discriminator.eval() to_device(self.discriminator, rollout_device)
def prep_training(self, train_device): self.classifier.train() to_device(self.classifier, train_device)
def prep_rollout(self, rollout_device): self.classifier.eval() to_device(self.classifier, rollout_device)
def prep_training(self, train_device): self.actor.train() self.critic.train() to_device(self.actor, train_device) to_device(self.critic, train_device)
def prep_rollout(self, rollout_device): self.actor.eval() to_device(model=self.actor, device=rollout_device)
def prep_rollout(self, rollout_device): self.q1.eval() self.q2.eval() to_device(self.q1, rollout_device) to_device(self.q2, rollout_device) to_device(self.log_alpha, rollout_device)