def train(self, start_epoch): """Run outer training loop.""" if self._hp.n_warmup_steps > 0: self.warmup() for epoch in range(start_epoch, self._hp.num_epochs): print("Epoch {}".format(epoch)) self.train_epoch(epoch) if not self.args.dont_save and self.is_chef: save_checkpoint({ 'epoch': epoch, 'global_step': self.global_step, 'state_dict': self.agent.state_dict(), }, os.path.join(self._hp.exp_path, 'weights'), CheckpointHandler.get_ckpt_name(epoch)) self.agent.save_state(self._hp.exp_path) self.val()
def train(self, start_epoch): if not self.args.skip_first_val: self.val() for epoch in range(start_epoch, self._hp.num_epochs): self.train_epoch(epoch) if not self.args.dont_save: save_checkpoint( { 'epoch': epoch, 'global_step': self.global_step, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), }, os.path.join(self._hp.exp_path, 'weights'), CheckpointHandler.get_ckpt_name(epoch)) if epoch % self.args.val_interval == 0: self.val()