def save_checkpoint(self, filename, extra_state): """Save all training state in a checkpoint file.""" if distributed_utils.is_master(self.args): # only save one checkpoint extra_state['train_meters'] = self.meters utils.save_state( filename, self.args, self.model, self.criterion, self.optimizer, self.lr_scheduler, self._num_updates, self._optim_history, extra_state, )
def save_checkpoint(self, filename, extra_state): """Save all training state in a checkpoint file.""" if self.args.amp: extra_state['amp_state_dict'] = amp.state_dict() extra_state['amp_master_params'] = list(amp.master_params(self.optimizer.optimizer)) if distributed_utils.is_master(self.args): # only save one checkpoint utils.save_state( filename, self.args, self.get_model(), self.criterion, self.optimizer, self.lr_scheduler, self._num_updates, self._optim_history, extra_state, )
def save_checkpoint(self, filename, extra_state): """Save all training state in a checkpoint file.""" if distributed_utils.is_master(self.args): # only save one checkpoint extra_state['train_meters'] = self.meters if not self.args.sep_optim: utils.save_state( filename, self.args, self.get_model().state_dict(), self.criterion, self.optimizer, self.lr_scheduler, self._num_updates, self._optim_history, extra_state ) else: utils.save_state_for_sep_optim(filename, self.args, self.get_model().state_dict(), self.criterion, self.optimizer, self.lr_scheduler, self._num_updates, self.dec_optimizer, self.dec_lr_scheduler, self._optim_history, self._dec_optim_history, extra_state)
def save_checkpoint(self, filename, extra_state): if distributed_utils.is_master(self.args): # only save one checkpoint extra_state['train_meters'] = self.meters utils.save_state( filename, self.args, self.get_model().state_dict(), self.criterion, self.optimizer, self.lr_scheduler, self._num_updates, self._optim_history, extra_state, )
def save_checkpoint(self, filename, extra_state): """Save all training state in a checkpoint file.""" if self.args.distributed_rank == 0: # only save one checkpoint utils.save_state(filename, self.args, self.model, self.criterion, self.optimizer, self.lr_scheduler, self._num_updates, self._optim_history, extra_state)
def _async_save_checkpoint(self, rank, device_id, filename, extra_state): utils.save_state(filename, self.args, self.model, self.criterion, self.optimizer, self.lr_scheduler, self._optim_history, extra_state)