def warm_up(self, scaler, model, dataloader, cfg, prefix='train'): optimizer = build_optimizer(cfg, model) model.train() cur_iter = 0 while cur_iter < cfg.WARMUP.ITERS: for i, sample in enumerate(dataloader): cur_iter += 1 if cur_iter >= cfg.WARMUP.ITERS: break lr = get_warmup_lr(cur_iter, cfg) for param_group in optimizer.param_groups: param_group['lr'] = lr losses = self.run_step(scaler, model, sample, optimizer, None, None, prefix) if self.cfg.local_rank == 0: template = "[iter {}/{}, lr {}] Total train loss: {:.4f} \n" "{}" logger.info( template.format( cur_iter, cfg.WARMUP.ITERS, round(get_current_lr(optimizer), 6), losses["loss"].item(), "\n".join( ["{}: {:.4f}".format(n, l.item()) for n, l in losses.items() if n != "loss"]), ) ) del optimizer
def train_epoch(self, scaler, epoch, model, dataset, dataloader, optimizer, prefix="train"): model.train() _timer = Timer() lossLogger = LossLogger() performanceLogger = build_evaluator(self.cfg, dataset) num_iters = len(dataloader) for i, sample in enumerate(dataloader): self.n_iters_elapsed += 1 _timer.tic() self.run_step(scaler, model, sample, optimizer, lossLogger, performanceLogger, prefix) torch.cuda.synchronize() _timer.toc() if (i + 1) % self.cfg.N_ITERS_TO_DISPLAY_STATUS == 0: if self.cfg.local_rank == 0: template = "[epoch {}/{}, iter {}/{}, lr {}] Total train loss: {:.4f} " "(ips = {:.2f})\n" "{}" logger.info( template.format( epoch, self.cfg.N_MAX_EPOCHS - 1, i, num_iters - 1, round(get_current_lr(optimizer), 6), lossLogger.meters["loss"].value, self.batch_size * self.cfg.N_ITERS_TO_DISPLAY_STATUS / _timer.diff, "\n".join( ["{}: {:.4f}".format(n, l.value) for n, l in lossLogger.meters.items() if n != "loss"]), ) ) if self.cfg.TENSORBOARD and self.cfg.local_rank == 0: # Logging train losses [self.tb_writer.add_scalar(f"loss/{prefix}_{n}", l.global_avg, epoch) for n, l in lossLogger.meters.items()] performances = performanceLogger.evaluate() if performances is not None and len(performances): [self.tb_writer.add_scalar(f"performance/{prefix}_{k}", v, epoch) for k, v in performances.items()] if self.cfg.TENSORBOARD_WEIGHT and False: for name, param in model.named_parameters(): layer, attr = os.path.splitext(name) attr = attr[1:] self.tb_writer.add_histogram("{}/{}".format(layer, attr), param, epoch)