def train_epoch(self, epoch, printer=print): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() cur_lr = self.optimizer.param_groups[0]['lr'] self.model.train() prefetcher = data_prefetcher(self.train_loader) X, y = prefetcher.next() i = 0 while X is not None: i += 1 N = X.size(0) self.steps += 1 logits, aux_logits = self.model(X) loss = self.criterion(logits, y) if self.use_aux: loss += self.config.aux_weight * self.criterion(aux_logits, y) self.optimizer.zero_grad() if self.opt_level == 'O0': loss.backward() else: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.config.grad_clip) self.optimizer.step() prec1, prec5 = accuracy(logits, y, topk=(1, 5)) losses.update(loss.item(), N) top1.update(prec1.item(), N) top5.update(prec5.item(), N) if self.steps % self.log_step == 0 and self.rank == 0: self.writer.add_scalar('train/lr', round(cur_lr, 5), self.steps) self.writer.add_scalar('train/loss', loss.item(), self.steps) self.writer.add_scalar('train/top1', prec1.item(), self.steps) self.writer.add_scalar('train/top5', prec5.item(), self.steps) if self.gpu == 0 and (i % self.config.print_freq == 0 or i == len(self.train_loader) - 1): printer( f'Train: Epoch: [{epoch}][{i}/{len(self.train_loader) - 1}]\t' f'Step {self.steps}\t' f'lr {round(cur_lr, 5)}\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})\t') X, y = prefetcher.next() if self.gpu == 0: printer("Train: [{:3d}/{}] Final Prec@1 {:.4%}".format( epoch, self.total_epochs - 1, top1.avg))
def val_epoch(self, epoch, printer): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() self.model.eval() prefetcher = data_prefetcher(self.valid_loader) X, y = prefetcher.next() i = 0 with torch.no_grad(): while X is not None: N = X.size(0) i += 1 logits, _ = self.model(X) loss = self.criterion(logits, y) prec1, prec5 = accuracy(logits, y, topk=(1, 5)) losses.update(loss.item(), N) top1.update(prec1.item(), N) top5.update(prec5.item(), N) if self.rank == 0 and (i % self.config.print_freq == 0 or i == len(self.valid_loader) - 1): printer( f'Valid: Epoch: [{epoch}][{i}/{len(self.valid_loader)}]\t' f'Step {self.steps}\t' f'Loss {losses.avg:.4f}\t' f'Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})') X, y = prefetcher.next() if self.rank == 0: self.writer.add_scalar('val/loss', losses.avg, self.steps) self.writer.add_scalar('val/top1', top1.avg, self.steps) self.writer.add_scalar('val/top5', top5.avg, self.steps) printer("Valid: [{:3d}/{}] Final Prec@1 {:.4%}".format( epoch, self.total_epochs - 1, top1.avg)) return top1.avg
def train_epoch(self, epoch, printer=print): batch_time = eval_util.AverageMeter() data_time = eval_util.AverageMeter() forward_time = eval_util.AverageMeter() backward_time = eval_util.AverageMeter() log_time = eval_util.AverageMeter() self.model.train() end = time.time() self.data_ins.set_epoch(epoch) prefetcher = data_prefetcher(self.train_loader) images, _ = prefetcher.next() i = 0 while images is not None: i += 1 self.adjust_learning_rate(self.steps) self.adjust_mm(self.steps) self.steps += 1 assert images.dim( ) == 5, f"Input must have 5 dims, got: {images.dim()}" view1 = images[:, 0, ...].contiguous() view2 = images[:, 1, ...].contiguous() # measure data loading time data_time.update(time.time() - end) # forward tflag = time.time() q, target_z = self.model(view1, view2, self.mm) forward_time.update(time.time() - tflag) tflag = time.time() loss = self.forward_loss(q, target_z) self.optimizer.zero_grad() if self.opt_level == 'O0': loss.backward() else: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() self.optimizer.step() backward_time.update(time.time() - tflag) tflag = time.time() if self.steps % self.log_step == 0: self.logger.update('steps', self.steps) self.logger.update( 'lr', round(self.optimizer.param_groups[0]['lr'], 5)) self.logger.update('mm', round(self.mm, 5)) self.logger.update('loss', loss.item(), view1.size(0)) if self.rank == 0: self.loss_log_tool.update(self.logger.get_key_val('steps'), self.logger.get_key_val('loss')) self.lr_log_tool.update(self.logger.get_key_val('steps'), self.logger.get_key_val('lr')) if self.steps % 100 == 0: self.loss_log_tool.plot(self.loss_log_png, x_label='steps', y_label='loss', label='loss') self.loss_log_tool.save_log() self.lr_log_tool.plot(self.lr_log_png, x_label='steps', y_label='lr', label='lr') self.lr_log_tool.save_log() log_time.update(time.time() - tflag) batch_time.update(time.time() - end) end = time.time() # Print log info if self.gpu == 0 and self.steps % self.log_step == 0: printer( f'Epoch: [{epoch}][{i}/{len(self.train_loader)}]\t{str(self.logger)}\t' f'Batch Time {batch_time.val:.4f} ({batch_time.avg:.4f})\t' f'Data Time {data_time.val:.4f} ({data_time.avg:.4f})\t' f'forward Time {forward_time.val:.4f} ({forward_time.avg:.4f})\t' f'backward Time {backward_time.val:.4f} ({backward_time.avg:.4f})\t' f'Log Time {log_time.val:.4f} ({log_time.avg:.4f})\t') images, _ = prefetcher.next()
def train_epoch(self, epoch, printer=print): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() cur_lr = self.lr_scheduler.get_last_lr()[0] self.model.print_alphas(self.logger) self.model.train() prefetcher_trn = data_prefetcher(self.train_loader) prefetcher_val = data_prefetcher(self.valid_loader) trn_X, trn_y = prefetcher_trn.next() val_X, val_y = prefetcher_val.next() i = 0 while trn_X is not None: i += 1 N = trn_X.size(0) self.steps += 1 # architect step (alpha) self.alpha_optim.zero_grad() self.architect.unrolled_backward(trn_X, trn_y, val_X, val_y, cur_lr, self.w_optim) self.alpha_optim.step() # child network step (w) self.w_optim.zero_grad() logits = self.model(trn_X) loss = self.model.criterion(logits, trn_y) loss.backward() nn.utils.clip_grad_norm_(self.model.weights(), self.config.w_grad_clip) self.w_optim.step() prec1, prec5 = accuracy(logits, trn_y, topk=(1, 5)) losses.update(loss.item(), N) top1.update(prec1.item(), N) top5.update(prec5.item(), N) if self.steps % self.log_step == 0: self.writer.add_scalar('train/lr', round(cur_lr, 5), self.steps) self.writer.add_scalar('train/loss', loss.item(), self.steps) self.writer.add_scalar('train/top1', prec1.item(), self.steps) self.writer.add_scalar('train/top5', prec5.item(), self.steps) if i % self.config.print_freq == 0 or i == len( self.train_loader) - 1: printer( f'Train: Epoch: [{epoch}][{i}/{len(self.train_loader) - 1}]\t' f'Step {self.steps}\t' f'lr {round(cur_lr, 5)}\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})\t') trn_X, trn_y = prefetcher_trn.next() val_X, val_y = prefetcher_val.next() printer("Train: [{:3d}/{}] Final Prec@1 {:.4%}".format( epoch, self.total_epochs - 1, top1.avg))