def validate(config, val_loader, model, criterion, lr_scheduler, epoch, output_dir, tb_log_dir, writer_dict=None, topk=(1, 5)): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): # compute output output = model( input, train_step=-1, # Evaluate using MDEQ (even when pre-training) writer=writer_dict['writer']) target = target.cuda(non_blocking=True) loss = criterion(output, target) # measure accuracy and record loss losses.update(loss.item(), input.size(0)) prec1, prec5 = accuracy(output, target, topk=topk) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() msg = 'Test: Time {batch_time.avg:.3f}\t' \ 'Loss {loss.avg:.4f}\t' \ 'Error@1 {error1:.3f}\t' \ 'Error@5 {error5:.3f}\t' \ 'Accuracy@1 {top1.avg:.3f}\t' \ 'Accuracy@5 {top5.avg:.3f}\t'.format( batch_time=batch_time, loss=losses, top1=top1, top5=top5, error1=100-top1.avg, error5=100-top5.avg) logger.info(msg) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_top1', top1.avg, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return top1.avg
def train(config, train_loader, model, criterion, optimizer, lr_scheduler, epoch, output_dir, tb_log_dir, writer_dict, topk=(1, 5)): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() total_batch_num = len(train_loader) effec_batch_num = int(config.PERCENT * total_batch_num) for i, (input, target) in enumerate(train_loader): # train on partial training data if i >= effec_batch_num: break # measure data loading time data_time.update(time.time() - end) #target = target - 1 # Specific for imagenet # compute output output = model(input, train_step=(lr_scheduler._step_count - 1), writer=writer_dict['writer']) target = target.cuda(non_blocking=True) loss = criterion(output, target) # compute gradient and do update step optimizer.zero_grad() loss.backward() if config['TRAIN']['CLIP'] > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), config['TRAIN']['CLIP']) optimizer.step() if config.TRAIN.LR_SCHEDULER != 'step': lr_scheduler.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) prec1, prec5 = accuracy(output, target, topk=topk) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy@1 {top1.val:.3f} ({top1.avg:.3f})\t' \ 'Accuracy@5 {top5.val:.3f} ({top5.avg:.3f})\t'.format( epoch, i, effec_batch_num, batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, top1=top1, top5=top5) logger.info(msg) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_top1', top1.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1