def test(): test_losses = misc.AverageMeter() test_top1 = misc.AverageMeter() test_top5 = misc.AverageMeter() model.eval() prefetcher = datasets.DataPrefetcher(test_loader) with torch.no_grad(): data, target = prefetcher.next() while data is not None: default_graph.clear_all_tensors() data, target = data.to(args.device), target.to(args.device) output = model(data) loss = criterion(output, target) prec1, prec5 = misc.accuracy(output, target, topk=(1, 5)) test_losses.update(loss.item(), data.size(0)) test_top1.update(prec1.item(), data.size(0)) test_top5.update(prec5.item(), data.size(0)) data, target = prefetcher.next() test_sparsity = (torch.cat(gates_params) != 0).float().mean().item() print(' * Test set: Loss_CE: %.4f, ' 'Sparsity: %.4f, Top1 acc: %.4f, Top5 acc: %.4f\n' % (test_losses.avg, test_sparsity, test_top1.avg, test_top5.avg)) return test_top1.avg, test_sparsity
def validate(val_loader, model, criterion, epoch): losses = misc.AverageMeter() top1 = misc.AverageMeter() top5 = misc.AverageMeter() # switch to evaluate mode prefetcher = datasets.DataPrefetcher(val_loader) model.eval() input, target = prefetcher.next() i = -1 while input is not None: i += 1 with torch.no_grad(): output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = misc.accuracy(output.data, target, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) losses.update(to_python_float(reduced_loss), input.size(0)) top1.update(to_python_float(prec1), input.size(0)) top5.update(to_python_float(prec5), input.size(0)) input, target = prefetcher.next() print(' * Test Epoch {0}, Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}\n'. format(epoch, top1=top1, top5=top5)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch): losses = misc.AverageMeter() top1 = misc.AverageMeter() top5 = misc.AverageMeter() # switch to train mode prefetcher = datasets.DataPrefetcher(train_loader) model.train() input, target = prefetcher.next() i = -1 while input is not None: i += 1 output = model(input) loss = criterion(output, target) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if i % args.log_interval == 0: prec1, prec5 = misc.accuracy(output.data, target, topk=(1, 5)) # Average loss and accuracy across processes for logging reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) # to_python_float incurs a host<->device sync losses.update(to_python_float(reduced_loss), input.size(0)) top1.update(to_python_float(prec1), input.size(0)) top5.update(to_python_float(prec5), input.size(0)) torch.cuda.synchronize() print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.10f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), loss=losses, top1=top1, top5=top5)) input, target = prefetcher.next()
test_top5.update(prec5.item(), data.size(0)) data, target = prefetcher.next() test_sparsity = (torch.cat(gates_params) != 0).float().mean().item() print(' * Test set: Loss_CE: %.4f, ' 'Sparsity: %.4f, Top1 acc: %.4f, Top5 acc: %.4f\n' % (test_losses.avg, test_sparsity, test_top1.avg, test_top5.avg)) return test_top1.avg, test_sparsity best_acc = 0 top1 = misc.AverageMeter() top5 = misc.AverageMeter() prefetcher = datasets.DataPrefetcher(train_loader) data, target = prefetcher.next() i = -1 while data is not None: i += 1 model.train() optimizer.zero_grad() output = model(data) loss_ce = criterion(output, target) loss_reg = args.lambd * (torch.cat(gates_params).abs().mean() - args.sparsity_level)**2 loss = loss_ce + loss_reg loss.backward() optimizer.step()