Esempio n. 1
0
 def reduce_buffers(self, bs):
     for stage in self.stages:
         if isinstance(stage, RandomlyWiredStage):
             if self.training:
                 bs, r_mean, r_var, r_usage = batch_reduce(bs,
                                                           stage.nn_running_mean,
                                                           stage.nn_running_var,
                                                           stage.node_running_usage)
                 stage.nn_running_mean.copy_(r_mean)
                 stage.nn_running_var.copy_(r_var)
                 stage.node_running_usage.copy_(r_usage)
Esempio n. 2
0
def validate(cur_gpu, val_loader, model, criterion, epoch, hparams):
    logger = get_logger()
    model.eval()

    if logger:
        loss_meter = AverageMeter('val_loss')
        acc1_meter = AverageMeter('val_acc1')
        acc5_meter = AverageMeter('val_acc5')

    model_module = model.module if hparams.distributed_mode == 'gpus' else model

    for i, (image, target) in enumerate(val_loader):
        with torch.no_grad():
            if cur_gpu >= 0:
                image = image.cuda(cur_gpu, non_blocking=True)
                target = target.cuda(cur_gpu, non_blocking=True)

            if hparams.fp16:
                image = image.half()

            output = model(image)

            if hparams.fp16:
                output = output.float()

            loss = criterion(output,
                             target,
                             label_smoothing=hparams.label_smoothing)
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            bs = torch.tensor(image.size(0),
                              device='cuda:%d' %
                              cur_gpu if cur_gpu >= 0 else None)

            if hparams.distributed_mode == 'gpus':
                bs, loss, acc1, acc5 = batch_reduce(bs, loss, acc1, acc5)

            if logger:
                loss_meter.update(loss.item(), bs.item())
                acc1_meter.update(acc1.item(), bs.item())
                acc5_meter.update(acc5.item(), bs.item())

    loss, acc1, acc5 = None, None, None
    if logger:
        metrics = [('val_loss', loss_meter.result),
                   ('val_acc1', acc1_meter.result),
                   ('val_acc5', acc5_meter.result)]
        logger.log_metrics(metrics, epoch + 1, 0, 'val')
        logger.log_summaries(model_module.get_summaries(), epoch + 1, 0, 'val')
        loss, acc1, acc5 = loss_meter.result, acc1_meter.result, acc5_meter.result
    return loss, acc1, acc5
Esempio n. 3
0
def train(cur_gpu, train_loader, model, criterion, optimizer, lr_scheduler,
          params, params_clone, epoch, hparams):
    logger = get_logger()
    monitor = get_monitor()
    model.train()

    if logger:
        loss_meter = AverageMeter('train_loss')
        acc1_meter = AverageMeter('train_acc1')
        acc5_meter = AverageMeter('train_acc5')

    model_module = model.module if hparams.distributed_mode == 'gpus' else model

    for i, (image, target) in enumerate(train_loader):
        monitor and monitor.before_step()

        if cur_gpu >= 0:
            image = image.cuda(cur_gpu, non_blocking=True)
            target = target.cuda(cur_gpu, non_blocking=True)

        if hparams.fp16:
            image = image.half()

        output = model(image)

        if hparams.fp16:
            output = output.float()

        loss = criterion(output, target, label_smoothing=hparams.label_smoothing)
        loss_ = loss.data.clone()
        acc1, acc5 = accuracy(output, target, topk=(1,5))
        bs = torch.tensor(image.size(0), device='cuda:%d' % cur_gpu if cur_gpu >= 0 else None)

        if hparams.distributed_mode == 'gpus':
            model_module.reduce_buffers(bs)
            bs, loss_, acc1, acc5 = batch_reduce(bs, loss_, acc1, acc5)

        if logger:
            loss_meter.update(loss.item(), bs.item())
            acc1_meter.update(acc1.item(), bs.item())
            acc5_meter.update(acc5.item(), bs.item())

        if hparams.fp16:
            loss = loss * hparams.loss_scale
            model.zero_grad()
            loss.backward()
            copy_grads(params, params_clone)
            for p in params_clone:
                p.grad.data.div_(hparams.loss_scale)
            optimizer.step()
            copy_params(params_clone, params)
            torch.cuda.synchronize()
        else:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        step = i + 1
        lr_scheduler.step(epoch=epoch + step / hparams.steps_per_epoch)

        if logger and step % hparams.print_freq == 0:
            metrics = [('train_loss', loss_meter.result),
                       ('train_acc1', acc1_meter.result),
                       ('train_acc5', acc5_meter.result),
                       ('lr', optimizer.param_groups[0]['lr'])]

            # epoch: zero-indexed for code, one-indexed for human reader
            logger.log_metrics(metrics, epoch + 1, step, 'train')
            logger.log_summaries(model_module.get_summaries(), epoch + 1, step, 'train')

        monitor and monitor.after_step(str(loss_meter))

    if logger:
        metrics = [('train_loss', loss_meter.result),
                   ('train_acc1', acc1_meter.result),
                   ('train_acc5', acc5_meter.result),
                   ('lr', optimizer.param_groups[0]['lr'])]
        logger.log_metrics(metrics, epoch + 1, step, 'train')
        logger.log_summaries(model_module.get_summaries(), epoch + 1, step, 'train')