def train_one_epoch(config, model, data_loader, optimizer, epoch,
                    lr_scheduler):
    model.train()
    optimizer.zero_grad()

    num_steps = len(data_loader)
    batch_time = AverageMeter()
    loss_meter = AverageMeter()
    norm_meter = AverageMeter()

    start = time.time()
    end = time.time()
    for idx, (samples_1, samples_2, targets) in enumerate(data_loader):
        samples_1 = samples_1.cuda(non_blocking=True)
        samples_2 = samples_2.cuda(non_blocking=True)
        targets = targets.cuda(non_blocking=True)

        loss = model(samples_1, samples_2)

        optimizer.zero_grad()
        if config.AMP_OPT_LEVEL != "O0":
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            if config.TRAIN.CLIP_GRAD:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), config.TRAIN.CLIP_GRAD)
            else:
                grad_norm = get_grad_norm(amp.master_params(optimizer))
        else:
            loss.backward()
            if config.TRAIN.CLIP_GRAD:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), config.TRAIN.CLIP_GRAD)
            else:
                grad_norm = get_grad_norm(model.parameters())
        optimizer.step()
        lr_scheduler.step_update(epoch * num_steps + idx)

        torch.cuda.synchronize()

        loss_meter.update(loss.item(), targets.size(0))
        norm_meter.update(grad_norm)
        batch_time.update(time.time() - end)
        end = time.time()

        if idx % config.PRINT_FREQ == 0:
            lr = optimizer.param_groups[0]['lr']
            memory_used = torch.cuda.max_memory_allocated() / (1024.0 * 1024.0)
            etas = batch_time.avg * (num_steps - idx)
            logger.info(
                f'Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t'
                f'eta {datetime.timedelta(seconds=int(etas))} lr {lr:.6f}\t'
                f'time {batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                f'loss {loss_meter.val:.4f} ({loss_meter.avg:.4f})\t'
                f'grad_norm {norm_meter.val:.4f} ({norm_meter.avg:.4f})\t'
                f'mem {memory_used:.0f}MB')
    epoch_time = time.time() - start
    logger.info(
        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
    )
    def validate(self, net, loader, loss_fn, amp_autocast=suppress, metric_name=None):
        losses_m = AverageMeter()
        top1_m = AverageMeter()
        top5_m = AverageMeter()
        rmse_m = AverageMeter()

        net.eval()

        with torch.no_grad():
            for batch_idx, (input, target) in enumerate(loader):
                if not self._misc_cfg.prefetcher:
                    input = input.to(self.ctx[0])
                    target = target.to(self.ctx[0])

                with amp_autocast():
                    output = net(input)
                    if self._problem_type == REGRESSION:
                        output = output.flatten()
                if isinstance(output, (tuple, list)):
                    output = output[0]

                if self._problem_type == REGRESSION:
                    if metric_name:
                        assert metric_name == 'rmse', f'{metric_name} metric not supported for regression.'
                    val_metric_score = rmse(output, target)
                else:
                    val_metric_score = accuracy(output, target, topk=(1, min(5, self.num_class)))

                # augmentation reduction
                reduce_factor = self._misc_cfg.tta
                if self._problem_type != REGRESSION and reduce_factor > 1:
                    output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2)
                    target = target[0:target.size(0):reduce_factor]

                loss = loss_fn(output, target)
                reduced_loss = loss.data

                if self.found_gpu:
                    torch.cuda.synchronize()

                losses_m.update(reduced_loss.item(), input.size(0))
                if self._problem_type == REGRESSION:
                    rmse_score = val_metric_score
                    rmse_m.update(rmse_score.item(), output.size(0))
                else:
                    acc1, acc5 = val_metric_score
                    acc1 /= 100
                    acc5 /= 100
                    top1_m.update(acc1.item(), output.size(0))
                    top5_m.update(acc5.item(), output.size(0))

        if self._problem_type == REGRESSION:
            self._logger.info('[Epoch %d] validation: rmse=%f', self.epoch, rmse_m.avg)
            return {'loss': losses_m.avg, 'rmse': rmse_m.avg}
        else:
            self._logger.info('[Epoch %d] validation: top1=%f top5=%f', self.epoch, top1_m.avg, top5_m.avg)
            return {'loss': losses_m.avg, 'top1': top1_m.avg, 'top5': top5_m.avg}
Esempio n. 3
0
def validate_multi_class(config, data_loader, model):
    criterion = SoftTargetCrossEntropy()
    model.eval()

    batch_time = AverageMeter()
    loss_meter = AverageMeter()
    ''' calculate loss and f1 score '''
    tp_sum, fp_sum, fn_sum = 0, 0, 0

    end = time.time()
    for idx, (images, target) in enumerate(data_loader):
        images = images.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # compute output
        output = model(images)

        # measure accuracy and record loss
        loss = criterion(output, target)
        
        tp, fp, fn = tfpn(output, target)        
        # acc1, acc5 = accuracy(output, target, topk=(1, 5))

        # acc1 = reduce_tensor(acc1)
        # acc5 = reduce_tensor(acc5)
        # loss = reduce_tensor(loss)

        loss_meter.update(loss.item(), target.size(0))
        tp_sum += tp
        fp_sum += fp
        fn_sum += fn

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if idx % config.PRINT_FREQ == 0:
            memory_used = torch.cuda.max_memory_allocated() / (1024.0 * 1024.0)
            logger.info(
                f'Test: [{idx}/{len(data_loader)}]\t'
                f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                f'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f})\t'
                f'precision {tp/(tp+fp):.3f} \t'
                f'recall {tp/(tp+fn):.3f} \t'
                f'Mem {memory_used:.0f}MB')
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1_score = 2*precision*recall / (precision+recall)
    logger.info(f' * precision {precision:.3f} recall {recall:.3f} f1 {f1_score:.3f}')
    return f1_score, precision, recall
Esempio n. 4
0
def validate(model, loader, criterion, log_freq=50):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            target = target.cuda()
            input = input.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1.item(), input.size(0))
            top5.update(acc5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % log_freq == 0:
                logging.info(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                    'Acc@1: {top1.val:>7.3f} ({top1.avg:>7.3f})  '
                    'Acc@5: {top5.val:>7.3f} ({top5.avg:>7.3f})'.format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                        loss=losses,
                        top1=top1,
                        top5=top5))

    results = OrderedDict(top1=round(top1.avg, 4),
                          top1_err=round(100 - top1.avg, 4),
                          top5=round(top5.avg, 4),
                          top5_err=round(100 - top5.avg, 4))

    logging.info(' * Acc@1 {:.1f} ({:.3f}) Acc@5 {:.1f} ({:.3f})'.format(
        results['top1'], results['top1_err'], results['top5'],
        results['top5_err']))
Esempio n. 5
0
def train_one_epoch(epoch,
                    model,
                    loader,
                    optimizer,
                    loss_fn,
                    args,
                    lr_scheduler=None,
                    saver=None,
                    output_dir=None,
                    amp_autocast=suppress,
                    loss_scaler=None,
                    model_ema=None,
                    mixup_fn=None):

    if args.mixup_off_epoch and epoch >= args.mixup_off_epoch:
        if args.prefetcher and loader.mixup_enabled:
            loader.mixup_enabled = False
        elif mixup_fn is not None:
            mixup_fn.mixup_enabled = False

    second_order = hasattr(optimizer,
                           'is_second_order') and optimizer.is_second_order
    batch_time_m = AverageMeter()
    data_time_m = AverageMeter()
    losses_m = AverageMeter()

    model.train()

    end = time.time()
    last_idx = len(loader) - 1
    num_updates = epoch * len(loader)
    for batch_idx, (input, target) in enumerate(loader):
        last_batch = batch_idx == last_idx
        data_time_m.update(time.time() - end)
        if not args.prefetcher:
            input, target = input.cuda(), target.cuda()
            if mixup_fn is not None:
                input, target = mixup_fn(input, target)
        if args.channels_last:
            input = input.contiguous(memory_format=torch.channels_last)

        with amp_autocast():
            output = model(input)
            loss = loss_fn(output, target)

        if not args.distributed:
            losses_m.update(loss.item(), input.size(0))

        optimizer.zero_grad()
        if loss_scaler is not None:
            loss_scaler(loss,
                        optimizer,
                        clip_grad=args.clip_grad,
                        clip_mode=args.clip_mode,
                        parameters=model_parameters(model,
                                                    exclude_head='agc'
                                                    in args.clip_mode),
                        create_graph=second_order)
        else:
            loss.backward(create_graph=second_order)
            if args.clip_grad is not None:
                dispatch_clip_grad(model_parameters(model,
                                                    exclude_head='agc'
                                                    in args.clip_mode),
                                   value=args.clip_grad,
                                   mode=args.clip_mode)
            optimizer.step()

        if model_ema is not None:
            model_ema.update(model)

        torch.cuda.synchronize()
        num_updates += 1
        batch_time_m.update(time.time() - end)
        if last_batch or batch_idx % args.log_interval == 0:
            lrl = [param_group['lr'] for param_group in optimizer.param_groups]
            lr = sum(lrl) / len(lrl)

            if args.distributed:
                reduced_loss = reduce_tensor(loss.data, args.world_size)
                losses_m.update(reduced_loss.item(), input.size(0))

            if args.local_rank == 0:
                _logger.info(
                    'Train: {} [{:>4d}/{} ({:>3.0f}%)]  '
                    'Loss: {loss.val:#.4g} ({loss.avg:#.3g})  '
                    'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s  '
                    '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    'LR: {lr:.3e}  '
                    'Data: {data_time.val:.3f} ({data_time.avg:.3f})'.format(
                        epoch,
                        batch_idx,
                        len(loader),
                        100. * batch_idx / last_idx,
                        loss=losses_m,
                        batch_time=batch_time_m,
                        rate=input.size(0) * args.world_size /
                        batch_time_m.val,
                        rate_avg=input.size(0) * args.world_size /
                        batch_time_m.avg,
                        lr=lr,
                        data_time=data_time_m))

                if args.save_images and output_dir:
                    torchvision.utils.save_image(
                        input,
                        os.path.join(output_dir,
                                     'train-batch-%d.jpg' % batch_idx),
                        padding=0,
                        normalize=True)

        if saver is not None and args.recovery_interval and (
                last_batch or (batch_idx + 1) % args.recovery_interval == 0):
            saver.save_recovery(epoch, batch_idx=batch_idx)

        if lr_scheduler is not None:
            lr_scheduler.step_update(num_updates=num_updates,
                                     metric=losses_m.avg)

        end = time.time()
        # end for

    if hasattr(optimizer, 'sync_lookahead'):
        optimizer.sync_lookahead()

    return OrderedDict([('loss', losses_m.avg)])
Esempio n. 6
0
def validate(model,
             loader,
             loss_fn,
             args,
             amp_autocast=suppress,
             log_suffix=''):
    batch_time_m = AverageMeter()
    losses_m = AverageMeter()
    top1_m = AverageMeter()
    top5_m = AverageMeter()

    model.eval()

    end = time.time()
    last_idx = len(loader) - 1
    with torch.no_grad():
        for batch_idx, (input, target) in enumerate(loader):
            last_batch = batch_idx == last_idx
            if not args.prefetcher:
                input = input.cuda()
                target = target.cuda()
            if args.channels_last:
                input = input.contiguous(memory_format=torch.channels_last)

            with amp_autocast():
                output = model(input)
            if isinstance(output, (tuple, list)):
                output = output[0]

            # augmentation reduction
            reduce_factor = args.tta
            if reduce_factor > 1:
                output = output.unfold(0, reduce_factor,
                                       reduce_factor).mean(dim=2)
                target = target[0:target.size(0):reduce_factor]

            loss = loss_fn(output, target)
            acc1, acc5 = accuracy(output, target, topk=(1, 5))

            if args.distributed:
                reduced_loss = reduce_tensor(loss.data, args.world_size)
                acc1 = reduce_tensor(acc1, args.world_size)
                acc5 = reduce_tensor(acc5, args.world_size)
            else:
                reduced_loss = loss.data

            torch.cuda.synchronize()

            losses_m.update(reduced_loss.item(), input.size(0))
            top1_m.update(acc1.item(), output.size(0))
            top5_m.update(acc5.item(), output.size(0))

            batch_time_m.update(time.time() - end)
            end = time.time()
            if args.local_rank == 0 and (last_batch or
                                         batch_idx % args.log_interval == 0):
                log_name = 'Test' + log_suffix
                _logger.info(
                    '{0}: [{1:>4d}/{2}]  '
                    'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})  '
                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                    'Acc@1: {top1.val:>7.4f} ({top1.avg:>7.4f})  '
                    'Acc@5: {top5.val:>7.4f} ({top5.avg:>7.4f})'.format(
                        log_name,
                        batch_idx,
                        last_idx,
                        batch_time=batch_time_m,
                        loss=losses_m,
                        top1=top1_m,
                        top5=top5_m))

    metrics = OrderedDict([('loss', losses_m.avg), ('top1', top1_m.avg),
                           ('top5', top5_m.avg)])

    return metrics
Esempio n. 7
0
def main():
    start_endpoint = "http://localhost:3000/start"
    stop_endpoint = "http://localhost:3000/stop"
    setup_default_logging()
    args = parser.parse_args()
    # might as well try to do something useful...
    args.pretrained = args.pretrained or not args.checkpoint

    output_dir = args.checkpoint.split('/')
    output_dir.pop(-1)
    output_dir = ('/').join(output_dir)

    # create model
    model = create_model(
        args.model,
        num_classes=args.num_classes,
        in_chans=3,
        pretrained=args.pretrained,
        checkpoint_path=args.checkpoint)

    logging.info('Model %s created, param count: %d' %
                 (args.model, sum([m.numel() for m in model.parameters()])))

    # config = resolve_data_config(vars(args), model=model)
    # model, test_time_pool = apply_test_time_pool(model, config, args)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(
            model, device_ids=list(range(args.num_gpu))).cuda()
    else:
        model = model.cuda()

    dataset_eval = torchvision.datasets.CIFAR100(
        root='./data', train=False, download=True)

    data_config = resolve_data_config(vars(args), model=model)

    # #CIFAR_100_MEAN = (0.5071, 0.4865, 0.4409)
    # #CIFAR_100_STD = (0.2673, 0.2564, 0.2762)
    data_config['mean'] = (0.5071, 0.4865, 0.4409)
    data_config['std'] = (0.2673, 0.2564, 0.2762)

    loader = create_loader(
        dataset_eval,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        is_training=False,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        crop_pct=data_config['crop_pct']
    )

    model.eval()

    batch_time = AverageMeter()

    with torch.no_grad():
        idle_power = requests.post(url=start_endpoint)
        idle_json = idle_power.json()
        for batch_idx, (input, _) in enumerate(loader):
            input = input.cuda()

            tstart = time.time()
            output = model(input)
            tend = time.time()

            if batch_idx != 0:
                batch_time.update(tend - tstart)

                if batch_idx % args.log_freq == 0:
                    print('Predict: [{0}/{1}] Time {batch_time.val:.6f} ({batch_time.avg:.6f})'.format(
                        batch_idx, len(loader), batch_time=batch_time), end='\r')

    load_power = requests.post(url=stop_endpoint)
    load_json = load_power.json()
    fps = 1 / batch_time.avg
    inference_power = float(load_json['load']) - float(idle_json['idle'])
    stats = [{'FPS': [float(fps)]},
                {'Total_Power': [float(inference_power)]}]
    with open(os.path.join(output_dir, '{}_fps_cifar.yaml'.format(args.model)), 'w') as f:
        yaml.safe_dump(stats, f)
Esempio n. 8
0
def train_epoch_slim_gate(epoch,
                          model,
                          loader,
                          optimizer,
                          loss_fn,
                          args,
                          lr_scheduler=None,
                          saver=None,
                          output_dir='',
                          use_amp=False,
                          model_ema=None,
                          optimizer_step=1):
    start_chn_idx = 0
    num_gate = 1

    batch_time_m = AverageMeter()
    data_time_m = AverageMeter()
    losses_m = AverageMeter()
    acc_m = AverageMeter()
    flops_m = AverageMeter()
    ce_loss_m = AverageMeter()
    flops_loss_m = AverageMeter()
    acc_gate_m_l = [AverageMeter() for i in range(num_gate)]
    gate_loss_m_l = [AverageMeter() for i in range(num_gate)]
    model.train()
    for n, m in model.named_modules():  # Freeze bn
        if isinstance(m, nn.BatchNorm2d) or isinstance(m, DSBatchNorm2d):
            m.eval()

    for n, m in model.named_modules():
        if len(getattr(m, 'in_channels_list', [])) > 4:
            m.in_channels_list = m.in_channels_list[start_chn_idx:4]
            m.in_channels_list_tensor = torch.from_numpy(
                np.array(m.in_channels_list)).float().cuda()
        if len(getattr(m, 'out_channels_list', [])) > 4:
            m.out_channels_list = m.out_channels_list[start_chn_idx:4]
            m.out_channels_list_tensor = torch.from_numpy(
                np.array(m.out_channels_list)).float().cuda()

    end = time.time()
    last_idx = len(loader) - 1
    num_updates = epoch * len(loader)
    model.apply(lambda m: add_mac_hooks(m))
    for batch_idx, (input, target) in enumerate(loader):
        last_batch = batch_idx == last_idx
        data_time_m.update(time.time() - end)
        if not args.prefetcher:
            input, target = input.cuda(), target.cuda()

        if last_batch or (batch_idx + 1) % optimizer_step == 0:
            optimizer.zero_grad()
        # generate online labels
        with torch.no_grad():
            set_model_mode(model, 'smallest')
            output = model(input)
            conf_s, correct_s = accuracy(output, target, no_reduce=True)
            gate_target = [
                torch.LongTensor([0])
                if correct_s[0][idx] else torch.LongTensor([3])
                for idx in range(correct_s[0].size(0))
            ]
            gate_target = torch.stack(gate_target).squeeze(-1).cuda()
        # =============
        set_model_mode(model, 'dynamic')
        output = model(input)

        if hasattr(model, 'module'):
            model_ = model.module
        else:
            model_ = model

        #  SGS Loss
        gate_loss = 0
        gate_num = 0
        gate_loss_l = []
        gate_acc_l = []
        for n, m in model_.named_modules():
            if isinstance(m, MultiHeadGate):
                if getattr(m, 'keep_gate', None) is not None:
                    gate_num += 1
                    g_loss = loss_fn(m.keep_gate, gate_target)
                    gate_loss += g_loss
                    gate_loss_l.append(g_loss)
                    gate_acc_l.append(
                        accuracy(m.keep_gate, gate_target, topk=(1, ))[0])

        gate_loss /= gate_num

        #  MAdds Loss
        running_flops = add_flops(model)
        if isinstance(running_flops, torch.Tensor):
            running_flops = running_flops.float().mean().cuda()
        else:
            running_flops = torch.FloatTensor([running_flops]).cuda()
        flops_loss = (running_flops / 1e9)**2

        #  Target Loss, back-propagate through gumbel-softmax
        ce_loss = loss_fn(output, target)

        loss = gate_loss + ce_loss + 0.5 * flops_loss
        # loss = ce_loss
        acc1 = accuracy(output, target, topk=(1, ))[0]

        if use_amp:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        if last_batch or (batch_idx + 1) % optimizer_step == 0:
            optimizer.step()

        torch.cuda.synchronize()
        if model_ema is not None:
            model_ema.update(model)
        num_updates += 1

        if not args.distributed:
            losses_m.update(loss.item(), input.size(0))
            acc_m.update(acc1.item(), input.size(0))
            flops_m.update(running_flops.item(), input.size(0))
            ce_loss_m.update(ce_loss.item(), input.size(0))
            flops_loss_m.update(flops_loss.item(), input.size(0))
        else:
            reduced_loss = reduce_tensor(loss.data, args.world_size)
            reduced_acc = reduce_tensor(acc1, args.world_size)
            reduced_flops = reduce_tensor(running_flops, args.world_size)
            reduced_loss_flops = reduce_tensor(flops_loss, args.world_size)
            reduced_ce_loss = reduce_tensor(ce_loss, args.world_size)
            reduced_acc_gate_l = reduce_list_tensor(gate_acc_l,
                                                    args.world_size)
            reduced_gate_loss_l = reduce_list_tensor(gate_loss_l,
                                                     args.world_size)
            losses_m.update(reduced_loss.item(), input.size(0))
            acc_m.update(reduced_acc.item(), input.size(0))
            flops_m.update(reduced_flops.item(), input.size(0))
            flops_loss_m.update(reduced_loss_flops.item(), input.size(0))
            ce_loss_m.update(reduced_ce_loss.item(), input.size(0))
            for i in range(num_gate):
                acc_gate_m_l[i].update(reduced_acc_gate_l[i].item(),
                                       input.size(0))
                gate_loss_m_l[i].update(reduced_gate_loss_l[i].item(),
                                        input.size(0))
        batch_time_m.update(time.time() - end)
        if (last_batch or batch_idx % args.log_interval
                == 0) and args.local_rank == 0 and batch_idx != 0:
            lrl = [param_group['lr'] for param_group in optimizer.param_groups]
            lr = sum(lrl) / len(lrl)
            print_gate_stats(model)
            logging.info(
                'Train: {} [{:>4d}/{} ({:>3.0f}%)]  '
                'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f})  '
                'CELoss: {celoss.val:>9.6f} ({celoss.avg:>6.4f})  '
                'GateLoss: {gate_loss[0].val:>6.4f} ({gate_loss[0].avg:>6.4f})  '
                'FlopsLoss: {flopsloss.val:>9.6f} ({flopsloss.avg:>6.4f})  '
                'TrainAcc: {acc.val:>9.6f} ({acc.avg:>6.4f})  '
                'GateAcc: {acc_gate[0].val:>6.4f}({acc_gate[0].avg:>6.4f})  '
                'Flops: {flops.val:>6.0f} ({flops.avg:>6.0f})  '
                'LR: {lr:.3e}  '
                'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s  '
                '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                'DataTime: {data_time.val:.3f} ({data_time.avg:.3f})\n'.format(
                    epoch,
                    batch_idx,
                    last_idx,
                    100. * batch_idx / last_idx,
                    loss=losses_m,
                    flopsloss=flops_loss_m,
                    acc=acc_m,
                    flops=flops_m,
                    celoss=ce_loss_m,
                    batch_time=batch_time_m,
                    rate=input.size(0) * args.world_size / batch_time_m.val,
                    rate_avg=input.size(0) * args.world_size /
                    batch_time_m.avg,
                    lr=lr,
                    data_time=data_time_m,
                    gate_loss=gate_loss_m_l,
                    acc_gate=acc_gate_m_l))

        if saver is not None and args.recovery_interval and (
                last_batch or (batch_idx + 1) % args.recovery_interval == 0):
            saver.save_recovery(model,
                                optimizer,
                                args,
                                epoch,
                                model_ema=model_ema,
                                use_amp=use_amp,
                                batch_idx=batch_idx)

        if lr_scheduler is not None:
            lr_scheduler.step_update(num_updates=num_updates,
                                     metric=losses_m.avg)

        end = time.time()
        # end for

    if hasattr(optimizer, 'sync_lookahead'):
        optimizer.sync_lookahead()

    return OrderedDict([('loss', losses_m.avg)])
Esempio n. 9
0
def validate(args):
    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint
    args.prefetcher = not args.no_prefetcher
    amp_autocast = suppress  # do nothing
    if args.amp:
        if has_apex:
            args.apex_amp = True
        elif has_native_amp:
            args.native_amp = True
        else:
            _logger.warning(
                "Neither APEX or Native Torch AMP is available, using FP32.")
    assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set."
    if args.native_amp:
        amp_autocast = torch.cuda.amp.autocast

    if args.legacy_jit:
        set_jit_legacy()

    # create model
    model = create_model(args.model,
                         pretrained=args.pretrained,
                         num_classes=args.num_classes,
                         in_chans=3,
                         global_pool=args.gp,
                         scriptable=args.torchscript)

    if args.checkpoint:
        load_checkpoint(model, args.checkpoint, args.use_ema)

    param_count = sum([m.numel() for m in model.parameters()])
    _logger.info('Model %s created, param count: %d' %
                 (args.model, param_count))

    data_config = resolve_data_config(vars(args), model=model)
    model, test_time_pool = (
        model, False) if args.no_test_pool else apply_test_time_pool(
            model, data_config)

    if args.torchscript:
        torch.jit.optimized_execution(True)
        model = torch.jit.script(model)

    model = model.cuda()
    if args.apex_amp:
        model = amp.initialize(model, opt_level='O1')

    if args.channels_last:
        model = model.to(memory_format=torch.channels_last)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(args.num_gpu)))

    criterion = nn.CrossEntropyLoss().cuda()

    if os.path.splitext(args.data)[1] == '.tar' and os.path.isfile(args.data):
        dataset = DatasetTar(args.data,
                             load_bytes=args.tf_preprocessing,
                             class_map=args.class_map)
    else:
        dataset = Dataset(args.data,
                          train_mode='val',
                          fold_num=args.fold_num,
                          load_bytes=args.tf_preprocessing,
                          class_map=args.class_map)

    if args.valid_labels:
        with open(args.valid_labels, 'r') as f:
            valid_labels = {int(line.rstrip()) for line in f}
            valid_labels = [i in valid_labels for i in range(args.num_classes)]
    else:
        valid_labels = None

    if args.real_labels:
        real_labels = RealLabelsImagenet(dataset.filenames(basename=True),
                                         real_json=args.real_labels)
    else:
        real_labels = None

    crop_pct = 1.0 if test_time_pool else data_config['crop_pct']
    loader = create_loader(dataset,
                           input_size=data_config['input_size'],
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=data_config['interpolation'],
                           mean=data_config['mean'],
                           std=data_config['std'],
                           num_workers=args.workers,
                           crop_pct=crop_pct,
                           pin_memory=args.pin_mem,
                           tf_preprocessing=args.tf_preprocessing)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    # top5 = AverageMeter()
    f1_m = AverageMeter()

    model.eval()
    last_idx = len(loader) - 1
    cuda = torch.device('cuda')
    temperature = nn.Parameter(torch.ones(1) *
                               1.5).to(cuda).detach().requires_grad_(True)

    m = nn.Sigmoid()
    nll_criterion = nn.CrossEntropyLoss().cuda()
    ece_criterion = _ECELoss().cuda()

    with torch.no_grad():
        # warmup, reduce variability of first batch time, especially for comparing torchscript vs non
        input = torch.randn((args.batch_size, ) +
                            data_config['input_size']).cuda()
        if args.channels_last:
            input = input.contiguous(memory_format=torch.channels_last)
        model(input)
        end = time.time()

        logits_list = []
        target_list = []

        for batch_idx, (input, target) in enumerate(loader):
            last_batch = batch_idx == last_idx
            if args.no_prefetcher:
                target = target.cuda()
                input = input.cuda()
            if args.channels_last:
                input = input.contiguous(memory_format=torch.channels_last)

            # compute output
            with amp_autocast():
                output = model(input)

            if valid_labels is not None:
                output = output[:, valid_labels]
            loss = criterion(output, target)

            if real_labels is not None:
                real_labels.add_result(output)

            # measure accuracy and record loss
            acc1, _ = accuracy(output.detach(), target, topk=(1, 1))

            logits_list.append(output)
            target_list.append(target)

            best_f1 = 0.0
            best_th = 1.0

            if last_batch:
                logits = torch.cat(logits_list).cuda()  ###
                targets = torch.cat(target_list).cuda()  ###

                targets_cpu = targets.cpu().numpy()
                sigmoided = m(logits)[:, 1].cpu().numpy()

                for i in range(1000, 0, -1):
                    th = i * 0.001
                    real_pred = (sigmoided >= th) * 1.0
                    f1 = f1_score(targets_cpu.squeeze(), real_pred.squeeze())

                    if f1 > best_f1:
                        best_f1 = f1
                        best_th = th

            losses.update(loss.item(), input.size(0))
            top1.update(acc1.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % args.log_freq == 0:
                _logger.info(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                    'Acc@1: {top1.val:>7.3f} ({top1.avg:>7.3f})  '
                    'thresh: {thresh:>7.4f}  '
                    'f1: {f1:>7.4f}'.format(batch_idx,
                                            len(loader),
                                            batch_time=batch_time,
                                            rate_avg=input.size(0) /
                                            batch_time.avg,
                                            loss=losses,
                                            top1=top1,
                                            thresh=best_th,
                                            f1=best_f1))

    print(best_th, best_f1)

    #for temp_scalilng
    if args.temp_scaling:

        #         before_temperature_ece = ece_criterion(logits, targets).item()
        #         before_temperature_nll = nll_criterion(logits, targets).item()
        #         print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece))

        #         optimizer = optim.LBFGS([temperature], lr=0.01, max_iter=50)

        #         def eval():
        #             unsqueezed_temperature = temperature.unsqueeze(1).expand(logits.size(0), logits.size(1))
        #             loss = nll_criterion(logits/unsqueezed_temperature, targets)
        #             loss.backward()
        #             return loss
        #         optimizer.step(eval)

        #         unsqueezed_temperature = temperature.unsqueeze(1).expand(logits.size(0), logits.size(1))

        #         logits = logits/unsqueezed_temperature
        #         after_temperature_nll = nll_criterion(logits, targets).item()
        #         after_temperature_ece = ece_criterion(logits, targets).item()
        #         print('Optimal temperature: %.3f' % temperature.item())
        #         print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece))

        sigmoided = m(logits)[:, 1].detach().cpu().numpy()
        temperature = nn.Parameter(torch.ones(1) *
                                   11).to(cuda).detach().requires_grad_(False)

        logits = logits / temperature.unsqueeze(1).expand(
            logits.size(0), logits.size(1))
        targets_cpu = targets.cpu().numpy()
        sigmoided = m(logits)[:, 1].detach().cpu().numpy()

        best_f1 = 0.0
        best_th = 1.0
        for i in range(1000, 0, -1):
            th = i * 0.001
            real_pred = (sigmoided >= th) * 1.0
            f1 = f1_score(targets_cpu.squeeze(), real_pred.squeeze())

            if f1 > best_f1:
                best_f1 = f1
                best_th = th

        print(best_th, best_f1)

    if real_labels is not None:
        # real labels mode replaces topk values at the end
        top1a, top5a = real_labels.get_accuracy(k=1), real_labels.get_accuracy(
            k=5)
    else:
        top1a, f1a = top1.avg, best_f1
    results = OrderedDict(top1=round(top1a, 4),
                          top1_err=round(100 - top1a, 4),
                          f1=f1a,
                          f1_err=round(100 - f1a, 4),
                          param_count=round(param_count / 1e6, 2),
                          img_size=data_config['input_size'][-1],
                          cropt_pct=crop_pct,
                          interpolation=data_config['interpolation'])

    _logger.info(' * Acc@1 {:.3f} ({:.3f}) f1 {:.3f} ({:.3f})'.format(
        results['top1'], results['top1_err'], results['f1'],
        results['f1_err']))

    return results
Esempio n. 10
0
def main():
    setup_default_logging()
    args = parser.parse_args()
    # might as well try to do something useful...
    args.pretrained = args.pretrained or not args.checkpoint

    # create model
    model = create_model(args.model,
                         num_classes=args.num_classes,
                         in_chans=3,
                         pretrained=args.pretrained,
                         checkpoint_path=args.checkpoint)

    _logger.info('Model %s created, param count: %d' %
                 (args.model, sum([m.numel() for m in model.parameters()])))

    config = resolve_data_config(vars(args), model=model)
    model, test_time_pool = (
        model,
        False) if args.no_test_pool else apply_test_time_pool(model, config)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(
                                          args.num_gpu))).cuda()
    else:
        model = model.cuda()

    loader = create_loader(
        Dataset(args.data, train_mode='test', fold_num=-1),
        input_size=config['input_size'],
        batch_size=args.batch_size,
        use_prefetcher=True,
        interpolation=config['interpolation'],
        mean=config['mean'],
        std=config['std'],
        num_workers=args.workers,
        crop_pct=1.0 if test_time_pool else config['crop_pct'])

    model.eval()

    #     k = min(args.topk, args.num_classes)
    batch_time = AverageMeter()
    end = time.time()
    topk_ids = []
    name_list = []
    sig_list = []
    logits_list = []
    m = torch.nn.Sigmoid()
    with torch.no_grad():
        for batch_idx, (
                input,
                _,
        ) in enumerate(loader):
            input = input.cuda()
            labels = model(input)
            logits_list.append(labels)
            sigmoided = m(labels)
            sig_list.append(
                np.expand_dims(sigmoided[:, 1].cpu().numpy(), axis=1))
            #             topk = labels.topk(k)[1]
            #             topk_ids.append(topk.cpu().numpy())

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % args.log_freq == 0:
                _logger.info(
                    'Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'
                    .format(batch_idx, len(loader), batch_time=batch_time))


#     topk_ids = np.concatenate(topk_ids, axis=0).squeeze()
#     logits = torch.cat(logits_list).cuda()
#     temperature = nn.Parameter(torch.ones(1) * args.te).to(torch.device('cuda') ).detach().requires_grad_(False)
#     logits = logits/temperature.unsqueeze(1).expand(logits.size(0), logits.size(1))
#     temp_sigmoided =  m(logits)[:,1].detach().cpu().numpy()

    sig_list = np.vstack(sig_list)
    name_list = loader.dataset.filenames(basename=True)

    real_sigmoid = sig_list.squeeze()
    #     real_sigmoid = temp_sigmoided
    real_pred = ((sig_list >= args.thresh) * 1).squeeze()

    name_pred_dict = {}
    for idx in range(len(name_list)):
        name_pred_dict[name_list[idx]] = (real_pred[idx], real_sigmoid[idx])

    args.output_dir = args.checkpoint.replace(
        args.checkpoint.split('/')[-1], "")
    with open(os.path.join(args.output_dir, './prediction.tsv'),
              'w') as out_file:
        #         filenames_int = [int(f.split('.')[0]) for f in filenames]
        #         for name, topk in zip(filenames_int, topk_ids):
        #             print(name,topk)
        #             i = i+1
        #             if i == 10:
        #                 break
        #         idx = np.argsort(filenames_int)
        #         topk_ids = topk_ids[idx]
        for name in name_list:
            out_file.write('{}\n'.format(str(name_pred_dict[name][0])))
    with open(os.path.join(args.output_dir, './probability.tsv'),
              'w') as out_file:
        for name in name_list:
            out_file.write('{}\n'.format(name_pred_dict[name][1]))

    copyfile(
        os.path.join(args.output_dir, './prediction.tsv'),
        '/home/workspace/user-workspace/prediction/' + 'prediction_153_' +
        args.checkpoint.split('/')[-2] + '.tsv')
Esempio n. 11
0
def validate_gate(model, loader, loss_fn, args, log_suffix=''):
    start_chn_idx = 0
    num_gate = 1

    batch_time_m = AverageMeter()
    data_time_m = AverageMeter()
    losses_m = AverageMeter()
    prec1_m = AverageMeter()
    prec5_m = AverageMeter()
    flops_m = AverageMeter()
    acc_gate_m_l = [AverageMeter() for i in range(num_gate)]
    model.eval()

    for n, m in model.named_modules():
        if len(getattr(m, 'in_channels_list', [])) > 4:
            m.in_channels_list = m.in_channels_list[start_chn_idx:4]
            m.in_channels_list_tensor = torch.from_numpy(
                np.array(m.in_channels_list)).float().cuda()
        if len(getattr(m, 'out_channels_list', [])) > 4:
            m.out_channels_list = m.out_channels_list[start_chn_idx:4]
            m.out_channels_list_tensor = torch.from_numpy(
                np.array(m.out_channels_list)).float().cuda()

    end = time.time()
    last_idx = len(loader) - 1
    model.apply(lambda m: add_mac_hooks(m))
    for batch_idx, (input, target) in enumerate(loader):
        last_batch = batch_idx == last_idx
        data_time_m.update(time.time() - end)
        if not args.prefetcher:
            input, target = input.cuda(), target.cuda()
        # generate online labels
        with torch.no_grad():
            set_model_mode(model, 'smallest')
            output = model(input)
            conf_s, correct_s = accuracy(output, target, no_reduce=True)
            gate_target = [
                torch.LongTensor([0])
                if correct_s[0][idx] else torch.LongTensor([3])
                for idx in range(correct_s[0].size(0))
            ]
            gate_target = torch.stack(gate_target).squeeze(-1).cuda()
        # =============
        set_model_mode(model, 'dynamic')
        output = model(input)

        if hasattr(model, 'module'):
            model_ = model.module
        else:
            model_ = model

        gate_acc_l = []
        for n, m in model_.named_modules():
            if isinstance(m, MultiHeadGate):
                if getattr(m, 'keep_gate', None) is not None:
                    gate_acc_l.append(
                        accuracy(m.keep_gate, gate_target, topk=(1, ))[0])

        running_flops = add_flops(model)
        if isinstance(running_flops, torch.Tensor):
            running_flops = running_flops.float().mean().cuda()
        else:
            running_flops = torch.FloatTensor([running_flops]).cuda()

        loss = loss_fn(output, target)
        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        if not args.distributed:
            losses_m.update(loss.item(), input.size(0))
            prec1_m.update(prec1.item(), input.size(0))
            prec5_m.update(prec5.item(), input.size(0))
            flops_m.update(running_flops.item(), input.size(0))
        else:
            reduced_loss = reduce_tensor(loss.data, args.world_size)
            reduced_prec1 = reduce_tensor(prec1, args.world_size)
            reduced_prec5 = reduce_tensor(prec5, args.world_size)
            reduced_flops = reduce_tensor(running_flops, args.world_size)
            reduced_acc_gate_l = reduce_list_tensor(gate_acc_l,
                                                    args.world_size)
            torch.cuda.synchronize()
            losses_m.update(reduced_loss.item(), input.size(0))
            prec1_m.update(reduced_prec1.item(), input.size(0))
            prec5_m.update(reduced_prec5.item(), input.size(0))
            flops_m.update(reduced_flops.item(), input.size(0))
            for i in range(num_gate):
                acc_gate_m_l[i].update(reduced_acc_gate_l[i].item(),
                                       input.size(0))
        batch_time_m.update(time.time() - end)
        if (last_batch or batch_idx % args.log_interval
                == 0) and args.local_rank == 0 and batch_idx != 0:
            print_gate_stats(model)
            log_name = 'Test' + log_suffix
            logging.info(
                '{}: [{:>4d}/{} ({:>3.0f}%)]  '
                'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                'Acc@1: {prec1.val:>9.6f} ({prec1.avg:>6.4f})  '
                'Acc@5: {prec5.val:>9.6f} ({prec5.avg:>6.4f})  '
                'GateAcc: {acc_gate[0].val:>6.4f}({acc_gate[0].avg:>6.4f})  '
                'Flops: {flops.val:>6.0f} ({flops.avg:>6.0f})  '
                'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s  '
                '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                'DataTime: {data_time.val:.3f} ({data_time.avg:.3f})\n'.format(
                    log_name,
                    batch_idx,
                    last_idx,
                    100. * batch_idx / last_idx,
                    loss=losses_m,
                    prec1=prec1_m,
                    prec5=prec5_m,
                    flops=flops_m,
                    batch_time=batch_time_m,
                    rate=input.size(0) * args.world_size / batch_time_m.val,
                    rate_avg=input.size(0) * args.world_size /
                    batch_time_m.avg,
                    data_time=data_time_m,
                    acc_gate=acc_gate_m_l))

        end = time.time()
        # end for
    metrics = OrderedDict([('loss', losses_m.avg), ('prec1', prec1_m.avg),
                           ('prec5', prec5_m.avg), ('flops', flops_m.avg)])

    return metrics
def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler,
                train_data_loader, summary_writer, conf, local_rank):
    losses = AverageMeter()
    c_losses = AverageMeter()
    d_losses = AverageMeter()
    dices = AverageMeter()
    iterator = tqdm(train_data_loader)
    model.train()
    if conf["optimizer"]["schedule"]["mode"] == "epoch":
        scheduler.step(current_epoch)
    for i, sample in enumerate(iterator):
        imgs = sample["image"].cuda()
        masks = sample["mask"].cuda().float()
        # if torch.sum(masks) < 100:
        #     continue
        centers = sample["center"].cuda().float()

        seg_mask, center_mask = model(imgs)
        with torch.no_grad():
            pred = torch.sigmoid(seg_mask)
            d = dice_round(pred[:, 0:1, ...].cpu(),
                           masks[:, 0:1, ...].cpu(),
                           t=0.5).item()
        dices.update(d, imgs.size(0))

        mask_loss = loss_functions["mask_loss"](seg_mask, masks)
        # if torch.isnan(mask_loss):
        #     print("nan loss, skipping!!!")
        #     optimizer.zero_grad()
        #     continue
        center_loss = loss_functions["center_loss"](center_mask, centers)
        center_loss *= 50
        loss = mask_loss + center_loss

        loss /= 2
        if current_epoch == 0:
            loss /= 10
        losses.update(loss.item(), imgs.size(0))
        d_losses.update(mask_loss.item(), imgs.size(0))

        c_losses.update(center_loss.item(), imgs.size(0))
        iterator.set_postfix({
            "lr": float(scheduler.get_lr()[-1]),
            "epoch": current_epoch,
            "loss": losses.avg,
            "dice": dices.avg,
            "d_loss": d_losses.avg,
            "c_loss": c_losses.avg,
        })
        optimizer.zero_grad()
        if conf['fp16']:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1)
        optimizer.step()
        torch.cuda.synchronize()

        if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"):
            scheduler.step(i + current_epoch * len(train_data_loader))

    if local_rank == 0:
        for idx, param_group in enumerate(optimizer.param_groups):
            lr = param_group['lr']
            summary_writer.add_scalar('group{}/lr'.format(idx),
                                      float(lr),
                                      global_step=current_epoch)
        summary_writer.add_scalar('train/loss',
                                  float(losses.avg),
                                  global_step=current_epoch)
Esempio n. 13
0
def validate(args):
    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint

    # create model
    model = create_model(
        args.model,
        num_classes=args.num_classes,
        in_chans=3,
        pretrained=args.pretrained)

    if args.checkpoint:
        load_checkpoint(model, args.checkpoint, args.use_ema)

    param_count = sum([m.numel() for m in model.parameters()])
    logging.info('Model %s created, param count: %d' % (args.model, param_count))

    data_config = resolve_data_config(model, args)
    model, test_time_pool = apply_test_time_pool(model, data_config, args)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda()
    else:
        model = model.cuda()

    criterion = nn.CrossEntropyLoss().cuda()

    loader = create_loader(
        Dataset(args.data, load_bytes=args.tf_preprocessing),
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        use_prefetcher=True,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        crop_pct=1.0 if test_time_pool else data_config['crop_pct'],
        tf_preprocessing=args.tf_preprocessing)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            target = target.cuda()
            input = input.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0:
                logging.info(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})  '
                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                    'Prec@1: {top1.val:>7.4f} ({top1.avg:>7.4f})  '
                    'Prec@5: {top5.val:>7.4f} ({top5.avg:>7.4f})'.format(
                        i, len(loader), batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                        loss=losses, top1=top1, top5=top5))

    results = OrderedDict(
        top1=round(top1.avg, 3), top1_err=round(100 - top1.avg, 3),
        top5=round(top5.avg, 3), top5_err=round(100 - top5.avg, 3),
        param_count=round(param_count / 1e6, 2))

    logging.info(' * Prec@1 {:.3f} ({:.3f}) Prec@5 {:.3f} ({:.3f})'.format(
       results['top1'], results['top1_err'], results['top5'], results['top5_err']))

    return results
Esempio n. 14
0
def main():
    setup_default_logging()
    args = parser.parse_args()
    # might as well try to do something useful...
    args.pretrained = args.pretrained or not args.checkpoint

    # create model
    model = create_model(args.model,
                         num_classes=args.num_classes,
                         in_chans=3,
                         pretrained=args.pretrained,
                         checkpoint_path=args.checkpoint)

    logging.info('Model %s created, param count: %d' %
                 (args.model, sum([m.numel() for m in model.parameters()])))

    config = resolve_data_config(vars(args), model=model)
    model, test_time_pool = apply_test_time_pool(model, config, args)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(
                                          args.num_gpu))).cuda()
    else:
        model = model.cuda()

    loader = create_loader(
        Dataset(args.data),
        input_size=config['input_size'],
        batch_size=args.batch_size,
        use_prefetcher=True,
        interpolation=config['interpolation'],
        mean=config['mean'],
        std=config['std'],
        num_workers=args.workers,
        crop_pct=1.0 if test_time_pool else config['crop_pct'])

    model.eval()

    k = min(args.topk, args.num_classes)
    batch_time = AverageMeter()
    end = time.time()
    topk_ids = []
    with torch.no_grad():
        for batch_idx, (input, _) in enumerate(loader):
            input = input.cuda()
            labels = model(input)
            topk = labels.topk(k)[1]
            topk_ids.append(topk.cpu().numpy())

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % args.log_freq == 0:
                logging.info(
                    'Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'
                    .format(batch_idx, len(loader), batch_time=batch_time))

    topk_ids = np.concatenate(topk_ids, axis=0).squeeze()

    savebase = "classification_result/"
    os.makedirs(savebase, exist_ok=True)

    classfile = "labels.txt"
    classpath = os.path.join(os.getcwd(), classfile)
    classlist = {}
    with open(classpath) as f:
        for idx, line in enumerate(f):
            val = line.split('\n')[0]
            classlist[idx] = val

    filenames = loader.dataset.filenames()
    for filepath, label in zip(filenames, topk_ids):
        filename = os.path.basename(filepath)
        prediction = classlist[label[0]]
        savedir = savebase + prediction
        savepath = savedir + "/" + filename
        os.makedirs(savedir, exist_ok=True)
        copyfile(filepath, savepath)
        print('{0} : {1}'.format(filename, prediction))
Esempio n. 15
0
def validate(args):
    # create model
    from tinynet import tinynet
    if args.model_name == 'tinynet_a':
        args.r = 0.86
        args.w = 1.0
        args.d = 1.2
        ckpt_path = './models/tinynet_a.pth'
    elif args.model_name == 'tinynet_b':
        args.r = 0.84
        args.w = 0.75
        args.d = 1.1
        ckpt_path = './models/tinynet_b.pth'
    elif args.model_name == 'tinynet_c':
        args.r = 0.825
        args.w = 0.54
        args.d = 0.85
        ckpt_path = './models/tinynet_c.pth'
    elif args.model_name == 'tinynet_d':
        args.r = 0.68
        args.w = 0.54
        args.d = 0.695
        ckpt_path = './models/tinynet_d.pth'
    elif args.model_name == 'tinynet_e':
        args.r = 0.475
        args.w = 0.51
        args.d = 0.60
        ckpt_path = './models/tinynet_e.pth'
    else:
        raise 'Unsupported model name.'

    model = tinynet(
        r=args.r,
        w=args.w,
        d=args.d,
    )

    state_dict = torch.load(ckpt_path)
    model.load_state_dict(state_dict, strict=False)

    params = sum([param.numel() for param in model.parameters()])
    logging.info('Model %s created, #params: %d' % (args.model_name, params))

    data_config = resolve_data_config(vars(args), model=model)

    model = model.cuda()
    criterion = nn.CrossEntropyLoss().cuda()

    dataset = Dataset(args.data)
    data_loader = create_loader(dataset,
                                is_training=False,
                                input_size=data_config['input_size'],
                                batch_size=128,
                                use_prefetcher=False,
                                interpolation=data_config['interpolation'],
                                mean=data_config['mean'],
                                std=data_config['std'],
                                num_workers=4,
                                crop_pct=data_config['crop_pct'],
                                pin_memory=False)

    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    with torch.no_grad():
        for i, (input, target) in enumerate(data_loader):
            input = input.cuda()
            target = target.cuda()

            output = model(input)
            loss = criterion(output, target)

            acc1, acc5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1.item(), input.size(0))
            top5.update(acc5.item(), input.size(0))

            if i % 100 == 0:
                logging.info(
                    'Test: [{0:>4d}/{1}]  Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})'
                    .format(i, len(data_loader), loss=losses))

    logging.info(' * Acc@1 {:.3f} Acc@5 {:.3f}'.format(top1.avg, top5.avg))
    def train_one_epoch(
            self, epoch, net, loader, optimizer, loss_fn,
            lr_scheduler=None, output_dir=None, amp_autocast=suppress,
            loss_scaler=None, model_ema=None, mixup_fn=None, time_limit=math.inf):
        start_tic = time.time()
        if self._augmentation_cfg.mixup_off_epoch and epoch >= self._augmentation_cfg.mixup_off_epoch:
            if self._misc_cfg.prefetcher and loader.mixup_enabled:
                loader.mixup_enabled = False
            elif mixup_fn is not None:
                mixup_fn.mixup_enabled = False

        second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
        losses_m = AverageMeter()
        train_metric_score_m = AverageMeter()

        net.train()

        num_updates = epoch * len(loader)
        self._time_elapsed += time.time() - start_tic
        tic = time.time()
        last_tic = time.time()
        train_metric_name = 'accuracy'
        batch_idx = 0
        for batch_idx, (input, target) in enumerate(loader):
            b_tic = time.time()
            if self._time_elapsed > time_limit:
                return {'train_acc': train_metric_score_m.avg, 'train_loss': losses_m.avg, 'time_limit': True}
            if self._problem_type == REGRESSION:
                target = target.to(torch.float32)
            if not self._misc_cfg.prefetcher:
                # prefetcher would move data to cuda by default
                input, target = input.to(self.ctx[0]), target.to(self.ctx[0])
                if mixup_fn is not None:
                    input, target = mixup_fn(input, target)

            with amp_autocast():
                output = net(input)
                if self._problem_type == REGRESSION:
                    output = output.flatten()
                loss = loss_fn(output, target)
            if self._problem_type == REGRESSION:
                train_metric_name = 'rmse'
                train_metric_score = rmse(output, target)
            else:
                if output.shape == target.shape:
                    train_metric_name = 'rmse'
                    train_metric_score = rmse(output, target)
                else:
                    train_metric_score = accuracy(output, target)[0] / 100

            losses_m.update(loss.item(), input.size(0))
            train_metric_score_m.update(train_metric_score.item(), output.size(0))

            optimizer.zero_grad()
            if loss_scaler is not None:
                loss_scaler(
                    loss, optimizer,
                    clip_grad=self._optimizer_cfg.clip_grad, clip_mode=self._optimizer_cfg.clip_mode,
                    parameters=model_parameters(net, exclude_head='agc' in self._optimizer_cfg.clip_mode),
                    create_graph=second_order)
            else:
                loss.backward(create_graph=second_order)
                if self._optimizer_cfg.clip_grad is not None:
                    dispatch_clip_grad(
                        model_parameters(net, exclude_head='agc' in self._optimizer_cfg.clip_mode),
                        value=self._optimizer_cfg.clip_grad, mode=self._optimizer_cfg.clip_mode)
                optimizer.step()

            if model_ema is not None:
                model_ema.update(net)

            if self.found_gpu:
                torch.cuda.synchronize()

            num_updates += 1
            if (batch_idx+1) % self._misc_cfg.log_interval == 0:
                lrl = [param_group['lr'] for param_group in optimizer.param_groups]
                lr = sum(lrl) / len(lrl)
                self._logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f',
                                  epoch, batch_idx,
                                  self._train_cfg.batch_size*self._misc_cfg.log_interval/(time.time()-last_tic),
                                  train_metric_name, train_metric_score_m.avg, lr)
                last_tic = time.time()

                if self._misc_cfg.save_images and output_dir:
                    torchvision.utils.save_image(
                        input,
                        os.path.join(output_dir, 'train-batch-%d.jpg' % batch_idx),
                        padding=0,
                        normalize=True)

            if lr_scheduler is not None:
                lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg)

            self._time_elapsed += time.time() - b_tic

        throughput = int(self._train_cfg.batch_size * batch_idx / (time.time() - tic))
        self._logger.info('[Epoch %d] training: %s=%f', epoch, train_metric_name, train_metric_score_m.avg)
        self._logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f', epoch, throughput, time.time()-tic)

        end_time = time.time()
        if hasattr(optimizer, 'sync_lookahead'):
            optimizer.sync_lookahead()

        self._time_elapsed += time.time() - end_time

        return {train_metric_name: train_metric_score_m.avg, 'train_loss': losses_m.avg, 'time_limit': False}
Esempio n. 17
0
def validate(args):
    # might as well try to validate something
    args.pretrained = False
    args.prefetcher = True

    # create model
    model = eval(args.model)(config_path=args.config_path,
                             target_flops=args.target_flops,
                             num_classes=args.num_classes,
                             bn_momentum=args.bn_momentum,
                             activation=args.activation,
                             se=args.se)

    if args.checkpoint:
        load_checkpoint(model, args.checkpoint, True)

    param_count = sum([m.numel() for m in model.parameters()])
    logging.info('Model %s created, param count: %d' %
                 (args.model, param_count))

    data_config = resolve_data_config(vars(args), model=model)
    #model, test_time_pool = apply_test_time_pool(model, data_config, args)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(
                                          args.num_gpu))).cuda()
    else:
        model = model.cuda()

    criterion = nn.CrossEntropyLoss().cuda()

    if args.lmdb:
        eval_dir = os.path.join(args.data, 'test_lmdb', 'test.lmdb')
        dataset_eval = ImageFolderLMDB(eval_dir, None, None)
    else:
        eval_dir = os.path.join(args.data, 'val')
        dataset_eval = Dataset(eval_dir)

    #crop_pct = 1.0 if test_time_pool else data_config['crop_pct']
    crop_pct = 1.0
    loader = create_loader(dataset_eval,
                           input_size=data_config['input_size'],
                           batch_size=args.batch_size,
                           is_training=False,
                           use_prefetcher=args.prefetcher,
                           interpolation=data_config['interpolation'],
                           mean=data_config['mean'],
                           std=data_config['std'],
                           num_workers=args.workers)
    # crop_pct=crop_pct)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0:
                logging.info(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
                    'Prec@1: {top1.val:>7.3f} ({top1.avg:>7.3f})  '
                    'Prec@5: {top5.val:>7.3f} ({top5.avg:>7.3f})'.format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                        loss=losses,
                        top1=top1,
                        top5=top5))

    results = OrderedDict(top1=round(top1.avg, 4),
                          top1_err=round(100 - top1.avg, 4),
                          top5=round(top5.avg, 4),
                          top5_err=round(100 - top5.avg, 4),
                          param_count=round(param_count / 1e6, 2),
                          img_size=data_config['input_size'][-1],
                          cropt_pct=crop_pct,
                          interpolation=data_config['interpolation'])

    logging.info(' * Prec@1 {:.3f} ({:.3f}) Prec@5 {:.3f} ({:.3f})'.format(
        results['top1'], results['top1_err'], results['top5'],
        results['top5_err']))

    return results
Esempio n. 18
0
def train_one_epoch(config, model, criterion, data_loader, optimizer, epoch,
                    mixup_fn, lr_scheduler, writer):
    model.train()
    optimizer.zero_grad()

    num_steps = len(data_loader)
    batch_time = AverageMeter()
    loss_meter = AverageMeter()
    norm_meter = AverageMeter()

    start = time.time()
    end = time.time()
    for idx, (samples, targets) in enumerate(data_loader):
        samples = samples.cuda(non_blocking=True)
        targets = targets.cuda(non_blocking=True)

        if mixup_fn is not None:
            samples, targets = mixup_fn(samples, targets)

        outputs = model(samples)

        if config.TRAIN.ACCUMULATION_STEPS > 1:
            loss = criterion(outputs, targets)
            loss = loss / config.TRAIN.ACCUMULATION_STEPS
            if config.AMP_OPT_LEVEL != "O0":
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                if config.TRAIN.CLIP_GRAD:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), config.TRAIN.CLIP_GRAD)
                else:
                    grad_norm = get_grad_norm(amp.master_params(optimizer))
            else:
                loss.backward()
                if config.TRAIN.CLIP_GRAD:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        model.parameters(), config.TRAIN.CLIP_GRAD)
                else:
                    grad_norm = get_grad_norm(model.parameters())
            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
                optimizer.step()
                optimizer.zero_grad()
                lr_scheduler.step_update(epoch * num_steps + idx)
            writer.add_scalar("train/loss",
                              scalar_value=loss,
                              global_step=(epoch * num_steps + idx))
            writer.add_scalar("train/lr",
                              scalar_value=optimizer.param_groups[0]['lr'],
                              global_step=epoch)
            writer.add_scalar("train/grad_norm",
                              scalar_value=grad_norm,
                              global_step=(epoch * num_steps + idx))
        else:
            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            if config.AMP_OPT_LEVEL != "O0":
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                if config.TRAIN.CLIP_GRAD:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), config.TRAIN.CLIP_GRAD)
                else:
                    grad_norm = get_grad_norm(amp.master_params(optimizer))
            else:
                loss.backward()
                if config.TRAIN.CLIP_GRAD:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        model.parameters(), config.TRAIN.CLIP_GRAD)
                else:
                    grad_norm = get_grad_norm(model.parameters())
            optimizer.step()
            lr_scheduler.step_update(epoch * num_steps + idx)
            writer.add_scalar("train/loss",
                              scalar_value=loss,
                              global_step=(epoch * num_steps + idx))
            writer.add_scalar("train/lr",
                              scalar_value=optimizer.param_groups[0]['lr'],
                              global_step=epoch)
            writer.add_scalar("train/grad_norm",
                              scalar_value=grad_norm,
                              global_step=(epoch * num_steps + idx))

        torch.cuda.synchronize()

        loss_meter.update(loss.item(), targets.size(0))
        norm_meter.update(grad_norm)
        batch_time.update(time.time() - end)
        end = time.time()

        if idx % config.PRINT_FREQ == 0:
            lr = optimizer.param_groups[0]['lr']
            memory_used = torch.cuda.max_memory_allocated() / (1024.0 * 1024.0)
            etas = batch_time.avg * (num_steps - idx)
            logger.info(
                f'Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t'
                f'eta {datetime.timedelta(seconds=int(etas))} lr {lr:.6f}\t'
                f'time {batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                f'loss {loss_meter.val:.4f} ({loss_meter.avg:.4f})\t'
                f'grad_norm {norm_meter.val:.4f} ({norm_meter.avg:.4f})\t'
                f'mem {memory_used:.0f}MB')
    epoch_time = time.time() - start
    logger.info(
        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
    )
Esempio n. 19
0
def validate(args):
    setup_default_logging()

    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint
    args.prefetcher = not args.no_prefetcher

    # create model
    bench = create_model(args.model,
                         bench_task='predict',
                         pretrained=args.pretrained,
                         redundant_bias=args.redundant_bias,
                         checkpoint_path=args.checkpoint,
                         checkpoint_ema=args.use_ema)
    input_size = bench.config.image_size

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = bench.cuda()
    if has_amp:
        print('Using AMP mixed precision.')
        bench = amp.initialize(bench, opt_level='O1')
    else:
        print('AMP not installed, running network in FP32.')

    if args.num_gpu > 1:
        bench = torch.nn.DataParallel(bench,
                                      device_ids=list(range(args.num_gpu)))

    if 'test' in args.anno:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'image_info_{args.anno}.json')
        image_dir = args.anno
    else:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'instances_{args.anno}.json')
        image_dir = args.anno
    dataset = CocoDetection(os.path.join(args.data, image_dir),
                            annotation_path)

    loader = create_loader(dataset,
                           input_size=input_size,
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=args.interpolation,
                           fill_color=args.fill_color,
                           num_workers=args.workers,
                           mean=args.mean,
                           std=args.std,
                           pin_mem=args.pin_mem)

    img_ids = []
    results = []
    bench.eval()

    for i, (input, target) in enumerate(loader, 1):
        dumm_inp = input
        tisc = target['img_scale']
        tisz = target['img_size']
        break

    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(
        enable_timing=True)
    # repetitions = 300
    # timings=np.zeros((repetitions,1))
    #GPU-WARM-UP
    # print(enumerate())
    for _ in range(10):
        _ = bench(dumm_inp, tisc, tisz)
    # MEASURE PERFORMANCE

    # dummy_input = torch.randn(1, 3,bench.config.image_size,bench.config.image_size,dtype=torch.float).to("cuda")
    print("starting")
    batch_time = AverageMeter()
    # end = time.time()
    with torch.no_grad():
        for _ in range(2000):
            starter.record()
            _ = bench(dumm_inp, tisc, tisz)
            ender.record()
            # measure elapsed time
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            batch_time.update(curr_time)
            # print(curr_time)
            # end = time.time()

            # if i % args.log_freq == 0:
            print(
                'Test: [{0:>4d}/{1}]  '
                'Time: {batch_time.val:.3f}ms ({batch_time.avg:.3f}ms, {rate_avg:>7.2f}/s)  '
                .format(
                    i,
                    len(loader),
                    batch_time=batch_time,
                    rate_avg=dumm_inp.size(0) / batch_time.avg,
                ))

    # json.dump(results, open(args.results, 'w'), indent=4)
    # if 'test' not in args.anno:
    #     coco_results = dataset.coco.loadRes(args.results)
    #     coco_eval = COCOeval(dataset.coco, coco_results, 'bbox')
    #     coco_eval.params.imgIds = img_ids  # score only ids we've used
    #     coco_eval.evaluate()
    #     coco_eval.accumulate()
    #     coco_eval.summarize()

    return results