Ejemplo n.º 1
0
def create_datasets_and_loaders(args, model_config):
    input_config = resolve_input_config(args, model_config=model_config)

    dataset_train, dataset_eval = create_dataset(args.dataset, args.root)

    # setup labeler in loader/collate_fn if not enabled in the model bench
    labeler = None
    if not args.bench_labeler:
        labeler = AnchorLabeler(Anchors.from_config(model_config),
                                model_config.num_classes,
                                match_threshold=0.5)

    loader_train = create_loader(
        dataset_train,
        input_size=input_config['input_size'],
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        re_prob=args.reprob,
        re_mode=args.remode,
        re_count=args.recount,
        # color_jitter=args.color_jitter,
        # auto_augment=args.aa,
        interpolation=args.train_interpolation
        or input_config['interpolation'],
        fill_color=input_config['fill_color'],
        mean=input_config['mean'],
        std=input_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        pin_mem=args.pin_mem,
        anchor_labeler=labeler,
    )

    if args.val_skip > 1:
        dataset_eval = SkipSubset(dataset_eval, args.val_skip)
    loader_eval = create_loader(
        dataset_eval,
        input_size=input_config['input_size'],
        batch_size=args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=input_config['interpolation'],
        fill_color=input_config['fill_color'],
        mean=input_config['mean'],
        std=input_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        pin_mem=args.pin_mem,
        anchor_labeler=labeler,
    )

    evaluator = create_evaluator(args.dataset,
                                 loader_eval.dataset,
                                 distributed=args.distributed,
                                 pred_yxyx=False)

    return loader_train, loader_eval, evaluator
Ejemplo n.º 2
0
def validate(args):
    setup_default_logging()

    if args.amp:
        if has_apex:
            args.apex_amp = True
        elif has_native_amp:
            args.native_amp = True
    assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set."
    args.pretrained = args.pretrained or not args.checkpoint  # might as well try to validate something
    args.prefetcher = not args.no_prefetcher

    # create model
    with set_layer_config(scriptable=args.torchscript):
        bench = create_model(
            args.model,
            bench_task='predict',
            num_classes=args.num_classes,
            pretrained=args.pretrained,
            redundant_bias=args.redundant_bias,
            soft_nms=args.soft_nms,
            checkpoint_path=args.checkpoint,
            checkpoint_ema=args.use_ema,
        )
    model_config = bench.config

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = bench.cuda()

    amp_autocast = suppress
    if args.apex_amp:
        bench = amp.initialize(bench, opt_level='O1')
        print('Using NVIDIA APEX AMP. Validating in mixed precision.')
    elif args.native_amp:
        amp_autocast = torch.cuda.amp.autocast
        print('Using native Torch AMP. Validating in mixed precision.')
    else:
        print('AMP not enabled. Validating in float32.')

    if args.num_gpu > 1:
        bench = torch.nn.DataParallel(bench,
                                      device_ids=list(range(args.num_gpu)))

    dataset = create_dataset(args.dataset, args.root, args.split)
    input_config = resolve_input_config(args, model_config)
    loader = create_loader(dataset,
                           input_size=input_config['input_size'],
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=input_config['interpolation'],
                           fill_color=input_config['fill_color'],
                           mean=input_config['mean'],
                           std=input_config['std'],
                           num_workers=args.workers,
                           pin_mem=args.pin_mem)

    evaluator = create_evaluator(args.dataset, dataset, pred_yxyx=False)
    bench.eval()
    batch_time = AverageMeter()
    end = time.time()
    last_idx = len(loader) - 1
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            with amp_autocast():
                output = bench(input, img_info=target)
            evaluator.add_predictions(output, target)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0 or i == last_idx:
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    .format(i,
                            len(loader),
                            batch_time=batch_time,
                            rate_avg=input.size(0) / batch_time.avg))

    mean_ap = 0.
    if dataset.parser.has_labels:
        mean_ap = evaluator.evaluate()
    else:
        evaluator.save(args.results)

    return mean_ap
Ejemplo n.º 3
0
def create_datasets_and_loaders(
    args,
    model_config,
    transform_train_fn=None,
    transform_eval_fn=None,
    collate_fn=None,
):
    """ Setup datasets, transforms, loaders, evaluator.

    Args:
        args: Command line args / config for training
        model_config: Model specific configuration dict / struct
        transform_train_fn: Override default image + annotation transforms (see note in loaders.py)
        transform_eval_fn: Override default image + annotation transforms (see note in loaders.py)
        collate_fn: Override default fast collate function

    Returns:
        Train loader, validation loader, evaluator
    """
    input_config = resolve_input_config(args, model_config=model_config)

    dataset_train, dataset_eval = create_dataset(args.dataset, args.root)

    # setup labeler in loader/collate_fn if not enabled in the model bench
    labeler = None
    if not args.bench_labeler:
        labeler = AnchorLabeler(Anchors.from_config(model_config),
                                model_config.num_classes,
                                match_threshold=0.5)

    loader_train = create_loader(
        dataset_train,
        input_size=input_config['input_size'],
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        re_prob=args.reprob,
        re_mode=args.remode,
        re_count=args.recount,
        # color_jitter=args.color_jitter,
        # auto_augment=args.aa,
        interpolation=args.train_interpolation
        or input_config['interpolation'],
        fill_color=input_config['fill_color'],
        mean=input_config['mean'],
        std=input_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        pin_mem=args.pin_mem,
        anchor_labeler=labeler,
        transform_fn=transform_train_fn,
        collate_fn=collate_fn,
    )

    if args.val_skip > 1:
        dataset_eval = SkipSubset(dataset_eval, args.val_skip)
    loader_eval = create_loader(
        dataset_eval,
        input_size=input_config['input_size'],
        batch_size=args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=input_config['interpolation'],
        fill_color=input_config['fill_color'],
        mean=input_config['mean'],
        std=input_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        pin_mem=args.pin_mem,
        anchor_labeler=labeler,
        transform_fn=transform_eval_fn,
        collate_fn=collate_fn,
    )

    evaluator = create_evaluator(args.dataset,
                                 loader_eval.dataset,
                                 distributed=args.distributed,
                                 pred_yxyx=False)

    return loader_train, loader_eval, evaluator
Ejemplo n.º 4
0
    def __init__(self, context: PyTorchTrialContext) -> None:

        self.context = context
        self.hparam = self.context.get_hparam
        self.args = DotDict(self.context.get_hparams())
        # Create a unique download directory for each rank so they don't overwrite each other.
        self.download_directory = f"/tmp/data-rank{self.context.distributed.get_rank()}"
        self.num_slots = int(self.context.get_experiment_config()['resources']
                             ['slots_per_trial'])

        if self.args.sync_bn and self.num_slots == 1:
            print(
                'Can not use sync_bn with one slot. Either set sync_bn to False or use distributed training.'
            )
            sys.exit()
        self.args.pretrained_backbone = not self.args.no_pretrained_backbone
        self.args.prefetcher = not self.args.no_prefetcher

        tmp = []
        for arg in self.args.lr_noise.split(' '):
            tmp.append(float(arg))
        self.args.lr_noise = tmp

        self.model = create_model(
            self.args.model,
            bench_task='train',
            num_classes=self.args.num_classes,
            pretrained=self.args.pretrained,
            pretrained_backbone=self.args.pretrained_backbone,
            redundant_bias=self.args.redundant_bias,
            label_smoothing=self.args.smoothing,
            new_focal=self.args.new_focal,
            jit_loss=self.args.jit_loss,
            bench_labeler=self.args.bench_labeler,
            checkpoint_path=self.args.initial_checkpoint,
        )
        self.model_config = self.model.config
        self.input_config = resolve_input_config(
            self.args, model_config=self.model_config)
        print('h: ', self.args.model,
              sum([m.numel() for m in self.model.parameters()]))

        if self.args.sync_bn:
            print('creating batch sync model')
            if self.args.model_ema:
                print('creating batch sync ema model')

                self.model_ema = self.context.wrap_model(deepcopy(self.model))
            self.model = self.convert_syncbn_model(self.model)

        self.model = self.context.wrap_model(self.model)
        print('Model created, param count:', self.args.model,
              sum([m.numel() for m in self.model.parameters()]))

        self.optimizer = self.context.wrap_optimizer(
            create_optimizer(self.args, self.model))
        print('Created optimizer: ', self.optimizer)

        if self.args.amp:
            print('using amp')
            if self.args.sync_bn and self.args.model_ema:
                print('using sync_bn and model_ema when creating apex_amp')
                (self.model, self.model_ema
                 ), self.optimizer = self.context.configure_apex_amp(
                     [self.model, self.model_ema],
                     self.optimizer,
                     min_loss_scale=self.hparam("min_loss_scale"))
            else:
                self.model, self.optimizer = self.context.configure_apex_amp(
                    self.model,
                    self.optimizer,
                    min_loss_scale=self.hparam("min_loss_scale"))

        if self.args.model_ema:
            print('using model ema')
            if self.args.sync_bn:
                print('using model ema batch syn')
                self.model_ema = ModelEma(self.model_ema,
                                          context=self.context,
                                          decay=self.args.model_ema_decay)
            else:
                # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
                self.model_ema = ModelEma(self.model,
                                          context=self.context,
                                          decay=self.args.model_ema_decay)

        self.lr_scheduler, self.num_epochs = create_scheduler(
            self.args, self.optimizer)
        self.lr_scheduler = self.context.wrap_lr_scheduler(
            self.lr_scheduler, LRScheduler.StepMode.MANUAL_STEP)

        self.cur_epoch = 0
        self.num_updates = 0 * self.cur_epoch

        if self.args.prefetcher:
            self.train_mean, self.train_std, self.train_random_erasing = self.calculate_means(
                mean=self.input_config['mean'],
                std=self.input_config['std'],
                re_prob=self.args.reprob,
                re_mode=self.args.remode,
                re_count=self.args.recount)

            self.val_mean, self.val_std, self.val_random_erasing = self.calculate_means(
                self.input_config['mean'], self.input_config['std'])

        self.val_reducer = self.context.wrap_reducer(self.validation_reducer,
                                                     for_training=False)
Ejemplo n.º 5
0
def validate(args):
    setup_default_logging()

    if args.amp:
        if has_apex:
            args.apex_amp = True
        elif has_native_amp:
            args.native_amp = True
    assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set."
    args.pretrained = args.pretrained or not args.checkpoint  # might as well try to validate something
    args.prefetcher = not args.no_prefetcher

    # create model
    with set_layer_config(scriptable=args.torchscript):
        bench = create_model(
            args.model,
            bench_task='predict',
            num_classes=args.num_classes,
            pretrained=args.pretrained,
            redundant_bias=args.redundant_bias,
            soft_nms=args.soft_nms,
            checkpoint_path=args.checkpoint,
            checkpoint_ema=args.use_ema,
        )
    model_config = bench.config

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = bench.cuda()

    amp_autocast = suppress
    if args.apex_amp:
        bench = amp.initialize(bench, opt_level='O1')
        print('Using NVIDIA APEX AMP. Validating in mixed precision.')
    elif args.native_amp:
        amp_autocast = torch.cuda.amp.autocast
        print('Using native Torch AMP. Validating in mixed precision.')
    else:
        print('AMP not enabled. Validating in float32.')

    if args.num_gpu > 1:
        bench = torch.nn.DataParallel(bench,
                                      device_ids=list(range(args.num_gpu)))

    dataset = create_dataset(args.dataset, args.root, args.split)
    input_config = resolve_input_config(args, model_config)
    loader = create_loader(dataset,
                           input_size=input_config['input_size'],
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=input_config['interpolation'],
                           fill_color=input_config['fill_color'],
                           mean=input_config['mean'],
                           std=input_config['std'],
                           num_workers=args.workers,
                           pin_mem=args.pin_mem)

    evaluator = create_evaluator(args.dataset, dataset, pred_yxyx=False)
    bench.eval()
    batch_time = AverageMeter()
    end = time.time()
    last_idx = len(loader) - 1
    imgs = []
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            for b in range(input.shape[0]):
                imgs.append(input[b].cpu().numpy())
                # targets.append(target[b].cpu().numpy())

            with amp_autocast():
                output = bench(input, img_info=target)
            evaluator.add_predictions(output, target)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0 or i == last_idx:
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    .format(i,
                            len(loader),
                            batch_time=batch_time,
                            rate_avg=input.size(0) / batch_time.avg))

    mean_ap = 0.
    if dataset.parser.has_labels:
        preds = [p[:2, :] for p in evaluator.predictions]
        anns = evaluator.coco_api.imgToAnns
        targets = [
            np.asarray((anns[k][0]['bbox'], anns[k][1]['bbox']))
            for k in range(len(imgs))
        ]
        mean_ap = evaluator.evaluate()
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        for i, img in enumerate(imgs):
            img = imgs[i]
            img_m = np.mean(img, axis=0)
            for c in range(3):
                img[c] = img_m
            img_ = img.transpose(1, 2, 0)
            m = img_.min()
            M = img_.max()
            img_ = ((img_ - m) / (M - m) * 255).astype('uint8').copy()
            img_ = draw_bbox(img_, preds[i], targets[i])
            cv2.imwrite(os.path.join(args.out_dir, '%d.jpg' % i), img_)
    else:
        evaluator.save(args.results)

    return mean_ap