def create_datasets_and_loaders(args, model_config): input_config = resolve_input_config(args, model_config=model_config) dataset_train, dataset_eval = create_dataset(args.dataset, args.root) # setup labeler in loader/collate_fn if not enabled in the model bench labeler = None if not args.bench_labeler: labeler = AnchorLabeler(Anchors.from_config(model_config), model_config.num_classes, match_threshold=0.5) loader_train = create_loader( dataset_train, input_size=input_config['input_size'], batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, re_prob=args.reprob, re_mode=args.remode, re_count=args.recount, # color_jitter=args.color_jitter, # auto_augment=args.aa, interpolation=args.train_interpolation or input_config['interpolation'], fill_color=input_config['fill_color'], mean=input_config['mean'], std=input_config['std'], num_workers=args.workers, distributed=args.distributed, pin_mem=args.pin_mem, anchor_labeler=labeler, ) if args.val_skip > 1: dataset_eval = SkipSubset(dataset_eval, args.val_skip) loader_eval = create_loader( dataset_eval, input_size=input_config['input_size'], batch_size=args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=input_config['interpolation'], fill_color=input_config['fill_color'], mean=input_config['mean'], std=input_config['std'], num_workers=args.workers, distributed=args.distributed, pin_mem=args.pin_mem, anchor_labeler=labeler, ) evaluator = create_evaluator(args.dataset, loader_eval.dataset, distributed=args.distributed, pred_yxyx=False) return loader_train, loader_eval, evaluator
def validate(args): setup_default_logging() if args.amp: if has_apex: args.apex_amp = True elif has_native_amp: args.native_amp = True assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set." args.pretrained = args.pretrained or not args.checkpoint # might as well try to validate something args.prefetcher = not args.no_prefetcher # create model with set_layer_config(scriptable=args.torchscript): bench = create_model( args.model, bench_task='predict', num_classes=args.num_classes, pretrained=args.pretrained, redundant_bias=args.redundant_bias, soft_nms=args.soft_nms, checkpoint_path=args.checkpoint, checkpoint_ema=args.use_ema, ) model_config = bench.config param_count = sum([m.numel() for m in bench.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) bench = bench.cuda() amp_autocast = suppress if args.apex_amp: bench = amp.initialize(bench, opt_level='O1') print('Using NVIDIA APEX AMP. Validating in mixed precision.') elif args.native_amp: amp_autocast = torch.cuda.amp.autocast print('Using native Torch AMP. Validating in mixed precision.') else: print('AMP not enabled. Validating in float32.') if args.num_gpu > 1: bench = torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) dataset = create_dataset(args.dataset, args.root, args.split) input_config = resolve_input_config(args, model_config) loader = create_loader(dataset, input_size=input_config['input_size'], batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=input_config['interpolation'], fill_color=input_config['fill_color'], mean=input_config['mean'], std=input_config['std'], num_workers=args.workers, pin_mem=args.pin_mem) evaluator = create_evaluator(args.dataset, dataset, pred_yxyx=False) bench.eval() batch_time = AverageMeter() end = time.time() last_idx = len(loader) - 1 with torch.no_grad(): for i, (input, target) in enumerate(loader): with amp_autocast(): output = bench(input, img_info=target) evaluator.add_predictions(output, target) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_freq == 0 or i == last_idx: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format(i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg)) mean_ap = 0. if dataset.parser.has_labels: mean_ap = evaluator.evaluate() else: evaluator.save(args.results) return mean_ap
def create_datasets_and_loaders( args, model_config, transform_train_fn=None, transform_eval_fn=None, collate_fn=None, ): """ Setup datasets, transforms, loaders, evaluator. Args: args: Command line args / config for training model_config: Model specific configuration dict / struct transform_train_fn: Override default image + annotation transforms (see note in loaders.py) transform_eval_fn: Override default image + annotation transforms (see note in loaders.py) collate_fn: Override default fast collate function Returns: Train loader, validation loader, evaluator """ input_config = resolve_input_config(args, model_config=model_config) dataset_train, dataset_eval = create_dataset(args.dataset, args.root) # setup labeler in loader/collate_fn if not enabled in the model bench labeler = None if not args.bench_labeler: labeler = AnchorLabeler(Anchors.from_config(model_config), model_config.num_classes, match_threshold=0.5) loader_train = create_loader( dataset_train, input_size=input_config['input_size'], batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, re_prob=args.reprob, re_mode=args.remode, re_count=args.recount, # color_jitter=args.color_jitter, # auto_augment=args.aa, interpolation=args.train_interpolation or input_config['interpolation'], fill_color=input_config['fill_color'], mean=input_config['mean'], std=input_config['std'], num_workers=args.workers, distributed=args.distributed, pin_mem=args.pin_mem, anchor_labeler=labeler, transform_fn=transform_train_fn, collate_fn=collate_fn, ) if args.val_skip > 1: dataset_eval = SkipSubset(dataset_eval, args.val_skip) loader_eval = create_loader( dataset_eval, input_size=input_config['input_size'], batch_size=args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=input_config['interpolation'], fill_color=input_config['fill_color'], mean=input_config['mean'], std=input_config['std'], num_workers=args.workers, distributed=args.distributed, pin_mem=args.pin_mem, anchor_labeler=labeler, transform_fn=transform_eval_fn, collate_fn=collate_fn, ) evaluator = create_evaluator(args.dataset, loader_eval.dataset, distributed=args.distributed, pred_yxyx=False) return loader_train, loader_eval, evaluator
def __init__(self, context: PyTorchTrialContext) -> None: self.context = context self.hparam = self.context.get_hparam self.args = DotDict(self.context.get_hparams()) # Create a unique download directory for each rank so they don't overwrite each other. self.download_directory = f"/tmp/data-rank{self.context.distributed.get_rank()}" self.num_slots = int(self.context.get_experiment_config()['resources'] ['slots_per_trial']) if self.args.sync_bn and self.num_slots == 1: print( 'Can not use sync_bn with one slot. Either set sync_bn to False or use distributed training.' ) sys.exit() self.args.pretrained_backbone = not self.args.no_pretrained_backbone self.args.prefetcher = not self.args.no_prefetcher tmp = [] for arg in self.args.lr_noise.split(' '): tmp.append(float(arg)) self.args.lr_noise = tmp self.model = create_model( self.args.model, bench_task='train', num_classes=self.args.num_classes, pretrained=self.args.pretrained, pretrained_backbone=self.args.pretrained_backbone, redundant_bias=self.args.redundant_bias, label_smoothing=self.args.smoothing, new_focal=self.args.new_focal, jit_loss=self.args.jit_loss, bench_labeler=self.args.bench_labeler, checkpoint_path=self.args.initial_checkpoint, ) self.model_config = self.model.config self.input_config = resolve_input_config( self.args, model_config=self.model_config) print('h: ', self.args.model, sum([m.numel() for m in self.model.parameters()])) if self.args.sync_bn: print('creating batch sync model') if self.args.model_ema: print('creating batch sync ema model') self.model_ema = self.context.wrap_model(deepcopy(self.model)) self.model = self.convert_syncbn_model(self.model) self.model = self.context.wrap_model(self.model) print('Model created, param count:', self.args.model, sum([m.numel() for m in self.model.parameters()])) self.optimizer = self.context.wrap_optimizer( create_optimizer(self.args, self.model)) print('Created optimizer: ', self.optimizer) if self.args.amp: print('using amp') if self.args.sync_bn and self.args.model_ema: print('using sync_bn and model_ema when creating apex_amp') (self.model, self.model_ema ), self.optimizer = self.context.configure_apex_amp( [self.model, self.model_ema], self.optimizer, min_loss_scale=self.hparam("min_loss_scale")) else: self.model, self.optimizer = self.context.configure_apex_amp( self.model, self.optimizer, min_loss_scale=self.hparam("min_loss_scale")) if self.args.model_ema: print('using model ema') if self.args.sync_bn: print('using model ema batch syn') self.model_ema = ModelEma(self.model_ema, context=self.context, decay=self.args.model_ema_decay) else: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper self.model_ema = ModelEma(self.model, context=self.context, decay=self.args.model_ema_decay) self.lr_scheduler, self.num_epochs = create_scheduler( self.args, self.optimizer) self.lr_scheduler = self.context.wrap_lr_scheduler( self.lr_scheduler, LRScheduler.StepMode.MANUAL_STEP) self.cur_epoch = 0 self.num_updates = 0 * self.cur_epoch if self.args.prefetcher: self.train_mean, self.train_std, self.train_random_erasing = self.calculate_means( mean=self.input_config['mean'], std=self.input_config['std'], re_prob=self.args.reprob, re_mode=self.args.remode, re_count=self.args.recount) self.val_mean, self.val_std, self.val_random_erasing = self.calculate_means( self.input_config['mean'], self.input_config['std']) self.val_reducer = self.context.wrap_reducer(self.validation_reducer, for_training=False)
def validate(args): setup_default_logging() if args.amp: if has_apex: args.apex_amp = True elif has_native_amp: args.native_amp = True assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set." args.pretrained = args.pretrained or not args.checkpoint # might as well try to validate something args.prefetcher = not args.no_prefetcher # create model with set_layer_config(scriptable=args.torchscript): bench = create_model( args.model, bench_task='predict', num_classes=args.num_classes, pretrained=args.pretrained, redundant_bias=args.redundant_bias, soft_nms=args.soft_nms, checkpoint_path=args.checkpoint, checkpoint_ema=args.use_ema, ) model_config = bench.config param_count = sum([m.numel() for m in bench.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) bench = bench.cuda() amp_autocast = suppress if args.apex_amp: bench = amp.initialize(bench, opt_level='O1') print('Using NVIDIA APEX AMP. Validating in mixed precision.') elif args.native_amp: amp_autocast = torch.cuda.amp.autocast print('Using native Torch AMP. Validating in mixed precision.') else: print('AMP not enabled. Validating in float32.') if args.num_gpu > 1: bench = torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) dataset = create_dataset(args.dataset, args.root, args.split) input_config = resolve_input_config(args, model_config) loader = create_loader(dataset, input_size=input_config['input_size'], batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=input_config['interpolation'], fill_color=input_config['fill_color'], mean=input_config['mean'], std=input_config['std'], num_workers=args.workers, pin_mem=args.pin_mem) evaluator = create_evaluator(args.dataset, dataset, pred_yxyx=False) bench.eval() batch_time = AverageMeter() end = time.time() last_idx = len(loader) - 1 imgs = [] with torch.no_grad(): for i, (input, target) in enumerate(loader): for b in range(input.shape[0]): imgs.append(input[b].cpu().numpy()) # targets.append(target[b].cpu().numpy()) with amp_autocast(): output = bench(input, img_info=target) evaluator.add_predictions(output, target) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_freq == 0 or i == last_idx: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format(i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg)) mean_ap = 0. if dataset.parser.has_labels: preds = [p[:2, :] for p in evaluator.predictions] anns = evaluator.coco_api.imgToAnns targets = [ np.asarray((anns[k][0]['bbox'], anns[k][1]['bbox'])) for k in range(len(imgs)) ] mean_ap = evaluator.evaluate() if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) for i, img in enumerate(imgs): img = imgs[i] img_m = np.mean(img, axis=0) for c in range(3): img[c] = img_m img_ = img.transpose(1, 2, 0) m = img_.min() M = img_.max() img_ = ((img_ - m) / (M - m) * 255).astype('uint8').copy() img_ = draw_bbox(img_, preds[i], targets[i]) cv2.imwrite(os.path.join(args.out_dir, '%d.jpg' % i), img_) else: evaluator.save(args.results) return mean_ap