def get_faster_rcnn_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get faster rcnn dataloader.""" if (not args.final_fit) and (not val_dataset): train_dataset, val_dataset = _train_val_split(train_dataset, args.split_ratio) train_bfn = FasterRCNNTrainBatchify(net, num_shards) if hasattr(train_dataset, 'get_im_aspect_ratio'): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.] * len(train_dataset) train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size, num_parts=1, part_index=0, shuffle=True) train_loader = gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=True)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = None if val_dataset: val_loader = gluon.data.DataLoader( val_dataset.transform(val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) args.num_samples = len(train_dataset) return train_loader, val_loader
def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) #train_sampler = gcv.nn.sampler.SplitSampler(train_dataset.get_im_aspect_ratio(), # batch_size, # num_parts=hvd.size() if args.horovod else 1, # part_index=hvd.rank() if args.horovod else 0, # shuffle=True) train_sampler = gcv.nn.sampler.SplitSampler( batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0) train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = mx.gluon.data.DataLoader(val_dataset.transform( val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return train_loader, val_loader
def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) if hasattr(train_dataset, 'get_im_aspect_ratio'): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.] * len(train_dataset) train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0, shuffle=True) # dataset: train_dataset.transform(train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)) # ashape: anchor 预先定义的大小 # multi_stage + ashape : 计算anchor train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device # dataset: val_dataset.transform(val_transform(short, net.max_size)) # 每个item返回 img, bbox.astype('float32'), mx.nd.array([im_scale]) # bbox: x1, y1, x2, y2, class_id # img最短边<= short,最长边<=net.max_size # Tuple 不是python中的元组tuple # Append(): 每个样本自成ndarray,所有样本数据的大小不必相同,返回的batch是列表 # val_bfn 有3个Append(),每个Append()处理dataset item的一个属性 val_loader = mx.gluon.data.DataLoader(val_dataset.transform( val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return train_loader, val_loader
def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) if hasattr(train_dataset, 'get_im_aspect_ratio'): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.] * len(train_dataset) if args.horovod: num_parts = hvd.size() part_index = hvd.rank() elif "perseus" in args.kv_store: num_parts = kv.num_workers part_index = kv.rank else: num_parts = 1 part_index = 0 train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size, num_parts=num_parts, part_index=part_index, shuffle=True) train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = mx.gluon.data.DataLoader(val_dataset.transform( val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return train_loader, val_loader
elif dataset.lower() == 'coco': train_dataset = gdata.COCODetection(root='/media/HDD_4TB/MSCOCO/images/',splits='instances_train2017', use_crowd=False) val_dataset = gdata.COCODetection(root='/media/HDD_4TB/MSCOCO/images/',splits='instances_val2017', skip_empty=False) val_metric = COCODetectionMetric(val_dataset, args.save_prefix + '_eval', cleanup=True) else: raise NotImplementedError('Dataset: {} not implemented.'.format(dataset)) if args.mixup: from gluoncv.data.mixup import detection train_dataset = detection.MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(train_dataset.get_im_aspect_ratio(), batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0, shuffle=True) train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = mx.gluon.data.DataLoader( val_dataset.transform(val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers)
rcnn_cls_loss = mx.gluon.loss.SoftmaxCrossEntropyLoss() # and finally the loss to penalize inaccurate proposals rcnn_box_loss = mx.gluon.loss.HuberLoss() # == smoothl1 ########################################################## # RPN training targets # -------------------- # To speed up training, we let CPU to pre-compute RPN training targets. # This is especially nice when your CPU is powerful and you can use ``-j num_workers`` # to utilize multi-core CPU. ############################################################################## # If we provide network to the training transform function, it will compute training targets train_transform = presets.rcnn.FasterRCNNDefaultTrainTransform(short, max_size, net) # Return images, labels, rpn_cls_targets, rpn_box_targets, rpn_box_masks loosely batchify_fn = FasterRCNNTrainBatchify(net) # For the next part, we only use batch size 1 batch_size = 1 train_loader = DataLoader(train_dataset.transform(train_transform), batch_size, shuffle=True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers) ############################################################################## # This time we can see the data loader is actually returning the training targets for us. # Then it is very naturally a gluon training loop with Trainer and let it update the weights. for ib, batch in enumerate(train_loader): if ib > 0: break with autograd.train_mode(): for data, label, rpn_cls_targets, rpn_box_targets, rpn_box_masks in zip(*batch): label = label.expand_dims(0)
def get_voc_iterator(rank, num_workers, net, num_shards): data_dir = "data-%d" % rank try: s3_client = boto3.client('s3') for file in [ 'VOCtrainval_06-Nov-2007.tar', 'VOCtest_06-Nov-2007.tar', 'VOCtrainval_11-May-2012.tar' ]: s3_client.download_file(args.s3bucket, f'voc_tars/{file}', f'/opt/ml/code/{file}') with tarfile.open(filename) as tar: tar.extractall(path=path) except: print('downloading from source') download_voc(data_dir) input_shape = (1, 256, 256, 3) batch_size = args.batch_size # might want to replace with mx.io.ImageDetRecordIter, this means you need data in RecordIO format # train_iter = mx.io.MNISTIter( # image="%s/train-images-idx3-ubyte" % data_dir, # label="%s/train-labels-idx1-ubyte" % data_dir, # input_shape=input_shape, # batch_size=batch_size, # shuffle=True, # flat=False, # num_parts=hvd.size(), # part_index=hvd.rank() # ) train_dataset = gdata.VOCDetection( root=f'/opt/ml/code/data-{rank}/VOCdevkit/', splits=[(2007, 'trainval'), (2012, 'trainval')]) val_dataset = gdata.VOCDetection( root=f'/opt/ml/code/data-{rank}/VOCdevkit/', splits=[(2007, 'test')]) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) im_aspect_ratio = [1.] * len(train_dataset) train_bfn = FasterRCNNTrainBatchify(net) train_sampler = gluoncv.nn.sampler.SplitSortedBucketSampler( im_aspect_ratio, batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0, shuffle=True) # had issue with multi_stage=True train_iter = mx.gluon.data.DataLoader(train_dataset.transform( FasterRCNNDefaultTrainTransform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=False)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_iter = mx.gluon.data.DataLoader(val_dataset.transform( FasterRCNNDefaultValTransform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=num_workers) return train_iter, val_iter
# loss to penalize incorrect foreground/background prediction rpn_cls_loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False) # loss to penalize inaccurate anchor boxes rpn_box_loss = mx.gluon.loss.HuberLoss(rho=1 / 9.) # loss to penalize incorrect classification prediction. rcnn_cls_loss = mx.gluon.loss.SoftmaxCrossEntropyLoss() # loss to penalize inaccurate proposals rcnn_box_loss = mx.gluon.loss.HuberLoss() # if we provide network to the training transform function, it will compute training targets train_transform = presets.rcnn.FasterRCNNDefaultTrainTransform(net=student) # utility to create the batches according to student parameters, speed up learning batchify_fn = FasterRCNNTrainBatchify(student) # data loader used to go through the data set train_loader = DataLoader(train_dataset.transform(train_transform), batch_size=1, shuffle=True, batchify_fn=batchify_fn, last_batch='rollover') matplotlib.use('TkAgg') writer = SummaryWriter() for batch_idx, batch in enumerate(train_loader): if batch_idx > 2000: break with autograd.record(): loss = [] for image_idx, (data_batch, label, rpn_cls_targets, rpn_box_targets, rpn_box_masks) in enumerate(zip(*batch)): start = time.time() with autograd.pause(): # teacher predictions
**kwargs, **model_cfg) cfg["train"]["save_prefix"] = os.path.join(cfg["train"]["save_dir"], net_name) if resume and resume.strip(): net.load_parameters(resume.strip()) else: for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) # dataloader train_bfn = FasterRCNNTrainBatchify(net, len(ctx)) if hasattr(train_dataset, 'get_im_aspect_ratio'): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.] * len(train_dataset) train_sampler = gcv.nn.sampler.SplitSortedBucketSampler( im_aspect_ratio, batch_size=cfg["dataset"]["batch_size_per_device"] * len(ctx), num_parts=1, part_index=0, shuffle=True) train_loader = mx.gluon.data.DataLoader( train_dataset.transform( FasterRCNNDefaultTrainTransform( net.short, net.max_size,