Beispiel #1
0
def get_dataloader(net, train_dataset, val_dataset, train_transform,
                   val_transform, batch_size, num_shards, args):
    """Get dataloader."""
    train_bfn = FasterRCNNTrainBatchify(net, num_shards)
    #train_sampler = gcv.nn.sampler.SplitSampler(train_dataset.get_im_aspect_ratio(),
    #                                            batch_size,
    #                                            num_parts=hvd.size() if args.horovod else 1,
    #                                            part_index=hvd.rank() if args.horovod else 0,
    #                                            shuffle=True)
    train_sampler = gcv.nn.sampler.SplitSampler(
        batch_size,
        num_parts=hvd.size() if args.horovod else 1,
        part_index=hvd.rank() if args.horovod else 0)
    train_loader = mx.gluon.data.DataLoader(train_dataset.transform(
        train_transform(net.short,
                        net.max_size,
                        net,
                        ashape=net.ashape,
                        multi_stage=args.use_fpn)),
                                            batch_sampler=train_sampler,
                                            batchify_fn=train_bfn,
                                            num_workers=args.num_workers)
    val_bfn = Tuple(*[Append() for _ in range(3)])
    short = net.short[-1] if isinstance(net.short,
                                        (tuple, list)) else net.short
    # validation use 1 sample per device
    val_loader = mx.gluon.data.DataLoader(val_dataset.transform(
        val_transform(short, net.max_size)),
                                          num_shards,
                                          False,
                                          batchify_fn=val_bfn,
                                          last_batch='keep',
                                          num_workers=args.num_workers)
    return train_loader, val_loader
Beispiel #2
0
def get_faster_rcnn_dataloader(net, train_dataset, val_dataset, train_transform, val_transform,
                               batch_size, num_shards, args):
    """Get faster rcnn dataloader."""
    if (not args.final_fit) and (not val_dataset):
        train_dataset, val_dataset = _train_val_split(train_dataset, args.split_ratio)

    train_bfn = FasterRCNNTrainBatchify(net, num_shards)
    if hasattr(train_dataset, 'get_im_aspect_ratio'):
        im_aspect_ratio = train_dataset.get_im_aspect_ratio()
    else:
        im_aspect_ratio = [1.] * len(train_dataset)
    train_sampler = \
        gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size, num_parts=1,
                                                part_index=0, shuffle=True)
    train_loader = gluon.data.DataLoader(train_dataset.transform(
        train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=True)),
        batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers)
    val_bfn = Tuple(*[Append() for _ in range(3)])
    short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short
    # validation use 1 sample per device
    val_loader = None
    if val_dataset:
        val_loader = gluon.data.DataLoader(
            val_dataset.transform(val_transform(short, net.max_size)), num_shards, False,
            batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers)
    args.num_samples = len(train_dataset)
    return train_loader, val_loader
Beispiel #3
0
 def _get_dataloader(net, test_dataset, data_shape, batch_size,
                     num_workers, num_devices, args):
     """Get dataloader."""
     if args.meta_arch == 'yolo3':
         width, height = data_shape, data_shape
         val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
         test_loader = gluon.data.DataLoader(
             test_dataset.transform(
                 YOLO3DefaultValTransform(width, height)),
             batch_size,
             False,
             batchify_fn=val_batchify_fn,
             last_batch='keep',
             num_workers=num_workers)
         return test_loader
     elif args.meta_arch == 'faster_rcnn':
         """Get faster rcnn dataloader."""
         test_bfn = Tuple(*[Append() for _ in range(3)])
         short = net.short[-1] if isinstance(net.short,
                                             (tuple,
                                              list)) else net.short
         # validation use 1 sample per device
         test_loader = gluon.data.DataLoader(
             test_dataset.transform(
                 FasterRCNNDefaultValTransform(short, net.max_size)),
             num_devices,
             False,
             batchify_fn=test_bfn,
             last_batch='keep',
             num_workers=args.num_workers)
         return test_loader
     else:
         raise NotImplementedError('%s not implemented.' %
                                   args.meta_arch)
Beispiel #4
0
def get_dataloader(net, train_dataset, val_dataset, train_transform,
                   val_transform, batch_size, num_shards, args):
    """Get dataloader."""
    val_bfn = Tuple(*[Append() for _ in range(3)])
    short = net.short[-1] if isinstance(net.short,
                                        (tuple, list)) else net.short
    val_loader = mx.gluon.data.DataLoader(val_dataset.transform(
        val_transform(short, net.max_size)),
                                          num_shards,
                                          False,
                                          batchify_fn=val_bfn,
                                          last_batch='keep',
                                          num_workers=args.num_workers)
    return None, val_loader
Beispiel #5
0
def get_dataloader(net, train_dataset, val_dataset, train_transform,
                   val_transform, batch_size, num_shards, args):
    """Get dataloader."""
    train_bfn = FasterRCNNTrainBatchify(net, num_shards)
    if hasattr(train_dataset, 'get_im_aspect_ratio'):
        im_aspect_ratio = train_dataset.get_im_aspect_ratio()
    else:
        im_aspect_ratio = [1.] * len(train_dataset)
    train_sampler = \
        gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size,
                                                num_parts=hvd.size() if args.horovod else 1,
                                                part_index=hvd.rank() if args.horovod else 0,
                                                shuffle=True)
    # dataset: train_dataset.transform(train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn))
    # ashape: anchor 预先定义的大小
    # multi_stage + ashape : 计算anchor
    train_loader = mx.gluon.data.DataLoader(train_dataset.transform(
        train_transform(net.short,
                        net.max_size,
                        net,
                        ashape=net.ashape,
                        multi_stage=args.use_fpn)),
                                            batch_sampler=train_sampler,
                                            batchify_fn=train_bfn,
                                            num_workers=args.num_workers)
    val_bfn = Tuple(*[Append() for _ in range(3)])
    short = net.short[-1] if isinstance(net.short,
                                        (tuple, list)) else net.short
    # validation use 1 sample per device
    # dataset: val_dataset.transform(val_transform(short, net.max_size))
    # 每个item返回 img, bbox.astype('float32'), mx.nd.array([im_scale])
    # bbox: x1, y1, x2, y2, class_id
    # img最短边<= short,最长边<=net.max_size
    # Tuple 不是python中的元组tuple
    # Append(): 每个样本自成ndarray,所有样本数据的大小不必相同,返回的batch是列表
    # val_bfn 有3个Append(),每个Append()处理dataset item的一个属性
    val_loader = mx.gluon.data.DataLoader(val_dataset.transform(
        val_transform(short, net.max_size)),
                                          num_shards,
                                          False,
                                          batchify_fn=val_bfn,
                                          last_batch='keep',
                                          num_workers=args.num_workers)
    return train_loader, val_loader
Beispiel #6
0
def get_dataloader(net, train_dataset, val_dataset, train_transform,
                   val_transform, batch_size, num_shards, args):
    """Get dataloader."""
    train_bfn = FasterRCNNTrainBatchify(net, num_shards)
    if hasattr(train_dataset, 'get_im_aspect_ratio'):
        im_aspect_ratio = train_dataset.get_im_aspect_ratio()
    else:
        im_aspect_ratio = [1.] * len(train_dataset)
    if args.horovod:
        num_parts = hvd.size()
        part_index = hvd.rank()
    elif "perseus" in args.kv_store:
        num_parts = kv.num_workers
        part_index = kv.rank
    else:
        num_parts = 1
        part_index = 0
    train_sampler = \
        gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size,
                                                num_parts=num_parts,
                                                part_index=part_index,
                                                shuffle=True)
    train_loader = mx.gluon.data.DataLoader(train_dataset.transform(
        train_transform(net.short,
                        net.max_size,
                        net,
                        ashape=net.ashape,
                        multi_stage=args.use_fpn)),
                                            batch_sampler=train_sampler,
                                            batchify_fn=train_bfn,
                                            num_workers=args.num_workers)
    val_bfn = Tuple(*[Append() for _ in range(3)])
    short = net.short[-1] if isinstance(net.short,
                                        (tuple, list)) else net.short
    # validation use 1 sample per device
    val_loader = mx.gluon.data.DataLoader(val_dataset.transform(
        val_transform(short, net.max_size)),
                                          num_shards,
                                          False,
                                          batchify_fn=val_bfn,
                                          last_batch='keep',
                                          num_workers=args.num_workers)
    return train_loader, val_loader
# (mxnet uses BCHW format) before they are fed into neural networks.
#
# A handy DataLoader would be very convenient for us to apply different transforms and aggregate data into mini-batches.
#
# Because Faster-RCNN handles raw images with various aspect ratios and various shapes, we provide a
# :py:class:`gluoncv.data.batchify.Append`, which neither stack or pad images, but instead return lists.
# In such way, image tensors and labels returned have their own shapes, unaware of the rest in the same batch.

from gluoncv.data.batchify import Tuple, Append
from mxnet.gluon.data import DataLoader

batch_size = 2  # for tutorial, we use smaller batch-size
num_workers = 0  # you can make it larger(if your CPU has more cores) to accelerate data loading

# behavior of batchify_fn: stack images, and pad labels
batchify_fn = Tuple(Append(), Append())
train_loader = DataLoader(train_dataset.transform(train_transform),
                          batch_size,
                          shuffle=True,
                          batchify_fn=batchify_fn,
                          last_batch='rollover',
                          num_workers=num_workers)
val_loader = DataLoader(val_dataset.transform(val_transform),
                        batch_size,
                        shuffle=False,
                        batchify_fn=batchify_fn,
                        last_batch='keep',
                        num_workers=num_workers)

for ib, batch in enumerate(train_loader):
    if ib > 3:
Beispiel #8
0
        self._imglist = [file for file in glob.iglob(imgPath)]
        self._csvPath = [file for file in glob.iglob(csvPath)]
        self._len = len(self._imglist)

    def __getitem__(self, idx):
        img = image.imread(self._imglist[idx])
        label = pd.read_csv(self._csvPath[idx])
        return (img, label)

    def __len__(self):
        return self._len


if __name__ == "__main__":
    batch_size = 2
    batchify_fn = Tuple(Append(), Append())
    train_dataset = DensemapDataset()
    im = train_dataset[0]

    def train_transform(*trans_data):
        img = trans_data[0]
        aug = gdata.vision.transforms.RandomFlipLeftRight()
        return (aug(img), trans_data[1])

    train_loader = DataLoader(train_dataset.transform(train_transform),
                              batch_size=2,
                              shuffle=True,
                              batchify_fn=batchify_fn)
    for ib, batch in enumerate(train_loader):
        # batch[0] 是 X , batch[1] 是 y
        # batch[0][0] 是第0个X
                   class_names=train_dataset.classes,
                   ax=ax)
plt.show()

##########################################################
# Data Loader
# -----------
# Data loader is identical to Faster R-CNN with the difference of mask input and output.

from gluoncv.data.batchify import Tuple, Append, MaskRCNNTrainBatchify
from mxnet.gluon.data import DataLoader

batch_size = 2  # for tutorial, we use smaller batch-size
num_workers = 0  # you can make it larger(if your CPU has more cores) to accelerate data loading

train_bfn = Tuple(*[Append() for _ in range(3)])
train_loader = DataLoader(train_dataset.transform(train_transform),
                          batch_size,
                          shuffle=True,
                          batchify_fn=train_bfn,
                          last_batch='rollover',
                          num_workers=num_workers)
val_bfn = Tuple(*[Append() for _ in range(2)])
val_loader = DataLoader(val_dataset.transform(val_transform),
                        batch_size,
                        shuffle=False,
                        batchify_fn=val_bfn,
                        last_batch='keep',
                        num_workers=num_workers)

for ib, batch in enumerate(train_loader):
Beispiel #10
0

def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size,
                   num_shards, args):
    """Get dataloader."""
    train_bfn = FasterRCNNTrainBatchify(net, num_shards)
    train_sampler = \
        gcv.nn.sampler.SplitSortedBucketSampler(train_dataset.get_im_aspect_ratio(),
                                                batch_size,
                                                num_parts=hvd.size() if args.horovod else 1,
                                                part_index=hvd.rank() if args.horovod else 0,
                                                shuffle=True)
    train_loader = mx.gluon.data.DataLoader(train_dataset.transform(
        train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)),
        batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers)
    val_bfn = Tuple(*[Append() for _ in range(3)])
    short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short
    # validation use 1 sample per device
    val_loader = mx.gluon.data.DataLoader(
        val_dataset.transform(val_transform(short, net.max_size)), num_shards, False,
        batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers)
    return train_loader, val_loader


def save_params(net, logger, best_map, current_map, epoch, save_interval, prefix):
    current_map = float(current_map)
    if current_map > best_map[0]:
        logger.info('[Epoch {}] mAP {} higher than current best {} saving to {}'.format(
            epoch, current_map, best_map, '{:s}_best.params'.format(prefix)))
        best_map[0] = current_map
        net.save_parameters('{:s}_best.params'.format(prefix))
Beispiel #11
0
                   class_names=train_dataset.classes,
                   ax=ax)
plt.show()

##########################################################
# Data Loader
# -----------
# Data loader is identical to Faster R-CNN with the difference of mask input and output.

from gluoncv.data.batchify import Tuple, Append
from mxnet.gluon.data import DataLoader

batch_size = 2  # for tutorial, we use smaller batch-size
num_workers = 0  # you can make it larger(if your CPU has more cores) to accelerate data loading

train_bfn = Tuple(*[Append() for _ in range(3)])
train_loader = DataLoader(train_dataset.transform(train_transform),
                          batch_size,
                          shuffle=True,
                          batchify_fn=train_bfn,
                          last_batch='rollover',
                          num_workers=num_workers)
val_bfn = Tuple(*[Append() for _ in range(2)])
val_loader = DataLoader(val_dataset.transform(val_transform),
                        batch_size,
                        shuffle=False,
                        batchify_fn=val_bfn,
                        last_batch='keep',
                        num_workers=num_workers)

for ib, batch in enumerate(train_loader):
    def get_voc_iterator(rank, num_workers, net, num_shards):
        data_dir = "data-%d" % rank
        try:
            s3_client = boto3.client('s3')
            for file in [
                    'VOCtrainval_06-Nov-2007.tar', 'VOCtest_06-Nov-2007.tar',
                    'VOCtrainval_11-May-2012.tar'
            ]:
                s3_client.download_file(args.s3bucket, f'voc_tars/{file}',
                                        f'/opt/ml/code/{file}')
                with tarfile.open(filename) as tar:
                    tar.extractall(path=path)
        except:
            print('downloading from source')
            download_voc(data_dir)

        input_shape = (1, 256, 256, 3)
        batch_size = args.batch_size

        # might want to replace with mx.io.ImageDetRecordIter, this means you need data in RecordIO format
        #         train_iter = mx.io.MNISTIter(
        #             image="%s/train-images-idx3-ubyte" % data_dir,
        #             label="%s/train-labels-idx1-ubyte" % data_dir,
        #             input_shape=input_shape,
        #             batch_size=batch_size,
        #             shuffle=True,
        #             flat=False,
        #             num_parts=hvd.size(),
        #             part_index=hvd.rank()
        #         )

        train_dataset = gdata.VOCDetection(
            root=f'/opt/ml/code/data-{rank}/VOCdevkit/',
            splits=[(2007, 'trainval'), (2012, 'trainval')])
        val_dataset = gdata.VOCDetection(
            root=f'/opt/ml/code/data-{rank}/VOCdevkit/',
            splits=[(2007, 'test')])
        val_metric = VOC07MApMetric(iou_thresh=0.5,
                                    class_names=val_dataset.classes)
        im_aspect_ratio = [1.] * len(train_dataset)
        train_bfn = FasterRCNNTrainBatchify(net)
        train_sampler = gluoncv.nn.sampler.SplitSortedBucketSampler(
            im_aspect_ratio,
            batch_size,
            num_parts=hvd.size() if args.horovod else 1,
            part_index=hvd.rank() if args.horovod else 0,
            shuffle=True)
        # had issue with multi_stage=True
        train_iter = mx.gluon.data.DataLoader(train_dataset.transform(
            FasterRCNNDefaultTrainTransform(net.short,
                                            net.max_size,
                                            net,
                                            ashape=net.ashape,
                                            multi_stage=False)),
                                              batch_sampler=train_sampler,
                                              batchify_fn=train_bfn,
                                              num_workers=num_workers)

        val_bfn = Tuple(*[Append() for _ in range(3)])
        short = net.short[-1] if isinstance(net.short,
                                            (tuple, list)) else net.short
        # validation use 1 sample per device
        val_iter = mx.gluon.data.DataLoader(val_dataset.transform(
            FasterRCNNDefaultValTransform(short, net.max_size)),
                                            num_shards,
                                            False,
                                            batchify_fn=val_bfn,
                                            last_batch='keep',
                                            num_workers=num_workers)

        return train_iter, val_iter