Python ILSVRC12 Examples, tensorpack.dataset.ILSVRC12 Python Examples

Example #1

0

Show file

def get_imagenet_dataflow(datadir, name, batch_size, augmentors):
    """
    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
    assert datadir is not None
    assert isinstance(augmentors, list)
    isTrain = name == 'train'
    cpu = min(30, multiprocessing.cpu_count())
    meta_dir = './ilsvrc_metadata'
    if isTrain:
        ds = dataset.ILSVRC12(datadir, name, meta_dir=meta_dir, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)
        ds = PrefetchDataZMQ(ds, cpu)
        ds = BatchData(ds, batch_size, remainder=False)
    else:
        ds = dataset.ILSVRC12Files(datadir,
                                   name,
                                   meta_dir=meta_dir,
                                   shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            im = aug.augment(im)
            return im, cls

        ds = MultiThreadMapData(ds, cpu, mapf, buffer_size=2000, strict=True)
        ds = BatchData(ds, batch_size, remainder=True)
        ds = PrefetchDataZMQ(ds, 1)
    return ds

Example #2

0

Show file

File: imagenet_utils.py Project: zzuxzt/tensorpack

def get_imagenet_dataflow(
        datadir, name, batch_size,
        augmentors, parallel=None):
    """
    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
    assert datadir is not None
    assert isinstance(augmentors, list)
    isTrain = name == 'train'
    if parallel is None:
        parallel = min(40, multiprocessing.cpu_count() // 2)  # assuming hyperthreading
    if isTrain:
        ds = dataset.ILSVRC12(datadir, name, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)
        if parallel < 16:
            logger.warn("DataFlow may become the bottleneck when too few processes are used.")
        ds = PrefetchDataZMQ(ds, parallel)
        ds = BatchData(ds, batch_size, remainder=False)
    else:
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            im = aug.augment(im)
            return im, cls
        ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True)
        ds = BatchData(ds, batch_size, remainder=True)
        ds = PrefetchDataZMQ(ds, 1)
    return ds

Example #3

0

Show file

File: imagenet_utils.py Project: StanfordVisionSystems/vfeedbacknet

def get_imagenet_dataflow(datadir,
                          name,
                          batch_size,
                          augmentors,
                          parallel=None):
    """
    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
    assert datadir is not None
    assert isinstance(augmentors, list)
    isTrain = name == 'train'
    if parallel is None:
        parallel = min(40, multiprocessing.cpu_count())
    if isTrain:
        ds = dataset.ILSVRC12(datadir, name, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)
        if parallel < 16:
            logger.warn(
                "DataFlow may become the bottleneck when too few processes are used."
            )
        ds = PrefetchDataZMQ(ds, parallel)
        ds = BatchData(ds, batch_size, remainder=False)
    else:
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp

            jpeg_filename = os.path.basename(fname)
            jpeg_dirname = os.path.basename(os.path.dirname(fname))
            zip_filepath = os.path.dirname(fname) + '.zip'

            f = zipfile.ZipFile(zip_filepath, 'r')
            compress_jpeg = np.fromstring(f.read(
                os.path.join(jpeg_dirname, jpeg_filename)),
                                          dtype=np.uint8)

            im = cv2.imdecode(compress_jpeg, cv2.IMREAD_COLOR)
            #im = cv2.imread(fname, cv2.IMREAD_COLOR)

            im = aug.augment(im)
            return im, cls

        ds = MultiThreadMapData(ds,
                                parallel,
                                mapf,
                                buffer_size=2000,
                                strict=True)
        ds = BatchData(ds, batch_size, remainder=True)
        ds = PrefetchDataZMQ(ds, 1)
    return ds

Example #4

0

Show file

File: imagenet_utils.py Project: DYJ1234567890/Adaptively-Connected-Neural-Networks-master

def get_imagenet_dataflow(datadir,
                          name,
                          batch_size,
                          augmentors,
                          parallel=None):  #获取图像网络数据流
    """
    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
    assert datadir is not None
    assert isinstance(augmentors, list)
    isTrain = name == 'train'
    if parallel is None:  # 如果不是并行的话
        parallel = min(40,
                       multiprocessing.cpu_count() //
                       2)  # assuming hyperthreading 超线程？ 获取当前计算机cpu数量

    if isTrain:
        # dataset:创建一个在数据流上运行的预测器，并且拿出一个batch？
        ds = dataset.ILSVRC12(datadir, name, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors,
                                   copy=False)  # 使用共享的增强参数在多个组件上应用图像增强器
        if parallel < 16:  # 如果少于16个的话
            logger.warn(
                "DataFlow may become the bottleneck when too few processes are used."
            )
        ds = PrefetchDataZMQ(ds, parallel)  # 实现高效的数据流水线
        ds = BatchData(ds, batch_size, remainder=False)  # 取一个batch？
    else:
        # 如果是测试时,增强图像，加速对数据流的读取操作等
        # 与ILSVRC12相同，但生成图像的文件名而不是np array。
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp
            im = cv2.imread(fname, cv2.IMREAD_COLOR
                            )  # cv2.IMREAD_COLOR : 默认使用该种标识。加载一张彩色图片,忽视它的透明度
            im = aug.augment(im)  # 增强图像
            return im, cls

        ds = MultiThreadMapData(ds,
                                parallel,
                                mapf,
                                buffer_size=2000,
                                strict=True)  # 并行加速？
        ds = BatchData(ds, batch_size, remainder=True)  # 取一个batch?
        ds = PrefetchDataZMQ(ds, 1)
    return ds

Example #5

0

Show file

def get_train_dataflow(datadir, batch, augmentors=None):
    """
    Sec 3, Remark 4:
    Use a single random shuffling of the training data (per epoch)
    that is divided amongst all k workers.

    NOTE: Here we do not follow the paper which makes some differences.
    Here, each machine shuffles independently.
    """
    if augmentors is None:
        augmentors = fbresnet_augmentor(True)
    ds = dataset.ILSVRC12(datadir, 'train', shuffle=True)
    ds = AugmentImageComponent(ds, augmentors, copy=False)
    ds = BatchData(ds, batch, remainder=False)
    ds = MultiProcessRunnerZMQ(ds, min(50, mp.cpu_count()))
    return ds

Example #6

0

Show file

File: imagenet_benchmarks.py Project: sherrycattt/Efficient-PyTorch

def get_tp_loader(data_dir, name, batch_size, parallel=None):
    isTrain = name == 'train'
    augmentors = get_tp_augmentor(isTrain)

    if data_dir.endswith('lmdb'):
        # 500000[70:87:20, 1.95it/s]
        data_dir = os.path.join(data_dir, 'ILSVRC-%s.lmdb' % name)
        ds = LMDBSerializer.load(data_dir, shuffle=False)
        ds = get_sequential_loader(ds, isTrain, batch_size, augmentors,
                                   parallel)
    else:
        # 500000[27:11:03, 5.11it/s]
        if isTrain:
            ds = dataset.ILSVRC12(data_dir, name, shuffle=True)
        else:
            ds = dataset.ILSVRC12Files(data_dir, name, shuffle=False)
        ds = get_random_loader(ds, isTrain, batch_size, augmentors, parallel)
    return ds