Beispiel #1
0
def test_wrong_feature_shape():
    features = {
        'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""),
        'image/object/bbox': tfrec.FixedLenFeature([], tfrec.float32, -1.0),
        'image/object/class/label': tfrec.FixedLenFeature([], tfrec.int64, -1),
    }
    test_dummy_data_path = os.path.join(get_dali_extra_path(), 'db',
                                        'coco_dummy')
    pipe = Pipeline(1, 1, 0)
    with pipe:
        input = fn.readers.tfrecord(path=os.path.join(test_dummy_data_path,
                                                      'small_coco.tfrecord'),
                                    index_path=os.path.join(
                                        test_dummy_data_path,
                                        'small_coco_index.idx'),
                                    features=features)
    pipe.set_outputs(input['image/encoded'], input['image/object/class/label'],
                     input['image/object/bbox'])
    pipe.build()
    # the error is raised because FixedLenFeature is used with insufficient shape to house the input
    assert_raises(
        RuntimeError,
        pipe.run,
        glob="Error when executing CPU operator*readers*tfrecord*"
        "Output tensor shape is too small*[]*Expected at least 4 elements")
 def __init__(self, batch_size, num_threads, device_id, num_gpus, data, data_idx):
     super(TFRecordPipeline, self).__init__(batch_size, num_threads, device_id)
     self.input = ops.TFRecordReader(path = data,
                                     index_path = data_idx,
                                     features = {"image/encoded" : tfrec.FixedLenFeature((), tfrec.string, ""),
                                                 "image/class/label": tfrec.FixedLenFeature([1], tfrec.int64,  -1)
                                     })
Beispiel #3
0
 def __init__(self,
              batch_size,
              num_threads,
              device_id,
              data_dir,
              crop,
              dali_cpu=False):
     super(HybridTrainPipe, self).__init__(batch_size,
                                           num_threads,
                                           device_id,
                                           seed=12 + device_id)
     self.input = ops.TFRecordReader(
         path=data_dir + ".tfrecord",
         index_path=data_dir + ".index",
         features={
             "image": tfrec.FixedLenFeature([], tfrec.string, ""),
             'label': tfrec.FixedLenFeature([], tfrec.float32, 0.0),
             'index': tfrec.FixedLenFeature([], tfrec.int64, 0),
         })
     self.decode = ops.ImageDecoder(
         device="cpu",
         output_type=types.RGB,
         split_stages=True,
     )
     self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224))
     self.cmnp = ops.CropMirrorNormalize(
         device="gpu",
         output_dtype=types.FLOAT,
         output_layout=types.NCHW,
         crop=(crop, crop),
         image_type=types.RGB,
         mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
         std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
     self.coin = ops.CoinFlip(probability=0.5)
     print('DALI "{0}" variant'.format('gpu'))
 def __init__(self, batch_size, num_threads, device_id):
     super(TFRecordPipeline, self).__init__(batch_size, num_threads,
                                            device_id)
     self.input = ops.TFRecordReader(
         path=tfrecord,
         index_path=tfrecord_idx,
         features={
             "image/encoded": tfrec.FixedLenFeature((), tfrec.string, ""),
             'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64,
                                                        -1),
             'image/class/text': tfrec.FixedLenFeature([], tfrec.string,
                                                       ''),
             'image/object/bbox/xmin':
             tfrec.VarLenFeature(tfrec.float32, 0.0),
             'image/object/bbox/ymin':
             tfrec.VarLenFeature(tfrec.float32, 0.0),
             'image/object/bbox/xmax':
             tfrec.VarLenFeature(tfrec.float32, 0.0),
             'image/object/bbox/ymax':
             tfrec.VarLenFeature(tfrec.float32, 0.0)
         })
     self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
     self.resize = ops.Resize(device="gpu", resize_shorter=256.)
     self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                         output_dtype=types.FLOAT,
                                         crop=(224, 224),
                                         image_type=types.RGB,
                                         mean=[0., 0., 0.],
                                         std=[1., 1., 1.])
     self.uniform = ops.Uniform(range=(0.0, 1.0))
     self.iter = 0
Beispiel #5
0
 def __init__(self, batch_size, num_threads, device_id, **kwargs):
     super(TFRecordTrain, self).__init__(batch_size, num_threads, device_id)
     self.dim = kwargs["dim"]
     self.seed = kwargs["seed"]
     self.oversampling = kwargs["oversampling"]
     self.input = ops.TFRecordReader(
         path=kwargs["tfrecords"],
         index_path=kwargs["tfrecords_idx"],
         features={
             "X_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0),
             "Y_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0),
             "X": tfrec.VarLenFeature([], tfrec.float32, 0.0),
             "Y": tfrec.FixedLenFeature([], tfrec.string, ""),
             "fname": tfrec.FixedLenFeature([], tfrec.string, ""),
         },
         num_shards=kwargs["gpus"],
         shard_id=device_id,
         random_shuffle=True,
         pad_last_batch=True,
         read_ahead=True,
         seed=self.seed,
     )
     self.patch_size = kwargs["patch_size"]
     self.crop_shape = types.Constant(np.array(self.patch_size), dtype=types.INT64)
     self.crop_shape_float = types.Constant(np.array(self.patch_size), dtype=types.FLOAT)
     self.layout = "CDHW" if self.dim == 3 else "CHW"
     self.axis_name = "DHW" if self.dim == 3 else "HW"
Beispiel #6
0
 def __init__(self, batch_size, num_threads, device_id, tfrecord,
              tfrecord_idx):
     super(TFRecordPipeline, self).__init__(batch_size, num_threads,
                                            device_id)
     self.input = ops.TFRecordReader(path=tfrecord,
                                     index_path=tfrecord_idx,
                                     features={
                                         "image/encoded":
                                         tfrec.FixedLenFeature(
                                             (), tfrec.string, ""),
                                         'image/class/label':
                                         tfrec.FixedLenFeature([1],
                                                               tfrec.int64,
                                                               -1),
                                         'image/class/text':
                                         tfrec.FixedLenFeature([],
                                                               tfrec.string,
                                                               ''),
                                     })
     self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB)
     self.resize = ops.Resize(device="gpu", resize_a=256, resize_b=256)
     self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                         output_dtype=types.FLOAT,
                                         crop=(224, 224),
                                         image_type=types.RGB,
                                         mean=[0., 0., 0.],
                                         std=[1., 1., 1.],
                                         output_layout=types.NHWC)
     self.uniform = ops.Uniform(range=(0.0, 1.0))
     self.iter = 0
Beispiel #7
0
 def __init__(self, tfrecord_files, tfrecord_index_files, num_shards, shard_id,
              dataset_size=None, stick_to_shard=False, pad_last_batch=False,
              initial_fill=1024, lazy_init=False, read_ahead=False, prefetch_queue_depth=1,
              seed=-1, random_shuffle=False, skip_cached_images=False):
     self.tfrecord_files = tfrecord_files
     self.tfrecord_index_files = tfrecord_index_files
     self.num_shards = num_shards
     self.shard_id = shard_id
     features = {'image/encoded':      tfrec.FixedLenFeature([], tfrec.string, ""),
                 'image/id':           tfrec.FixedLenFeature([], tfrec.int64, -1),
                 'image/shape':        tfrec.FixedLenFeature([3], tfrec.int64, -1),
                 'image/object/label': tfrec.VarLenFeature([1], tfrec.int64, -1),
                 'image/object/bbox':  tfrec.VarLenFeature([4], tfrec.float32, 0.0),}
     # Note that (shuffle_after_epoch, skip_empty, ltrb, ratio, size_threshold) arguments
     # are not supported in the TFRecord reader, but some of them can be passed to the TFRecord
     # creation script
     self.dataset_reader = dali.ops.TFRecordReader(path=self.tfrecord_files,
                                                   index_path=self.tfrecord_index_files,
                                                   num_shards=self.num_shards,
                                                   shard_id=self.shard_id,
                                                   stick_to_shard=stick_to_shard,
                                                   pad_last_batch=pad_last_batch,
                                                   initial_fill=initial_fill,
                                                   lazy_init=lazy_init,
                                                   read_ahead=read_ahead,
                                                   prefetch_queue_depth=prefetch_queue_depth,
                                                   seed=seed,
                                                   random_shuffle=random_shuffle,
                                                   skip_cached_images=skip_cached_images,
                                                   features=features)
     self._size = dataset_size
     self.cast_int32 = dali.ops.Cast(device="cpu", dtype=dali.types.INT32)
Beispiel #8
0
 def __init__(self, tfrecord_files, idx_files, 
              batch_size, device_id=0, rank=0,
              total_devices=1, num_threads=4):
     super(TFRecordPipeline, self).__init__(batch_size,
                                      num_threads,
                                      device_id)
     self.input = ops.TFRecordReader(path = tfrecord_files, index_path = idx_files,
                                     shard_id = rank, num_shards = total_devices,
                                     random_shuffle = True,
                                     features = {"image/encoded" : tfrec.FixedLenFeature((), tfrec.string, ""),
                                      'image/class/label':         tfrec.FixedLenFeature([1], tfrec.int64,  -1),
                                      })
     self.decode = ops.ImageDecoder(device = "mixed", output_type = types.RGB)
     self.resize = ops.Resize(device = "gpu", resize_shorter = 256)
     self.cmnp = ops.CropMirrorNormalize(device = "gpu",
                                         output_dtype = types.FLOAT16,
                                         crop = (224, 224),
                                         image_type = types.RGB,
                                         mean = [0, 0, 0],
                                         std = [1., 1., 1.],
                                         output_layout='HWC')
     self.uniform = ops.Uniform(range = (0.0, 1.0))
     self.flip = ops.CoinFlip()
     self.brightness = ops.Uniform(range = (0.5, 1.5))
     self.contrast = ops.Uniform(range = (0.8, 1.3))
     self.cast = ops.Cast(device = "gpu", dtype = types.FLOAT16)
     self.iter = 0
Beispiel #9
0
 def __init__(self, tfrec_filenames, tfrec_idx_filenames, batch_size,
              num_threads, device_id, set_affinity, prefetch_queue_depth):
     super(Dali_CPU_Pipe,
           self).__init__(batch_size,
                          num_threads,
                          device_id,
                          set_affinity=set_affinity,
                          prefetch_queue_depth=prefetch_queue_depth)
     self.input = ops.TFRecordReader(path=tfrec_filenames,
                                     index_path=tfrec_idx_filenames,
                                     initial_fill=10000,
                                     features={
                                         "image/encoded":
                                         tfrec.FixedLenFeature(
                                             (), tfrec.string, ""),
                                         'image/class/label':
                                         tfrec.FixedLenFeature([1],
                                                               tfrec.int64,
                                                               -1)
                                     })
     self.decode = ops.HostDecoder(output_type=types.RGB)
     self.resize = ops.Resize(device="cpu", resize_shorter=_RESIZE_MIN)
     self.cmnp = ops.CropMirrorNormalize(device="cpu",
                                         output_dtype=types.FLOAT,
                                         crop=(INPUT_SIZE, INPUT_SIZE),
                                         image_type=types.RGB,
                                         mean=_CHANNEL_MEANS,
                                         std=[58.395, 57.120, 57.375],
                                         output_layout=dali.types.NCHW)
     self.iter = 0
Beispiel #10
0
def tfrecord_pipe(tfrecord_op, path, index_path):
    inputs = tfrecord_op(path=path,
                         index_path=index_path,
                         features={
                             "image/encoded":
                             tfrec.FixedLenFeature((), tfrec.string, ""),
                             "image/class/label":
                             tfrec.FixedLenFeature([1], tfrec.int64, -1)
                         })
    return inputs["image/encoded"]
 def __init__(self, batch_size, num_threads, device_id, data_dir, crop, size):
     super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
     # self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=False)
     index_path = []
     for path in os.listdir("/home/guojia/idx_files/val"):
         index_path.append(os.path.join("/home/guojia/idx_files/val", path))
     index_path = sorted(index_path)
     self.input = ops.TFRecordReader(path=data_dir, index_path=index_path, shard_id=args.local_rank,
                                     num_shards=args.world_size, random_shuffle=True,
                                     features={
                                         'image/height': tfrec.FixedLenFeature([1], tfrec.int64, -1),
                                         'image/width': tfrec.FixedLenFeature([1], tfrec.int64, -1),
                                         'image/colorspace': tfrec.FixedLenFeature([], tfrec.string, ''),
                                         'image/channels': tfrec.FixedLenFeature([], tfrec.int64, -1),
                                         'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1),
                                         'image/class/synset': tfrec.FixedLenFeature([], tfrec.string, ''),
                                         'image/format': tfrec.FixedLenFeature((), tfrec.string, ""),
                                         'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""),
                                         'image/encoded': tfrec.FixedLenFeature((), tfrec.string, "")
                                     })
     self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
     self.res = ops.Resize(device="gpu", resize_shorter=size, interp_type=types.INTERP_TRIANGULAR)
     self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                         output_dtype=types.FLOAT,
                                         output_layout=types.NCHW,
                                         crop=(crop, crop),
                                         image_type=types.RGB,
                                         mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                                         std=[0.229 * 255,0.224 * 255,0.225 * 255])
Beispiel #12
0
 def __init__(self, batch_size, num_threads, device_id, num_gpus, data_paths, dont_use_mmap):
     super(TFRecordPipeline, self).__init__(batch_size, num_threads, device_id)
     tfrecord = sorted(glob.glob(data_paths[0]))
     tfrecord_idx = sorted(glob.glob(data_paths[1]))
     self.input = ops.readers.TFRecord(path = tfrecord,
                                       index_path = tfrecord_idx,
                                       shard_id = device_id,
                                       num_shards = num_gpus,
                                       features = {"image/encoded" : tfrec.FixedLenFeature((), tfrec.string, ""),
                                                   "image/class/label": tfrec.FixedLenFeature([1], tfrec.int64,  -1)
                                       }, dont_use_mmap=dont_use_mmap)
 def __init__(self, **kwargs):
     super(TFRecordPipeline, self).__init__(**kwargs)
     tfrecord = sorted(glob.glob(kwargs['data_paths'][0]))
     tfrecord_idx = sorted(glob.glob(kwargs['data_paths'][1]))
     cache_enabled = kwargs['decoder_cache_params']['cache_enabled']
     self.input = ops.TFRecordReader(path = tfrecord,
                                     index_path = tfrecord_idx,
                                     shard_id = kwargs['device_id'],
                                     num_shards = kwargs['num_gpus'],
                                     stick_to_shard = cache_enabled,
                                     #skip_cached_images = cache_enabled,
                                     features = {"image/encoded" : tfrec.FixedLenFeature((), tfrec.string, ""),
                                                 "image/class/label": tfrec.FixedLenFeature([1], tfrec.int64,  -1)
                                     })
Beispiel #14
0
def tfrecord_pipeline(dspath, batch_size, num_threads, device="cpu", device_id=None,
                        shard_id=0, num_shards=1, reader_name="Reader",
                        seq=True, chroms=False, chroms_vlog=False, target=True, target_vlog=True, label=False, random_shuffle=True):
    pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id)

    feature_description = {}
    feature_description["seq"] = tfrec.VarLenFeature(tfrec.float32, -1.0)
    feature_description["label"] = tfrec.FixedLenFeature([], tfrec.int64, -1)
    feature_description["target"] = tfrec.FixedLenFeature([], tfrec.float32, -1.0)
    for ct in dspath["chromatin_tracks"]:
        feature_description[ct] = tfrec.VarLenFeature(tfrec.float32, -1.0)

    with pipe:
        inputs = fn.readers.tfrecord(
            name=reader_name,
            path=dspath['TFRecord'],
            index_path=dspath['TFRecord_idx'],
            features=feature_description,
            shard_id = shard_id,
            num_shards = num_shards,
            random_shuffle=random_shuffle,
            read_ahead=True, 
            prefetch_queue_depth=20,
            pad_last_batch=True)
        if device=="gpu":
            inputs['seq'] = inputs['seq'].gpu()
            for ct in dspath["chromatin_tracks"]: inputs[ct] = inputs[ct].gpu()
            inputs['target'] = inputs['target'].gpu()
            inputs['label'] = inputs['label'].gpu()
        seqdata = fn.expand_dims(inputs['seq'], axes=1, device=device)
        seqdata = fn.reshape(seqdata, shape=(4, -1), device=device)
        chromsdata = fn.cat(*[fn.expand_dims(inputs[ct], axes=0, device=device) for ct in dspath["chromatin_tracks"]], axis=0, device=device)

        sample = []
        if seq: sample.append(seqdata)
        if chroms: 
            if chroms_vlog:
                sample.append(log(chromsdata + 1))
            else:
                sample.append(chromsdata)
        if target:
            if target_vlog: 
                sample.append(log(inputs['target'] + 1))
            else:
                sample.append(inputs['target'])
        if label: sample.append(inputs['label'])

        pipe.set_outputs(*sample)
    return pipe
Beispiel #15
0
    def __init__(self, batch_size, num_threads, device_id, tfrecords,
                 idx_paths):
        super(ResnetPipeline, self).__init__(batch_size, num_threads,
                                             device_id)

        # Transformation operations below.
        # From https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/supported_ops.html
        self.input = ops.TFRecordReader(
            path=tfrecords,
            index_path=idx_paths,
            features={
                "image/encoded": tfrec.FixedLenFeature([], tfrec.string, ""),
                "image/class/label": tfrec.FixedLenFeature([1], tfrec.float32,
                                                           0.0),
                "image/class/text": tfrec.FixedLenFeature([], tfrec.string,
                                                          ""),
                "image/object/bbox/xmin":
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                "image/object/bbox/ymin":
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                "image/object/bbox/xmax":
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                "image/object/bbox/ymax":
                tfrec.VarLenFeature(tfrec.float32, 0.0)
            })

        self.decode = ops.nvJPEGDecoder(device="mixed",
                                        cache_debug=True,
                                        output_type=types.RGB)

        self.resize = ops.Resize(device="gpu",
                                 image_type=types.RGB,
                                 interp_type=types.INTERP_LINEAR,
                                 resize_shorter=256.)

        self.cmn = ops.CropMirrorNormalize(device="gpu",
                                           output_dtype=types.FLOAT,
                                           crop=(224, 224),
                                           image_type=types.RGB,
                                           mean=[0., 0., 0.],
                                           std=[1., 1., 1])

        self.uniform = ops.Uniform(range=(0.0, 1.0))

        self.transpose = ops.Transpose(device="gpu", perm=[0, 3, 1, 2])

        self.cast = ops.Cast(device="gpu", dtype=types.INT32)

        self.iter = 0
Beispiel #16
0
def tfr_properties(root_path, index_path, device):
    import nvidia.dali.tfrecord as tfrec
    features = {
        "image/encoded": tfrec.FixedLenFeature((), tfrec.string, ""),
        "image/class/label": tfrec.FixedLenFeature([1], tfrec.int64, -1)
    }
    inputs = fn.readers.tfrecord(path=root_path,
                                 index_path=index_path,
                                 features=features)
    enc = fn.get_property(inputs["image/encoded"], key="source_info")
    lab = fn.get_property(inputs["image/class/label"], key="source_info")
    if device == 'gpu':
        enc = enc.gpu()
        lab = lab.gpu()
    return enc, lab
 def __init__(self, **kwargs):
     super(TFRecordPipeline, self).__init__(**kwargs)
     tfrecord = sorted(glob.glob(kwargs['data_paths'][0]))
     tfrecord_idx = sorted(glob.glob(kwargs['data_paths'][1]))
     cache_enabled = kwargs['decoder_cache_params']['cache_enabled']
     self.input = ops.readers.TFRecord(
         path=tfrecord,
         index_path=tfrecord_idx,
         shard_id=kwargs['shard_id'],
         num_shards=kwargs['num_shards'],
         random_shuffle=kwargs['random_shuffle'],
         dont_use_mmap=kwargs['dont_use_mmap'],
         stick_to_shard=cache_enabled,
         features={
             "image/encoded": tfrec.FixedLenFeature((), tfrec.string, ""),
             "image/class/label": tfrec.FixedLenFeature([1], tfrec.int64,  -1)})
Beispiel #18
0
 def __init__(self, batch_size, num_threads, device_id, **kwargs):
     super(TFRecordTest, self).__init__(batch_size, num_threads, device_id)
     self.input = ops.TFRecordReader(
         path=kwargs["tfrecords"],
         index_path=kwargs["tfrecords_idx"],
         features={
             "X_shape": tfrec.FixedLenFeature([4], tfrec.int64, 0),
             "X": tfrec.VarLenFeature([], tfrec.float32, 0.0),
             "fname": tfrec.FixedLenFeature([], tfrec.string, ""),
         },
         shard_id=device_id,
         num_shards=kwargs["gpus"],
         read_ahead=True,
         random_shuffle=False,
         pad_last_batch=True,
     )
Beispiel #19
0
    def __init__(self, args):
        super(TFRecordDetectionPipeline,
              self).__init__(args.batch_size, args.num_workers, 0, 0)
        self.input = ops.TFRecordReader(
            path=os.path.join(test_dummy_data_path, 'small_coco.tfrecord'),
            index_path=os.path.join(test_dummy_data_path,
                                    'small_coco_index.idx'),
            features={
                'image/encoded':
                tfrec.FixedLenFeature((), tfrec.string, ""),
                'image/object/class/label':
                tfrec.VarLenFeature([1], tfrec.int64, 0),
                'image/object/bbox':
                tfrec.VarLenFeature([4], tfrec.float32, 0.0),
            },
            shard_id=0,
            num_shards=1,
            random_shuffle=False)

        self.decode_gpu = ops.ImageDecoder(device="mixed",
                                           output_type=types.RGB)
        self.cast = ops.Cast(dtype=types.INT32)
        self.box_encoder = ops.BoxEncoder(device="cpu",
                                          criteria=0.5,
                                          anchors=coco_anchors())
    def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False):
        super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
        # self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True)
        index_path = []
        for path in os.listdir("/home/guojia/idx_files/train"):
            index_path.append(os.path.join("/home/guojia/idx_files/train", path))
        index_path = sorted(index_path)
        self.input = ops.TFRecordReader(path=data_dir, index_path=index_path, shard_id=args.local_rank,
                                        num_shards=args.world_size, random_shuffle=True,
                                        features={
                                                    'image/height': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
                                                    'image/width': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
                                                    'image/colorspace': tfrec.FixedLenFeature([ ], tfrec.string, ''),
                                                    'image/channels': tfrec.FixedLenFeature([], tfrec.int64,  -1),
                                                    'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
                                                    'image/class/synset': tfrec.FixedLenFeature([ ], tfrec.string, ''),
                                                    # 'image/class/text': tfrec.FixedLenFeature([ ], tfrec.string, ''),
                                                    # 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/label': tfrec.FixedLenFeature([1], tfrec.int64,-1),
                                                    'image/format': tfrec.FixedLenFeature((), tfrec.string, ""),
                                                    'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""),
                                                    'image/encoded': tfrec.FixedLenFeature((), tfrec.string, "")
                                                })

        #let user decide which pipeline works him bets for RN version he runs
        dali_device = 'cpu' if dali_cpu else 'gpu'
        decoder_device = 'cpu' if dali_cpu else 'mixed'
        # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
        # without additional reallocations
        device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
        host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
        self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB,
                                                 device_memory_padding=device_memory_padding,
                                                 host_memory_padding=host_memory_padding,
                                                 random_aspect_ratio=[0.8, 1.25],
                                                 random_area=[0.1, 1.0],
                                                 num_attempts=100)
        self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                            output_dtype=types.FLOAT,
                                            output_layout=types.NCHW,
                                            crop=(crop, crop),
                                            image_type=types.RGB,
                                            mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                                            std=[0.229 * 255,0.224 * 255,0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
        print('DALI "{0}" variant'.format(dali_device))
Beispiel #21
0
 def tfrecord_pipe_scalars():
     data = fn.readers.tfrecord(
         path=os.path.join(test_dummy_data_path, 'small_coco.tfrecord'),
         index_path=os.path.join(test_dummy_data_path,
                                 'small_coco_index.idx'),
         features={
             'image/height': tfrec.FixedLenFeature((), tfrec.int64, -1),
         })
     return data['image/height']
Beispiel #22
0
def tfrecord_pipe_empty_fields(path, index_path):
    inputs = fn.readers.tfrecord(path=path,
                                 index_path=index_path,
                                 features={
                                     "image/encoded":
                                     tfrec.FixedLenFeature((), tfrec.string,
                                                           ""),
                                     "image/class/label":
                                     tfrec.FixedLenFeature([1], tfrec.int64,
                                                           -1),
                                     "does/not/exists":
                                     tfrec.VarLenFeature(tfrec.int64, -1),
                                     "does/not/exists/as/well":
                                     tfrec.FixedLenFeature([1], tfrec.float32,
                                                           .0)
                                 })
    return inputs["image/encoded"], inputs["does/not/exists"], inputs[
        "does/not/exists/as/well"]
Beispiel #23
0
def test_tfrecord_reader_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    tfrecord = sorted(glob.glob(os.path.join(tfrecord_dir, '*[!i][!d][!x]')))
    tfrecord_idx = sorted(glob.glob(os.path.join(tfrecord_dir, '*idx')))
    input = fn.tfrecord_reader(path=tfrecord,
                               index_path=tfrecord_idx,
                               shard_id=0,
                               num_shards=1,
                               features={
                                   "image/encoded":
                                   tfrec.FixedLenFeature((), tfrec.string, ""),
                                   "image/class/label":
                                   tfrec.FixedLenFeature([1], tfrec.int64, -1)
                               })
    out = input["image/encoded"]
    pipe.set_outputs(out)
    pipe.build()
    for _ in range(3):
        pipe.run()
Beispiel #24
0
 def __init__(self, batch_size, num_threads, device_id, **kwargs):
     super(TFRecordBenchmark, self).__init__(batch_size, num_threads, device_id)
     self.dim = kwargs["dim"]
     self.input = ops.TFRecordReader(
         path=kwargs["tfrecords"],
         index_path=kwargs["tfrecords_idx"],
         features={
             "X_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0),
             "Y_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0),
             "X": tfrec.VarLenFeature([], tfrec.float32, 0.0),
             "Y": tfrec.FixedLenFeature([], tfrec.string, ""),
             "fname": tfrec.FixedLenFeature([], tfrec.string, ""),
         },
         shard_id=device_id,
         num_shards=kwargs["gpus"],
         read_ahead=True,
     )
     self.patch_size = kwargs["patch_size"]
     self.layout = "CDHW" if self.dim == 3 else "CHW"
Beispiel #25
0
    def __init__(self, file_tfrecord, batch_size, num_workers, device_id=0):
        super().__init__(batch_size, num_workers, device_id)
        self.input = ops.TFRecordReader(
            path=file_tfrecord.as_posix(),
            index_path=(file_tfrecord.parent /
                        (file_tfrecord.stem + '.idx')).as_posix(),
            features={'encoded': tfrec.FixedLenFeature((), tfrec.string, "")})

        self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB)

        self.cmnp = ops.CropMirrorNormalize(device='gpu',
                                            output_dtype=types.FLOAT,
                                            output_layout=types.NCHW,
                                            image_type=types.RGB,
                                            mean=[124, 116, 104],
                                            std=[58, 57, 57])
Beispiel #26
0
 def __init__(
     self,
     batch_size,
     num_threads,
     device_id,
     size=1024,
     path="/home/guyuchao/ssd/dataset/cityscape/leftImg8bit/dalirecord/dataset-r10.tfrecords",
     index_path="/home/guyuchao/ssd/dataset/cityscape/leftImg8bit/dalirecord/dataset-r10.idx"
 ):
     super(TFRecordPipeline, self).__init__(batch_size, num_threads,
                                            device_id, size)
     self.input = ops.TFRecordReader(path=path,
                                     index_path=index_path,
                                     features={
                                         "image/encoded":
                                         tfrec.FixedLenFeature(
                                             (), tfrec.string, "")
                                     })
 def _input(self, tfrecord_path, index_path, shard_id=0):
     return ops.TFRecordReader(
         path=tfrecord_path,
         index_path=index_path,
         random_shuffle=True,
         features={
             'image/encoded':
             tfrec.FixedLenFeature((), tfrec.string, ""),
             'image/filename':
             tfrec.FixedLenFeature((), tfrec.string, ""),
             'image/format':
             tfrec.FixedLenFeature((), tfrec.string, ""),
             'image/height':
             tfrec.FixedLenFeature([1], tfrec.int64, -1),
             'image/width':
             tfrec.FixedLenFeature([1], tfrec.int64, -1),
             'image/channels':
             tfrec.FixedLenFeature([1], tfrec.int64, -1),
             'image/segmentation/class/encoded': (tfrec.FixedLenFeature(
                 (), tfrec.string, "")),
             'image/segmentation/class/format': (tfrec.FixedLenFeature(
                 (), tfrec.string, ""))
         })
    def __init__(self, batch_size, num_threads, device_id):
        super(TFRecordPipeline, self).__init__(batch_size, num_threads,
                                               device_id)
        self.input = ops.TFRecordReader(
            path=tfrecord,
            index_path=tfrecord_idx,
            features={
                "image/encoded":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                'image/filename':
                tfrec.FixedLenFeature([], tfrec.string, ''),
                'image/height':
                tfrec.FixedLenFeature([1], tfrec.int64, -1),
                'image/width':
                tfrec.FixedLenFeature([1], tfrec.int64, -1),
                'image/colorspace':
                tfrec.FixedLenFeature([], tfrec.string, ''),
                'image/channels':
                tfrec.FixedLenFeature([1], tfrec.int64, -1),
                'image/format':
                tfrec.FixedLenFeature([], tfrec.string, ''),
                'image/class/label':
                tfrec.FixedLenFeature([1], tfrec.int64, -1),
                'image/class/synset':
                tfrec.FixedLenFeature([], tfrec.string, ''),
                'image/class/text':
                tfrec.FixedLenFeature([], tfrec.string, ''),
                'image/object/bbox/xmin':
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                'image/object/bbox/ymin':
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                'image/object/bbox/xmax':
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                'image/object/bbox/ymax':
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                'image/object/bbox/label':
                tfrec.FixedLenFeature([1], tfrec.int64, -1)
            })
        self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)

        self.resize = ops.Resize(device="cpu", resize_x=512., resize_y=512.)
        self.vert_flip = ops.Flip(device="cpu", horizontal=0)
        self.vert_coin = ops.CoinFlip(probability=0.5)
        self.rotate = ops.Rotate(device='cpu', interp_type=types.INTERP_NN)
        self.rotate_range = ops.Uniform(range=(-7, 7))
        self.rotate_coin = ops.CoinFlip(probability=0.2)
        self.cmnp = ops.CropMirrorNormalize(device="cpu",
                                            output_dtype=types.FLOAT,
                                            crop=(512, 512),
                                            image_type=types.RGB,
                                            mean=[0., 0., 0.],
                                            std=[1., 1., 1.])
        self.mirror_coin = ops.CoinFlip(probability=0.5)
        self.uniform = ops.Uniform(range=(0.0, 1.0))
        self.iter = 0
def dali_dataloader(
        tfrec_filenames,
        tfrec_idx_filenames,
        shard_id=0, num_shards=1,
        batch_size=128, num_threads=os.cpu_count(),
        image_size=224, num_workers=1, training=True):
    pipe = Pipeline(batch_size=batch_size,
                    num_threads=num_threads, device_id=0)
    with pipe:
        inputs = fn.readers.tfrecord(
            path=tfrec_filenames,
            index_path=tfrec_idx_filenames,
            random_shuffle=training,
            shard_id=shard_id,
            num_shards=num_shards,
            initial_fill=10000,
            read_ahead=True,
            pad_last_batch=True,
            prefetch_queue_depth=num_workers,
            name='Reader',
            features={
                'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""),
                'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
            })
        jpegs = inputs["image/encoded"]
        if training:
            images = fn.decoders.image_random_crop(
                jpegs,
                device="mixed",
                output_type=types.RGB,
                random_aspect_ratio=[0.8, 1.25],
                random_area=[0.1, 1.0],
                num_attempts=100)
            images = fn.resize(images,
                               device='gpu',
                               resize_x=image_size,
                               resize_y=image_size,
                               interp_type=types.INTERP_TRIANGULAR)
            mirror = fn.random.coin_flip(probability=0.5)
        else:
            images = fn.decoders.image(jpegs,
                                       device='mixed',
                                       output_type=types.RGB)
            images = fn.resize(images,
                               device='gpu',
                               size=int(image_size / 0.875),
                               mode="not_smaller",
                               interp_type=types.INTERP_TRIANGULAR)
            mirror = False

        images = fn.crop_mirror_normalize(
            images.gpu(),
            dtype=types.FLOAT,
            crop=(image_size, image_size),
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
            mirror=mirror)
        label = inputs["image/class/label"] - 1  # 0-999
        label = fn.element_extract(label, element_map=0)  # Flatten
        label = label.gpu()
        pipe.set_outputs(images, label)

    pipe.build()
    last_batch_policy = LastBatchPolicy.DROP if training else LastBatchPolicy.PARTIAL
    loader = DALIClassificationIterator(
        pipe, reader_name="Reader", auto_reset=True, last_batch_policy=last_batch_policy)
    return loader
Beispiel #30
0
def get_dali_pipeline(tfrec_filenames,
                      tfrec_idx_filenames,
                      height,
                      width,
                      shard_id,
                      num_gpus,
                      dali_cpu=True,
                      training=True):

    inputs = fn.readers.tfrecord(path=tfrec_filenames,
                                 index_path=tfrec_idx_filenames,
                                 random_shuffle=training,
                                 shard_id=shard_id,
                                 num_shards=num_gpus,
                                 initial_fill=10000,
                                 features={
                                     'image/encoded':
                                     tfrec.FixedLenFeature((), tfrec.string,
                                                           ""),
                                     'image/class/label':
                                     tfrec.FixedLenFeature([1], tfrec.int64,
                                                           -1),
                                     'image/class/text':
                                     tfrec.FixedLenFeature([], tfrec.string,
                                                           ''),
                                     'image/object/bbox/xmin':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0),
                                     'image/object/bbox/ymin':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0),
                                     'image/object/bbox/xmax':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0),
                                     'image/object/bbox/ymax':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0)
                                 })

    decode_device = "cpu" if dali_cpu else "mixed"
    resize_device = "cpu" if dali_cpu else "gpu"
    if training:
        images = fn.decoders.image_random_crop(
            inputs["image/encoded"],
            device=decode_device,
            output_type=types.RGB,
            random_aspect_ratio=[0.75, 1.25],
            random_area=[0.05, 1.0],
            num_attempts=100)
        images = fn.resize(images,
                           device=resize_device,
                           resize_x=width,
                           resize_y=height)
    else:
        images = fn.decoders.image(inputs["image/encoded"],
                                   device=decode_device,
                                   output_type=types.RGB)
        # Make sure that every image > 224 for CropMirrorNormalize
        images = fn.resize(images, device=resize_device, resize_shorter=256)

    images = fn.crop_mirror_normalize(images.gpu(),
                                      dtype=types.FLOAT,
                                      crop=(height, width),
                                      mean=[123.68, 116.78, 103.94],
                                      std=[58.4, 57.12, 57.3],
                                      output_layout="HWC",
                                      mirror=fn.random.coin_flip())
    labels = inputs["image/class/label"].gpu()

    labels -= 1  # Change to 0-based (don't use background class)
    return images, labels