예제 #1
0
def test_tfrecord_reader_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    tfrecord = sorted(glob.glob(os.path.join(tfrecord_dir, '*[!i][!d][!x]')))
    tfrecord_idx = sorted(glob.glob(os.path.join(tfrecord_dir, '*idx')))
    input = fn.tfrecord_reader(path=tfrecord,
                               index_path=tfrecord_idx,
                               shard_id=0,
                               num_shards=1,
                               features={
                                   "image/encoded":
                                   tfrec.FixedLenFeature((), tfrec.string, ""),
                                   "image/class/label":
                                   tfrec.FixedLenFeature([1], tfrec.int64, -1)
                               })
    out = input["image/encoded"]
    pipe.set_outputs(out)
    pipe.build()
    for _ in range(3):
        pipe.run()
예제 #2
0
def get_dali_pipeline(tfrec_filenames,
                      tfrec_idx_filenames,
                      height,
                      width,
                      batch_size,
                      num_threads,
                      device_id,
                      shard_id,
                      num_gpus,
                      deterministic=False,
                      dali_cpu=True,
                      training=True):

    kwargs = dict()

    if deterministic:
        kwargs['seed'] = 7 * (1 + hvd.rank())
    pipeline = Pipeline(batch_size, num_threads, device_id, **kwargs)
    with pipeline:
        inputs = fn.tfrecord_reader(
            path=tfrec_filenames,
            index_path=tfrec_idx_filenames,
            random_shuffle=training,
            shard_id=shard_id,
            num_shards=num_gpus,
            initial_fill=10000,
            features={
                'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""),
                'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64,
                                                           -1),
                'image/class/text': tfrec.FixedLenFeature([], tfrec.string,
                                                          ''),
                'image/object/bbox/xmin':
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                'image/object/bbox/ymin':
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                'image/object/bbox/xmax':
                tfrec.VarLenFeature(tfrec.float32, 0.0),
                'image/object/bbox/ymax':
                tfrec.VarLenFeature(tfrec.float32, 0.0)
            })

        decode_device = "cpu" if dali_cpu else "mixed"
        resize_device = "cpu" if dali_cpu else "gpu"
        if training:
            images = fn.image_decoder_random_crop(
                inputs["image/encoded"],
                device=decode_device,
                output_type=types.RGB,
                random_aspect_ratio=[0.75, 1.25],
                random_area=[0.05, 1.0],
                num_attempts=100)
            images = fn.resize(images,
                               device=resize_device,
                               resize_x=width,
                               resize_y=height)
        else:
            images = fn.image_decoder(inputs["image/encoded"],
                                      device=decode_device,
                                      output_type=types.RGB)
            # Make sure that every image > 224 for CropMirrorNormalize
            images = fn.resize(images,
                               device=resize_device,
                               resize_shorter=256)

        images = fn.crop_mirror_normalize(images.gpu(),
                                          dtype=types.FLOAT,
                                          crop=(height, width),
                                          mean=[123.68, 116.78, 103.94],
                                          std=[58.4, 57.12, 57.3],
                                          output_layout="HWC",
                                          mirror=fn.random.coin_flip())
        labels = inputs["image/class/label"].gpu()

        labels -= 1  # Change to 0-based (don't use background class)
        pipeline.set_outputs(images, labels)
    return pipeline
예제 #3
0
def load_tfrecord(directory, batch_size, training):
    tfrecord = []
    tfrecord_idx = []
    for f in os.listdir(directory):
        fullpath = os.path.join(directory, f)
        if not os.path.isfile(fullpath):
            continue

        if f.endswith(".tfrecord"):
            tfrecord.append(fullpath)

        if f.endswith(".idx"):
            tfrecord_idx.append(fullpath)

    tfrecord.sort()
    tfrecord_idx.sort()

    pipe = Pipeline(batch_size=batch_size, num_threads=32, device_id=0)
    with pipe:
        inputs = fn.tfrecord_reader(
            path=tfrecord,
            index_path=tfrecord_idx,
            features={
                "frame_one":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "frame_two":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "frame_three":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "frame_four":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "plus_one_position":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_one_orientation":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_two_position":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_two_orientation":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_three_position":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_three_orientation":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "speed":
                tfrec.FixedLenFeature([], tfrec.float32, 0.0),
            })
        frame1 = inputs["frame_one"]
        frame1 = fn.image_decoder(frame1,
                                  device="mixed",
                                  output_type=types.RGB)
        # frame1 = fn.resize(frame1, device="gpu", resize_shorter=256.)
        frame1 = fn.crop_mirror_normalize(frame1,
                                          device="gpu",
                                          dtype=types.FLOAT,
                                          mean=[0., 0., 0.],
                                          std=[1., 1., 1.])
        frame1 = fn.transpose(frame1, device="gpu", perm=[1, 2, 0])

        frame2 = inputs["frame_two"]
        frame2 = fn.image_decoder(frame2,
                                  device="mixed",
                                  output_type=types.RGB)
        # frame2 = fn.resize(frame2, device="gpu", resize_shorter=256.)
        frame2 = fn.crop_mirror_normalize(frame2,
                                          device="gpu",
                                          dtype=types.FLOAT,
                                          mean=[0., 0., 0.],
                                          std=[1., 1., 1.])
        frame2 = fn.transpose(frame2, device="gpu", perm=[1, 2, 0])

        position = inputs["plus_one_position"].gpu()
        orientation = inputs["plus_one_orientation"].gpu()
        speed = inputs["speed"].gpu()

        image = fn.cat(frame1, frame2, device="gpu", axis=2)
        pose = fn.cat(position, orientation, device="gpu", axis=0)
        pipe.set_outputs(image, pose, speed)

    # Define shapes and types of the outputs
    shapes = ((batch_size, 480, 640), (batch_size, 6), (batch_size))
    dtypes = (tf.float32, tf.float32)

    # Create dataset
    return dali_tf.DALIDataset(pipeline=pipe,
                               batch_size=batch_size,
                               output_shapes=shapes,
                               output_dtypes=dtypes,
                               device_id=0)