def test_tfrecord_reader_cpu(): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None) tfrecord = sorted(glob.glob(os.path.join(tfrecord_dir, '*[!i][!d][!x]'))) tfrecord_idx = sorted(glob.glob(os.path.join(tfrecord_dir, '*idx'))) input = fn.tfrecord_reader(path=tfrecord, index_path=tfrecord_idx, shard_id=0, num_shards=1, features={ "image/encoded": tfrec.FixedLenFeature((), tfrec.string, ""), "image/class/label": tfrec.FixedLenFeature([1], tfrec.int64, -1) }) out = input["image/encoded"] pipe.set_outputs(out) pipe.build() for _ in range(3): pipe.run()
def get_dali_pipeline(tfrec_filenames, tfrec_idx_filenames, height, width, batch_size, num_threads, device_id, shard_id, num_gpus, deterministic=False, dali_cpu=True, training=True): kwargs = dict() if deterministic: kwargs['seed'] = 7 * (1 + hvd.rank()) pipeline = Pipeline(batch_size, num_threads, device_id, **kwargs) with pipeline: inputs = fn.tfrecord_reader( path=tfrec_filenames, index_path=tfrec_idx_filenames, random_shuffle=training, shard_id=shard_id, num_shards=num_gpus, initial_fill=10000, features={ 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/class/text': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0) }) decode_device = "cpu" if dali_cpu else "mixed" resize_device = "cpu" if dali_cpu else "gpu" if training: images = fn.image_decoder_random_crop( inputs["image/encoded"], device=decode_device, output_type=types.RGB, random_aspect_ratio=[0.75, 1.25], random_area=[0.05, 1.0], num_attempts=100) images = fn.resize(images, device=resize_device, resize_x=width, resize_y=height) else: images = fn.image_decoder(inputs["image/encoded"], device=decode_device, output_type=types.RGB) # Make sure that every image > 224 for CropMirrorNormalize images = fn.resize(images, device=resize_device, resize_shorter=256) images = fn.crop_mirror_normalize(images.gpu(), dtype=types.FLOAT, crop=(height, width), mean=[123.68, 116.78, 103.94], std=[58.4, 57.12, 57.3], output_layout="HWC", mirror=fn.random.coin_flip()) labels = inputs["image/class/label"].gpu() labels -= 1 # Change to 0-based (don't use background class) pipeline.set_outputs(images, labels) return pipeline
def load_tfrecord(directory, batch_size, training): tfrecord = [] tfrecord_idx = [] for f in os.listdir(directory): fullpath = os.path.join(directory, f) if not os.path.isfile(fullpath): continue if f.endswith(".tfrecord"): tfrecord.append(fullpath) if f.endswith(".idx"): tfrecord_idx.append(fullpath) tfrecord.sort() tfrecord_idx.sort() pipe = Pipeline(batch_size=batch_size, num_threads=32, device_id=0) with pipe: inputs = fn.tfrecord_reader( path=tfrecord, index_path=tfrecord_idx, features={ "frame_one": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_two": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_three": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_four": tfrec.FixedLenFeature((), tfrec.string, ""), "plus_one_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_one_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "speed": tfrec.FixedLenFeature([], tfrec.float32, 0.0), }) frame1 = inputs["frame_one"] frame1 = fn.image_decoder(frame1, device="mixed", output_type=types.RGB) # frame1 = fn.resize(frame1, device="gpu", resize_shorter=256.) frame1 = fn.crop_mirror_normalize(frame1, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame1 = fn.transpose(frame1, device="gpu", perm=[1, 2, 0]) frame2 = inputs["frame_two"] frame2 = fn.image_decoder(frame2, device="mixed", output_type=types.RGB) # frame2 = fn.resize(frame2, device="gpu", resize_shorter=256.) frame2 = fn.crop_mirror_normalize(frame2, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame2 = fn.transpose(frame2, device="gpu", perm=[1, 2, 0]) position = inputs["plus_one_position"].gpu() orientation = inputs["plus_one_orientation"].gpu() speed = inputs["speed"].gpu() image = fn.cat(frame1, frame2, device="gpu", axis=2) pose = fn.cat(position, orientation, device="gpu", axis=0) pipe.set_outputs(image, pose, speed) # Define shapes and types of the outputs shapes = ((batch_size, 480, 640), (batch_size, 6), (batch_size)) dtypes = (tf.float32, tf.float32) # Create dataset return dali_tf.DALIDataset(pipeline=pipe, batch_size=batch_size, output_shapes=shapes, output_dtypes=dtypes, device_id=0)