def get_pipeline(folder="train", custom_reader=None): pipe = Pipeline(batch_size=64, num_threads=1, device_id=1) if custom_reader: raw_files, labels = custom_reader else: raw_files, labels = fn.file_reader(file_root="%s" % folder, random_shuffle=True) decode = fn.image_decoder(raw_files, device="mixed", output_type=types.GRAY) resize = fn.resize(decode, device="gpu", image_type=types.RGB, interp_type=types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT) hsv = fn.hsv(resize, hue=fn.uniform(range=(-10, 10)), saturation=fn.uniform(range=(-.5, .5)), value=fn.uniform(range=(0.9, 1.2)), device="gpu", dtype=types.UINT8) bc = fn.brightness_contrast(hsv, device="gpu", brightness=fn.uniform(range=(.9, 1.1))) cmn = fn.crop_mirror_normalize(bc, device="gpu", output_dtype=types.FLOAT, output_layout=types.NHWC, image_type=types.GRAY, mean=[255 // 2], std=[255 // 2]) rot = fn.rotate(cmn, angle=fn.uniform(range=(-40, 40)), device="gpu", keep_size=True) tpose = fn.transpose(rot, perm=(2, 0, 1), device="gpu") # Reshaping to a format PyTorch likes pipe.set_outputs(tpose, labels) pipe.build() dali_iter = DALIClassificationIterator([pipe], -1) return dali_iter
def vid_pipeline(num_frames, width, height, seq_layout): vid, _ = fn.readers.video_resize(filenames=[vid_file], labels=[], name='video reader', sequence_length=num_frames, file_list_include_preceding_frame=True, device='gpu', seed=42, resize_x=width, resize_y=height) if seq_layout == "FCHW": vid = fn.transpose(vid, perm=[0, 3, 1, 2]) elif seq_layout == "CFHW": vid = fn.transpose(vid, perm=[3, 0, 1, 2]) else: assert seq_layout == "FHWC" return vid
def pipe_salt_and_pepper_noise(prob, salt_vs_pepper, channel_first, per_channel, salt_val, pepper_val, device='cpu'): encoded, _ = fn.readers.file(file_root=images_dir) in_data = fn.decoders.image(encoded, output_type=types.RGB) if device == 'gpu': in_data = in_data.gpu() if channel_first: in_data = fn.transpose(in_data, perm=[2, 0, 1]) prob_arg = prob or fn.random.uniform(range=(0.05, 0.5)) salt_vs_pepper_arg = salt_vs_pepper or fn.random.uniform(range=(0.25, 0.75)) out_data = fn.noise.salt_and_pepper( in_data, per_channel=per_channel, prob=prob_arg, salt_vs_pepper=salt_vs_pepper_arg, salt_val=salt_val, pepper_val=pepper_val ) return in_data, out_data, prob_arg, salt_vs_pepper_arg
def setup_dali( image_file='/mnt/data/DATASETS/samples/images/image_110.jpg', image_dim=[800, 1600], batch_size=1, num_threads=4, device='mixed', device_id=0, output_dir='./out/', ): os.makedirs(os.path.dirname(output_dir), exist_ok=True) pipeline = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipeline: data, _ = fn.file_reader(files=[image_file]) # image preprocess images = fn.image_decoder(data, device=device) images = fn.resize(images, size=image_dim, mode="not_larger", max_size=image_dim) images = fn.pad(images, fill_value=0, shape=[image_dim[0], image_dim[1], 1]) images = fn.transpose(images, perm=[2, 0, 1]) images = fn.cast(images, dtype=dali.types.FLOAT) images = images / 255. # input shape input_shape = np.float32((image_dim[0], image_dim[1], 1)) # original shape shapes = fn.peek_image_shape(data) shapes = fn.cast(shapes, dtype=dali.types.FLOAT) # gather outputs out = [images, input_shape, shapes] pipeline.set_outputs(*out) pipeline.build() output = pipeline.run() img = output[0].at(0) if device == 'cpu' else output[0].as_cpu().at(0) img = img.transpose(1, 2, 0) # HWC img = img[:, :, ::-1] # BGR print(img) quit() cv2.imwrite(os.path.join(output_dir, 'dali_image.jpg'), img)
def ExternalSourcePipeline(params, num_threads, device_id, external_date, seed): pipe = Pipeline(params.batch_size, num_threads, device_id, seed=seed) with pipe: jpegs, labels = fn.external_source(source=external_date, num_outputs=2) images = fn.image_decoder(jpegs, device="mixed", output_type=types.RGB) images = fn.resize(images, resize_x=224, resize_y=224) images = fn.cast(images, dtype=types.UINT8) / 255 images = fn.normalize(images, axes=[0, 1], mean=params.mean, stddev=params.std, device='gpu', batch=False) output = fn.transpose(images, perm=[2, 0, 1], device='gpu') pipe.set_outputs(output, labels) return pipe
def create_video_reader_pipeline(sequence_length, files, crop_size): images = fn.readers.video(device="gpu", filenames=files, sequence_length=sequence_length, normalized=False, random_shuffle=True, image_type=types.RGB, dtype=types.UINT8, initial_fill=16, pad_last_batch=True, name="Reader") images = fn.crop(images, crop=crop_size, dtype=types.FLOAT, crop_pos_x=fn.random.uniform(range=(0.0, 1.0)), crop_pos_y=fn.random.uniform(range=(0.0, 1.0))) images = fn.transpose(images, perm=[3, 0, 1, 2]) return images
def setup_dali( input_name='DALI_INPUT_0', image_dim=[896, 1536], batch_size=1, num_threads=4, device='cpu', device_id=0, output_dir='./out/', ): pipeline = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipeline: data = fn.external_source(name=input_name, device="cpu") # image preprocess images = fn.image_decoder(data, device=device) images = fn.resize(images, size=image_dim, mode="not_larger", max_size=image_dim) images = fn.pad(images, fill_value=0, shape=[image_dim[0], image_dim[1], 1]) images = fn.transpose(images, perm=[2, 0, 1]) images = fn.cast(images, dtype=dali.types.FLOAT) images = images / 255. # input shape input_shape = np.float32((image_dim[0], image_dim[1], 1)) # original shape shapes = fn.peek_image_shape(data) shapes = fn.cast(shapes, dtype=dali.types.FLOAT) # gather outputs out = [images, input_shape, shapes] pipeline.set_outputs(*out) os.makedirs(os.path.dirname(output_dir), exist_ok=True) pipeline.serialize(filename=os.path.join(output_dir, 'model.dali'))
def create_video_reader_pipeline(batch_size, sequence_length, num_threads, device_id, files, crop_size): pipeline = Pipeline(batch_size, num_threads, device_id, seed=12) with pipeline: images = fn.video_reader(device="gpu", filenames=files, sequence_length=sequence_length, normalized=False, random_shuffle=True, image_type=types.RGB, dtype=types.UINT8, initial_fill=16, pad_last_batch=True, name="Reader") images = fn.crop(images, crop=crop_size, dtype=types.FLOAT, crop_pos_x=fn.uniform(range=(0.0, 1.0)), crop_pos_y=fn.uniform(range=(0.0, 1.0))) images = fn.transpose(images, perm=[3, 0, 1, 2]) pipeline.set_outputs(images) return pipeline
def transpose_fn(self, img, lbl): img, lbl = fn.transpose(img, perm=(1, 0, 2, 3)), fn.transpose(lbl, perm=(1, 0, 2, 3)) return img, lbl
bytes_per_sample_hint=ImageBytes, brightness=fn.uniform(range=(0.5, 1.5)), contrast=fn.uniform(range=(0.5, 2.5)), saturation=fn.uniform(range=(0.1, 2))) images = fn.cast(images, device='gpu', bytes_per_sample_hint=ImageBytes, dtype=DALIDataType.FLOAT) images = fn.normalize( images, device='gpu', bytes_per_sample_hint=ImageBytes, mean=Constant(numpy.array([[[190.6380, 207.2640, 202.5720]]])), stddev=Constant(numpy.array([[[85.2720, 68.6970, 81.4215]]]))) images = fn.transpose(images, device='gpu', bytes_per_sample_hint=ImageBytes, perm=[2, 0, 1]) TestingPipe.set_outputs(images, labels) TestingLoader = DALIClassificationIterator(TestingPipe, size=1000 * args.bs) model_top = torch.load(args.top) model_top.eval() model_bottom = torch.load(args.bottom) model_bottom.eval() torch.backends.cudnn.benchmark = True label_mean = torch.Tensor([57.9607, 134.3954, 55.0105, 378.1777]).cuda() label_std = torch.Tensor([7.7420, 14.6187, 24.0051, 7.5642]).cuda() outputs = [['images', 'x1', 'y1', 'x2', 'y2']] index = 1
def load_tfrecord(directory, batch_size, training): tfrecord = [] tfrecord_idx = [] for f in os.listdir(directory): fullpath = os.path.join(directory, f) if not os.path.isfile(fullpath): continue if f.endswith(".tfrecord"): tfrecord.append(fullpath) if f.endswith(".idx"): tfrecord_idx.append(fullpath) tfrecord.sort() tfrecord_idx.sort() pipe = Pipeline(batch_size=batch_size, num_threads=32, device_id=0) with pipe: inputs = fn.tfrecord_reader( path=tfrecord, index_path=tfrecord_idx, features={ "frame_one": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_two": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_three": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_four": tfrec.FixedLenFeature((), tfrec.string, ""), "plus_one_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_one_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "speed": tfrec.FixedLenFeature([], tfrec.float32, 0.0), }) frame1 = inputs["frame_one"] frame1 = fn.image_decoder(frame1, device="mixed", output_type=types.RGB) # frame1 = fn.resize(frame1, device="gpu", resize_shorter=256.) frame1 = fn.crop_mirror_normalize(frame1, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame1 = fn.transpose(frame1, device="gpu", perm=[1, 2, 0]) frame2 = inputs["frame_two"] frame2 = fn.image_decoder(frame2, device="mixed", output_type=types.RGB) # frame2 = fn.resize(frame2, device="gpu", resize_shorter=256.) frame2 = fn.crop_mirror_normalize(frame2, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame2 = fn.transpose(frame2, device="gpu", perm=[1, 2, 0]) position = inputs["plus_one_position"].gpu() orientation = inputs["plus_one_orientation"].gpu() speed = inputs["speed"].gpu() image = fn.cat(frame1, frame2, device="gpu", axis=2) pose = fn.cat(position, orientation, device="gpu", axis=0) pipe.set_outputs(image, pose, speed) # Define shapes and types of the outputs shapes = ((batch_size, 480, 640), (batch_size, 6), (batch_size)) dtypes = (tf.float32, tf.float32) # Create dataset return dali_tf.DALIDataset(pipeline=pipe, batch_size=batch_size, output_shapes=shapes, output_dtypes=dtypes, device_id=0)