def __init__(self, device, batch_size, num_threads=1, device_id=0): super(MultichannelPipeline, self).__init__(batch_size, num_threads, device_id) self.device = device self.reader = ops.readers.File(files=multichannel_tiff_files) decoder_device = 'mixed' if self.device == 'gpu' else 'cpu' self.decoder = ops.decoders.Image(device=decoder_device, output_type=types.ANY_DATA) self.resize = ops.Resize(device=self.device, resize_y=900, resize_x=300, min_filter=types.DALIInterpType.INTERP_LINEAR) self.crop = ops.Crop(device=self.device, crop_h=220, crop_w=224, crop_pos_x=0.3, crop_pos_y=0.2) self.transpose = ops.Transpose(device=self.device, perm=(1, 0, 2), transpose_layout=False) self.cmn = ops.CropMirrorNormalize(device=self.device, std=255., mean=0., output_layout="HWC", dtype=types.FLOAT)
def __init__(self, DATA_PATH, input_height, batch_size, num_threads, device_id): super(SimCLRFinetuneTrainDataTransform, self).__init__(batch_size, num_threads, device_id, seed=12) self.COPIES = 1 self.input_height = input_height self.input = ops.FileReader(file_root=DATA_PATH, random_shuffle=True, seed=12) self.coin = ops.CoinFlip(probability=0.5) self.uniform = ops.Uniform(range=[0.7, 1.3]) #-1 to 1 #read image (I think that has to be cpu, do a mixed operation to decode into gpu) self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.crop = ops.RandomResizedCrop(size=self.input_height, minibatch_size=batch_size, device="gpu") self.flip = ops.Flip(vertical=self.coin(), horizontal=self.coin(), device="gpu") self.colorjit_gray = ops.ColorTwist(brightness=self.uniform(), contrast=self.uniform(), hue=self.uniform(), saturation=self.uniform(), device="gpu") self.blur = ops.GaussianBlur(window_size=int(0.1 * self.input_height), device="gpu", dtype=types.FLOAT) self.swapaxes = ops.Transpose(perm=[2, 0, 1], device="gpu") self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
def __init__(self, params, num_threads, device_id): super(DaliPipeline, self).__init__(params.batch_size, num_threads, device_id, seed=12) dii = DaliInputIterator(params, device_id) self.no_copy = params.no_copy if self.no_copy: print("Use Zero Copy ES") self.source = ops.ExternalSource(device="gpu", source=dii, num_outputs=2, layout=["DHWC", "DHWC"], no_copy=self.no_copy) self.do_rotate = True if params.rotate_input == 1 else False print("Enable Rotation" if self.do_rotate else "Disable Rotation") self.rng_angle = ops.Uniform(device="cpu", range=[-1.5, 2.5]) self.icast = ops.Cast(device="cpu", dtype=types.INT32) self.fcast = ops.Cast(device="cpu", dtype=types.FLOAT) self.rotate1 = ops.Rotate(device="gpu", axis=(1, 0, 0), interp_type=types.INTERP_LINEAR) self.rotate2 = ops.Rotate(device="gpu", axis=(0, 1, 0), interp_type=types.INTERP_LINEAR) self.rotate3 = ops.Rotate(device="gpu", axis=(0, 0, 1), interp_type=types.INTERP_LINEAR) self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
def __init__(self, DATA_PATH, input_height, batch_size, copies, stage, num_threads, device_id, seed=1729): super(SimCLRTransform, self).__init__(batch_size, num_threads, device_id, seed=seed) #this lets our pytorch compat function find the length of our dataset self.num_samples = len(ImageFolder(DATA_PATH)) self.copies = copies self.input_height = input_height self.stage = stage self.input = ops.FileReader(file_root=DATA_PATH, random_shuffle=True, seed=seed) self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu") self.to_int32_cpu = ops.Cast(dtype=types.INT32, device="cpu") self.coin = ops.random.CoinFlip(probability=0.5) self.uniform = ops.random.Uniform(range=[0.6, 0.9]) self.blur_amt = ops.random.Uniform(values=[ float(i) for i in range(1, int(0.1 * self.input_height), 2) ]) self.angles = ops.random.Uniform(range=[0, 360]) self.cast = ops.Cast(dtype=types.FLOAT, device='gpu') self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.crop = ops.RandomResizedCrop(size=self.input_height, minibatch_size=batch_size, random_area=[0.75, 1.0], device="gpu") self.resize = ops.Resize(resize_x=self.input_height, resize_y=self.input_height, device="gpu") self.flip = ops.Flip(vertical=self.coin(), horizontal=self.coin(), device="gpu") self.colorjit_gray = ops.ColorTwist(brightness=self.uniform(), contrast=self.uniform(), hue=self.uniform(), saturation=self.uniform(), device="gpu") self.blur = ops.GaussianBlur(window_size=self.to_int32_cpu( self.blur_amt()), device="gpu") self.rotate = ops.Rotate( angle=self.angles(), keep_size=True, interp_type=types.DALIInterpType.INTERP_LINEAR, device="gpu") self.swapaxes = ops.Transpose(perm=[2, 0, 1], device="gpu")
def __init__(self, params, device_id, files, labels): super().__init__(params.batch_size, params.num_gpus * 8, device_id, seed=params.seed) # file_root有坑,并不是文件夹名字就是label,按照文件夹顺序(1, 10, 11, 2, 20, 21, ...)分别给与0,1,2,3,4...标签 self.input = ops.FileReader(files=files, labels=labels, random_shuffle=True) self.decocer = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.resize = ops.Resize(device='gpu', resize_shorter=224) self.pos_rng_x = ops.random.Uniform(range=(0.0, 1.0)) self.pos_rng_y = ops.random.Uniform(range=(0.0, 1.0)) self.crop = ops.Crop(device='gpu', crop_h=224, crop_w=224) self.flip = ops.Flip(device='gpu') self.coinflip = ops.random.CoinFlip(probability=0.5) self.hsv = ops.Hsv(device='gpu') self.saturation = ops.random.Uniform(range=(0.8, 1.0)) self.value = ops.random.Uniform(range=(0.8, 1.0)) mean = torch.Tensor(params.mean).unsqueeze(0).unsqueeze(0) * 255 std = torch.Tensor(params.std).unsqueeze(0).unsqueeze(0) * 255 self.normalize = ops.Normalize(axes=[0, 1], mean=mean, stddev=std, device='gpu', batch=False) self.transpose = ops.Transpose(device='gpu', perm=[2, 0, 1])
def __init__(self, batch_size, num_threads, device_id, file_list, sequence_length, seg_num, seg_length, resize_shorter_scale, crop_target_size, is_training=False, initial_prefetch_size=10, num_shards=1, shard_id=0, dali_mean=0., dali_std=1.0): super(VideoPipe, self).__init__(batch_size, num_threads, device_id) self.input = ops.VideoReader(device="gpu", file_list=file_list, sequence_length=sequence_length, seg_num=seg_num, seg_length=seg_length, is_training=is_training, num_shards=num_shards, shard_id=shard_id, random_shuffle=is_training, initial_fill=initial_prefetch_size) # the sequece data read by ops.VideoReader is of shape [F, H, W, C] # Because the ops.Resize does not support sequence data, # it will be transposed into [H, W, F, C], # then reshaped to [H, W, FC], and then resized like a 2-D image. self.transpose = ops.Transpose(device="gpu", perm=[1, 2, 0, 3]) self.reshape = ops.Reshape(device="gpu", rel_shape=[1.0, 1.0, -1], layout='HWC') self.resize = ops.Resize(device="gpu", resize_shorter=resize_shorter_scale) # crops and mirror are applied by ops.CropMirrorNormalize. # Normalization will be implemented in paddle due to the difficulty of dimension broadcast, # It is not sure whether dimension broadcast can be implemented correctly by dali, just take the Paddle Op instead. self.pos_rng_x = ops.Uniform(range=(0.0, 1.0)) self.pos_rng_y = ops.Uniform(range=(0.0, 1.0)) self.mirror_generator = ops.Uniform(range=(0.0, 1.0)) self.cast_mirror = ops.Cast(dtype=types.DALIDataType.INT32) self.crop_mirror_norm = ops.CropMirrorNormalize( device="gpu", crop=[crop_target_size, crop_target_size], mean=dali_mean, std=dali_std) self.reshape_back = ops.Reshape(device="gpu", shape=[ seg_num, seg_length * 3, crop_target_size, crop_target_size ], layout='FCHW') self.cast_label = ops.Cast(device="gpu", dtype=types.DALIDataType.INT64)
def __init__(self, device, batch_size, layout, iterator, num_threads=1, device_id=0, permutation = (1, 0, 2), transpose_layout=False, out_layout_arg=None): super(TransposePipeline, self).__init__(batch_size, num_threads, device_id) self.device = device self.layout = layout self.iterator = iterator self.inputs = ops.ExternalSource() if out_layout_arg: self.transpose = ops.Transpose(device = self.device, perm = permutation, transpose_layout = transpose_layout, output_layout = out_layout_arg) else: self.transpose = ops.Transpose(device = self.device, perm = permutation, transpose_layout = transpose_layout)
def __init__(self, device, batch_size, layout, iterator, num_threads=1, device_id=0): super(TransposePipeline, self).__init__(batch_size, num_threads, device_id) self.device = device self.layout = layout self.iterator = iterator self.inputs = ops.ExternalSource() self.transpose = ops.Transpose(device = self.device, perm = (1, 0, 2))
def __init__(self, image_dir, batch_size, num_threads, device_id): super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.input = ops.FileReader(file_root=image_dir) # instead of path to file directory file with pairs image_name image_label_value can be provided # self.input = ops.FileReader(file_root = image_dir, file_list = image_dir + '/file_list.txt') self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.transpose = ops.Transpose(device='gpu', perm=[2, 0, 1])
def __init__(self, device_id, n_devices, file_root, file_list, batch_size, sample_rate=16000, window_size=.02, window_stride=.01, nfeatures=64, nfft=512, frame_splicing_factor=3, silence_threshold=-80, dither=.00001, preemph_coeff=.97, lowfreq=0.0, highfreq=0.0, num_threads=1): super().__init__(batch_size, num_threads, device_id, seed=42) self.dither = dither self.frame_splicing_factor = frame_splicing_factor self.read = ops.readers.File(file_root=file_root, file_list=file_list, device="cpu", shard_id=device_id, num_shards=n_devices) self.decode = ops.AudioDecoder(device="cpu", dtype=types.FLOAT, downmix=True) self.normal_distribution = ops.random.Normal(device="cpu") self.preemph = ops.PreemphasisFilter(preemph_coeff=preemph_coeff) self.spectrogram = ops.Spectrogram(device="cpu", nfft=nfft, window_length=window_size * sample_rate, window_step=window_stride * sample_rate) self.mel_fbank = ops.MelFilterBank(device="cpu", sample_rate=sample_rate, nfilter=nfeatures, normalize=True, freq_low=lowfreq, freq_high=highfreq) self.log_features = ops.ToDecibels(device="cpu", multiplier=np.log(10), reference=1.0, cutoff_db=-80) self.get_shape = ops.Shapes(device="cpu") self.normalize = ops.Normalize(axes=[0], device="cpu") self.splicing_transpose = ops.Transpose(device="cpu", perm=[1, 0]) self.splicing_reshape = ops.Reshape(device="cpu", rel_shape=[-1, frame_splicing_factor]) self.splicing_pad = ops.Pad(axes=[0], fill_value=0, align=frame_splicing_factor, shape=[1], device="cpu") self.get_nonsilent_region = ops.NonsilentRegion(device="cpu", cutoff_db=silence_threshold) self.trim_silence = ops.Slice(device="cpu", axes=[0]) self.to_float = ops.Cast(dtype=types.FLOAT)
def __init__(self, batch_size, sequence_length, num_threads, device_id, files): super(VideoReaderPipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.reader = ops.VideoReader(device="gpu", filenames=files, sequence_length=sequence_length, normalized=False, image_type=types.RGB, dtype=types.UINT8) self.transpose = ops.Transpose(device="gpu", perm=[0, 3, 1, 2])
def __init__(self, params, num_threads, device_id): super(DaliPipeline, self).__init__(params.batch_size, num_threads, device_id, seed=12) with h5py.File(params.data_path, 'r') as f: # load hydro and clean up Hydro = f['Hydro'][...] self.Hydro = types.Constant(Hydro, shape=Hydro.shape, layout="DHWC", device="cpu") del Hydro # load nbody and clean up Nbody = f['Nbody'][...] self.Nbody = types.Constant(Nbody, shape=Nbody.shape, layout="DHWC", device="cpu") del Nbody #self.ndummy = np.zeros((20, 20, 20, 4), dtype=np.float32) #self.hdummy = np.zeros((20, 20, 20, 5), dtype=np.float32) #self.Nbody = types.Constant(self.ndummy, shape = self.ndummy.shape, layout = "DHWC", device="cpu") #self.Hydro = types.Constant(self.hdummy, shape = self.hdummy.shape, layout = "DHWC", device="cpu") #self.Nbody = ops.Constant(fdata = self.ndummy.flatten().tolist(), shape = self.ndummy.shape, layout = "DHWC", device = "cpu") #self.Hydro = ops.Constant(fdata = self.hdummy.flatten().tolist(), shape = self.hdummy.shape, layout = "DHWC", device = "cpu") self.do_rotate = True if params.rotate_input == 1 else False print("Enable Rotation" if self.do_rotate else "Disable Rotation") self.rng_angle = ops.Uniform(device="cpu", range=[-1.5, 2.5]) self.rng_pos = ops.Uniform(device="cpu", range=[0., 1.]) self.icast = ops.Cast(device="cpu", dtype=types.INT32) self.fcast = ops.Cast(device="cpu", dtype=types.FLOAT) self.crop = ops.Crop(device="cpu", crop_d=params.data_size, crop_h=params.data_size, crop_w=params.data_size) self.rotate1 = ops.Rotate(device="gpu", axis=(1, 0, 0), interp_type=types.INTERP_LINEAR) self.rotate2 = ops.Rotate(device="gpu", axis=(0, 1, 0), interp_type=types.INTERP_LINEAR) self.rotate3 = ops.Rotate(device="gpu", axis=(0, 0, 1), interp_type=types.INTERP_LINEAR) self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
def __init__(self, batch_size, num_threads, device_id): super(TransposePipe, self).__init__(batch_size, num_threads, device_id, seed=12) self.input = ops.CaffeReader(path=caffe_db_folder, shard_id=device_id, num_shards=1) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.crop = ops.Crop(device="gpu", crop=(224, 224), image_type=types.RGB) self.transpose = ops.Transpose(device="gpu", perm=[2, 0, 1])
def __init__(self, batch_size, num_threads, device_id, tfrecords, idx_paths): super(ResnetPipeline, self).__init__(batch_size, num_threads, device_id) # Transformation operations below. # From https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/supported_ops.html self.input = ops.TFRecordReader( path=tfrecords, index_path=idx_paths, features={ "image/encoded": tfrec.FixedLenFeature([], tfrec.string, ""), "image/class/label": tfrec.FixedLenFeature([1], tfrec.float32, 0.0), "image/class/text": tfrec.FixedLenFeature([], tfrec.string, ""), "image/object/bbox/xmin": tfrec.VarLenFeature(tfrec.float32, 0.0), "image/object/bbox/ymin": tfrec.VarLenFeature(tfrec.float32, 0.0), "image/object/bbox/xmax": tfrec.VarLenFeature(tfrec.float32, 0.0), "image/object/bbox/ymax": tfrec.VarLenFeature(tfrec.float32, 0.0) }) self.decode = ops.nvJPEGDecoder(device="mixed", cache_debug=True, output_type=types.RGB) self.resize = ops.Resize(device="gpu", image_type=types.RGB, interp_type=types.INTERP_LINEAR, resize_shorter=256.) self.cmn = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, crop=(224, 224), image_type=types.RGB, mean=[0., 0., 0.], std=[1., 1., 1]) self.uniform = ops.Uniform(range=(0.0, 1.0)) self.transpose = ops.Transpose(device="gpu", perm=[0, 3, 1, 2]) self.cast = ops.Cast(device="gpu", dtype=types.INT32) self.iter = 0
def __init__(self, batch_size, sequence_length, num_threads, device_id, files, crop_size): super(VideoReaderPipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.reader = ops.VideoReader(device="gpu", filenames=files, sequence_length=sequence_length, normalized=False, random_shuffle=True, image_type=types.RGB, dtype=types.UINT8, initial_fill=16) # self.crop = ops.Crop(device="gpu", crop=crop_size, output_dtype=types.FLOAT) self.uniform = ops.Uniform(range=(0.0, 1.0)) self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
def __init__(self, files, labels): super().__init__(256, 8, 0, seed=42) self.input = ops.FileReader(files=files, labels=labels, random_shuffle=False) self.decocer = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.resize = ops.Resize(device='gpu', resize_shorter=224) self.crop = ops.Crop(device='gpu', crop_h=224, crop_w=224) mean = torch.Tensor([0.485, 0.456, 0.406 ]).unsqueeze(0).unsqueeze(0) * 255 std = torch.Tensor([0.229, 0.224, 0.225 ]).unsqueeze(0).unsqueeze(0) * 255 self.normalize = ops.Normalize(axes=[0, 1], mean=mean, stddev=std, device='gpu', batch=False) self.transpose = ops.Transpose(device='gpu', perm=[2, 0, 1])
def __init__(self, params, device_id, files, labels): super().__init__(params.batch_size, params.num_gpus * 8, device_id, seed=params.seed) self.input = ops.FileReader(files=files, labels=labels, random_shuffle=False) self.decocer = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.resize = ops.Resize(device='gpu', resize_shorter=224) self.crop = ops.Crop(device='gpu', crop_h=224, crop_w=224) mean = torch.Tensor(params.mean).unsqueeze(0).unsqueeze(0) * 255 std = torch.Tensor(params.std).unsqueeze(0).unsqueeze(0) * 255 self.normalize = ops.Normalize(axes=[0, 1], mean=mean, stddev=std, device='gpu', batch=False) self.transpose = ops.Transpose(device='gpu', perm=[2, 0, 1])
def __init__(self, device, batch_size, iterator, nfilter, sample_rate, freq_low, freq_high, normalize, mel_formula, layout='ft', num_threads=1, device_id=0, func=mel_fbank_func): super(MelFilterBankPythonPipeline, self).__init__( batch_size, num_threads, device_id, seed=12345, exec_async=False, exec_pipelined=False) self.device = "cpu" self.iterator = iterator self.inputs = ops.ExternalSource() function = partial(func, nfilter, sample_rate, freq_low, freq_high, normalize, mel_formula) self.mel_fbank = ops.PythonFunction(function=function) self.layout=layout self.freq_major = layout.find('f') != len(layout) - 1 self.need_transpose = not self.freq_major and len(layout) > 1 if self.need_transpose: perm = [i for i in range(len(layout))] f = layout.find('f') perm[f] = len(layout) - 2 perm[-2] = f self.transpose = ops.Transpose(perm=perm)
def __init__(self, params, num_threads, device_id): super(DaliPipeline, self).__init__(params.batch_size, num_threads, device_id, seed=12) dii = DaliInputIterator(params, device_id) dpi = DaliParameterIterator(params) self.no_copy = params.no_copy if self.no_copy: print("Use Zero Copy ES") self.source = ops.ExternalSource(device="gpu", source=dii, num_outputs=2, layout=["DHWC", "DHWC"], no_copy=self.no_copy) self.params = ops.ExternalSource(device="cpu", source=dpi, num_outputs=2) self.do_rotate = True if params.rotate_input == 1 else False print("Enable Rotation" if self.do_rotate else "Disable Rotation") self.rotate = ops.Rotate(device="gpu", interp_type=types.INTERP_LINEAR) self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
def __init__(self, DATA_PATH, input_height, batch_size, num_threads, device_id): super(SimCLRFinetuneValDataTransform, self).__init__(batch_size, num_threads, device_id, seed=12) self.COPIES = 1 self.input_height = input_height self.input = ops.FileReader(file_root=DATA_PATH, random_shuffle=True, seed=12) self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.crop = ops.RandomResizedCrop(size=self.input_height, random_area=1, random_aspect_ratio=1, minibatch_size=batch_size, device="gpu", dtype=types.FLOAT) self.swapaxes = ops.Transpose(perm=[2, 0, 1], device="gpu") self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
def __init__(self, device, batch_size, layout, iterator, num_threads=1, device_id=0, tested_operator=None): super(MultichannelSynthPipeline, self).__init__(batch_size, num_threads, device_id) self.device = device self.layout = layout self.iterator = iterator self.inputs = ops.ExternalSource() self.tested_operator = tested_operator if self.tested_operator == 'resize' or not self.tested_operator: self.resize = ops.Resize( device=self.device, resize_y=900, resize_x=300, min_filter=types.DALIInterpType.INTERP_LINEAR) if self.tested_operator == 'crop' or not self.tested_operator: self.crop = ops.Crop(device=self.device, crop=(220, 224), crop_pos_x=0.3, crop_pos_y=0.2, image_type=types.ANY_DATA) if self.tested_operator == 'transpose' or not self.tested_operator: self.transpose = ops.Transpose(device=self.device, perm=(1, 0, 2), transpose_layout=False) if self.tested_operator == 'normalize' or not self.tested_operator: self.cmn = ops.CropMirrorNormalize(device=self.device, std=255., mean=0., output_layout="HWC", output_dtype=types.FLOAT)
def __init__(self, params, num_threads, device_id): super(DaliPipeline, self).__init__(params.batch_size, num_threads, device_id, seed=12) dii = DaliInputIterator(params) self.no_copy = params.no_copy if self.no_copy: print("Use Zero Copy ES") self.source = ops.ExternalSource(source = dii, num_outputs = 2, layout = ["DHWC", "DHWC"], no_copy = self.no_copy) self.do_rotate = True if params.rotate_input==1 else False print("Enable Rotation" if self.do_rotate else "Disable Rotation") self.rng_angle = ops.Uniform(device = "cpu", range = [0., 180.]) self.rng_axis = ops.Uniform(device = "cpu", range = [-1., 1.], shape=(3)) self.rotate = ops.Rotate(device = "gpu", interp_type = types.INTERP_LINEAR, keep_size=True) self.transpose = ops.Transpose(device = "gpu", perm=[3,0,1,2]) self.crop = ops.Crop(device = "gpu", crop = (dii.size, dii.size, dii.size))
def __init__(self, batch_size, sequence_length, num_threads, files, gt_files, \ crop_size, random_shuffle=True, step=-1, device_id=-1, seed=12): super(VideoReaderPipeline, self).__init__(batch_size, num_threads, device_id, seed=seed) #Define VideoReader self.reader = ops.VideoReader(device="gpu", \ filenames=files, \ sequence_length=sequence_length, \ normalized=False, \ random_shuffle=random_shuffle, \ image_type=types.RGB, \ dtype=types.UINT8, \ step=step, \ initial_fill=16, seed=seed) self.gt_reader = ops.VideoReader(device="gpu", \ filenames=gt_files, \ sequence_length=sequence_length, \ normalized=False, \ random_shuffle=random_shuffle, \ image_type=types.RGB, \ dtype=types.UINT8, \ step=step, \ initial_fill=16, seed=seed) # Define crop and permute operations to apply to every sequence self.crop = ops.Crop(device="gpu", \ crop=[crop_size, crop_size], \ output_dtype=types.FLOAT) self.uniform = ops.Uniform(range=(0.0, 1.0)) # used for random crop self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
def __init__(self, file_list, batch_size, sequence_length, num_threads, device_id, crop_size, step=-1, stride=1, random_shuffle=True): super(VideoReaderPipeline, self).__init__(batch_size, num_threads, device_id, seed=12) # Define VideoReader self.reader = ops.VideoReader(device="gpu", file_list=file_list, sequence_length=sequence_length, normalized=False, random_shuffle=random_shuffle, image_type=types.RGB, dtype=types.UINT8, step=step, stride=stride, initial_fill=16) # Define crop, mirror and normalisation operations to apply to every sequence self.crop = ops.Crop(device="gpu", crop=crop_size, output_dtype=types.FLOAT) self.transpose = ops.Transpose(device="gpu", perm=[0, 3, 1, 2]) # [N F C H W] self.uniform = ops.Uniform(range=(0.2, 1.0)) # used for random crop
def __init__(self, directory, supervised: bool = True, sequence_length: int = 11, batch_size: int = 1, num_workers: int = 1, gpu_id: int = 0, shuffle: bool = True, crop_size: tuple = (256, 256), resize: tuple = None, brightness: float = 0.25, contrast: float = 0.1, mean: list = [0.5, 0.5, 0.5], std: list = [0.5, 0.5, 0.5], conv_mode='3d', image_shape=(256, 256), validate: bool = False): super().__init__(batch_size, num_workers, gpu_id, prefetch_queue_depth=1) self.input = ops.VideoReader(additional_decode_surfaces=1, channels=3, device="gpu", dtype=types.FLOAT, enable_frame_num=False, enable_timestamps=False, file_root=directory, image_type=types.RGB, initial_fill=1, lazy_init=False, normalized=True, num_shards=1, pad_last_batch=False, prefetch_queue_depth=1, random_shuffle=shuffle, sequence_length=sequence_length, skip_vfr_check=True, step=-1, shard_id=0, stick_to_shard=False, stride=1) self.uniform = ops.Uniform(range=(0.0, 1.0)) self.cmn = ops.CropMirrorNormalize(device='gpu', crop=crop_size, mean=mean, std=std, output_layout=types.NFHWC) self.coin = ops.CoinFlip(probability=0.5) self.brightness_val = ops.Uniform( range=[1 - brightness, 1 + brightness]) self.contrast_val = ops.Uniform(range=[1 - contrast, 1 + contrast]) self.supervised = supervised self.half = ops.Constant(fdata=0.5) self.zero = ops.Constant(idata=0) self.cast_to_long = ops.Cast(device='gpu', dtype=types.INT64) if crop_size is not None: H, W = crop_size else: # default H, W = image_shape # print('CONV MODE!!! {}'.format(conv_mode)) if conv_mode == '3d': self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2]) self.reshape = None elif conv_mode == '2d': self.transpose = ops.Transpose(device='gpu', perm=[0, 3, 1, 2]) self.reshape = ops.Reshape(device='gpu', shape=[-1, H, W]) self.validate = validate
def __init__(self, device_id, num_threads, resample_range: list, sample_rate=16000, window_size=0.02, window_stride=0.01, window="hann", normalize="per_feature", n_fft=None, preemph=0.97, nfilt=64, lowfreq=0, highfreq=0, log=True, dither=constant, pad_to=8, max_duration=15.0, frame_splicing=3, batch_size=1, total_samples=16, audio_fp16_input=True, device='gpu'): super().__init__(batch_size, num_threads, device_id, exec_async=True, exec_pipelined=True, seed =12, prefetch_queue_depth=1) self._dali_init_log(locals()) if torch.distributed.is_initialized(): shard_id = torch.distributed.get_rank() n_shards = torch.distributed.get_world_size() else: shard_id = 0 n_shards = 1 torch_windows = { 'hann': torch.hann_window, 'hamming': torch.hamming_window, 'blackman': torch.blackman_window, 'bartlett': torch.bartlett_window, 'none': None, } self.audio_fp16_input=audio_fp16_input self.total_samples = total_samples self.win_length = int(sample_rate * window_size) # frame size self.hop_length = int(sample_rate * window_stride) self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) self.normalize = normalize self.log = log self.dither = dither self.frame_splicing = frame_splicing self.nfilt = nfilt self.preemph = preemph self.pad_to = pad_to self.highfreq = highfreq or sample_rate / 2 window_fn = torch_windows.get(window, None) window_tensor = window_fn(self.win_length, periodic=False) if window_fn else None self.sample_rate = sample_rate self.window_size = window_size self.window_stride = window_stride self.window = window_tensor self.lowfreq = lowfreq self.log = log self.device = device win_unpadded = self.window.tolist() win_padded = win_unpadded + [0] * (self.n_fft - len(win_unpadded)) print("self.n_fft = {}".format(self.n_fft)) print("self.hop_length = {}".format(self.hop_length)) print("self.win_length = {}".format(self.win_length)) print("self.window_tensor = {}".format(self.window)) print("self.sample_rate = {}".format(self.sample_rate)) print("self.window_size = {}".format(self.window_size)) print("self.window_stride = {}".format(self.window_stride)) print("self.lowfreq = {}".format(self.lowfreq)) print("self.device = {}".format(self.device)) self.extsrc = ops.ExternalSource(name="INPUT_0", device=self.device, no_copy=True) self.preemph = ops.PreemphasisFilter(preemph_coeff=preemph, device=self.device) self.spectrogram = ops.Spectrogram(device=self.device, nfft=self.n_fft, center_windows=True, window_fn=win_padded, window_length=len(win_padded), window_step=self.hop_length ) self.mel_fbank = ops.MelFilterBank(device=self.device, sample_rate=self.sample_rate, nfilter=self.nfilt, freq_high=self.highfreq, freq_low=self.lowfreq, normalize=normalize ) self.log_features = ops.ToDecibels(device=self.device, multiplier=np.log(10), reference=1.0, cutoff_db=math.log(1e-20)) self.get_shape = ops.Shapes(device=self.device) self.normalize = ops.Normalize(axes=[0], device=self.device, ddof=1) self.pad = ops.Pad(axes=[0,1], fill_value=0, shape=[502,240], device=self.device) # Frame splicing self.splicing_transpose = ops.Transpose(device=self.device, perm=[1, 0]) self.splicing_reshape = ops.Reshape(device=self.device, rel_shape=[-1, self.frame_splicing]) self.splicing_pad = ops.Pad(axes=[0], fill_value=0, align=self.frame_splicing, shape=[1], device=self.device) self.to_float16 = ops.Cast(dtype=types.FLOAT16, device=self.device) self.to_float32 = ops.Cast(dtype=types.FLOAT, device=self.device) self.samples_done = 0