예제 #1
0
 def __init__(self,
              device,
              batch_size,
              relative,
              use_wildcard,
              num_threads=3,
              device_id=0,
              num_gpus=1):
     super(ReshapePipeline, self).__init__(batch_size,
                                           num_threads,
                                           device_id,
                                           seed=7865,
                                           exec_async=True,
                                           exec_pipelined=True)
     self.device = device
     self.input = ops.readers.Caffe(path=caffe_db_folder,
                                    shard_id=device_id,
                                    num_shards=num_gpus)
     self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
     W = 320
     H = 224
     self.resize = ops.Resize(device="cpu", resize_x=W, resize_y=H)
     WC = -1 if use_wildcard else W * 3
     if relative:
         rel_shape = (-1, 3) if use_wildcard else (1, 3)
         self.reshape = ops.Reshape(device=device,
                                    rel_shape=rel_shape,
                                    layout="ab")
     else:
         self.reshape = ops.Reshape(device=device,
                                    shape=(H, WC),
                                    layout="ab")
예제 #2
0
 def __init__(self,
              batch_size,
              num_threads,
              device_id,
              file_list,
              sequence_length,
              seg_num,
              seg_length,
              resize_shorter_scale,
              crop_target_size,
              is_training=False,
              initial_prefetch_size=10,
              num_shards=1,
              shard_id=0,
              dali_mean=0.,
              dali_std=1.0):
     super(VideoPipe, self).__init__(batch_size, num_threads, device_id)
     self.input = ops.VideoReader(device="gpu",
                                  file_list=file_list,
                                  sequence_length=sequence_length,
                                  seg_num=seg_num,
                                  seg_length=seg_length,
                                  is_training=is_training,
                                  num_shards=num_shards,
                                  shard_id=shard_id,
                                  random_shuffle=is_training,
                                  initial_fill=initial_prefetch_size)
     # the sequece data read by ops.VideoReader is of shape [F, H, W, C]
     # Because the ops.Resize does not support sequence data,
     # it will be transposed into [H, W, F, C],
     # then reshaped to [H, W, FC], and then resized like a 2-D image.
     self.transpose = ops.Transpose(device="gpu", perm=[1, 2, 0, 3])
     self.reshape = ops.Reshape(device="gpu",
                                rel_shape=[1.0, 1.0, -1],
                                layout='HWC')
     self.resize = ops.Resize(device="gpu",
                              resize_shorter=resize_shorter_scale)
     # crops and mirror are applied by ops.CropMirrorNormalize.
     # Normalization will be implemented in paddle due to the difficulty of dimension broadcast,
     # It is not sure whether dimension broadcast can be implemented correctly by dali, just take the Paddle Op instead.
     self.pos_rng_x = ops.Uniform(range=(0.0, 1.0))
     self.pos_rng_y = ops.Uniform(range=(0.0, 1.0))
     self.mirror_generator = ops.Uniform(range=(0.0, 1.0))
     self.cast_mirror = ops.Cast(dtype=types.DALIDataType.INT32)
     self.crop_mirror_norm = ops.CropMirrorNormalize(
         device="gpu",
         crop=[crop_target_size, crop_target_size],
         mean=dali_mean,
         std=dali_std)
     self.reshape_back = ops.Reshape(device="gpu",
                                     shape=[
                                         seg_num, seg_length * 3,
                                         crop_target_size, crop_target_size
                                     ],
                                     layout='FCHW')
     self.cast_label = ops.Cast(device="gpu",
                                dtype=types.DALIDataType.INT64)
예제 #3
0
 def define_graph(self):
     device = self.device
     return [
         # no-op
         ops.Reshape(device=device, shape=[1])(types.Constant(1.25)),
         # flatten with reshape op
         ops.Reshape(device=device)
         (types.Constant(np.array([[1, 2], [3, 4]], dtype=np.uint16),
                         device=device),
          shape=types.Constant([4]))
     ]
예제 #4
0
 def __init__(self,
              batch_size,
              output_type,
              input_type,
              use_input,
              num_threads=3,
              device_id=0,
              num_gpus=1):
     super(CVPipeline, self).__init__(batch_size,
                                      num_threads,
                                      device_id,
                                      seed=7865,
                                      exec_async=False,
                                      exec_pipelined=False)
     self.use_input = use_input
     self.name = "cv"
     self.input = ops.CaffeReader(path=caffe_db_folder,
                                  shard_id=device_id,
                                  num_shards=num_gpus)
     self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
     if self.use_input:
         self.transform_source = ops.ExternalSource(
             lambda: gen_transforms(self.batch_size, 10))
         self.warp = ops.PythonFunction(
             function=CVWarp(output_type, input_type))
     else:
         self.warp = ops.PythonFunction(function=CVWarp(
             output_type, input_type, [[0.1, 0.9, 10], [0.8, -0.2, -20]]))
     self.set_layout = ops.Reshape(layout="HWC")
     self.iter = 0
예제 #5
0
    def __init__(self,
                 batch_size,
                 layout,
                 iterator,
                 pos_size_iter,
                 num_threads=1,
                 device_id=0,
                 num_gpus=1,
                 axes=None,
                 axis_names=None,
                 normalized_anchor=True,
                 normalized_shape=True):
        super(SliceSynthDataPipelinePythonOp,
              self).__init__(batch_size,
                             num_threads,
                             device_id,
                             seed=12345,
                             exec_async=False,
                             exec_pipelined=False)
        self.device = "cpu"
        self.layout = layout
        self.iterator = iterator
        self.pos_size_iter = pos_size_iter
        self.inputs = ops.ExternalSource()
        self.input_crop_pos = ops.ExternalSource()
        self.input_crop_size = ops.ExternalSource()

        function = partial(slice_func_helper, axes, axis_names, self.layout,
                           normalized_anchor, normalized_shape)
        self.slice = ops.PythonFunction(function=function)
        self.set_layout = ops.Reshape(layout=layout)
예제 #6
0
    def __init__(self,
                 function,
                 batch_size,
                 iterator,
                 data_shape,
                 data_layout,
                 num_threads=1,
                 device_id=0):
        super(Crop3dPythonOpPipeline, self).__init__(batch_size,
                                                     num_threads,
                                                     device_id,
                                                     exec_async=False,
                                                     exec_pipelined=False)
        self.iterator = iterator
        self.inputs = ops.ExternalSource()
        self.data_shape = data_shape
        self.data_layout = data_layout

        def crop_func(image):
            return function(image,
                            layout=self.data_layout,
                            shape=self.data_shape)

        self.crop = ops.PythonFunction(function=crop_func)
        self.set_layout = ops.Reshape(layout=data_layout)
예제 #7
0
    def __init__(self,
                 batch_size,
                 pos_size_iter,
                 num_threads=1,
                 device_id=0,
                 num_gpus=1,
                 axes=None,
                 axis_names=None,
                 normalized_anchor=True,
                 normalized_shape=True):
        super(SlicePythonOp, self).__init__(batch_size,
                                            num_threads,
                                            device_id,
                                            seed=12345,
                                            exec_async=False,
                                            exec_pipelined=False)
        self.device = "cpu"
        self.layout = "HWC"
        self.pos_size_iter = pos_size_iter

        self.input = ops.CaffeReader(path=caffe_db_folder,
                                     random_shuffle=False)
        self.decode = ops.ImageDecoder(device='cpu', output_type=types.RGB)

        self.input_crop_pos = ops.ExternalSource()
        self.input_crop_size = ops.ExternalSource()

        function = partial(slice_func_helper, axes, axis_names, self.layout,
                           normalized_anchor, normalized_shape)
        self.slice = ops.PythonFunction(function=function)
        self.set_layout = ops.Reshape(layout="HWC")
예제 #8
0
    def __init__(self,
                 function,
                 batch_size,
                 data_layout,
                 iterator,
                 anchor,
                 shape,
                 axis_names,
                 axes,
                 fill_value,
                 erase_func=erase_func,
                 num_threads=1,
                 device_id=0):
        super(ErasePythonPipeline, self).__init__(batch_size,
                                                  num_threads,
                                                  device_id,
                                                  exec_async=False,
                                                  exec_pipelined=False)
        self.iterator = iterator
        self.inputs = ops.ExternalSource()
        self.data_layout = data_layout

        function = partial(erase_func, anchor, shape, axis_names, axes,
                           data_layout, fill_value)

        self.erase = ops.PythonFunction(function=function)
        self.set_layout = ops.Reshape(layout=data_layout)
예제 #9
0
 def __init__(self,
              batch_size,
              num_threads,
              device,
              device_id=0,
              shard_id=0,
              num_shards=1,
              seed=0):
     super(TestPipeline, self).__init__(batch_size, num_threads, device_id,
                                        seed)
     self.device = device
     self.input = ops.COCOReader(file_root=file_root,
                                 annotations_file=annotations_file,
                                 shard_id=shard_id,
                                 num_shards=num_shards,
                                 ratio=False,
                                 save_img_ids=True)
     self.decode = ops.ImageDecoder(
         device='mixed' if device is 'gpu' else 'cpu',
         output_type=types.RGB)
     self.resize = ops.Resize(device=device,
                              image_type=types.RGB,
                              resize_x=224,
                              resize_y=224,
                              interp_type=types.INTERP_LINEAR)
     self.cmn = ops.CropMirrorNormalize(device=device,
                                        output_dtype=types.FLOAT,
                                        image_type=types.RGB,
                                        mean=[128., 128., 128.],
                                        std=[1., 1., 1.])
     self.cast = ops.Cast(device=device, dtype=types.INT16)
     self.reshape = ops.Reshape(device=device, shape=[1, 1])
예제 #10
0
    def __init__(self,
                 function,
                 batch_size,
                 iterator,
                 data_shape,
                 data_layout,
                 num_threads=1,
                 device_id=0,
                 dictionary={},
                 default_value=0.0):
        super(LookupTablePythonOpPipeline, self).__init__(batch_size,
                                                          num_threads,
                                                          device_id,
                                                          exec_async=False,
                                                          exec_pipelined=False)
        self.iterator = iterator
        self.inputs = ops.ExternalSource()
        self.data_shape = data_shape
        self.data_layout = data_layout

        def lookup_table_func(input_data):
            return function(input_data,
                            shape=data_shape,
                            dictionary=dictionary,
                            default_value=default_value)

        self.lookup = ops.PythonFunction(function=lookup_table_func)
        self.set_layout = ops.Reshape(layout=data_layout)
 def __init__(self, batch_size, num_threads, device_id, _seed, image_dir):
     super(CommonPipeline, self).__init__(batch_size, num_threads, device_id, seed=_seed, exec_async=False,
                                          exec_pipelined=False)
     self.input = ops.FileReader(file_root=image_dir)
     self.decode = ops.ImageDecoder(device = 'cpu', output_type=types.RGB)
     self.resize = ops.PythonFunction(function=resize)
     self.set_layout = ops.Reshape(layout="HWC")
예제 #12
0
 def __init__(self, batch_size, num_threads, device_id, data_dir, crop):
     super(HybridTrainPipe, self).__init__(batch_size, num_threads,
                                           device_id, data_dir, crop)
     self.pad = ops.Paste(device="gpu",
                          fill_value=0,
                          ratio=1.1,
                          min_canvas_size=crop)
     self.res = ops.RandomResizedCrop(device="gpu",
                                      size=crop,
                                      random_area=[0.9, 1.1],
                                      random_aspect_ratio=1.33333)
     self.cutmix = ops.PythonFunction(function=cut_mixe_image,
                                      num_outputs=2,
                                      device='gpu')
     self.cmnp = ops.CropMirrorNormalize(
         device="gpu",
         output_dtype=types.FLOAT,
         output_layout=types.NCHW,
         image_type=types.RGB,
         mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
         std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
     self.coin = ops.CoinFlip(probability=0.5)
     self.rotated = ops.Rotate(device="gpu", keep_size=True)
     self.rotated_rng = ops.Uniform(range=(-5.0, 5.0))
     self.brightness = ops.Brightness(device="gpu")
     self.brightness_rng = ops.Uniform(range=(0.8, 1.2))
     self.reshape = ops.Reshape(device="gpu", layout="HWC")
     self.one_hot = ops.OneHot(num_classes=3,
                               dtype=types.INT32,
                               device="cpu")
     self.jitter_rng = ops.CoinFlip(probability=0.3)
     self.jittered = ops.Jitter(device="gpu")
    def __init__(self, DATA_PATH, input_height, batch_size, num_threads,
                 device_id):
        super(SimCLRTrainDataTransform, self).__init__(batch_size,
                                                       num_threads,
                                                       device_id,
                                                       seed=12)

        self.COPIES = 3

        self.input_height = input_height
        self.input = ops.FileReader(file_root=DATA_PATH,
                                    random_shuffle=True,
                                    seed=12)

        self.coin = ops.CoinFlip(probability=0.5)
        self.uniform = ops.Uniform(range=[0.7, 1.3])  #-1 to 1
        #read image (I think that has to be cpu, do a mixed operation to decode into gpu)
        self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB)
        self.crop = ops.RandomResizedCrop(size=self.input_height, device="gpu")
        self.flip = ops.Flip(vertical=self.coin(),
                             horizontal=self.coin(),
                             device="gpu")
        self.colorjit_gray = ops.ColorTwist(brightness=self.uniform(),
                                            contrast=self.uniform(),
                                            hue=self.uniform(),
                                            saturation=self.uniform(),
                                            device="gpu")
        self.blur = ops.GaussianBlur(window_size=int(0.1 * self.input_height),
                                     device="gpu")
        self.swapaxes = ops.Reshape(
            shape=[-1, self.input_height, self.input_height], device="gpu")
예제 #14
0
 def __init__(self, device, batch_size, use_wildcard, num_threads=3, device_id=0, num_gpus=1):
     super(ReshapeWithInput, self).__init__(batch_size, num_threads, device_id, seed=7865, exec_async=False, exec_pipelined=False)
     self.device = device
     self.input = ops.CaffeReader(path = caffe_db_folder, shard_id = device_id, num_shards = num_gpus)
     self.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)
     fn = CollapseChannelsWildcard if use_wildcard else CollapseChannels
     self.gen_shapes = ops.PythonFunction(function=fn)
     self.reshape = ops.Reshape(device = device, layout = "ab");
예제 #15
0
 def __init__(self, device, batch_size, relative, use_wildcard, num_threads=3, device_id=0, num_gpus=1):
     super(ReshapeWithArgInput, self).__init__(batch_size, num_threads, device_id, seed=7865, exec_async=False, exec_pipelined=False)
     self.device = device
     self.input = ops.CaffeReader(path = caffe_db_folder, shard_id = device_id, num_shards = num_gpus)
     self.resize = ops.Resize(device = "cpu");
     self.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)
     self.gen_shapes = ops.PythonFunction(function=MakeTallFunc(relative, use_wildcard))
     self.reshape = ops.Reshape(device = device);
     self.relative = relative
예제 #16
0
 def __init__(self,  batch_size,function,  num_threads=1, device_id=0, num_gpus=1 ):
     super(WaterPythonPipeline, self).__init__(batch_size,
                                        num_threads,
                                        device_id,
                                        exec_async=False,
                                        exec_pipelined=False)
     self.input = ops.CaffeReader(path = caffe_db_folder, shard_id = device_id, num_shards = num_gpus)
     self.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)
     self.water = ops.PythonFunction(function=function)
     self.set_layout = ops.Reshape(layout="HWC")
예제 #17
0
    def __init__(self,
                 device_id,
                 n_devices,
                 file_root,
                 file_list,
                 batch_size,
                 sample_rate=16000,
                 window_size=.02,
                 window_stride=.01,
                 nfeatures=64,
                 nfft=512,
                 frame_splicing_factor=3,
                 silence_threshold=-80,
                 dither=.00001,
                 preemph_coeff=.97,
                 lowfreq=0.0,
                 highfreq=0.0,
                 num_threads=1):
        super().__init__(batch_size, num_threads, device_id, seed=42)

        self.dither = dither
        self.frame_splicing_factor = frame_splicing_factor

        self.read = ops.readers.File(file_root=file_root, file_list=file_list, device="cpu",
                                     shard_id=device_id, num_shards=n_devices)

        self.decode = ops.AudioDecoder(device="cpu", dtype=types.FLOAT, downmix=True)

        self.normal_distribution = ops.random.Normal(device="cpu")

        self.preemph = ops.PreemphasisFilter(preemph_coeff=preemph_coeff)

        self.spectrogram = ops.Spectrogram(device="cpu", nfft=nfft,
                                           window_length=window_size * sample_rate,
                                           window_step=window_stride * sample_rate)

        self.mel_fbank = ops.MelFilterBank(device="cpu", sample_rate=sample_rate, nfilter=nfeatures,
                                           normalize=True, freq_low=lowfreq, freq_high=highfreq)

        self.log_features = ops.ToDecibels(device="cpu", multiplier=np.log(10), reference=1.0,
                                           cutoff_db=-80)

        self.get_shape = ops.Shapes(device="cpu")

        self.normalize = ops.Normalize(axes=[0], device="cpu")

        self.splicing_transpose = ops.Transpose(device="cpu", perm=[1, 0])
        self.splicing_reshape = ops.Reshape(device="cpu", rel_shape=[-1, frame_splicing_factor])
        self.splicing_pad = ops.Pad(axes=[0], fill_value=0, align=frame_splicing_factor, shape=[1],
                                    device="cpu")

        self.get_nonsilent_region = ops.NonsilentRegion(device="cpu", cutoff_db=silence_threshold)
        self.trim_silence = ops.Slice(device="cpu", axes=[0])
        self.to_float = ops.Cast(dtype=types.FLOAT)
예제 #18
0
 def __init__(self, function, batch_size, num_threads=1, device_id=0):
     super(MultichannelPythonOpPipeline,
           self).__init__(batch_size,
                          num_threads,
                          device_id,
                          exec_async=False,
                          exec_pipelined=False)
     self.reader = ops.FileReader(file_root=multichannel_tiff_root)
     self.decoder = ops.ImageDecoder(device='cpu',
                                     output_type=types.ANY_DATA)
     self.oper = ops.PythonFunction(function=function)
     self.set_layout = ops.Reshape(layout="HWC")
예제 #19
0
 def __init__(self, batch_size, layout, data_iterator):
     super(SynthPythonFlipPipeline, self).__init__(batch_size, seed=1234, num_threads=4,
                                                   device_id=0, exec_async=False,
                                                   exec_pipelined=False)
     self.iterator = data_iterator
     self.layout = layout
     self.input = ops.ExternalSource()
     self.coin = ops.CoinFlip(seed=1234)
     h_dim, v_dim, d_dim = find_dims(layout)
     fun = lambda d, hor, ver, depth: numpy_flip(d, h_dim, v_dim, d_dim, hor, ver, depth)
     self.python_flip = ops.PythonFunction(function=fun)
     self.set_layout = ops.Reshape(layout=layout)
예제 #20
0
    def __init__(self, device, batch_size, dims, axes, axis_names, batch=False,
                 out_type=None, in_type=None, shift=None, scale=None,
                 num_threads=3, device_id=0, num_gpus=1):
        super(NormalizePipeline, self).__init__(
            batch_size, num_threads, device_id, seed=7865,
            exec_async=False, exec_pipelined=False)
        common_args = {
            "device": device,
            "axes": axes,
            "axis_names": axis_names,
            "batch": batch,
            "dtype": dali_type(out_type),
            "shift": shift,
            "scale": scale
        }
        self.in_type = in_type
        self.out_type = out_type
        self.device = device
        self.input = ops.ExternalSource()
        self.add_layout = None
        if axis_names is not None:
            layout = ''
            for i in range(dims):
                layout += chr(ord('a') + i)
            self.add_layout = ops.Reshape(layout=layout)
        self.batch = batch
        self.dims = dims
        self.has_axes = axes is not None or axis_names is not None
        self.scale = scale
        self.shift = shift
        self.is_integral = out_type is not None and out_type is not np.float32

        if axis_names is not None:
            axes = []
            for a in axis_names:
                axes.append(ord(a) - ord('a'))

        self.axes = axes
        self.axis_names = axis_names
        self.ddof = 2 if axes is not None and len(axes) > 0 else 0
        self.eps = 0.25

        self.mean = ops.PythonFunction(function=custom_mean(batch, axes), batch_processing=True)
        self.stddev = ops.PythonFunction(function=custom_stddev(batch, axes), batch_processing=True)
        self.normalize = ops.Normalize(**common_args, ddof=self.ddof)
        self.scalar_mean = ops.Normalize(**common_args, mean=1, ddof=self.ddof, epsilon=self.eps)
        self.scalar_stddev = ops.Normalize(**common_args, stddev=2, epsilon=self.eps)
        self.scalar_params = ops.Normalize(**common_args, mean=1, stddev=2)
예제 #21
0
    def __new__(cls, shape=None, **kwargs):
        """Create a ``Reshape`` operator.

        Parameters
        ----------
        shape : Sequence[int], optional
            The optional output shape.

        Returns
        -------
        nvidia.dali.ops.Reshape
            The operator.

        """
        return ops.Reshape(shape=shape,
                           device=context.get_device_type(),
                           **kwargs)
예제 #22
0
 def __init__(self,
              function,
              batch_size,
              layout,
              iterator,
              num_threads=1,
              device_id=0):
     super(CropSequencePythonOpPipeline,
           self).__init__(batch_size,
                          num_threads,
                          device_id,
                          exec_async=False,
                          exec_pipelined=False)
     self.layout = layout
     self.iterator = iterator
     self.inputs = ops.ExternalSource()
     self.crop = ops.PythonFunction(function=function)
     self.set_layout = ops.Reshape(layout=layout)
예제 #23
0
 def __init__(self,
              device,
              batch_size,
              num_threads=3,
              device_id=0,
              num_gpus=1):
     super(ReshapePipeline, self).__init__(batch_size,
                                           num_threads,
                                           device_id,
                                           seed=7865,
                                           exec_async=True,
                                           exec_pipelined=True)
     self.device = device
     self.input = ops.CaffeReader(path=caffe_db_folder,
                                  shard_id=device_id,
                                  num_shards=num_gpus)
     self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
     self.resize = ops.Resize(device="cpu", resize_x=224, resize_y=224)
     self.reshape = ops.Reshape(device=device,
                                shape=(224, 224 * 3),
                                layout="ab")
예제 #24
0
 def __init__(self,
              batch_size,
              output_type,
              input_type,
              fixed_size,
              num_threads=3,
              device_id=0,
              num_gpus=1):
     super(CVPipeline, self).__init__(batch_size,
                                      num_threads,
                                      device_id,
                                      seed=7865,
                                      exec_async=False,
                                      exec_pipelined=False)
     self.name = "cv"
     self.input = ops.CaffeReader(path=caffe_db_folder,
                                  shard_id=device_id,
                                  num_shards=num_gpus)
     self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
     self.rotate = ops.PythonFunction(
         function=CVRotate(output_type, input_type, fixed_size))
     self.set_layout = ops.Reshape(layout="HWC")
     self.uniform = ops.Uniform(range=(-180.0, 180.0), seed=42)
     self.iter = 0
예제 #25
0
    def __init__(self,
                 directory,
                 supervised: bool = True,
                 sequence_length: int = 11,
                 batch_size: int = 1,
                 num_workers: int = 1,
                 gpu_id: int = 0,
                 shuffle: bool = True,
                 crop_size: tuple = (256, 256),
                 resize: tuple = None,
                 brightness: float = 0.25,
                 contrast: float = 0.1,
                 mean: list = [0.5, 0.5, 0.5],
                 std: list = [0.5, 0.5, 0.5],
                 conv_mode='3d',
                 image_shape=(256, 256),
                 validate: bool = False):
        super().__init__(batch_size,
                         num_workers,
                         gpu_id,
                         prefetch_queue_depth=1)
        self.input = ops.VideoReader(additional_decode_surfaces=1,
                                     channels=3,
                                     device="gpu",
                                     dtype=types.FLOAT,
                                     enable_frame_num=False,
                                     enable_timestamps=False,
                                     file_root=directory,
                                     image_type=types.RGB,
                                     initial_fill=1,
                                     lazy_init=False,
                                     normalized=True,
                                     num_shards=1,
                                     pad_last_batch=False,
                                     prefetch_queue_depth=1,
                                     random_shuffle=shuffle,
                                     sequence_length=sequence_length,
                                     skip_vfr_check=True,
                                     step=-1,
                                     shard_id=0,
                                     stick_to_shard=False,
                                     stride=1)

        self.uniform = ops.Uniform(range=(0.0, 1.0))
        self.cmn = ops.CropMirrorNormalize(device='gpu',
                                           crop=crop_size,
                                           mean=mean,
                                           std=std,
                                           output_layout=types.NFHWC)

        self.coin = ops.CoinFlip(probability=0.5)
        self.brightness_val = ops.Uniform(
            range=[1 - brightness, 1 + brightness])
        self.contrast_val = ops.Uniform(range=[1 - contrast, 1 + contrast])
        self.supervised = supervised
        self.half = ops.Constant(fdata=0.5)
        self.zero = ops.Constant(idata=0)
        self.cast_to_long = ops.Cast(device='gpu', dtype=types.INT64)
        if crop_size is not None:
            H, W = crop_size
        else:
            # default
            H, W = image_shape
        # print('CONV MODE!!! {}'.format(conv_mode))
        if conv_mode == '3d':
            self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
            self.reshape = None
        elif conv_mode == '2d':
            self.transpose = ops.Transpose(device='gpu', perm=[0, 3, 1, 2])
            self.reshape = ops.Reshape(device='gpu', shape=[-1, H, W])
        self.validate = validate
예제 #26
0
    def __init__(self, 
                 device_id, 
                 num_threads, 
                 resample_range: list,
                 sample_rate=16000, 
                 window_size=0.02, 
                 window_stride=0.01,
                 window="hann", 
                 normalize="per_feature", 
                 n_fft=None,
                 preemph=0.97,
                 nfilt=64, 
                 lowfreq=0, 
                 highfreq=0, 
                 log=True, 
                 dither=constant,
                 pad_to=8,
                 max_duration=15.0,
                 frame_splicing=3, 
                 batch_size=1, 
                 total_samples=16,
                 audio_fp16_input=True,
                 device='gpu'):
        super().__init__(batch_size, num_threads, device_id, 
                         exec_async=True, exec_pipelined=True, seed =12, prefetch_queue_depth=1)

        self._dali_init_log(locals())
        if torch.distributed.is_initialized():
            shard_id = torch.distributed.get_rank()
            n_shards = torch.distributed.get_world_size()
        else:
            shard_id = 0
            n_shards = 1

        torch_windows = {
            'hann': torch.hann_window,
            'hamming': torch.hamming_window,
            'blackman': torch.blackman_window,
            'bartlett': torch.bartlett_window,
            'none': None,
        }

        self.audio_fp16_input=audio_fp16_input
        self.total_samples = total_samples
        self.win_length = int(sample_rate * window_size) # frame size
        self.hop_length = int(sample_rate * window_stride)
        self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length))
        
        self.normalize = normalize
        self.log = log
        self.dither = dither
        self.frame_splicing = frame_splicing
        self.nfilt = nfilt
        self.preemph = preemph
        self.pad_to = pad_to
        self.highfreq = highfreq or sample_rate / 2
        window_fn = torch_windows.get(window, None)
        window_tensor = window_fn(self.win_length,
                                  periodic=False) if window_fn else None



        self.sample_rate = sample_rate
        self.window_size = window_size
        self.window_stride = window_stride
        self.window = window_tensor

        self.lowfreq = lowfreq
        self.log = log
        self.device = device
        
        win_unpadded = self.window.tolist()
        win_padded = win_unpadded + [0] * (self.n_fft - len(win_unpadded))

        print("self.n_fft = {}".format(self.n_fft))
        print("self.hop_length = {}".format(self.hop_length))
        print("self.win_length = {}".format(self.win_length))
        print("self.window_tensor = {}".format(self.window))
        print("self.sample_rate = {}".format(self.sample_rate))
        print("self.window_size = {}".format(self.window_size))
        print("self.window_stride = {}".format(self.window_stride))
        print("self.lowfreq = {}".format(self.lowfreq))
        print("self.device = {}".format(self.device))

        self.extsrc = ops.ExternalSource(name="INPUT_0", device=self.device, no_copy=True)

        self.preemph = ops.PreemphasisFilter(preemph_coeff=preemph, device=self.device)

        self.spectrogram = ops.Spectrogram(device=self.device,
                                           nfft=self.n_fft,
                                           center_windows=True,
                                           window_fn=win_padded,
                                           window_length=len(win_padded),
                                           window_step=self.hop_length
                                           )
        self.mel_fbank = ops.MelFilterBank(device=self.device,
                                            sample_rate=self.sample_rate,
                                            nfilter=self.nfilt,
                                            freq_high=self.highfreq,
                                            freq_low=self.lowfreq,
                                            normalize=normalize
                                            )

        self.log_features = ops.ToDecibels(device=self.device, multiplier=np.log(10), reference=1.0,
                                           cutoff_db=math.log(1e-20))

        self.get_shape = ops.Shapes(device=self.device)

        self.normalize = ops.Normalize(axes=[0], device=self.device, ddof=1)

        self.pad = ops.Pad(axes=[0,1], fill_value=0, shape=[502,240], device=self.device)

        # Frame splicing
        self.splicing_transpose = ops.Transpose(device=self.device, perm=[1, 0])
        self.splicing_reshape = ops.Reshape(device=self.device, rel_shape=[-1, self.frame_splicing])
        self.splicing_pad = ops.Pad(axes=[0], fill_value=0, align=self.frame_splicing, shape=[1], device=self.device)

        self.to_float16 = ops.Cast(dtype=types.FLOAT16, device=self.device)
        self.to_float32 = ops.Cast(dtype=types.FLOAT, device=self.device)

        self.samples_done = 0