Exemple #1
0
    def __init__(self,
                 device,
                 batch_size,
                 pos_size_iter,
                 num_threads=1,
                 device_id=0,
                 is_fused_decoder=False,
                 axes=None,
                 axis_names=None,
                 normalized_anchor=True,
                 normalized_shape=True):
        super(SlicePipeline, self).__init__(batch_size,
                                            num_threads,
                                            device_id,
                                            seed=1234)
        self.is_fused_decoder = is_fused_decoder
        self.pos_size_iter = pos_size_iter
        self.device = device
        self.input = ops.CaffeReader(path=caffe_db_folder,
                                     random_shuffle=False)
        self.input_crop_pos = ops.ExternalSource()
        self.input_crop_size = ops.ExternalSource()

        if self.is_fused_decoder:
            if axis_names:
                self.decode = ops.ImageDecoderSlice(
                    device="cpu",
                    output_type=types.RGB,
                    normalized_anchor=normalized_anchor,
                    normalized_shape=normalized_shape,
                    axis_names=axis_names)
            elif axes:
                self.decode = ops.ImageDecoderSlice(
                    device="cpu",
                    output_type=types.RGB,
                    normalized_anchor=normalized_anchor,
                    normalized_shape=normalized_shape,
                    axes=axes)
            else:
                self.decode = ops.ImageDecoderSlice(
                    device="cpu",
                    output_type=types.RGB,
                    normalized_anchor=normalized_anchor,
                    normalized_shape=normalized_shape)
        else:
            self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
            if axis_names:
                self.slice = ops.Slice(device=self.device,
                                       normalized_anchor=normalized_anchor,
                                       normalized_shape=normalized_shape,
                                       axis_names=axis_names)
            elif axes:
                self.slice = ops.Slice(device=self.device,
                                       normalized_anchor=normalized_anchor,
                                       normalized_shape=normalized_shape,
                                       axes=axes)
            else:
                self.slice = ops.Slice(device=self.device,
                                       normalized_anchor=normalized_anchor,
                                       normalized_shape=normalized_shape)
Exemple #2
0
    def __init__(self, device, batch_size, layout, iterator, pos_size_iter,
                 num_threads=1, device_id=0, num_gpus=1,
                 axes=None, axis_names=None, normalized_anchor=True, normalized_shape=True):
        super(SliceSynthDataPipeline, self).__init__(
            batch_size, num_threads, device_id, seed=1234)
        self.device = device
        self.layout = layout
        self.iterator = iterator
        self.pos_size_iter = pos_size_iter
        self.inputs = ops.ExternalSource()
        self.input_crop_pos = ops.ExternalSource()
        self.input_crop_size = ops.ExternalSource()

        if axis_names:
            self.slice = ops.Slice(device = self.device,
                                   normalized_anchor=normalized_anchor,
                                   normalized_shape=normalized_shape,
                                   axis_names = axis_names)
        elif axes:
            self.slice = ops.Slice(device = self.device,
                                   normalized_anchor=normalized_anchor,
                                   normalized_shape=normalized_shape,
                                   axes = axes)
        else:
            self.slice = ops.Slice(device = self.device,
                                   normalized_anchor=normalized_anchor,
                                   normalized_shape=normalized_shape,
)
Exemple #3
0
    def __init__(self, batch_size, num_threads, device_id, prefetch, seed):
        super(CommonPipeline, self).__init__(batch_size,
                                             num_threads,
                                             device_id,
                                             prefetch_queue_depth=prefetch)

        self.decode_cpu = ops.HostDecoder(device="cpu", output_type=types.RGB)
        self.decode_crop = ops.HostDecoderSlice(device="cpu",
                                                output_type=types.RGB)
        self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=seed)
        self.crop2 = ops.RandomBBoxCrop(
            device="cpu",
            aspect_ratio=[0.5, 2.0],
            thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
            scaling=[0.3, 1.0],
            ltrb=True,
            seed=seed)
        self.slice_cpu = ops.Slice(device="cpu")
        self.slice_gpu = ops.Slice(device="gpu")

        self.flip_cpu = ops.Flip(device="cpu")
        self.bb_flip_cpu = ops.BbFlip(device="cpu", ltrb=True)

        self.flip_gpu = ops.Flip(device="gpu")
        self.bb_flip_gpu = ops.BbFlip(device="gpu", ltrb=True)
Exemple #4
0
 def __init__(self,
              device,
              batch_size,
              layout,
              iterator,
              pos_size_iter,
              num_threads=1,
              device_id=0,
              num_gpus=1,
              axes=None,
              axis_names=None,
              normalized_anchor=True,
              normalized_shape=True,
              extra_outputs=False,
              out_of_bounds_policy=None,
              fill_values=None):
     super(SliceSynthDataPipeline, self).__init__(batch_size,
                                                  num_threads,
                                                  device_id,
                                                  seed=1234)
     self.device = device
     self.layout = layout
     self.iterator = iterator
     self.pos_size_iter = pos_size_iter
     self.inputs = ops.ExternalSource()
     self.input_crop_pos = ops.ExternalSource()
     self.input_crop_size = ops.ExternalSource()
     self.extra_outputs = extra_outputs
     self.slice = ops.Slice(device=self.device,
                            normalized_anchor=normalized_anchor,
                            normalized_shape=normalized_shape,
                            axes=axes,
                            axis_names=axis_names,
                            out_of_bounds_policy=out_of_bounds_policy,
                            fill_values=fill_values)
    def __init__(self,
                 device,
                 batch_size,
                 pos_size_iter,
                 num_threads=1,
                 device_id=0,
                 is_fused_decoder=False):
        super(SlicePipeline, self).__init__(batch_size,
                                            num_threads,
                                            device_id,
                                            seed=1234)
        self.is_fused_decoder = is_fused_decoder
        self.pos_size_iter = pos_size_iter
        self.device = device
        self.input = ops.CaffeReader(path=caffe_db_folder,
                                     random_shuffle=False)
        self.input_crop_pos = ops.ExternalSource()
        self.input_crop_size = ops.ExternalSource()

        if self.is_fused_decoder:
            self.decode = ops.ImageDecoderSlice(device='cpu',
                                                output_type=types.RGB)
        else:
            self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)
            self.slice = ops.Slice(device=device, image_type=types.RGB)
Exemple #6
0
    def __new__(cls,
                axes=(1, 0),
                normalized_anchor=True,
                normalized_shape=True,
                **kwargs):
        """Create a ``Slice`` operator.

        Parameters
        ----------
        axes : Sequence[int], optional
            The axis to select.
        normalized_anchor : bool, optional, default=True
            Whether the begin of interval is normalized.
        normalized_shape : bool, optional, default=True
            Whether the size of interval is normalized.

        Returns
        -------
        nvidia.dali.ops.Slice
            The operator.

        """
        return ops.Slice(axes=axes,
                         normalized_anchor=normalized_anchor,
                         device=context.get_device_type(),
                         **kwargs)
    def __init__(self,
                 device_id,
                 n_devices,
                 file_root,
                 file_list,
                 batch_size,
                 sample_rate=16000,
                 window_size=.02,
                 window_stride=.01,
                 nfeatures=64,
                 nfft=512,
                 frame_splicing_factor=3,
                 silence_threshold=-80,
                 dither=.00001,
                 preemph_coeff=.97,
                 lowfreq=0.0,
                 highfreq=0.0,
                 num_threads=1):
        super().__init__(batch_size, num_threads, device_id, seed=42)

        self.dither = dither
        self.frame_splicing_factor = frame_splicing_factor

        self.read = ops.readers.File(file_root=file_root, file_list=file_list, device="cpu",
                                     shard_id=device_id, num_shards=n_devices)

        self.decode = ops.AudioDecoder(device="cpu", dtype=types.FLOAT, downmix=True)

        self.normal_distribution = ops.random.Normal(device="cpu")

        self.preemph = ops.PreemphasisFilter(preemph_coeff=preemph_coeff)

        self.spectrogram = ops.Spectrogram(device="cpu", nfft=nfft,
                                           window_length=window_size * sample_rate,
                                           window_step=window_stride * sample_rate)

        self.mel_fbank = ops.MelFilterBank(device="cpu", sample_rate=sample_rate, nfilter=nfeatures,
                                           normalize=True, freq_low=lowfreq, freq_high=highfreq)

        self.log_features = ops.ToDecibels(device="cpu", multiplier=np.log(10), reference=1.0,
                                           cutoff_db=-80)

        self.get_shape = ops.Shapes(device="cpu")

        self.normalize = ops.Normalize(axes=[0], device="cpu")

        self.splicing_transpose = ops.Transpose(device="cpu", perm=[1, 0])
        self.splicing_reshape = ops.Reshape(device="cpu", rel_shape=[-1, frame_splicing_factor])
        self.splicing_pad = ops.Pad(axes=[0], fill_value=0, align=frame_splicing_factor, shape=[1],
                                    device="cpu")

        self.get_nonsilent_region = ops.NonsilentRegion(device="cpu", cutoff_db=silence_threshold)
        self.trim_silence = ops.Slice(device="cpu", axes=[0])
        self.to_float = ops.Cast(dtype=types.FLOAT)
    def __init__(self, batch_size, num_threads, device_id):
        super(COCOPipeline, self).__init__(batch_size,
                                           num_threads,
                                           device_id,
                                           exec_async=False,
                                           exec_pipelined=False,
                                           seed=15)
        self.input = ops.COCOReader(file_root=file_root,
                                    annotations_file=annotations_file,
                                    shard_id=device_id,
                                    num_shards=num_gpus,
                                    ratio=True,
                                    ltrb=True)
        self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
        self.flip = ops.Flip(device="gpu")
        self.bbflip = ops.BbFlip(device="cpu", ltrb=True)
        self.paste_pos = ops.Uniform(range=(0, 1))
        self.paste_ratio = ops.Uniform(range=(1, 2))
        self.coin = ops.CoinFlip(probability=0.5)
        self.coin2 = ops.CoinFlip(probability=0.5)
        self.paste = ops.Paste(device="gpu", fill_value=(32, 64, 128))
        self.bbpaste = ops.BBoxPaste(device="cpu", ltrb=True)
        self.prospective_crop = ops.RandomBBoxCrop(device="cpu",
                                                   aspect_ratio=[0.5, 2.0],
                                                   thresholds=[0.1, 0.3, 0.5],
                                                   scaling=[0.8, 1.0],
                                                   ltrb=True)
        self.slice = ops.Slice(device="gpu")

        # resize
        self.resize = ops.Resize(device="gpu",
                                 interp_type=types.INTERP_LINEAR,
                                 resize_shorter=800,
                                 max_size=1200)

        self.shape = ops.Shapes(device="gpu")

        # normalize and convert hwc to chw
        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        # padding axes=(0,1) -> hwc, axes=(1,2) -> chw
        self.padding = ops.Pad(device="gpu",
                               fill_value=0,
                               axes=(1, 2),
                               shape=(800, 1200))
Exemple #9
0
    def __init__(self,
                 batch_size,
                 file_root,
                 annotations_file,
                 default_boxes,
                 seed,
                 device_id=0,
                 num_threads=4):

        super(COCOPipeline, self).__init__(batch_size=batch_size,
                                           device_id=device_id,
                                           num_threads=num_threads,
                                           seed=seed)

        self.input = ops.COCOReader(file_root=file_root,
                                    annotations_file=annotations_file,
                                    ratio=True,
                                    ltrb=True,
                                    random_shuffle=True)
        self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB)

        # Augumentation techniques
        self.crop = ops.RandomBBoxCrop(device="cpu",
                                       aspect_ratio=[0.5, 2.0],
                                       thresholds=[0.1, 0.3, 0.5, 0.7, 0.9],
                                       scaling=[0.8, 1.0],
                                       ltrb=True)
        self.slice = ops.Slice(device="gpu")
        self.twist = ops.ColorTwist(device="gpu")
        self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300)
        self.normalize = ops.CropMirrorNormalize(
            device="gpu",
            crop=(300, 300),
            mean=[0.485 * 255., 0.456 * 255., 0.406 * 255.],
            std=[0.229 * 255., 0.224 * 255., 0.225 * 255.])

        # Random variables
        self.rng1 = ops.Uniform(range=[0.5, 1.5])
        self.rng2 = ops.Uniform(range=[0.875, 1.125])
        self.rng3 = ops.Uniform(range=[-0.5, 0.5])

        self.flip = ops.Flip(device="gpu")
        self.bbflip = ops.BbFlip(device="cpu", ltrb=True)
        self.flip_coin = ops.CoinFlip(probability=0.5)

        self.box_encoder = ops.BoxEncoder(device="cpu",
                                          criteria=0.5,
                                          anchors=default_boxes.as_ltrb_list())
Exemple #10
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 external_data,
                 is_train=True):
        super(ExternalSourcePipeline, self).__init__(batch_size,
                                                     num_threads,
                                                     device_id,
                                                     seed=12)
        self.is_train = is_train
        self.input = ops.ExternalSource()

        self.angle_rng = ops.Uniform(range=(-10.0, 10.0))
        self.rotate = ops.Rotate(device="gpu")

        self.flip_rng = ops.CoinFlip(probability=0.5)
        self.flip = ops.Flip(device='gpu')

        self.slice = ops.Slice()
        self.external_data = external_data
        self.iterator = iter(self.external_data)
Exemple #11
0
    def __init__(self, default_boxes, args, seed):
        super(COCOPipeline, self).__init__(batch_size=args.batch_size,
                                           device_id=args.local_rank,
                                           num_threads=args.num_workers,
                                           seed=seed)

        try:
            shard_id = torch.distributed.get_rank()
            num_shards = torch.distributed.get_world_size()
        except RuntimeError:
            shard_id = 0
            num_shards = 1

        self.input = ops.COCOReader(file_root=args.train_coco_root,
                                    annotations_file=args.train_annotate,
                                    skip_empty=True,
                                    shard_id=shard_id,
                                    num_shards=num_shards,
                                    ratio=True,
                                    ltrb=True,
                                    random_shuffle=False,
                                    shuffle_after_epoch=True)

        self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)

        # Augumentation techniques
        self.crop = ops.RandomBBoxCrop(device="cpu",
                                       aspect_ratio=[0.5, 2.0],
                                       thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
                                       scaling=[0.3, 1.0],
                                       ltrb=True,
                                       allow_no_crop=True,
                                       num_attempts=1)
        self.slice = ops.Slice(device="cpu")

        self.hsv = ops.Hsv(
            device="gpu", dtype=types.FLOAT)  # use float to avoid clipping and
        # quantizing the intermediate result
        self.bc = ops.BrightnessContrast(
            device="gpu",
            contrast_center=128,  # input is in float, but in 0..255 range
            dtype=types.UINT8)

        self.resize = ops.Resize(
            device="cpu",
            resize_x=300,
            resize_y=300,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        dtype = types.FLOAT16 if args.fp16 else types.FLOAT

        self.normalize = ops.CropMirrorNormalize(
            device="gpu",
            crop=(300, 300),
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
            mirror=0,
            dtype=dtype,
            output_layout=types.NCHW,
            pad_output=False)

        # Random variables
        self.rng1 = ops.Uniform(range=[0.5, 1.5])
        self.rng2 = ops.Uniform(range=[0.875, 1.125])
        self.rng3 = ops.Uniform(range=[-0.5, 0.5])

        self.flip = ops.Flip(device="cpu")
        self.bbflip = ops.BbFlip(device="cpu", ltrb=True)
        self.flip_coin = ops.CoinFlip(probability=0.5)

        self.box_encoder = ops.BoxEncoder(device="cpu",
                                          criteria=0.5,
                                          anchors=default_boxes.as_ltrb_list())
Exemple #12
0
    def __init__(self, args, device_id, file_root, annotations_file):
        super(DetectionPipeline,
              self).__init__(batch_size=args.batch_size,
                             num_threads=args.num_workers,
                             device_id=device_id,
                             prefetch_queue_depth=args.prefetch,
                             seed=args.seed)

        # Reading COCO dataset
        self.input = ops.readers.COCO(file_root=file_root,
                                      annotations_file=annotations_file,
                                      shard_id=device_id,
                                      num_shards=args.num_gpus,
                                      ratio=True,
                                      ltrb=True,
                                      random_shuffle=True)

        self.decode_cpu = ops.decoders.Image(device="cpu",
                                             output_type=types.RGB)
        self.decode_crop = ops.decoders.ImageSlice(device="cpu",
                                                   output_type=types.RGB)

        self.decode_gpu = ops.decoders.Image(device="mixed",
                                             output_type=types.RGB,
                                             hw_decoder_load=0)
        self.decode_gpu_crop = ops.decoders.ImageSlice(device="mixed",
                                                       output_type=types.RGB,
                                                       hw_decoder_load=0)

        self.ssd_crop = ops.SSDRandomCrop(device="cpu",
                                          num_attempts=1,
                                          seed=args.seed)
        self.random_bbox_crop = ops.RandomBBoxCrop(
            device="cpu",
            aspect_ratio=[0.5, 2.0],
            thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
            scaling=[0.3, 1.0],
            bbox_layout="xyXY",
            seed=args.seed)

        self.slice_cpu = ops.Slice(device="cpu")
        self.slice_gpu = ops.Slice(device="gpu")

        self.resize_cpu = ops.Resize(
            device="cpu",
            resize_x=300,
            resize_y=300,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)
        self.resize_gpu = ops.Resize(
            device="gpu",
            resize_x=300,
            resize_y=300,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
        std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
        crop_size = (300, 300)
        self.normalize_cpu = ops.CropMirrorNormalize(device="cpu",
                                                     crop=crop_size,
                                                     mean=mean,
                                                     std=std,
                                                     mirror=0,
                                                     dtype=types.FLOAT)
        self.normalize_gpu = ops.CropMirrorNormalize(device="gpu",
                                                     crop=crop_size,
                                                     mean=mean,
                                                     std=std,
                                                     mirror=0,
                                                     dtype=types.FLOAT)

        self.twist_cpu = ops.ColorTwist(device="cpu")
        self.twist_gpu = ops.ColorTwist(device="gpu")

        self.hsv_cpu = ops.Hsv(device="cpu", dtype=types.FLOAT)
        self.hsv_gpu = ops.Hsv(device="gpu", dtype=types.FLOAT)

        self.bc_cpu = ops.BrightnessContrast(device="cpu",
                                             dtype=types.UINT8,
                                             contrast_center=128)
        self.bc_gpu = ops.BrightnessContrast(device="gpu",
                                             dtype=types.UINT8,
                                             contrast_center=128)

        self.flip_cpu = ops.Flip(device="cpu")
        self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True)

        self.flip_gpu = ops.Flip(device="gpu")
        self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True)

        default_boxes = coco_anchors()
        self.box_encoder_cpu = ops.BoxEncoder(device="cpu",
                                              criteria=0.5,
                                              anchors=default_boxes)
        self.box_encoder_gpu = ops.BoxEncoder(device="gpu",
                                              criteria=0.5,
                                              anchors=default_boxes)
        self.box_encoder_cpu_offsets = ops.BoxEncoder(
            device="cpu",
            criteria=0.5,
            offset=True,
            scale=2,
            stds=[0.1, 0.1, 0.2, 0.2],
            anchors=default_boxes)
        self.box_encoder_gpu_offsets = ops.BoxEncoder(
            device="gpu",
            criteria=0.5,
            offset=True,
            scale=2,
            stds=[0.1, 0.1, 0.2, 0.2],
            anchors=default_boxes)

        # Random variables
        self.saturation_rng = ops.random.Uniform(range=[0.8, 1.2])
        self.contrast_rng = ops.random.Uniform(range=[0.5, 1.5])
        self.brighness_rng = ops.random.Uniform(range=[0.875, 1.125])
        self.hue_rng = ops.random.Uniform(range=[-45, 45])
Exemple #13
0
    def __init__(
            self,
            *,
            train_pipeline:
        bool,  # True if train pipeline, False if validation pipeline
            device_id,
            num_threads,
            batch_size,
            file_root: str,
            file_list: str,
            sample_rate,
            discrete_resample_range: bool,
            resample_range: list,
            window_size,
            window_stride,
            nfeatures,
            nfft,
            frame_splicing_factor,
            dither_coeff,
            silence_threshold,
            preemph_coeff,
            pad_align,
            max_duration,
            mask_time_num_regions,
            mask_time_min,
            mask_time_max,
            mask_freq_num_regions,
            mask_freq_min,
            mask_freq_max,
            mask_both_num_regions,
            mask_both_min_time,
            mask_both_max_time,
            mask_both_min_freq,
            mask_both_max_freq,
            preprocessing_device="gpu"):
        super().__init__(batch_size, num_threads, device_id)

        self._dali_init_log(locals())

        if torch.distributed.is_initialized():
            shard_id = torch.distributed.get_rank()
            n_shards = torch.distributed.get_world_size()
        else:
            shard_id = 0
            n_shards = 1

        self.preprocessing_device = preprocessing_device.lower()
        assert self.preprocessing_device == "cpu" or self.preprocessing_device == "gpu", \
            "Incorrect preprocessing device. Please choose either 'cpu' or 'gpu'"
        self.frame_splicing_factor = frame_splicing_factor
        assert frame_splicing_factor == 1, "DALI doesn't support frame splicing operation"

        self.resample_range = resample_range
        self.discrete_resample_range = discrete_resample_range

        self.train = train_pipeline
        self.sample_rate = sample_rate
        self.dither_coeff = dither_coeff
        self.nfeatures = nfeatures
        self.max_duration = max_duration
        self.mask_params = {
            'time_num_regions': mask_time_num_regions,
            'time_min': mask_time_min,
            'time_max': mask_time_max,
            'freq_num_regions': mask_freq_num_regions,
            'freq_min': mask_freq_min,
            'freq_max': mask_freq_max,
            'both_num_regions': mask_both_num_regions,
            'both_min_time': mask_both_min_time,
            'both_max_time': mask_both_max_time,
            'both_min_freq': mask_both_min_freq,
            'both_max_freq': mask_both_max_freq,
        }
        self.do_remove_silence = True if silence_threshold is not None else False

        self.read = ops.FileReader(device="cpu",
                                   file_root=file_root,
                                   file_list=file_list,
                                   shard_id=shard_id,
                                   num_shards=n_shards,
                                   shuffle_after_epoch=train_pipeline)

        # TODO change ExternalSource to Uniform for new DALI release
        if discrete_resample_range and resample_range is not None:
            self.speed_perturbation_coeffs = ops.ExternalSource(
                device="cpu",
                cycle=True,
                source=self._discrete_resample_coeffs_generator)
        elif resample_range is not None:
            self.speed_perturbation_coeffs = random.Uniform(
                device="cpu", range=resample_range)
        else:
            self.speed_perturbation_coeffs = None

        self.decode = ops.AudioDecoder(
            device="cpu",
            sample_rate=self.sample_rate if resample_range is None else None,
            dtype=types.FLOAT,
            downmix=True)

        self.normal_distribution = random.Normal(device=preprocessing_device)

        self.preemph = ops.PreemphasisFilter(device=preprocessing_device,
                                             preemph_coeff=preemph_coeff)

        self.spectrogram = ops.Spectrogram(
            device=preprocessing_device,
            nfft=nfft,
            window_length=window_size * sample_rate,
            window_step=window_stride * sample_rate)

        self.mel_fbank = ops.MelFilterBank(device=preprocessing_device,
                                           sample_rate=sample_rate,
                                           nfilter=self.nfeatures,
                                           normalize=True)

        self.log_features = ops.ToDecibels(device=preprocessing_device,
                                           multiplier=np.log(10),
                                           reference=1.0,
                                           cutoff_db=math.log(1e-20))

        self.get_shape = ops.Shapes(device=preprocessing_device)

        self.normalize = ops.Normalize(device=preprocessing_device, axes=[1])

        self.pad = ops.Pad(device=preprocessing_device,
                           axes=[1],
                           fill_value=0,
                           align=pad_align)

        # Silence trimming
        self.get_nonsilent_region = ops.NonsilentRegion(
            device="cpu", cutoff_db=silence_threshold)
        self.trim_silence = ops.Slice(device="cpu",
                                      normalized_anchor=False,
                                      normalized_shape=False,
                                      axes=[0])
        self.to_float = ops.Cast(device="cpu", dtype=types.FLOAT)

        # Spectrogram masking
        self.spectrogram_cutouts = ops.ExternalSource(
            source=self._cutouts_generator, num_outputs=2, cycle=True)
        self.mask_spectrogram = ops.Erase(device=preprocessing_device,
                                          axes=[0, 1],
                                          fill_value=0,
                                          normalized_anchor=True)
Exemple #14
0
    def __init__(self, default_boxes, root, annFile, batch_size, mean, std,
                 local_rank, num_workers, seed):
        super(COCOPipeline, self).__init__(batch_size=batch_size,
                                           device_id=local_rank,
                                           num_threads=num_workers,
                                           seed=seed)

        # try:
        #     shard_id = torch.distributed.get_rank()
        #     num_shards = torch.distributed.get_world_size()
        # except RuntimeError:
        shard_id = 0
        num_shards = 1

        self.input = ops.COCOReader(file_root=root,
                                    annotations_file=annFile,
                                    skip_empty=True,
                                    shard_id=shard_id,
                                    num_shards=num_shards,
                                    ratio=True,
                                    ltrb=True,
                                    random_shuffle=False,
                                    shuffle_after_epoch=True)

        self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB)

        # Augumentation techniques
        # expand 1~2
        self.paste_ratio = ops.Uniform(range=[1, 2])
        self.paste_pos = ops.Uniform(range=[0, 1])
        self.paste = ops.Paste(device="gpu", fill_value=tuple(mean))
        self.bbpaste = ops.BBoxPaste(device="cpu", ltrb=True)
        # random crop
        self.crop = ops.RandomBBoxCrop(device="cpu",
                                       aspect_ratio=[0.5, 2.0],
                                       thresholds=[0.1, 0.3, 0.5, 0.7, 0.9],
                                       scaling=[0.3, 1.0],
                                       ltrb=True,
                                       allow_no_crop=True,
                                       num_attempts=50)
        self.slice = ops.Slice(device="gpu")
        self.twist = ops.ColorTwist(device="gpu")
        self.resize = ops.Resize(
            device="gpu",
            resize_x=320,
            resize_y=320,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        self.normalize = ops.CropMirrorNormalize(device="gpu",
                                                 crop=(320, 320),
                                                 mean=mean,
                                                 std=std,
                                                 mirror=0,
                                                 output_dtype=types.FLOAT,
                                                 output_layout=types.NCHW,
                                                 pad_output=False)

        # Random variables
        self.rng1 = ops.Uniform(range=[0.5, 1.5])
        self.rng2 = ops.Uniform(range=[0.875, 1.125])
        self.rng3 = ops.Uniform(range=[-0.5, 0.5])

        self.flip = ops.Flip(device="gpu")
        self.bbflip = ops.BbFlip(device="cpu", ltrb=True)
        self.flip_coin = ops.CoinFlip(probability=0.5)

        self.box_encoder = ops.BoxEncoder(device="cpu",
                                          criteria=0.5,
                                          anchors=default_boxes.as_ltrb_list())
Exemple #15
0
    def __init__(self, args, device_id, file_root, annotations_file):
        super(DetectionPipeline,
              self).__init__(args.batch_size, args.num_workers, device_id,
                             args.prefetch, args.seed)

        # Reading COCO dataset
        self.input = ops.COCOReader(file_root=file_root,
                                    annotations_file=annotations_file,
                                    shard_id=device_id,
                                    num_shards=args.num_gpus,
                                    ratio=True,
                                    ltrb=True,
                                    random_shuffle=True)

        self.decode_cpu = ops.HostDecoder(device="cpu", output_type=types.RGB)
        self.decode_crop = ops.HostDecoderSlice(device="cpu",
                                                output_type=types.RGB)

        self.decode_gpu = ops.nvJPEGDecoder(device="mixed",
                                            output_type=types.RGB)
        self.decode_gpu_crop = ops.nvJPEGDecoderSlice(device="mixed",
                                                      output_type=types.RGB)

        self.ssd_crop = ops.SSDRandomCrop(device="cpu",
                                          num_attempts=1,
                                          seed=args.seed)
        self.random_bbox_crop = ops.RandomBBoxCrop(
            device="cpu",
            aspect_ratio=[0.5, 2.0],
            thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
            scaling=[0.3, 1.0],
            ltrb=True,
            seed=args.seed)

        self.slice_cpu = ops.Slice(device="cpu")
        self.slice_gpu = ops.Slice(device="gpu")

        self.resize_cpu = ops.Resize(
            device="cpu",
            resize_x=300,
            resize_y=300,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)
        self.resize_gpu = ops.Resize(
            device="gpu",
            resize_x=300,
            resize_y=300,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
        std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
        crop_size = (300, 300)
        self.normalize_cpu = ops.CropMirrorNormalize(device="cpu",
                                                     crop=crop_size,
                                                     mean=mean,
                                                     std=std,
                                                     mirror=0,
                                                     output_dtype=types.FLOAT)
        self.normalize_gpu = ops.CropMirrorNormalize(device="gpu",
                                                     crop=crop_size,
                                                     mean=mean,
                                                     std=std,
                                                     mirror=0,
                                                     output_dtype=types.FLOAT)

        self.twist_cpu = ops.ColorTwist(device="cpu")
        self.twist_gpu = ops.ColorTwist(device="gpu")

        self.flip_cpu = ops.Flip(device="cpu")
        self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True)

        self.flip_gpu = ops.Flip(device="gpu")
        self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True)

        default_boxes = coco_anchors()
        self.box_encoder_cpu = ops.BoxEncoder(device="cpu",
                                              criteria=0.5,
                                              anchors=default_boxes)
        self.box_encoder_gpu = ops.BoxEncoder(device="gpu",
                                              criteria=0.5,
                                              anchors=default_boxes)
        self.box_encoder_cpu_offsets = ops.BoxEncoder(
            device="cpu",
            criteria=0.5,
            offset=True,
            scale=2,
            stds=[0.1, 0.1, 0.2, 0.2],
            anchors=default_boxes)
        self.box_encoder_gpu_offsets = ops.BoxEncoder(
            device="gpu",
            criteria=0.5,
            offset=True,
            scale=2,
            stds=[0.1, 0.1, 0.2, 0.2],
            anchors=default_boxes)

        # Random variables
        self.rng1 = ops.Uniform(range=[0.5, 1.5])
        self.rng2 = ops.Uniform(range=[0.875, 1.125])
        self.rng3 = ops.Uniform(range=[-0.5, 0.5])
Exemple #16
0
    def __init__(self,
                 batch_size,
                 device_id,
                 file_root,
                 annotations_file,
                 num_gpus,
                 output_fp16=False,
                 output_nhwc=False,
                 pad_output=False,
                 num_threads=1,
                 seed=15,
                 dali_cache=-1,
                 dali_async=True,
                 use_nvjpeg=False,
                 use_roi=False):
        super(COCOPipeline, self).__init__(batch_size=batch_size,
                                           device_id=device_id,
                                           num_threads=num_threads,
                                           seed=seed,
                                           exec_pipelined=dali_async,
                                           exec_async=dali_async)

        self.use_roi = use_roi
        self.use_nvjpeg = use_nvjpeg
        try:
            shard_id = torch.distributed.get_rank()
        except RuntimeError:
            shard_id = 0

        self.input = ops.COCOReader(file_root=file_root,
                                    annotations_file=annotations_file,
                                    shard_id=shard_id,
                                    num_shards=num_gpus,
                                    ratio=True,
                                    ltrb=True,
                                    skip_empty=True,
                                    random_shuffle=(dali_cache > 0),
                                    stick_to_shard=(dali_cache > 0),
                                    shuffle_after_epoch=(dali_cache <= 0))
        if use_nvjpeg:
            if use_roi:
                self.decode = ops.nvJPEGDecoderSlice(device="mixed",
                                                     output_type=types.RGB)
                # handled in ROI decoder
                self.slice = None
            else:
                if dali_cache > 0:
                    self.decode = ops.nvJPEGDecoder(device="mixed",
                                                    output_type=types.RGB,
                                                    cache_size=dali_cache *
                                                    1024,
                                                    cache_type="threshold",
                                                    cache_threshold=10000)
                else:
                    self.decode = ops.nvJPEGDecoder(device="mixed",
                                                    output_type=types.RGB)
                self.slice = ops.Slice(device="gpu")
            self.crop = ops.RandomBBoxCrop(
                device="cpu",
                aspect_ratio=[0.5, 2.0],
                thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
                scaling=[0.3, 1.0],
                ltrb=True,
                allow_no_crop=True,
                num_attempts=1)
        else:
            self.decode = ops.HostDecoder(device="cpu", output_type=types.RGB)
            # handled in the cropper
            self.slice = None
            self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1)

        # Augumentation techniques (in addition to random crop)
        self.twist = ops.ColorTwist(device="gpu")

        self.resize = ops.Resize(
            device="gpu",
            resize_x=300,
            resize_y=300,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT
        output_layout = types.NHWC if output_nhwc else types.NCHW

        mean_val = list(np.array([0.485, 0.456, 0.406]) * 255.)
        std_val = list(np.array([0.229, 0.224, 0.225]) * 255.)
        self.normalize = ops.CropMirrorNormalize(device="gpu",
                                                 crop=(300, 300),
                                                 mean=mean_val,
                                                 std=std_val,
                                                 mirror=0,
                                                 output_dtype=output_dtype,
                                                 output_layout=output_layout,
                                                 pad_output=pad_output)

        # Random variables
        self.rng1 = ops.Uniform(range=[0.5, 1.5])
        self.rng2 = ops.Uniform(range=[0.875, 1.125])
        self.rng3 = ops.Uniform(range=[-0.5, 0.5])
    def __init__(self,
                 batch_size,
                 device_id,
                 file_root,
                 meta_files_path,
                 annotations_file,
                 num_gpus,
                 anchors_ltrb_list,
                 output_fp16=False,
                 output_nhwc=False,
                 pad_output=False,
                 num_threads=1,
                 seed=15,
                 dali_cache=-1,
                 dali_async=True,
                 use_nvjpeg=False):
        super(COCOPipeline, self).__init__(batch_size=batch_size,
                                           device_id=device_id,
                                           num_threads=num_threads,
                                           seed=seed,
                                           exec_pipelined=dali_async,
                                           exec_async=dali_async)

        self.use_nvjpeg = use_nvjpeg
        try:
            shard_id = torch.distributed.get_rank()
        # Note: <= 19.05 was a RuntimeError, 19.06 is now throwing AssertionError
        except (RuntimeError, AssertionError):
            shard_id = 0

        if meta_files_path == None:
            self.c_input = ops.COCOReader(
                file_root=file_root,
                annotations_file=annotations_file,
                shard_id=shard_id,
                num_shards=num_gpus,
                ratio=True,
                ltrb=True,
                skip_empty=True,
                random_shuffle=(dali_cache > 0),
                stick_to_shard=(dali_cache > 0),
                lazy_init=True,
                shuffle_after_epoch=(dali_cache <= 0))
        else:
            self.c_input = ops.COCOReader(
                file_root=file_root,
                meta_files_path=meta_files_path,
                shard_id=shard_id,
                num_shards=num_gpus,
                random_shuffle=(dali_cache > 0),
                stick_to_shard=(dali_cache > 0),
                lazy_init=True,
                shuffle_after_epoch=(dali_cache <= 0))

        self.c_crop = ops.RandomBBoxCrop(
            device="cpu",
            aspect_ratio=[0.5, 2.0],
            thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
            scaling=[0.3, 1.0],
            ltrb=True,
            allow_no_crop=True,
            num_attempts=1)
        decoder_device = 'mixed' if use_nvjpeg else 'cpu'
        # fused decode and slice.  This is "region-of-interest" (roi) decoding
        self.m_decode = ops.ImageDecoderSlice(device=decoder_device,
                                              output_type=types.RGB)
        self.g_slice = None

        # special case for using dali decode caching: the caching decoder can't
        # be fused with slicing (because we need to slice the decoded image
        # differently every epoch), so we need to unfuse decode and slice:
        if dali_cache > 0 and use_nvjpeg:
            self.m_decode = ops.ImageDecoder(device='mixed',
                                             output_type=types.RGB,
                                             cache_size=dali_cache * 1024,
                                             cache_type="threshold",
                                             cache_threshold=10000)
            self.g_slice = ops.Slice(device="gpu")

        # Augumentation techniques (in addition to random crop)
        self.g_twist = ops.ColorTwist(device="gpu")

        self.g_resize = ops.Resize(
            device="gpu",
            resize_x=300,
            resize_y=300,
            min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT
        output_layout = types.NHWC if output_nhwc else types.NCHW

        mean_val = list(np.array([0.485, 0.456, 0.406]) * 255.)
        std_val = list(np.array([0.229, 0.224, 0.225]) * 255.)
        self.g_normalize = ops.CropMirrorNormalize(device="gpu",
                                                   crop=(300, 300),
                                                   mean=mean_val,
                                                   std=std_val,
                                                   output_dtype=output_dtype,
                                                   output_layout=output_layout,
                                                   pad_output=pad_output)

        # Random variables
        self.c_rng1 = ops.Uniform(range=[0.5, 1.5])
        self.c_rng2 = ops.Uniform(range=[0.875, 1.125])
        self.c_rng3 = ops.Uniform(range=[-0.5, 0.5])

        flip_probability = 0.5
        self.c_flip_coin = ops.CoinFlip(
            probability=flip_probability)  # coin_rnd

        self.c_bbflip = ops.BbFlip(device="cpu", ltrb=True)

        self.g_box_encoder = ops.BoxEncoder(device="gpu",
                                            criteria=0.5,
                                            anchors=anchors_ltrb_list,
                                            offset=True,
                                            stds=[0.1, 0.1, 0.2, 0.2],
                                            scale=300)

        self.g_cast = ops.Cast(device="gpu", dtype=types.FLOAT)
Exemple #18
0
    def __init__(self,
                 *,
                 pipeline_type,
                 device_id,
                 num_threads,
                 batch_size,
                 file_root: str,
                 sampler,
                 sample_rate,
                 resample_range: list,
                 window_size,
                 window_stride,
                 nfeatures,
                 nfft,
                 dither_coeff,
                 silence_threshold,
                 preemph_coeff,
                 max_duration,
                 preprocessing_device="gpu"):
        super().__init__(batch_size, num_threads, device_id)

        self._dali_init_log(locals())

        if torch.distributed.is_initialized():
            shard_id = torch.distributed.get_rank()
            n_shards = torch.distributed.get_world_size()
        else:
            shard_id = 0
            n_shards = 1

        self.preprocessing_device = preprocessing_device.lower()
        assert self.preprocessing_device == "cpu" or self.preprocessing_device == "gpu", \
            "Incorrect preprocessing device. Please choose either 'cpu' or 'gpu'"

        self.resample_range = resample_range

        train_pipeline = pipeline_type == 'train'
        self.train = train_pipeline
        self.sample_rate = sample_rate
        self.dither_coeff = dither_coeff
        self.nfeatures = nfeatures
        self.max_duration = max_duration
        self.do_remove_silence = True if silence_threshold is not None else False

        shuffle = train_pipeline and not sampler.is_sampler_random()
        self.read = ops.FileReader(name="Reader",
                                   pad_last_batch=(pipeline_type == 'val'),
                                   device="cpu",
                                   file_root=file_root,
                                   file_list=sampler.get_file_list_path(),
                                   shard_id=shard_id,
                                   num_shards=n_shards,
                                   shuffle_after_epoch=shuffle)

        # TODO change ExternalSource to Uniform for new DALI release
        if resample_range is not None:
            self.speed_perturbation_coeffs = ops.Uniform(device="cpu",
                                                         range=resample_range)
        else:
            self.speed_perturbation_coeffs = None

        self.decode = ops.AudioDecoder(
            device="cpu",
            sample_rate=self.sample_rate if resample_range is None else None,
            dtype=types.FLOAT,
            downmix=True)

        self.normal_distribution = ops.NormalDistribution(
            device=preprocessing_device)

        self.preemph = ops.PreemphasisFilter(device=preprocessing_device,
                                             preemph_coeff=preemph_coeff)

        self.spectrogram = ops.Spectrogram(
            device=preprocessing_device,
            nfft=nfft,
            window_length=window_size * sample_rate,
            window_step=window_stride * sample_rate)

        self.mel_fbank = ops.MelFilterBank(device=preprocessing_device,
                                           sample_rate=sample_rate,
                                           nfilter=self.nfeatures,
                                           normalize=True)

        self.log_features = ops.ToDecibels(device=preprocessing_device,
                                           multiplier=np.log(10),
                                           reference=1.0,
                                           cutoff_db=math.log(1e-20))

        self.get_shape = ops.Shapes(device=preprocessing_device)

        self.normalize = ops.Normalize(device=preprocessing_device, axes=[1])

        self.pad = ops.Pad(device=preprocessing_device, fill_value=0)

        # Silence trimming
        self.get_nonsilent_region = ops.NonsilentRegion(
            device="cpu", cutoff_db=silence_threshold)
        self.trim_silence = ops.Slice(device="cpu",
                                      normalized_anchor=False,
                                      normalized_shape=False,
                                      axes=[0])
        self.to_float = ops.Cast(device="cpu", dtype=types.FLOAT)
    def __init__(self, batch_size, device_id, file_root, annotations_file, num_gpus,
                 output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15):
        super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id,
                                           num_threads=num_threads, seed=seed)

        if torch.distributed.is_initialized():
            shard_id = torch.distributed.get_rank()
        else:
            shard_id = 0

        self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file,
                                    shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, random_shuffle=True,
                                    skip_empty=True)
        self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)

        # Augumentation techniques

        self.rotate = ops.Rotate(device="gpu", angle=30, interp_type=types.INTERP_LINEAR, fill_value=0)
        self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1)
        self.twist = ops.ColorTwist(device="gpu")

        self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300)

        # Will flip with probability provided in CoinFlip
        self.flip = ops.Flip(device='gpu')
        self.coin_flip_v = ops.CoinFlip(probability=0.1)
        self.coin_flip_h = ops.CoinFlip(probability=0.1)
        # bbox flipping
        self.bbflip = ops.BbFlip(device='gpu', ltrb=True)

        # paste
        self.paste = ops.Paste(device='gpu', fill_value=0)
        self.paste_pos = ops.Uniform(range=(0, 1))
        self.paste_ratio = ops.Uniform(range=(1, 2))
        self.bbpaste = ops.BBoxPaste(device='cpu', ltrb=True)

        # prospective
        self.prospective_crop = ops.RandomBBoxCrop(
            device='cpu',
            aspect_ratio=[0.5, 2.0],
            thresholds=[0.1, 0.3, 0.5],
            scaling=[0.8, 1.0],
            ltrb=True
        )
        # slice (after prospective crop)
        self.slice = ops.Slice(device='gpu')

        # color
        self.water = ops.Water(device='gpu')
        # self.contrast = ops.BrightnessContrast(device="gpu", brightness=0.5, contrast=1.5)
        # self.hsv = ops.Hsv(device="gpu", hue=45., saturation=0.2)
        self.sphere = ops.Sphere(device='gpu')

        self.warpaffine = ops.WarpAffine(device="gpu", matrix=[1.0, 0.8, 0.0, 0.0, 1.2, 0.0],
                                         interp_type=types.INTERP_LINEAR)

        output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT
        output_layout = types.NHWC if output_nhwc else types.NCHW

        self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300),
                                                 mean=[0.0, 0.0, 0.0],
                                                 std=[255.0, 255.0, 255.0],
                                                 mirror=0,
                                                 output_dtype=output_dtype,
                                                 output_layout=output_layout,
                                                 pad_output=pad_output)
        # Random variables
        self.rng1 = ops.Uniform(range=[0.5, 1.5])
        self.rng2 = ops.Uniform(range=[0.875, 1.125])
        self.rng3 = ops.Uniform(range=[-0.5, 0.5])