Ejemplo n.º 1
0
    def __init__(self, data_paths, num_gpus, batch_size, num_threads,
                 device_id, prefetch, fp16, nhwc, decoder_type,
                 decoder_cache_params, reader_queue_depth):
        super(CommonPipeline, self).__init__(batch_size,
                                             num_threads,
                                             device_id,
                                             prefetch_queue_depth=prefetch)
        if decoder_type == 'roi':
            print('Using nvJPEG with ROI decoding')
            self.decode_gpu = ops.nvJPEGDecoderRandomCrop(
                device="mixed", output_type=types.RGB)
            self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224)
        elif decoder_type == 'roi_split':
            print('Using nvJPEG with ROI decoding and split CPU/GPU stages')
            self.decode_gpu = ops.nvJPEGDecoderRandomCrop(
                device="mixed", output_type=types.RGB, split_stages=True)
            self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224)
        elif decoder_type == 'cached':
            assert decoder_cache_params['cache_enabled'] == True
            cache_size = decoder_cache_params['cache_size']
            cache_threshold = decoder_cache_params['cache_threshold']
            cache_type = decoder_cache_params['cache_type']
            print(
                'Using nvJPEG with cache (size : {} threshold: {}, type: {})'.
                format(cache_size, cache_threshold, cache_type))
            self.decode_gpu = ops.nvJPEGDecoder(
                device="mixed",
                output_type=types.RGB,
                cache_size=cache_size,
                cache_threshold=cache_threshold,
                cache_type=cache_type,
                cache_debug=False)
            self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224))
        elif decoder_type == 'split':
            print('Using nvJPEG with split CPU/GPU stages')
            self.decode_gpu = ops.nvJPEGDecoder(device="mixed",
                                                output_type=types.RGB,
                                                split_stages=True)
            self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224))
        else:
            print('Using nvJPEG')
            self.decode_gpu = ops.nvJPEGDecoder(device="mixed",
                                                output_type=types.RGB)
            self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224))

        layout = types.NHWC if nhwc else types.NCHW
        out_type = types.FLOAT16 if fp16 else types.FLOAT

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=out_type,
            output_layout=layout,
            crop=(224, 224),
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
Ejemplo n.º 2
0
 def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False):
     super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
     self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True)
     #let user decide which pipeline works him bets for RN version he runs
     if dali_cpu:
         dali_device = "cpu"
         self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB,
                                                 random_aspect_ratio=[0.8, 1.25],
                                                 random_area=[0.1, 1.0],
                                                 num_attempts=100)
     else:
         dali_device = "gpu"
         # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
         # without additional reallocations
         self.decode = ops.nvJPEGDecoderRandomCrop(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512,
                                                   random_aspect_ratio=[0.8, 1.25],
                                                   random_area=[0.1, 1.0],
                                                   num_attempts=100)
     self.res = ops.Resize("gpu", resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR)
     self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                         output_dtype=types.FLOAT,
                                         output_layout=types.NCHW,
                                         crop=(crop, crop),
                                         image_type=types.RGB,
                                         mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                                         std=[0.229 * 255,0.224 * 255,0.225 * 255])
     self.coin = ops.CoinFlip(probability=0.5)
     print('DALI "{0}" variant'.format(dali_device))
Ejemplo n.º 3
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 data_dir,
                 crop,
                 dali_cpu=False):
        super(HybridTrainPipe, self).__init__(batch_size,
                                              num_threads,
                                              device_id,
                                              seed=12 + device_id)
        if torch.distributed.is_initialized():
            local_rank = torch.distributed.get_rank()
            world_size = torch.distributed.get_world_size()
        else:
            local_rank = 0
            world_size = 1

        self.input = ops.FileReader(file_root=data_dir,
                                    shard_id=local_rank,
                                    num_shards=world_size,
                                    random_shuffle=True)

        if dali_cpu:
            dali_device = "cpu"
            self.decode = ops.HostDecoderRandomCrop(
                device=dali_device,
                output_type=types.RGB,
                random_aspect_ratio=[0.75, 4. / 3.],
                random_area=[0.08, 1.0],
                num_attempts=100)
        else:
            dali_device = "gpu"
            # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
            # without additional reallocations
            self.decode = ops.nvJPEGDecoderRandomCrop(
                device="mixed",
                output_type=types.RGB,
                device_memory_padding=211025920,
                host_memory_padding=140544512,
                random_aspect_ratio=[0.75, 4. / 3.],
                random_area=[0.08, 1.0],
                num_attempts=100)

        self.res = ops.Resize(device=dali_device,
                              resize_x=crop,
                              resize_y=crop,
                              interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(crop, crop),
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
Ejemplo n.º 4
0
 def __init__(self, batch_size, num_threads, device_id, data_dir, crop,
              dali_cpu, file_list):
     super(HybridTrainPipe, self).__init__(batch_size,
                                           num_threads,
                                           device_id,
                                           seed=12 + device_id)
     self.input = ops.FileReader(file_root=data_dir,
                                 shard_id=args.local_rank,
                                 num_shards=args.world_size,
                                 random_shuffle=True,
                                 file_list=file_list)
     if dali_cpu:
         dali_device = "cpu"
         self.decode = ops.HostDecoderRandomCrop(
             device=dali_device,
             output_type=types.RGB,
             random_aspect_ratio=[0.8, 1.25],
             random_area=[0.1, 1.0],
             num_attempts=100)
     else:
         dali_device = "gpu"
         self.decode = ops.nvJPEGDecoderRandomCrop(
             device="mixed",
             output_type=types.RGB,
             device_memory_padding=211025920,
             host_memory_padding=140544512,
             random_aspect_ratio=[0.8, 1.25],
             random_area=[0.1, 1.0],
             num_attempts=100)
     self.res = ops.Resize(device=dali_device,
                           resize_x=crop,
                           resize_y=crop,
                           interp_type=types.INTERP_TRIANGULAR)
     self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                         output_dtype=types.FLOAT,
                                         output_layout=types.NCHW,
                                         crop=(crop, crop),
                                         image_type=types.RGB,
                                         mean=mean,
                                         std=std)
     self.coin = ops.CoinFlip(probability=0.5)
     print('DALI "{0}" variant'.format(dali_device))
Ejemplo n.º 5
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 rec_path,
                 idx_path,
                 shard_id,
                 num_shards,
                 crop_shape,
                 min_random_area,
                 max_random_area,
                 min_random_aspect_ratio,
                 max_random_aspect_ratio,
                 nvjpeg_padding,
                 prefetch_queue=3,
                 seed=12,
                 output_layout=types.NCHW,
                 pad_output=True,
                 dtype='float16',
                 mlperf_print=True,
                 use_roi_decode=False,
                 cache_size=0):
        super(HybridTrainPipe,
              self).__init__(batch_size,
                             num_threads,
                             device_id,
                             seed=seed + device_id,
                             prefetch_queue_depth=prefetch_queue)

        if cache_size > 0:
            self.input = ops.MXNetReader(path=[rec_path],
                                         index_path=[idx_path],
                                         random_shuffle=True,
                                         shard_id=shard_id,
                                         num_shards=num_shards,
                                         stick_to_shard=True,
                                         lazy_init=True,
                                         skip_cached_images=True)
        else:  # stick_to_shard might not exist in this version of DALI.
            self.input = ops.MXNetReader(path=[rec_path],
                                         index_path=[idx_path],
                                         random_shuffle=True,
                                         shard_id=shard_id,
                                         num_shards=num_shards)

        if use_roi_decode and cache_size == 0:
            self.decode = ops.nvJPEGDecoderRandomCrop(
                device="mixed",
                output_type=types.RGB,
                device_memory_padding=nvjpeg_padding,
                host_memory_padding=nvjpeg_padding,
                random_area=[min_random_area, max_random_area],
                random_aspect_ratio=[
                    min_random_aspect_ratio, max_random_aspect_ratio
                ])
            self.rrc = ops.Resize(device="gpu",
                                  resize_x=crop_shape[0],
                                  resize_y=crop_shape[1])
        else:
            if cache_size > 0:
                self.decode = ops.nvJPEGDecoder(
                    device="mixed",
                    output_type=types.RGB,
                    device_memory_padding=nvjpeg_padding,
                    host_memory_padding=nvjpeg_padding,
                    cache_type='threshold',
                    cache_size=cache_size,
                    cache_threshold=0,
                    cache_debug=False)
            else:
                self.decode = ops.nvJPEGDecoder(
                    device="mixed",
                    output_type=types.RGB,
                    device_memory_padding=nvjpeg_padding,
                    host_memory_padding=nvjpeg_padding)

            self.rrc = ops.RandomResizedCrop(
                device="gpu",
                random_area=[min_random_area, max_random_area],
                random_aspect_ratio=[
                    min_random_aspect_ratio, max_random_aspect_ratio
                ],
                size=crop_shape)

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT,
            output_layout=output_layout,
            crop=crop_shape,
            pad_output=pad_output,
            image_type=types.RGB,
            mean=_mean_pixel,
            std=_std_pixel)
        self.coin = ops.CoinFlip(probability=0.5)
Ejemplo n.º 6
0
    def __init__(self,
                 args,
                 batch_size,
                 num_threads,
                 device_id,
                 rec_path,
                 idx_path,
                 shard_id,
                 num_shards,
                 crop_shape,
                 nvjpeg_padding,
                 prefetch_queue=3,
                 output_layout=types.NCHW,
                 pad_output=True,
                 dtype='float16',
                 dali_cpu=False):
        super(HybridTrainPipe,
              self).__init__(batch_size,
                             num_threads,
                             device_id,
                             seed=12 + device_id,
                             prefetch_queue_depth=prefetch_queue)
        self.input = ops.MXNetReader(path=[rec_path],
                                     index_path=[idx_path],
                                     random_shuffle=True,
                                     shard_id=shard_id,
                                     num_shards=num_shards)

        if dali_cpu:
            dali_device = "cpu"
            if args.dali_fuse_decoder:
                self.decode = ops.HostDecoderRandomCrop(device=dali_device,
                                                        output_type=types.RGB)
            else:
                self.decode = ops.HostDecoder(device=dali_device,
                                              output_type=types.RGB)
        else:
            dali_device = "gpu"
            if args.dali_fuse_decoder:
                self.decode = ops.nvJPEGDecoderRandomCrop(
                    device="mixed",
                    output_type=types.RGB,
                    device_memory_padding=nvjpeg_padding,
                    host_memory_padding=nvjpeg_padding)
            else:
                self.decode = ops.nvJPEGDecoder(
                    device="mixed",
                    output_type=types.RGB,
                    device_memory_padding=nvjpeg_padding,
                    host_memory_padding=nvjpeg_padding)

        if args.dali_fuse_decoder:
            self.resize = ops.Resize(device=dali_device,
                                     resize_x=crop_shape[1],
                                     resize_y=crop_shape[0])
        else:
            self.resize = ops.RandomResizedCrop(device=dali_device,
                                                size=crop_shape)

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT,
            output_layout=output_layout,
            crop=crop_shape,
            pad_output=pad_output,
            image_type=types.RGB,
            mean=args.rgb_mean,
            std=args.rgb_std)
        self.coin = ops.CoinFlip(probability=0.5)