Beispiel #1
0
    def __init__(self, data_paths, num_shards, batch_size, num_threads,
                 device_id, prefetch, fp16, random_shuffle, nhwc,
                 dont_use_mmap, decoder_type, decoder_cache_params,
                 reader_queue_depth, shard_id):
        super(CommonPipeline, self).__init__(batch_size,
                                             num_threads,
                                             device_id,
                                             random_shuffle,
                                             prefetch_queue_depth=prefetch)
        if decoder_type == 'roi':
            print('Using nvJPEG with ROI decoding')
            self.decode_gpu = ops.ImageDecoderRandomCrop(device="mixed",
                                                         output_type=types.RGB)
            self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224)
        elif decoder_type == 'roi_split':
            print('Using nvJPEG with ROI decoding and split CPU/GPU stages')
            self.decode_gpu = ops.ImageDecoderRandomCrop(device="mixed",
                                                         output_type=types.RGB,
                                                         split_stages=True)
            self.res = ops.Resize(device="gpu", resize_x=224, resize_y=224)
        elif decoder_type == 'cached':
            assert decoder_cache_params['cache_enabled'] == True
            cache_size = decoder_cache_params['cache_size']
            cache_threshold = decoder_cache_params['cache_threshold']
            cache_type = decoder_cache_params['cache_type']
            print(
                'Using nvJPEG with cache (size : {} threshold: {}, type: {})'.
                format(cache_size, cache_threshold, cache_type))
            self.decode_gpu = ops.ImageDecoder(device="mixed",
                                               output_type=types.RGB,
                                               cache_size=cache_size,
                                               cache_threshold=cache_threshold,
                                               cache_type=cache_type,
                                               cache_debug=False)
            self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224))
        elif decoder_type == 'split':
            print('Using nvJPEG with split CPU/GPU stages')
            self.decode_gpu = ops.ImageDecoder(device="mixed",
                                               output_type=types.RGB,
                                               split_stages=True)
            self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224))
        else:
            print('Using nvJPEG')
            self.decode_gpu = ops.ImageDecoder(device="mixed",
                                               output_type=types.RGB)
            self.res = ops.RandomResizedCrop(device="gpu", size=(224, 224))

        layout = types.NHWC if nhwc else types.NCHW
        out_type = types.FLOAT16 if fp16 else types.FLOAT

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            dtype=out_type,
            output_layout=layout,
            crop=(224, 224),
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.random.CoinFlip(probability=0.5)
Beispiel #2
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 data_dir,
                 crop,
                 dali_cpu=False):
        super(HybridTrainPipe, self).__init__(batch_size,
                                              num_threads,
                                              device_id,
                                              seed=12 + device_id)
        if torch.distributed.is_initialized():
            local_rank = torch.distributed.get_rank()
            world_size = torch.distributed.get_world_size()
        else:
            local_rank = 0
            world_size = 1

        self.input = ops.FileReader(file_root=data_dir,
                                    shard_id=local_rank,
                                    num_shards=world_size,
                                    random_shuffle=True)

        if dali_cpu:
            dali_device = "cpu"
            self.decode = ops.ImageDecoderRandomCrop(
                device=dali_device,
                output_type=types.RGB,
                random_aspect_ratio=[0.75, 4. / 3.],
                random_area=[0.08, 1.0],
                num_attempts=100)
        else:
            dali_device = "gpu"
            # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
            # without additional reallocations
            self.decode = ops.ImageDecoderRandomCrop(
                device="mixed",
                output_type=types.RGB,
                device_memory_padding=211025920,
                host_memory_padding=140544512,
                random_aspect_ratio=[0.75, 4. / 3.],
                random_area=[0.08, 1.0],
                num_attempts=100)

        self.res = ops.Resize(device=dali_device,
                              resize_x=crop,
                              resize_y=crop,
                              interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(crop, crop),
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
Beispiel #3
0
      def __init__(self, split, args=None):
          #batch_size, num_threads, data_dir, crop, shuffle=False, device_id=0, size=256, dali_cpu=False):
          self.split = split
          self.bs = args.batch_size if self.split == 'train' else args.val_batch_size
          self.shuffle = self.split == 'train'
          self.data_dir = os.path.join(args.root, split)
          self.crop = args.input_size
          super(HybridPipe, self).__init__(self.bs, args.workers, 0, seed=12)
          self.input = ops.FileReader(file_root=self.data_dir, shard_id=0, num_shards=1, random_shuffle=self.shuffle)
          dali_device = "gpu"
          if split == 'train':
              self.decode = ops.ImageDecoderRandomCrop(device="mixed", output_type=types.RGB,
                      device_memory_padding=211025920, host_memory_padding=140544512,
                      random_aspect_ratio=[0.75, 1.333],
                      random_area=[0.08, 1.0],
                      num_attempts=100)
              #self.res = ops.Resize(device=dali_device, resize_x=args.input_size, resize_y=args.input_size, interp_type=types.INTERP_TRIANGULAR)
              self.res = ops.Resize(device=dali_device, resize_shorter=args.input_size, interp_type=types.INTERP_TRIANGULAR)
          else:
              self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
              self.res = ops.Resize(device=dali_device, resize_shorter=256, interp_type=types.INTERP_TRIANGULAR)

          self.cmnp = ops.CropMirrorNormalize(device="gpu",
                  output_dtype=types.FLOAT,
                  output_layout=types.NCHW,
                  crop=(self.crop, self.crop),
                  image_type=types.RGB,
                  mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                  std=[0.229 * 255,0.224 * 255,0.225 * 255])
          self.coin = ops.CoinFlip(probability=0.5)
    def __init__(self, cfg):
        super(ClsTrainPipe,
              self).__init__(batch_size=cfg.dataset.loader.batch_size,
                             num_threads=cfg.dataset.loader.num_workers,
                             device_id=cfg.device.local_rank)

        self.eii = ClsInputIterator(cfg=cfg, is_train=True)
        self.source = ops.ExternalSource(source=self.eii, num_outputs=2)

        self.decode = ops.ImageDecoderRandomCrop(
            device='mixed',
            output_type=types.RGB,
            random_aspect_ratio=[0.8, 1.25],
            random_area=[0.3, 1.0],
            num_attempts=100)
        self.rotate = ops.Rotate(device='gpu', fill_value=127.5)
        self.res = ops.Resize(device='gpu',
                              resize_x=cfg.dataset.transform.image_size,
                              resize_y=cfg.dataset.transform.image_size,
                              interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(
            device='gpu',
            dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(cfg.dataset.transform.image_size,
                  cfg.dataset.transform.image_size),
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
        self.angle = ops.Uniform(
            range=(-1 * cfg.dataset.transform.max_rotate_angle,
                   cfg.dataset.transform.max_rotate_angle))
Beispiel #5
0
 def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False):
     super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
     self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True)
     #let user decide which pipeline works him bets for RN version he runs
     dali_device = 'cpu' if dali_cpu else 'gpu'
     decoder_device = 'cpu' if dali_cpu else 'mixed'
     # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
     # without additional reallocations
     device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
     host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
     self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB,
                                              device_memory_padding=device_memory_padding,
                                              host_memory_padding=host_memory_padding,
                                              random_aspect_ratio=[0.8, 1.25],
                                              random_area=[0.1, 1.0],
                                              num_attempts=100)
     self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR)
     self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                         output_dtype=types.FLOAT,
                                         output_layout=types.NCHW,
                                         crop=(crop, crop),
                                         image_type=types.RGB,
                                         mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                                         std=[0.229 * 255,0.224 * 255,0.225 * 255])
     self.coin = ops.CoinFlip(probability=0.5)
Beispiel #6
0
def imagenet_train_graph(data_dir,
                         size,
                         random_aspect_ratio,
                         random_area,
                         interp_type=types.INTERP_TRIANGULAR,
                         stats=imagenet_stats):
    inputs = ops.FileReader(file_root=data_dir, random_shuffle=True)
    decode = ops.ImageDecoderRandomCrop(
        device='mixed',
        random_aspect_ratio=random_aspect_ratio,
        random_area=random_area)
    resize = ops.Resize(device='gpu',
                        resize_x=size,
                        resize_y=size,
                        interp_type=interp_type)
    mean, std = [[x * 255 for x in stat] for stat in stats]
    crop_mirror_norm = ops.CropMirrorNormalize(device='gpu',
                                               output_dtype=types.FLOAT16,
                                               crop=(size, size),
                                               mean=mean,
                                               std=std)
    coin = ops.CoinFlip(probability=0.5)

    def define_graph():
        jpegs, labels = inputs(name='Reader')
        output = crop_mirror_norm(resize(decode(jpegs)), mirror=coin())
        return [output, labels]

    return define_graph
Beispiel #7
0
 def __init__(self,
              batch_size,
              num_threads,
              device_id,
              data_dir="./",
              dali_cpu=False):
     super(DecodeCropResizePipeline, self).__init__(batch_size,
                                                    num_threads,
                                                    device_id,
                                                    seed=12)
     self.input = ops.FileReader(file_root=data_dir, random_shuffle=True)
     if dali_cpu:
         decoder_device = "cpu"
     else:
         decoder_device = "mixed"
     device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
     host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
     dali_device = "cpu" if dali_cpu else "gpu"
     self.decode = ops.ImageDecoderRandomCrop(
         device=decoder_device,
         output_type=types.RGB,
         device_memory_padding=device_memory_padding,
         host_memory_padding=host_memory_padding,
         random_aspect_ratio=[0.8, 1.25],
         random_area=[0.1, 1.0],
         num_attempts=100)
     self.res = ops.Resize(device=dali_device,
                           resize_x=224,
                           resize_y=224,
                           interp_type=types.INTERP_TRIANGULAR)
Beispiel #8
0
    def __init__(self,
                 args,
                 batch_size,
                 num_threads,
                 device_id,
                 rec_path,
                 idx_path,
                 shard_id,
                 num_shards,
                 crop_shape,
                 nvjpeg_padding,
                 prefetch_queue=3,
                 output_layout=types.NCHW,
                 pad_output=True,
                 dtype='float16',
                 dali_cpu=False):
        super(HybridTrainPipe,
              self).__init__(batch_size,
                             num_threads,
                             device_id,
                             seed=12 + device_id,
                             prefetch_queue_depth=prefetch_queue)
        self.input = ops.MXNetReader(path=[rec_path],
                                     index_path=[idx_path],
                                     random_shuffle=True,
                                     shard_id=shard_id,
                                     num_shards=num_shards)

        dali_device, decoder_device = get_device_names(dali_cpu)
        if args.dali_fuse_decoder:
            self.decode = ops.ImageDecoderRandomCrop(
                device=decoder_device,
                output_type=types.RGB,
                device_memory_padding=nvjpeg_padding,
                host_memory_padding=nvjpeg_padding)
        else:
            self.decode = ops.ImageDecoder(
                device=decoder_device,
                output_type=types.RGB,
                device_memory_padding=nvjpeg_padding,
                host_memory_padding=nvjpeg_padding)

        if args.dali_fuse_decoder:
            self.resize = ops.Resize(device=dali_device,
                                     resize_x=crop_shape[1],
                                     resize_y=crop_shape[0])
        else:
            self.resize = ops.RandomResizedCrop(device=dali_device,
                                                size=crop_shape)

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT,
            output_layout=output_layout,
            crop=crop_shape,
            pad_output=pad_output,
            mean=args.rgb_mean,
            std=args.rgb_std)
        self.coin = ops.random.CoinFlip(probability=0.5)
Beispiel #9
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 root,
                 list_path,
                 crop,
                 shard_id,
                 num_shards,
                 coji=False,
                 dali_cpu=False):
        super(HybridTrainPipe, self).__init__(batch_size,
                                              num_threads,
                                              device_id,
                                              seed=12 + device_id)
        self.read = ops.FileReader(file_root=root,
                                   file_list=list_path,
                                   shard_id=shard_id,
                                   num_shards=num_shards,
                                   random_shuffle=True,
                                   initial_fill=1024)
        # Let user decide which pipeline works
        dali_device = 'cpu' if dali_cpu else 'gpu'
        decoder_device = 'cpu' if dali_cpu else 'mixed'
        # This padding sets the size of the internal nvJPEG buffers to be able to handle all images
        # from full-sized ImageNet without additional reallocations
        device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
        host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
        self.decode = ops.ImageDecoderRandomCrop(
            device=decoder_device,
            output_type=types.RGB,
            device_memory_padding=device_memory_padding,
            host_memory_padding=host_memory_padding,
            random_aspect_ratio=[0.75, 1.33333333],
            random_area=[0.08, 1.0],
            num_attempts=100)
        self.resize = ops.Resize(device=dali_device,
                                 resize_x=crop,
                                 resize_y=crop,
                                 interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(
            device=dali_device,
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(crop, crop),
            image_type=types.RGB,
            mean=[x * 255 for x in IMAGENET_MEAN],
            std=[x * 255 for x in IMAGENET_STD])
        self.coin = ops.CoinFlip(probability=0.5)

        self.coji = coji
        if self.coji:
            self.twist = ops.ColorTwist(device=dali_device)
            self.brightness_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4])
            self.contrast_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4])
            self.saturation_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4])
Beispiel #10
0
 def __init__(self,
              file_root,
              file_list,
              batch_size,
              resize_shorter,
              crop,
              min_area,
              lower,
              upper,
              interp,
              mean,
              std,
              device_id,
              shard_id=0,
              num_shards=1,
              random_shuffle=True,
              num_threads=4,
              seed=42,
              pad_output=False,
              output_dtype=types.FLOAT):
     super(HybridTrainPipe, self).__init__(batch_size,
                                           num_threads,
                                           device_id,
                                           seed=seed)
     self.input = ops.FileReader(file_root=file_root,
                                 file_list=file_list,
                                 shard_id=shard_id,
                                 num_shards=num_shards,
                                 random_shuffle=random_shuffle)
     # set internal nvJPEG buffers size to handle full-sized ImageNet images
     # without additional reallocations
     device_memory_padding = 211025920
     host_memory_padding = 140544512
     self.decode = ops.ImageDecoderRandomCrop(
         device='mixed',
         output_type=types.RGB,
         device_memory_padding=device_memory_padding,
         host_memory_padding=host_memory_padding,
         random_aspect_ratio=[lower, upper],
         random_area=[min_area, 1.0],
         num_attempts=100)
     self.res = ops.Resize(device='gpu',
                           resize_x=crop,
                           resize_y=crop,
                           interp_type=interp)
     self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                         output_dtype=output_dtype,
                                         output_layout=types.NCHW,
                                         crop=(crop, crop),
                                         image_type=types.RGB,
                                         mean=mean,
                                         std=std,
                                         pad_output=pad_output)
     self.coin = ops.CoinFlip(probability=0.5)
     self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
Beispiel #11
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 data_dir,
                 crop,
                 dali_cpu=False,
                 args_new=None,
                 shuffle_seed=0):
        super(HybridTrainPipe, self).__init__(batch_size,
                                              num_threads,
                                              device_id,
                                              seed=12 + device_id,
                                              prefetch_queue_depth=3)
        #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True, shuffle_seed=shuffle_seed)
        self.input = ops.FileReader(file_root=data_dir,
                                    shard_id=args_new.local_rank_per_job,
                                    num_shards=args_new.world_size,
                                    shuffle_after_epoch=True,
                                    shuffle_seed=shuffle_seed)

        #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, shuffle_after_epoch=True)
        #self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True)
        #        self.input = ops.FileReader(file_root=data_dir, shard_id=args_new.local_rank_per_job, num_shards=args_new.world_size, random_shuffle=True, shuffle_seed=shuffle_seed)
        dali_device = 'cpu' if dali_cpu else 'gpu'
        decoder_device = 'cpu' if dali_cpu else 'mixed'
        # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
        # without additional reallocations
        device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
        host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
        self.decode = ops.ImageDecoderRandomCrop(
            device=decoder_device,
            output_type=types.RGB,
            device_memory_padding=device_memory_padding,
            host_memory_padding=host_memory_padding,
            random_aspect_ratio=[0.8, 1.25],
            random_area=[0.1, 1.0],
            num_attempts=100)
        self.res = ops.Resize(device=dali_device,
                              resize_x=crop,
                              resize_y=crop,
                              interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(
            device=dali_device,
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(crop, crop),
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
        print('DALI "{0}" variant, shards={1}'.format(dali_device,
                                                      args_new.world_size))
Beispiel #12
0
    def __init__(self, batch_size, num_threads, device_id, data_dir, crop,
                 mean, std, local_rank=0, world_size=1, dali_cpu=False, shuffle=True, fp16=False,
                 min_crop_size=0.08, color_jitter=False):

        # As we're recreating the Pipeline at every epoch, the seed must be -1 (random seed)
        super(HybridTrainPipe, self).__init__(
            batch_size, num_threads, device_id, seed=-1)

        # Enabling read_ahead slowed down processing ~40%
        self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size,
                                    random_shuffle=shuffle)

        # Let user decide which pipeline works best with the chosen model
        if dali_cpu:
            decode_device = "cpu"
            self.dali_device = "cpu"
            self.flip = ops.Flip(device=self.dali_device)
        else:
            decode_device = "mixed"
            self.dali_device = "gpu"

        output_dtype = types.FLOAT
        if fp16:
            output_dtype = types.FLOAT16

        self.cmn = ops.CropMirrorNormalize(device=self.dali_device,
                                           output_dtype=output_dtype,
                                           output_layout=types.NCHW,
                                           crop=(crop, crop),
                                           image_type=types.RGB,
                                           mean=mean,
                                           std=std,)

        # To be able to handle all images from full-sized ImageNet, this padding sets the size of the internal
        # nvJPEG buffers without additional reallocations
        device_memory_padding = 211025920 if decode_device == 'mixed' else 0
        host_memory_padding = 140544512 if decode_device == 'mixed' else 0
        self.decode = ops.ImageDecoderRandomCrop(device=decode_device, output_type=types.RGB,
                                                 device_memory_padding=device_memory_padding,
                                                 host_memory_padding=host_memory_padding,
                                                 random_aspect_ratio=[
                                                     0.8, 1.25],
                                                 random_area=[
                                                     min_crop_size, 1.0],
                                                 num_attempts=100)

        # Resize as desired.  To match torchvision data loader, use triangular interpolation.
        self.res = ops.Resize(device=self.dali_device, resize_x=crop, resize_y=crop,
                              interp_type=types.INTERP_TRIANGULAR)

        self.coin = ops.CoinFlip(probability=0.5)
        print('DALI "{0}" variant'.format(self.dali_device))
    def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False):
        super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
        # self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True)
        index_path = []
        for path in os.listdir("/home/guojia/idx_files/train"):
            index_path.append(os.path.join("/home/guojia/idx_files/train", path))
        index_path = sorted(index_path)
        self.input = ops.TFRecordReader(path=data_dir, index_path=index_path, shard_id=args.local_rank,
                                        num_shards=args.world_size, random_shuffle=True,
                                        features={
                                                    'image/height': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
                                                    'image/width': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
                                                    'image/colorspace': tfrec.FixedLenFeature([ ], tfrec.string, ''),
                                                    'image/channels': tfrec.FixedLenFeature([], tfrec.int64,  -1),
                                                    'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
                                                    'image/class/synset': tfrec.FixedLenFeature([ ], tfrec.string, ''),
                                                    # 'image/class/text': tfrec.FixedLenFeature([ ], tfrec.string, ''),
                                                    # 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0),
                                                    # 'image/object/bbox/label': tfrec.FixedLenFeature([1], tfrec.int64,-1),
                                                    'image/format': tfrec.FixedLenFeature((), tfrec.string, ""),
                                                    'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""),
                                                    'image/encoded': tfrec.FixedLenFeature((), tfrec.string, "")
                                                })

        #let user decide which pipeline works him bets for RN version he runs
        dali_device = 'cpu' if dali_cpu else 'gpu'
        decoder_device = 'cpu' if dali_cpu else 'mixed'
        # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
        # without additional reallocations
        device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
        host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
        self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB,
                                                 device_memory_padding=device_memory_padding,
                                                 host_memory_padding=host_memory_padding,
                                                 random_aspect_ratio=[0.8, 1.25],
                                                 random_area=[0.1, 1.0],
                                                 num_attempts=100)
        self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                            output_dtype=types.FLOAT,
                                            output_layout=types.NCHW,
                                            crop=(crop, crop),
                                            image_type=types.RGB,
                                            mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                                            std=[0.229 * 255,0.224 * 255,0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
        print('DALI "{0}" variant'.format(dali_device))
Beispiel #14
0
    def __init__(self, device_id, crop, size, rank, seed_rank, options):
        super(TrainPipeline, self).__init__(options.batchsize,
                                            4,
                                            device_id,
                                            prefetch_queue_depth=3,
                                            set_affinity=True,
                                            seed=options.seed + seed_rank)
        rec_path = os.path.join(options.train_dir, "data.rec")
        idx_path = os.path.join(options.train_dir, "data.idx")
        self.input = ops.MXNetReader(path=[rec_path],
                                     index_path=[idx_path],
                                     random_shuffle=True,
                                     shard_id=rank,
                                     num_shards=size)

        # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
        # without additional reallocations
        random_aspect_ratio = [0.75, 4. / 3.]
        random_area = [0.08, 1.0]
        self.decode = ops.ImageDecoderRandomCrop(
            device="mixed",
            output_type=types.RGB,
            device_memory_padding=211025920,
            host_memory_padding=140544512,
            random_aspect_ratio=random_aspect_ratio,
            random_area=random_area,
            num_attempts=100,
            seed=options.seed + seed_rank + 1641)

        self.res = ops.Resize(device="gpu",
                              resize_x=crop,
                              resize_y=crop,
                              interp_type=types.INTERP_TRIANGULAR)
        dtype = types.FLOAT16 if options.fp16 else types.FLOAT
        layout = types.NCHW
        padding = False
        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=dtype,
            output_layout=layout,
            crop=(crop, crop),
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
            pad_output=padding,
            seed=options.seed + seed_rank + 1223)
        self.coin = ops.CoinFlip(probability=0.5,
                                 seed=options.seed + seed_rank + 412)
Beispiel #15
0
    def __new__(
        cls,
        output_type='BGR',
        host_memory_padding=8388608,
        device_memory_padding=16777216,
        random_area=(0.08, 1.),
        random_aspect_ratio=(0.75, 1.33),
        num_attempts=10,
        **kwargs
    ):
        """Create a ``ImageDecoderRandomCrop`` operator.

        Parameters
        ----------
        output_type : {'BGR', 'RGB'}, optional
            The output color space.
        host_memory_padding : int, optional, default=8388608
            The number of bytes for host buffer.
        device_memory_padding : int, optional, default=16777216
            The number of bytes for device buffer.
        random_area : Sequence[float], optional, default=(0.08, 1.)
            The range of scale for sampling.
        random_aspect_ratio : Sequence[float], optional, default=(0.75, 1.33)
            The range of aspect ratio for sampling.
        num_attempts : int, optional, default=10
            The max number of sampling trails.

        Returns
        -------
        nvidia.dali.ops.ImageDecoderRandomCrop
            The operator.

        """
        if isinstance(output_type, six.string_types):
            output_type = getattr(types, output_type)
        return ops.ImageDecoderRandomCrop(
            output_type=output_type,
            host_memory_padding=host_memory_padding,
            device_memory_padding=device_memory_padding,
            random_area=random_area,
            random_aspect_ratio=random_aspect_ratio,
            num_attempts=num_attempts,
            device=context.get_device_type(mixed=True),
            **kwargs
        )
Beispiel #16
0
    def __init__(self, args, batch_size, num_threads, device_id, rec_path, idx_path,
                 shard_id, num_shards, crop_shape, nvjpeg_padding, prefetch_queue=3,
                 output_layout=types.NCHW, pad_output=True, dtype='float16', dali_cpu=False,
                 nvjpeg_width_hint=5980, nvjpeg_height_hint=6430,
                 ):
        super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth = prefetch_queue)
        self.input = ops.MXNetReader(path=[rec_path], index_path=[idx_path],
                                     random_shuffle=True, shard_id=shard_id, num_shards=num_shards,
                                     dont_use_mmap=args.dali_dont_use_mmap)

        if dali_cpu:
            dali_device = "cpu"
            decoder_device = "cpu"
        else:
            dali_device = "gpu"
            decoder_device = "mixed"

        dali_kwargs_fallback = {}
        if Version(dali.__version__) >= Version("1.2.0"):
            dali_kwargs_fallback = {
                "preallocate_width_hint": nvjpeg_width_hint,
                "preallocate_height_hint": nvjpeg_height_hint,
            }
        if args.dali_fuse_decoder:
            self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB,
                                                     device_memory_padding=nvjpeg_padding, 
                                                     host_memory_padding=nvjpeg_padding,
                                                     **dali_kwargs_fallback)
        else:
            self.decode = ops.ImageDecoder(device=decoder_device, output_type=types.RGB,
                                           device_memory_padding=nvjpeg_padding, 
                                           host_memory_padding=nvjpeg_padding,
                                           **dali_kwargs_fallback)

        if args.dali_fuse_decoder:
            self.resize = ops.Resize(device=dali_device, resize_x=crop_shape[1], resize_y=crop_shape[0])
        else:
            self.resize = ops.RandomResizedCrop(device=dali_device, size=crop_shape)

        self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                            output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT,
                                            output_layout=output_layout, crop=crop_shape, pad_output=pad_output,
                                            image_type=types.RGB, mean=args.rgb_mean, std=args.rgb_std)
        self.coin = ops.CoinFlip(probability=0.5)
Beispiel #17
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 data_dir,
                 crop,
                 dali_cpu=False):
        super(HybridTrainPipe, self).__init__(batch_size,
                                              num_threads,
                                              0,
                                              seed=12)
        #self.input = ops.FileReader(
        #                file_root=osp.join(data_dir, 'train'),
        #                random_shuffle=True)
        self.input = ops.MXNetReader(path=osp.join(data_dir, 'train.rec'),
                                     index_path=osp.join(
                                         data_dir, 'train.idx'),
                                     random_shuffle=True)
        # let user decide which pipeline works him bets for RN version he runs
        if dali_cpu:
            dali_device = "cpu"
            self.decode = ops.ImageDecoderRandomCrop(device=dali_device,
                                                     output_type=types.RGB)
            self.res = ops.Resize(resize_x=crop, resize_y=crop)
        else:
            dali_device = "gpu"
            # This padding sets the size of the internal nvJPEG buffers to be able to handle
            # all images from full-sized ImageNet without additional reallocations
            self.decode = ops.ImageDecoder(device="mixed",
                                           output_type=types.RGB,
                                           device_memory_padding=211025920,
                                           host_memory_padding=140544512)
            self.res = ops.RandomResizedCrop(device=dali_device,
                                             size=(crop, crop))

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(crop, crop),
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
        print('DALI "{0}" variant'.format(dali_device))
    def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False):
        super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
        self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True)
        #let user decide which pipeline works him bets for RN version he runs
        dali_device = 'cpu' if dali_cpu else 'gpu'
        decoder_device = 'cpu' if dali_cpu else 'mixed'
        # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
        # without additional reallocations
        device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
        host_memory_padding = 140544512 if decoder_device == 'mixed' else 0

        ## randomly crop and resize, crop sampling,
        self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB,
                                                 device_memory_padding=device_memory_padding,
                                                 host_memory_padding=host_memory_padding,
                                                 random_aspect_ratio=[0.75, 4/3.0],
                                                 random_area=[0.08, 1.0],
                                                 num_attempts=10)
        self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR)

        self.vert_flip=ops.Flip(device='gpu', horizontal=0)
        self.vert_coin = ops.CoinFlip(probability=0.075)
        ##color jitter https://www.gitmemory.com/ruiyuanlu, https://github.com/NVIDIA/DALI/issues/336
        self.twist = ops.ColorTwist(device="gpu")
        self.rng1 = ops.Uniform(range=[0.6, 1.4])
        self.rng2 = ops.Uniform(range=[-102, 102]) ## factor=0.4, 0.4*255, -0.4*255
        #self.rng2 = ops.Uniform(range=[-51, 51]) ## factor=0.2, 0.2*255, -0.2*255

        self.flip = ops.Flip(device = "gpu", vertical = 1, horizontal = 0)
        self.color_jitter = ops.ColorTwist(device="gpu", hue=0.2, brightness=0.4,
                                contrast=0.4, saturation=0.4)
        self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                            output_dtype=types.FLOAT,
                                            output_layout=types.NCHW,
                                            crop=(crop, crop),
                                            image_type=types.RGB,
                                            mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                                            std=[0.229 * 255,0.224 * 255,0.225 * 255])
        ## this is torch.transform.RandomHorizontalFlip

        self.mirrorcoin  = ops.CoinFlip(probability=0.5)
        self.uniform = ops.Uniform(range = (0.0, 1.0))
        print('DALI "{0}" variant'.format(dali_device))
Beispiel #19
0
 def __init__(self, db_prefix, for_train, input_size, batch_size,
              num_threads, device_id, num_gpus):
     super(HybridRecPipe, self).__init__(batch_size,
                                         num_threads,
                                         device_id,
                                         seed=12 + device_id,
                                         prefetch_queue_depth=2)
     self.for_train = for_train
     self.input = ops.MXNetReader(path=[db_prefix + ".rec"],
                                  index_path=[db_prefix + ".idx"],
                                  random_shuffle=for_train,
                                  shard_id=device_id,
                                  num_shards=num_gpus)
     self.resize = ops.Resize(device="gpu",
                              resize_x=input_size,
                              resize_y=input_size)
     self.cmnp = ops.CropMirrorNormalize(
         device="gpu",
         output_dtype=types.FLOAT,
         output_layout=types.NCHW,
         crop=(input_size, input_size),
         image_type=types.RGB,
         mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
         std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
     if self.for_train:
         self.decode = ops.ImageDecoderRandomCrop(
             device="mixed",
             output_type=types.RGB,
             random_aspect_ratio=[3 / 4, 4 / 3],
             random_area=[0.08, 1.0],
             num_attempts=100)
         self.color = ops.ColorTwist(device='gpu')
         self.rng_brightness = ops.Uniform(range=(0.6, 1.4))
         self.rng_contrast = ops.Uniform(range=(0.6, 1.4))
         self.rng_saturation = ops.Uniform(range=(0.6, 1.4))
         self.mirror_coin = ops.CoinFlip(probability=0.5)
     else:
         self.decode = ops.ImageDecoder(device="mixed",
                                        output_type=types.RGB)
Beispiel #20
0
 def __init__(self,
              batch_size,
              num_threads,
              device_id,
              data_dir="./",
              dali_cpu=False):
     super(DecodeFullPipeline, self).__init__(batch_size,
                                              num_threads,
                                              device_id,
                                              seed=12)
     self.input = ops.FileReader(file_root=data_dir, random_shuffle=True)
     if dali_cpu:
         decoder_device = "cpu"
     else:
         decoder_device = "mixed"
     device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
     host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
     dali_device = "cpu" if dali_cpu else "gpu"
     self.decode = ops.ImageDecoderRandomCrop(
         device=decoder_device,
         output_type=types.RGB,
         device_memory_padding=device_memory_padding,
         host_memory_padding=host_memory_padding,
         random_aspect_ratio=[0.8, 1.25],
         random_area=[0.1, 1.0],
         num_attempts=100)
     self.res = ops.Resize(device=dali_device,
                           resize_x=224,
                           resize_y=224,
                           interp_type=types.INTERP_TRIANGULAR)
     print("Device = {}".format(dali_device))
     self.cmnp = ops.CropMirrorNormalize(
         device=dali_device,
         output_dtype=types.FLOAT,
         output_layout=types.NCHW,
         crop=(224, 224),
         mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
         std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
     self.coin = ops.CoinFlip(probability=0.5)
Beispiel #21
0
 def __init__(self):
     super(HybridTrainPipe, self).__init__(FLAGS.batch_size,
                                           FLAGS.num_threads,
                                           FLAGS.device_id,
                                           seed=42 + FLAGS.device_id)
     data_dir = os.path.join(FLAGS.data, 'train')
     crop = 224
     self.input = ops.FileReader(file_root=data_dir,
                                 shard_id=FLAGS.local_rank,
                                 num_shards=FLAGS.world_size,
                                 random_shuffle=True,
                                 pad_last_batch=True)
     # set internal nvJPEG buffers size to handle full-sized ImageNet images
     # without additional reallocations
     device_memory_padding = 211025920
     host_memory_padding = 140544512
     self.decode = ops.ImageDecoderRandomCrop(
         device='mixed',
         output_type=types.RGB,
         device_memory_padding=device_memory_padding,
         host_memory_padding=host_memory_padding,
         random_aspect_ratio=[0.8, 1.25],
         random_area=[0.1, 1.0],
         num_attempts=100)
     self.res = ops.Resize(device='gpu',
                           resize_x=crop,
                           resize_y=crop,
                           interp_type=types.INTERP_TRIANGULAR)
     self.cmnp = ops.CropMirrorNormalize(
         device="gpu",
         output_dtype=types.FLOAT,
         output_layout=types.NCHW,
         crop=(crop, crop),
         image_type=types.RGB,
         mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
         std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
     self.coin = ops.CoinFlip(probability=0.5)
     self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
    def __init__(
        self,
        batch_size,
        num_threads,
        device_id,
        data_dir,
        label_dir,
        crop,
        dali_cpu=False,
        training=True,
    ):
        shard_id = ht.MPI_WORLD.rank
        num_shards = ht.MPI_WORLD.size
        super(HybridPipe, self).__init__(batch_size,
                                         num_threads,
                                         device_id,
                                         seed=68 + shard_id)

        data_dir_list = [data_dir + d for d in os.listdir(data_dir)]
        label_dir_list = [label_dir + d for d in os.listdir(label_dir)]

        self.input = dali.ops.TFRecordReader(
            path=data_dir_list,
            index_path=label_dir_list,
            random_shuffle=True if training else False,
            shard_id=shard_id,
            num_shards=num_shards,
            initial_fill=10000,
            features={
                "image/encoded":
                dali.tfrecord.FixedLenFeature((), dali.tfrecord.string, ""),
                "image/class/label":
                dali.tfrecord.FixedLenFeature([1], dali.tfrecord.int64, -1),
                "image/class/text":
                dali.tfrecord.FixedLenFeature([], dali.tfrecord.string, ""),
                "image/object/bbox/xmin":
                dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0),
                "image/object/bbox/ymin":
                dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0),
                "image/object/bbox/xmax":
                dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0),
                "image/object/bbox/ymax":
                dali.tfrecord.VarLenFeature(dali.tfrecord.float32, 0.0),
            },
        )
        # let user decide which pipeline works him bets for RN version he runs
        dali_device = "cpu" if dali_cpu else "gpu"
        decoder_device = "cpu" if dali_cpu else "mixed"
        # This padding sets the size of the internal nvJPEG buffers to be able to
        # handle all images from full-sized ImageNet without additional reallocations
        # leaving the padding in for now to allow for the case for loading to GPUs
        # todo: move info to GPUs
        device_memory_padding = 211025920 if decoder_device == "mixed" else 0
        host_memory_padding = 140544512 if decoder_device == "mixed" else 0
        if training:
            self.decode = ops.ImageDecoderRandomCrop(
                device="cpu",  # decoder_device,
                output_type=dali.types.RGB,
                device_memory_padding=device_memory_padding,
                host_memory_padding=host_memory_padding,
                random_aspect_ratio=[0.75, 1.33],
                random_area=[0.05, 1.0],
                num_attempts=100,
            )
            self.resize = ops.Resize(
                device="cpu",  # dali_device,
                resize_x=crop,
                resize_y=crop,
                interp_type=dali.types.INTERP_TRIANGULAR,
            )
        else:
            self.decode = dali.ops.ImageDecoder(device="cpu",
                                                output_type=dali.types.RGB)
            self.resize = ops.Resize(device="cpu",
                                     resize_shorter=crop,
                                     interp_type=dali.types.INTERP_TRIANGULAR)
        # should this be CPU or GPU? -> if prefetching then do it on CPU before sending
        self.normalize = ops.CropMirrorNormalize(
            device="cpu",  # need to make this work with the define graph
            # dtype=dali.types.FLOAT,  # todo: not implemented on test system (old version of DALI)
            output_layout=dali.types.NCHW,
            crop=(crop, crop),
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        )
        self.coin = ops.CoinFlip(probability=0.5)
        self.training = training
        print0(
            f"Completed init of DALI Dataset on '{dali_device}', is training set? -> {training}"
        )
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 data_dir,
                 crop,
                 dali_cpu=False,
                 resume_index=0,
                 resume_epoch=0):
        super(HybridTrainPipe, self).__init__(batch_size,
                                              num_threads,
                                              device_id,
                                              seed=12 + device_id)

        shard = int(args.node_rank * args.world_size / args.nnodes +
                    args.local_rank)
        if args.mint:
            self.input = ops.FileReader(file_root=data_dir,
                                        shard_id=shard,
                                        num_shards=args.world_size,
                                        shuffle_after_epoch=True,
                                        cache_size=args.cache_size)
        else:
            cf_det = True
            if not resume_index and not resume_epoch and not args.cf_iterator:
                cf_det = False
                self.input = ops.FileReader(file_root=data_dir,
                                            shard_id=shard,
                                            num_shards=args.world_size,
                                            shuffle_after_epoch=True)
            else:
                self.input = ops.FileReader(file_root=data_dir,
                                            shard_id=shard,
                                            num_shards=args.world_size,
                                            shuffle_after_epoch=True,
                                            resume_index=resume_index,
                                            resume_epoch=resume_epoch,
                                            cf_det=cf_det)

            print("CF deterministic shuffling is {}".format(cf_det))

        #let user decide which pipeline works him bets for RN version he runs
        dali_device = 'cpu' if dali_cpu else 'gpu'
        #decoder_device = 'cpu'
        decoder_device = 'cpu' if dali_cpu else 'mixed'
        # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
        # without additional reallocations
        device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
        host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
        self.decode = ops.ImageDecoderRandomCrop(
            device=decoder_device,
            output_type=types.RGB,
            device_memory_padding=device_memory_padding,
            host_memory_padding=host_memory_padding,
            random_aspect_ratio=[0.8, 1.25],
            random_area=[0.1, 1.0],
            num_attempts=100)
        self.res = ops.Resize(device=dali_device,
                              resize_x=crop,
                              resize_y=crop,
                              interp_type=types.INTERP_TRIANGULAR)
        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(crop, crop),
            image_type=types.RGB,
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
        self.coin = ops.CoinFlip(probability=0.5)
        print('DALI "{0}" variant'.format(dali_device))
Beispiel #24
0
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 rec_path,
                 idx_path,
                 shard_id,
                 num_shards,
                 crop_shape,
                 min_random_area,
                 max_random_area,
                 min_random_aspect_ratio,
                 max_random_aspect_ratio,
                 nvjpeg_padding,
                 prefetch_queue=3,
                 seed=12,
                 output_layout=types.NCHW,
                 pad_output=True,
                 dtype='float16',
                 mlperf_print=True,
                 use_roi_decode=False,
                 cache_size=0):
        super(HybridTrainPipe,
              self).__init__(batch_size,
                             num_threads,
                             device_id,
                             seed=seed + device_id,
                             prefetch_queue_depth=prefetch_queue)

        if cache_size > 0:
            self.input = ops.MXNetReader(path=[rec_path],
                                         index_path=[idx_path],
                                         random_shuffle=True,
                                         shard_id=shard_id,
                                         num_shards=num_shards,
                                         stick_to_shard=True,
                                         lazy_init=True,
                                         skip_cached_images=True)
        else:  # stick_to_shard might not exist in this version of DALI.
            self.input = ops.MXNetReader(path=[rec_path],
                                         index_path=[idx_path],
                                         random_shuffle=True,
                                         shard_id=shard_id,
                                         num_shards=num_shards)

        if use_roi_decode and cache_size == 0:
            self.decode = ops.ImageDecoderRandomCrop(
                device="mixed",
                output_type=types.RGB,
                device_memory_padding=nvjpeg_padding,
                host_memory_padding=nvjpeg_padding,
                random_area=[min_random_area, max_random_area],
                random_aspect_ratio=[
                    min_random_aspect_ratio, max_random_aspect_ratio
                ])
            self.rrc = ops.Resize(device="gpu",
                                  resize_x=crop_shape[0],
                                  resize_y=crop_shape[1])
        else:
            if cache_size > 0:
                self.decode = ops.ImageDecoder(
                    device="mixed",
                    output_type=types.RGB,
                    device_memory_padding=nvjpeg_padding,
                    host_memory_padding=nvjpeg_padding,
                    cache_type='threshold',
                    cache_size=cache_size,
                    cache_threshold=0,
                    cache_debug=False)
            else:
                self.decode = ops.ImageDecoder(
                    device="mixed",
                    output_type=types.RGB,
                    device_memory_padding=nvjpeg_padding,
                    host_memory_padding=nvjpeg_padding)

            self.rrc = ops.RandomResizedCrop(
                device="gpu",
                random_area=[min_random_area, max_random_area],
                random_aspect_ratio=[
                    min_random_aspect_ratio, max_random_aspect_ratio
                ],
                size=crop_shape)

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT16 if dtype == 'float16' else types.FLOAT,
            output_layout=output_layout,
            crop=crop_shape,
            pad_output=pad_output,
            image_type=types.RGB,
            mean=_mean_pixel,
            std=_std_pixel)
        self.coin = ops.CoinFlip(probability=0.5)
    def __init__(self,
                 batch_size,
                 num_threads,
                 device_id,
                 data_dir,
                 crop,
                 dali_cpu=False):

        super(HybridTrainPipe, self).__init__(batch_size,
                                              num_threads,
                                              device_id,
                                              seed=12 + device_id)
        # adding distributed sharding
        if torch.distributed.is_initialized():
            local_rank = torch.distributed.get_rank()
            world_size = torch.distributed.get_world_size()
        else:
            local_rank = 0
            world_size = 1

        self.input = ops.FileReader(file_root=data_dir,
                                    shard_id=local_rank,
                                    num_shards=world_size,
                                    random_shuffle=True)

        #let user decide which pipeline works him bets for RN version he runs
        if dali_cpu:
            dali_device = "cpu"
            self.decode = ops.ImageDecoderRandomCrop(
                device=dali_device,
                output_type=types.RGB,
                random_aspect_ratio=[0.8, 1.25],
                random_area=[0.1, 1.0],
                num_attempts=100)
        else:
            dali_device = "gpu"
            # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
            # without additional reallocations `ImageDecoderRandomCrop`
            self.decode = ops.ImageDecoderRandomCrop(
                device="mixed",
                output_type=types.RGB,
                device_memory_padding=211025920,
                host_memory_padding=140544512,
                random_aspect_ratio=[0.8, 1.25],
                random_area=[0.1, 1.0],
                num_attempts=100)

        self.res = ops.Resize(device=dali_device,
                              resize_x=crop,
                              resize_y=crop,
                              interp_type=types.INTERP_TRIANGULAR)

        self.cmnp = ops.CropMirrorNormalize(
            device="gpu",
            output_dtype=types.FLOAT,
            output_layout=types.NCHW,
            crop=(crop, crop),
            image_type=types.RGB,
            # from https://github.com/Armour/pytorch-nn-practice/blob/master/utils/meanstd.py
            mean=[0.50707516 * 255, 0.48654887 * 255, 0.44091784 * 255],
            std=[0.26733429 * 255, 0.25643846 * 255, 0.27615047 * 255])

        #self.rotate = ops.Rotate(device="gpu", interp_type=types.INTERP_NN)

        self.coin = ops.CoinFlip(probability=0.5)

        self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")

        # add a rotate
        #self.rotate_range = ops.Uniform(range = (-7, 7)) # 7 degrees either way
        #self.rotate_coin = ops.CoinFlip(probability=0.075) # 7.5% chance

        pipeline_logger.info('DALI "{0}" variant'.format(dali_device))
Beispiel #26
0
 def __init__(
     self,
     train=False,
     batch_size=16,
     size=384,
     num_threads=4, 
     device_id=0
 ):
     super(ExternalSourcePipeline, self).__init__(
         batch_size, num_threads, device_id, seed=42
     )
     self.eii = iter(
         ExternalInputIterator(train, batch_size)
     )
     self.images_input = ops.ExternalSource()
     self.masks_input = ops.ExternalSource()
     if train:
         fixed_area = (size / 784)**2
         self.decode = ops.ImageDecoderRandomCrop(
             device="mixed",
             random_area=[fixed_area*0.7, fixed_area*1.3], 
             random_aspect_ratio=[0.7, 1.3],
         )
     else:
         self.decode = ops.ImageDecoderCrop(
             device="mixed", 
             crop=(size, size)
         )
     self.resize = ops.Resize(
         device="gpu", 
         interp_type=types.INTERP_TRIANGULAR,
         resize_x=size, 
         resize_y=size
     )
     self.mask_resize = ops.Resize(
         device="gpu", 
         interp_type=types.INTERP_NN,
         resize_x=size, 
         resize_y=size
     )
     self.normalize = ops.CropMirrorNormalize(
         device="gpu",
         mean=[0.5 * 255],  # 0.456 * 255, 0.406 * 255],
         std=[0.5 * 255],  # , 0.224 * 255, 0.225 * 255],
         output_layout=types.NCHW
     )
     self.mask_normalize = ops.CropMirrorNormalize(
         device="gpu",
         mean=[0],
         std=[255],
         output_layout=types.NCHW,
         image_type=types.GRAY,
     )
     # extra augmentations
     self.to_gray = ops.ColorSpaceConversion(
         device="gpu", image_type=types.RGB, output_type=types.GRAY
     )
     self.contrast = ops.BrightnessContrast(device="gpu")
     self.hsv = ops.Hsv(device="gpu")
     self.jitter = ops.Jitter(device ="gpu")
     # self.rng1 = ops.Uniform(range=[0, 1])
     self.rng2 = ops.Uniform(range=[0.8,1.2])
     self.rng3 = ops.Uniform(range=[-30, 30]) # for hue
     self.coin03 = ops.CoinFlip(probability=0.3)
     self.train = train