Ejemplo n.º 1
0
def create_video_pipe(video_files,
                      sequence_length=8,
                      target_size=224,
                      stride=30):
    pipeline = Pipeline(1, 4, 0, seed=42)
    with pipeline:
        images = fn.readers.video(device="gpu",
                                  filenames=video_files,
                                  sequence_length=sequence_length,
                                  stride=stride,
                                  shard_id=0,
                                  num_shards=1,
                                  random_shuffle=False,
                                  pad_last_batch=True,
                                  name="Reader")
        images = fn.crop_mirror_normalize(
            images,
            dtype=types.FLOAT,
            output_layout="FCHW",
            crop=(target_size, target_size),
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255])

        pipeline.set_outputs(images)
    return pipeline
Ejemplo n.º 2
0
def val_pipeline(cfg: ValLoaderConfig):
    jpeg, label = fn.readers.file(
        file_root=ROOT_DATA_DIR + "/val/",
        shard_id=env_rank(),
        num_shards=env_world_size(),
        name="Reader",
    )

    image = fn.decoders.image(jpeg, device="mixed", output_type=types.RGB)

    crop_size = cfg.image_size if cfg.full_crop else math.ceil(
        (cfg.image_size * 1.14 + 8) // 16 * 16)
    image = fn.resize(image,
                      device="gpu",
                      interp_type=types.INTERP_TRIANGULAR,
                      resize_shorter=crop_size)

    image = fn.crop_mirror_normalize(
        image,
        device="gpu",
        crop=(cfg.image_size, cfg.image_size),
        mean=DATA_MEAN,
        std=DATA_STD,
        dtype=types.FLOAT,
        output_layout=types.NCHW,
    )
    label = fn.one_hot(label, num_classes=cfg.num_classes).gpu()
    return image, label
Ejemplo n.º 3
0
def get_pipeline(folder="train", custom_reader=None):
    pipe = Pipeline(batch_size=64, num_threads=1, device_id=1)

    if custom_reader:
        raw_files, labels = custom_reader
    else:
        raw_files, labels = fn.file_reader(file_root="%s" % folder,
                                           random_shuffle=True)

    decode = fn.image_decoder(raw_files, device="mixed", output_type=types.GRAY)
    resize = fn.resize(decode, device="gpu", image_type=types.RGB,
                       interp_type=types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT)

    hsv = fn.hsv(resize, hue=fn.uniform(range=(-10, 10)), saturation=fn.uniform(range=(-.5, .5)),
                 value=fn.uniform(range=(0.9, 1.2)), device="gpu", dtype=types.UINT8)
    bc = fn.brightness_contrast(hsv, device="gpu", brightness=fn.uniform(range=(.9, 1.1)))

    cmn = fn.crop_mirror_normalize(bc, device="gpu", output_dtype=types.FLOAT,
                                   output_layout=types.NHWC,
                                   image_type=types.GRAY,
                                   mean=[255 // 2],
                                   std=[255 // 2])

    rot = fn.rotate(cmn, angle=fn.uniform(range=(-40, 40)), device="gpu", keep_size=True)

    tpose = fn.transpose(rot, perm=(2, 0, 1), device="gpu")  # Reshaping to a format PyTorch likes

    pipe.set_outputs(tpose, labels)
    pipe.build()

    dali_iter = DALIClassificationIterator([pipe], -1)

    return dali_iter
Ejemplo n.º 4
0
def check_gaussian_blur_output(batch_size, sigma, window_size, op_type="cpu"):
    decoder_device = "cpu" if op_type == "cpu" else "mixed"
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    with pipe:
        input, _ = fn.file_reader(file_root=images_dir,
                                  shard_id=0,
                                  num_shards=1)
        decoded = fn.image_decoder(input,
                                   device=decoder_device,
                                   output_type=types.RGB)
        blurred = fn.gaussian_blur(decoded,
                                   device=op_type,
                                   sigma=sigma,
                                   window_size=window_size)
        normalized = fn.crop_mirror_normalize(blurred,
                                              device=op_type,
                                              dtype=types.FLOAT,
                                              output_layout="HWC",
                                              mean=[128.0, 128.0, 128.0],
                                              std=[100.0, 100.0, 100.0])
        pipe.set_outputs(normalized)
    pipe.build()

    for _ in range(3):
        result = pipe.run()
Ejemplo n.º 5
0
def mnist_pipeline(num_threads,
                   path,
                   device,
                   device_id=0,
                   shard_id=0,
                   num_shards=1,
                   seed=0):
    pipeline = Pipeline(BATCH_SIZE, num_threads, device_id, seed)
    with pipeline:
        jpegs, labels = fn.readers.caffe2(path=path,
                                          random_shuffle=True,
                                          shard_id=shard_id,
                                          num_shards=num_shards)
        images = fn.image_decoder(jpegs,
                                  device='mixed' if device == 'gpu' else 'cpu',
                                  output_type=types.GRAY)
        if device == 'gpu':
            labels = labels.gpu()
        images = fn.crop_mirror_normalize(images,
                                          dtype=types.FLOAT,
                                          mean=[0.],
                                          std=[255.],
                                          output_layout="CHW")

        pipeline.set_outputs(images, labels)

    return pipeline
Ejemplo n.º 6
0
def order_change_pipeline():
    if order_change_pipeline.change:
        rng = 0
    else:
        order_change_pipeline.change = True
        rng = fn.random.coin_flip(probability=0.5, seed=47)
    jpegs, labels = fn.readers.file(file_root=file_root,
                                    shard_id=0,
                                    num_shards=2)
    images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB)
    resized_images = fn.random_resized_crop(images,
                                            device="gpu",
                                            size=(224, 224),
                                            seed=27)
    out_type = types.FLOAT16

    output = fn.crop_mirror_normalize(
        resized_images.gpu(),
        mirror=rng,
        device="gpu",
        dtype=out_type,
        crop=(224, 224),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    return rng, jpegs, labels, images, resized_images, output
Ejemplo n.º 7
0
def create_coco_pipeline(file_root,
                         annotations_file,
                         batch_size=1,
                         device_id=0,
                         num_threads=4,
                         local_rank=0,
                         world_size=1):
    pipeline = Pipeline(batch_size, num_threads,
                        local_rank, seed=42 + device_id)

    with pipeline:
        images, bboxes, labels = fn.coco_reader(file_root=file_root,
                                                annotations_file=annotations_file,
                                                skip_empty=True,
                                                shard_id=local_rank,
                                                num_shards=world_size,
                                                ratio=True,
                                                ltrb=True,
                                                random_shuffle=False,
                                                shuffle_after_epoch=True,
                                                name="Reader")

        crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels,
                                                                    device="cpu",
                                                                    aspect_ratio=[0.5, 2.0],
                                                                    thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
                                                                    scaling=[0.3, 1.0],
                                                                    bbox_layout="xyXY",
                                                                    allow_no_crop=True,
                                                                    num_attempts=50)
        images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB)
        flip_coin = fn.coin_flip(probability=0.5)
        images = fn.resize(images,
                           resize_x=300,
                           resize_y=300,
                           min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        # use float to avoid clipping and quantizing the intermediate result
        images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]),
                        saturation=fn.uniform(range=[0.5, 1.5]))

        images = fn.brightness_contrast(images,
                                        contrast_center = 128,  # input is in float, but in 0..255 range
                                        dtype = types.UINT8,
                                        brightness = fn.uniform(range=[0.875, 1.125]),
                                        contrast = fn.uniform(range=[0.5, 1.5]))

        bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
        images = fn.crop_mirror_normalize(images,
                                          mean=[104., 117., 123.],
                                          std=[1., 1., 1.],
                                          mirror=flip_coin,
                                          dtype=types.FLOAT,
                                          output_layout="CHW",
                                          pad_output=False)

        pipeline.set_outputs(images, bboxes, labels)
    return pipeline
Ejemplo n.º 8
0
def dali_data_iter(batch_size: int,
                   rec_file: str,
                   idx_file: str,
                   num_threads: int,
                   initial_fill=32768,
                   random_shuffle=True,
                   prefetch_queue_depth=1,
                   local_rank=0,
                   name="reader",
                   mean=(127.5, 127.5, 127.5),
                   std=(127.5, 127.5, 127.5)):
    """
    Parameters:
    ----------
    initial_fill: int
        Size of the buffer that is used for shuffling. If random_shuffle is False, this parameter is ignored.

    """
    rank: int = distributed.get_rank()
    world_size: int = distributed.get_world_size()
    import nvidia.dali.fn as fn
    import nvidia.dali.types as types
    from nvidia.dali.pipeline import Pipeline
    from nvidia.dali.plugin.pytorch import DALIClassificationIterator

    pipe = Pipeline(
        batch_size=batch_size,
        num_threads=num_threads,
        device_id=local_rank,
        prefetch_queue_depth=prefetch_queue_depth,
    )
    condition_flip = fn.random.coin_flip(probability=0.5)
    with pipe:
        jpegs, labels = fn.readers.mxnet(path=rec_file,
                                         index_path=idx_file,
                                         initial_fill=initial_fill,
                                         num_shards=world_size,
                                         shard_id=rank,
                                         random_shuffle=random_shuffle,
                                         pad_last_batch=False,
                                         name=name)
        images = fn.decoders.image(jpegs,
                                   device="mixed",
                                   output_type=types.RGB)
        images = fn.crop_mirror_normalize(images,
                                          dtype=types.FLOAT,
                                          mean=mean,
                                          std=std,
                                          mirror=condition_flip)
        pipe.set_outputs(images, labels)
    pipe.build()
    return DALIWarper(
        DALIClassificationIterator(
            pipelines=[pipe],
            reader_name=name,
        ))
    def get_pipe():
        def get_data():
            out = [
                np.zeros(input_shape, dtype=np.uint8)
                for _ in range(batch_size)
            ]
            return out

        data = fn.external_source(source=get_data, device=device)
        return fn.crop_mirror_normalize(data, crop_h=10, crop_w=20)
Ejemplo n.º 10
0
def create_dali_pipeline(data_dir,
                         crop,
                         size,
                         shard_id,
                         num_shards,
                         dali_cpu=False,
                         is_training=True):
    images, labels = fn.readers.file(file_root=data_dir,
                                     shard_id=shard_id,
                                     num_shards=num_shards,
                                     random_shuffle=is_training,
                                     pad_last_batch=True,
                                     name="Reader")
    dali_device = 'cpu' if dali_cpu else 'gpu'
    decoder_device = 'cpu' if dali_cpu else 'mixed'
    device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
    host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
    if is_training:
        images = fn.decoders.image_random_crop(
            images,
            device=decoder_device,
            output_type=types.RGB,
            device_memory_padding=device_memory_padding,
            host_memory_padding=host_memory_padding,
            random_aspect_ratio=[0.8, 1.25],
            random_area=[0.1, 1.0],
            num_attempts=100)
        images = fn.resize(images,
                           device=dali_device,
                           resize_x=crop,
                           resize_y=crop,
                           interp_type=types.INTERP_TRIANGULAR)
        mirror = fn.random.coin_flip(probability=0.5)
    else:
        images = fn.decoders.image(images,
                                   device=decoder_device,
                                   output_type=types.RGB)
        images = fn.resize(images,
                           device=dali_device,
                           size=size,
                           mode="not_smaller",
                           interp_type=types.INTERP_TRIANGULAR)
        mirror = False

    images = fn.crop_mirror_normalize(
        images.gpu(),
        dtype=types.FLOAT,
        output_layout="CHW",
        crop=(crop, crop),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        mirror=mirror)
    labels = labels.gpu()
    return images, labels
Ejemplo n.º 11
0
def create_dali_pipeline(batch_size, num_threads, device_id, data_dir):
    files = []
    with open(join(data_dir, "file_list.txt"), "r") as f:
        files = [line.rstrip() for line in f if line is not '']

    shuffle(files)

    img_files = []
    seg_files = []

    for prefix in files:
        img_files.append(join(data_dir, "leftImg8bit/train", prefix + "_leftImg8bit.png"))
        seg_files.append(join(data_dir, "gtFine/train", prefix + "_gtFine_labelIds.png"))

    pipeline = Pipeline(batch_size, num_threads, device_id, seed=12 + device_id)
    with pipeline:
        imgs, _ = fn.file_reader(files=img_files,
                                 shard_id=0, num_shards=1,
                                 random_shuffle=False, pad_last_batch=True)

        segs, _ = fn.file_reader(files=seg_files,
                                 shard_id=0, num_shards=1,
                                 random_shuffle=False, pad_last_batch=True)

        dali_device = 'gpu'
        decoder_device = 'mixed'
        # device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
        # host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
        device_memory_padding = 0
        host_memory_padding = 0

        imgs = fn.image_decoder(imgs, device=decoder_device, output_type=types.RGB,
                                device_memory_padding=device_memory_padding,
                                host_memory_padding=host_memory_padding,
                                hybrid_huffman_threshold=250000)

        segs = fn.image_decoder(segs, device=decoder_device, output_type=types.GRAY,
                                 device_memory_padding=device_memory_padding,
                                 host_memory_padding=host_memory_padding,
                                 hybrid_huffman_threshold=250000)

        imgs = fn.crop_mirror_normalize(imgs, device=dali_device,
                                        crop=(512, 512),
                                        dtype=types.FLOAT,
                                        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
                                        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
                                        output_layout="CHW")

        segs = fn.crop(segs, device=dali_device,
                       dtype=types.UINT8, crop=(512, 512))

        pipeline.set_outputs(imgs, segs)

    return pipeline
def rn50_pipeline_2(data_path):
    uniform = fn.random.uniform(range=(0., 1.), shape=2)
    resize_uniform = fn.random.uniform(range=(256., 480.))
    mirror = fn.random.coin_flip(probability=0.5)
    jpegs, _ = fn.readers.file(file_root=data_path)
    images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB)
    resized_images = fn.resize(images, device='gpu', interp_type=types.INTERP_LINEAR,
                               resize_shorter=resize_uniform)
    output = fn.crop_mirror_normalize(resized_images, device='gpu', dtype=types.FLOAT16,
                                      crop=(224, 224), mean=[128., 128., 128.], std=[1., 1., 1.],
                                      mirror=mirror, crop_pos_x=uniform[0], crop_pos_y=uniform[1])
    return output
Ejemplo n.º 13
0
def create_dali_pipeline(data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True):
    images, labels = fn.readers.file(file_root=data_dir,
                                     shard_id=shard_id,
                                     num_shards=num_shards,
                                     random_shuffle=is_training,
                                     pad_last_batch=True,
                                     name="Reader")
    dali_device = 'cpu' if dali_cpu else 'gpu'
    decoder_device = 'cpu' if dali_cpu else 'mixed'
    # ask nvJPEG to preallocate memory for the biggest sample in ImageNet for CPU and GPU to avoid reallocations in runtime
    device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
    host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
    # ask HW NVJPEG to allocate memory ahead for the biggest image in the data set to avoid reallocations in runtime
    preallocate_width_hint = 5980 if decoder_device == 'mixed' else 0
    preallocate_height_hint = 6430 if decoder_device == 'mixed' else 0
    if is_training:
        images = fn.decoders.image_random_crop(images,
                                               device=decoder_device, output_type=types.RGB,
                                               device_memory_padding=device_memory_padding,
                                               host_memory_padding=host_memory_padding,
                                               preallocate_width_hint=preallocate_width_hint,
                                               preallocate_height_hint=preallocate_height_hint,
                                               random_aspect_ratio=[0.8, 1.25],
                                               random_area=[0.1, 1.0],
                                               num_attempts=100)
        images = fn.resize(images,
                           device=dali_device,
                           resize_x=crop,
                           resize_y=crop,
                           interp_type=types.INTERP_TRIANGULAR)
        mirror = fn.random.coin_flip(probability=0.5)
    else:
        images = fn.decoders.image(images,
                                   device=decoder_device,
                                   output_type=types.RGB)
        images = fn.resize(images,
                           device=dali_device,
                           size=size,
                           mode="not_smaller",
                           interp_type=types.INTERP_TRIANGULAR)
        mirror = False

    images = fn.crop_mirror_normalize(images.gpu(),
                                      dtype=types.FLOAT,
                                      output_layout="CHW",
                                      crop=(crop, crop),
                                      mean=[0.485 * 255,0.456 * 255,0.406 * 255],
                                      std=[0.229 * 255,0.224 * 255,0.225 * 255],
                                      mirror=mirror)
    labels = labels.gpu()
    return images, labels
Ejemplo n.º 14
0
def es_pipeline_debug():
    images = fn.external_source(name='input')
    labels = fn.external_source(name='labels')
    rng = fn.random.coin_flip(probability=0.5, seed=47)
    images = fn.random_resized_crop(images, size=(224, 224), seed=27)
    out_type = types.FLOAT16

    output = fn.crop_mirror_normalize(
        images.gpu(),
        mirror=rng,
        device="gpu",
        dtype=out_type,
        crop=(224, 224),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    return rng, images, output, labels
Ejemplo n.º 15
0
def get_image_pipeline(batch_size, num_threads, device, device_id=0, shard_id=0, num_shards=1,
        def_for_dataset=False):
    test_data_root = get_dali_extra_path()
    file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images')
    annotations_file = os.path.join(
        test_data_root, 'db', 'coco_dummy', 'instances.json')

    pipe = Pipeline(batch_size, num_threads, device_id)
    with pipe:
        jpegs, _, _, image_ids = fn.readers.coco(
            file_root=file_root,
            annotations_file=annotations_file,
            shard_id=shard_id,
            num_shards=num_shards,
            ratio=False,
            image_ids=True)
        images = fn.decoders.image(
            jpegs,
            device=('mixed' if device == 'gpu' else 'cpu'),
            output_type=types.RGB)
        images = fn.resize(
            images,
            resize_x=224,
            resize_y=224,
            interp_type=types.INTERP_LINEAR)
        images = fn.crop_mirror_normalize(
            images,
            dtype=types.FLOAT,
            mean=[128., 128., 128.],
            std=[1., 1., 1.])
        if device == 'gpu':
            image_ids = image_ids.gpu()
        ids_reshaped = fn.reshape(image_ids, shape=[1, 1])
        ids_int16 = fn.cast(image_ids, dtype=types.INT16)

        pipe.set_outputs(images, ids_reshaped, ids_int16)

    shapes = (
        (batch_size, 3, 224, 224),
        (batch_size, 1, 1),
        (batch_size, 1))
    dtypes = (
        tf.float32,
        tf.int32,
        tf.int16)

    return pipe, shapes, dtypes
Ejemplo n.º 16
0
def injection_pipeline(callback, device='cpu'):
    rng = fn.random.coin_flip(probability=0.5, seed=47)
    images = fn.random_resized_crop(callback(),
                                    device=device,
                                    size=(224, 224),
                                    seed=27)
    out_type = types.FLOAT16

    output = fn.crop_mirror_normalize(
        images.gpu(),
        mirror=rng,
        device="gpu",
        dtype=out_type,
        crop=(224, 224),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    return rng, images, output
Ejemplo n.º 17
0
def es_pipeline_standard():
    jpegs, labels = fn.readers.file(file_root=file_root,
                                    shard_id=0,
                                    num_shards=2)
    images = fn.decoders.image(jpegs, output_type=types.RGB)
    rng = fn.random.coin_flip(probability=0.5, seed=47)
    images = fn.random_resized_crop(images, size=(224, 224), seed=27)
    out_type = types.FLOAT16

    output = fn.crop_mirror_normalize(
        images.gpu(),
        mirror=rng,
        device="gpu",
        dtype=out_type,
        crop=(224, 224),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    return rng, images, output, labels
 def pipe():
     image_like = fn.random.uniform(device=device,
                                    range=(0, 255),
                                    shape=(80, 120, 3))
     image_like = fn.reshape(image_like, layout="HWC")
     mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
     std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
     if rand_mean:
         mean = fn.random.uniform(range=(100, 125), shape=(3, ))
     if rand_stdev:
         std = fn.random.uniform(range=(55, 60), shape=(3, ))
     out = fn.crop_mirror_normalize(image_like,
                                    dtype=types.FLOAT,
                                    output_layout="HWC",
                                    mean=mean,
                                    std=std,
                                    scale=scale,
                                    shift=shift,
                                    pad_output=False)
     return out, image_like, mean, std
Ejemplo n.º 19
0
def rn50_pipeline():
    rng = fn.random.coin_flip(probability=0.5, seed=47)
    print(f'rng: {rng.get().as_array()}')
    tmp = rng ^ 1
    print(f'rng xor: {tmp.get().as_array()}')
    jpegs, labels = fn.readers.file(file_root=file_root,
                                    shard_id=0,
                                    num_shards=2)
    if jpegs.get().is_dense_tensor():
        print(f'jpegs: {jpegs.get().as_array()}')
    else:
        print('jpegs shapes:')
        for j in jpegs.get():
            print(j.shape())
    print(f'labels: {labels.get().as_array()}')
    images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB)
    for i in images.get().as_cpu():
        print(i)
    for i in images.get():
        print(i.shape())
    images = fn.random_resized_crop(images,
                                    device="gpu",
                                    size=(224, 224),
                                    seed=27)
    for i in images.get():
        print(i.shape())
    print(np.array(images.get().as_cpu()[0]))
    images += 1
    print(np.array(images.get().as_cpu()[0]))
    out_type = types.FLOAT16

    output = fn.crop_mirror_normalize(
        images.gpu(),
        mirror=rng,
        device="gpu",
        dtype=out_type,
        crop=(224, 224),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
    return (output, labels.gpu())
Ejemplo n.º 20
0
def get_pipeline(batch_size,
                 num_threads,
                 device,
                 device_id=0,
                 shard_id=0,
                 num_shards=1):
    test_data_root = os.environ['DALI_EXTRA_PATH']
    file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images')
    annotations_file = os.path.join(test_data_root, 'db', 'coco_dummy',
                                    'instances.json')

    pipe = Pipeline(batch_size, num_threads, device_id)
    with pipe:
        jpegs, _, _, image_ids = fn.coco_reader(
            file_root=file_root,
            annotations_file=annotations_file,
            shard_id=shard_id,
            num_shards=num_shards,
            ratio=False,
            image_ids=True)
        images = fn.image_decoder(
            jpegs,
            device=('mixed' if device == 'gpu' else 'cpu'),
            output_type=types.RGB)
        images = fn.resize(images,
                           resize_x=224,
                           resize_y=224,
                           interp_type=types.INTERP_LINEAR)
        images = fn.crop_mirror_normalize(images,
                                          dtype=types.FLOAT,
                                          mean=[128., 128., 128.],
                                          std=[1., 1., 1.])
        if device == 'gpu':
            image_ids = image_ids.gpu()
        ids_reshaped = fn.reshape(image_ids, shape=[1, 1])
        ids_int16 = fn.cast(image_ids, dtype=types.INT16)

        pipe.set_outputs(images, ids_reshaped, ids_int16)

    return pipe
Ejemplo n.º 21
0
def RN50Pipeline():
    device = 'mixed' if args.device == 'gpu' else 'cpu'
    jpegs, _ = fn.readers.file(file_root=args.images_dir)
    images = fn.decoders.image_random_crop(
        jpegs,
        device=device,
        output_type=types.RGB,
        hw_decoder_load=args.hw_load,
        preallocate_width_hint=args.width_hint,
        preallocate_height_hint=args.height_hint)
    images = fn.resize(images, resize_x=224, resize_y=224)
    layout = types.NCHW
    out_type = types.FLOAT16
    coin_flip = fn.random.coin_flip(probability=0.5)
    images = fn.crop_mirror_normalize(
        images,
        dtype=out_type,
        output_layout=layout,
        crop=(224, 224),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        mirror=coin_flip)
    return images
Ejemplo n.º 22
0
        def create_image_pipeline(
            batch_size,
            num_threads,
            device_id,
            image0_list,
            image1_list,
            flow_list,
            valBool,
        ):
            pipeline = Pipeline(batch_size, num_threads, device_id, seed=2)
            with pipeline:
                if valBool:
                    shuffleBool = False
                else:
                    shuffleBool = True
                """ READ FILES """
                image0, _ = fn.readers.file(
                    file_root=args.data,
                    files=image0_list,
                    random_shuffle=shuffleBool,
                    name="Reader",
                    seed=1,
                )
                image1, _ = fn.readers.file(
                    file_root=args.data,
                    files=image1_list,
                    random_shuffle=shuffleBool,
                    seed=1,
                )
                flo = fn.readers.numpy(
                    file_root=args.data,
                    files=flow_list,
                    random_shuffle=shuffleBool,
                    seed=1,
                )
                """ DECODE AND RESHAPE """
                image0 = fn.decoders.image(image0, device="cpu")
                image0 = fn.reshape(image0, layout="HWC")
                image1 = fn.decoders.image(image1, device="cpu")
                image1 = fn.reshape(image1, layout="HWC")
                images = fn.cat(image0, image1, axis=2)
                flo = fn.reshape(flo, layout="HWC")

                if valBool:
                    images = fn.resize(images, resize_x=162, resize_y=122)
                else:
                    """ CO-TRANSFORM """
                    # random translate
                    # angle_rng = fn.random.uniform(range=(-90, 90))
                    # images = fn.rotate(images, angle=angle_rng, fill_value=0)
                    # flo = fn.rotate(flo, angle=angle_rng, fill_value=0)

                    images = fn.random_resized_crop(
                        images,
                        size=[122, 162],  # 122, 162
                        random_aspect_ratio=[1.3, 1.4],
                        random_area=[0.8, 0.9],
                        seed=1,
                    )
                    flo = fn.random_resized_crop(
                        flo,
                        size=[122, 162],
                        random_aspect_ratio=[1.3, 1.4],
                        random_area=[0.8, 0.9],
                        seed=1,
                    )

                    # coin1 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=10)
                    # coin1_n = coin1 ^ True
                    # coin2 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=20)
                    # coin2_n = coin2 ^ True

                    # images = (
                    #     fn.flip(images, horizontal=1, vertical=1) * coin1 * coin2
                    #     + fn.flip(images, horizontal=1) * coin1 * coin2_n
                    #     + fn.flip(images, vertical=1) * coin1_n * coin2
                    #     + images * coin1_n * coin2_n
                    # )
                    # flo = (
                    #     fn.flip(flo, horizontal=1, vertical=1) * coin1 * coin2
                    #     + fn.flip(flo, horizontal=1) * coin1 * coin2_n
                    #     + fn.flip(flo, vertical=1) * coin1_n * coin2
                    #     + flo * coin1_n * coin2_n
                    # )
                    # _flo = flo
                    # flo_0 = fn.slice(_flo, axis_names="C", start=0, shape=1)
                    # flo_1 = fn.slice(_flo, axis_names="C", start=1, shape=1)
                    # flo_0 = flo_0 * coin1 * -1 + flo_0 * coin1_n
                    # flo_1 = flo_1 * coin2 * -1 + flo_1 * coin2_n
                    # # flo  = noflip + vertical flip + horizontal flip + both_flip

                    # # A horizontal flip is around the vertical axis (switch left and right)
                    # # So for a vertical flip coin1 is activated and needs to give +1, coin2 is activated needs to give -1
                    # # for a horizontal flip coin1 is activated and needs to be -1, coin2_n needs +1
                    # # no flip coin coin1_n +1, coin2_n +1

                    # flo = fn.cat(flo_0, flo_1, axis_name="C")
                """ NORMALIZE """
                images = fn.crop_mirror_normalize(
                    images,
                    mean=[0, 0, 0, 0, 0, 0],
                    std=[255, 255, 255, 255, 255, 255])
                images = fn.crop_mirror_normalize(
                    images,
                    mean=[0.45, 0.432, 0.411, 0.45, 0.432, 0.411],
                    std=[1, 1, 1, 1, 1, 1],
                )
                flo = fn.crop_mirror_normalize(
                    flo, mean=[0, 0], std=[args.div_flow, args.div_flow])

                pipeline.set_outputs(images, flo)
            return pipeline
Ejemplo n.º 23
0
import nvidia.dali.types as types
import matplotlib.pylab as plt
import nvidia.dali.fn as fn

import nvidia.dali.types as types

pipe = Pipeline(batch_size = 64, num_threads = 1, device_id = 0)

raw_files, labels = fn.file_reader(file_root = "data/resized", random_shuffle = True)

decode = fn.image_decoder(raw_files, device = "mixed", output_type = types.GRAY)
resize = fn.resize(decode, device = "gpu", image_type = types.GRAY,
                                interp_type = types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT)
cmn = fn.crop_mirror_normalize(resize, device="gpu",output_dtype=types.FLOAT,
                                                          output_layout=types.NCHW,
                                                        image_type=types.GRAY,
                                                        mean=[ 255//2],
                                                        std=[255//2])

pipe.set_outputs(cmn, labels)
pipe.build()



from nvidia.dali.plugin.pytorch import DALIClassificationIterator
dali_iter = DALIClassificationIterator([pipe], -1)

output = next(dali_iter)[0]
output["data"].shape, output["label"].shape

Ejemplo n.º 24
0
def get_dali_pipeline(tfrec_filenames,
                      tfrec_idx_filenames,
                      height,
                      width,
                      shard_id,
                      num_gpus,
                      dali_cpu=True,
                      training=True):

    inputs = fn.readers.tfrecord(path=tfrec_filenames,
                                 index_path=tfrec_idx_filenames,
                                 random_shuffle=training,
                                 shard_id=shard_id,
                                 num_shards=num_gpus,
                                 initial_fill=10000,
                                 features={
                                     'image/encoded':
                                     tfrec.FixedLenFeature((), tfrec.string,
                                                           ""),
                                     'image/class/label':
                                     tfrec.FixedLenFeature([1], tfrec.int64,
                                                           -1),
                                     'image/class/text':
                                     tfrec.FixedLenFeature([], tfrec.string,
                                                           ''),
                                     'image/object/bbox/xmin':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0),
                                     'image/object/bbox/ymin':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0),
                                     'image/object/bbox/xmax':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0),
                                     'image/object/bbox/ymax':
                                     tfrec.VarLenFeature(tfrec.float32, 0.0)
                                 })

    decode_device = "cpu" if dali_cpu else "mixed"
    resize_device = "cpu" if dali_cpu else "gpu"
    if training:
        images = fn.decoders.image_random_crop(
            inputs["image/encoded"],
            device=decode_device,
            output_type=types.RGB,
            random_aspect_ratio=[0.75, 1.25],
            random_area=[0.05, 1.0],
            num_attempts=100)
        images = fn.resize(images,
                           device=resize_device,
                           resize_x=width,
                           resize_y=height)
    else:
        images = fn.decoders.image(inputs["image/encoded"],
                                   device=decode_device,
                                   output_type=types.RGB)
        # Make sure that every image > 224 for CropMirrorNormalize
        images = fn.resize(images, device=resize_device, resize_shorter=256)

    images = fn.crop_mirror_normalize(images.gpu(),
                                      dtype=types.FLOAT,
                                      crop=(height, width),
                                      mean=[123.68, 116.78, 103.94],
                                      std=[58.4, 57.12, 57.3],
                                      output_layout="HWC",
                                      mirror=fn.random.coin_flip())
    labels = inputs["image/class/label"].gpu()

    labels -= 1  # Change to 0-based (don't use background class)
    return images, labels
Ejemplo n.º 25
0
def dali_dataloader(
        tfrec_filenames,
        tfrec_idx_filenames,
        shard_id=0, num_shards=1,
        batch_size=128, num_threads=os.cpu_count(),
        image_size=224, num_workers=1, training=True):
    pipe = Pipeline(batch_size=batch_size,
                    num_threads=num_threads, device_id=0)
    with pipe:
        inputs = fn.readers.tfrecord(
            path=tfrec_filenames,
            index_path=tfrec_idx_filenames,
            random_shuffle=training,
            shard_id=shard_id,
            num_shards=num_shards,
            initial_fill=10000,
            read_ahead=True,
            pad_last_batch=True,
            prefetch_queue_depth=num_workers,
            name='Reader',
            features={
                'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""),
                'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64,  -1),
            })
        jpegs = inputs["image/encoded"]
        if training:
            images = fn.decoders.image_random_crop(
                jpegs,
                device="mixed",
                output_type=types.RGB,
                random_aspect_ratio=[0.8, 1.25],
                random_area=[0.1, 1.0],
                num_attempts=100)
            images = fn.resize(images,
                               device='gpu',
                               resize_x=image_size,
                               resize_y=image_size,
                               interp_type=types.INTERP_TRIANGULAR)
            mirror = fn.random.coin_flip(probability=0.5)
        else:
            images = fn.decoders.image(jpegs,
                                       device='mixed',
                                       output_type=types.RGB)
            images = fn.resize(images,
                               device='gpu',
                               size=int(image_size / 0.875),
                               mode="not_smaller",
                               interp_type=types.INTERP_TRIANGULAR)
            mirror = False

        images = fn.crop_mirror_normalize(
            images.gpu(),
            dtype=types.FLOAT,
            crop=(image_size, image_size),
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
            mirror=mirror)
        label = inputs["image/class/label"] - 1  # 0-999
        label = fn.element_extract(label, element_map=0)  # Flatten
        label = label.gpu()
        pipe.set_outputs(images, label)

    pipe.build()
    last_batch_policy = LastBatchPolicy.DROP if training else LastBatchPolicy.PARTIAL
    loader = DALIClassificationIterator(
        pipe, reader_name="Reader", auto_reset=True, last_batch_policy=last_batch_policy)
    return loader
Ejemplo n.º 26
0
def train_pipeline(cfg: TrainLoaderConfig):

    jpeg, label = fn.readers.file(
        file_root=ROOT_DATA_DIR + "/train/",
        random_shuffle=True,
        shard_id=env_rank(),
        num_shards=env_world_size(),
        name="Reader",
    )
    image = fn.decoders.image_random_crop(
        jpeg,
        device="mixed",
        random_aspect_ratio=[0.75, 1.25],
        random_area=[cfg.min_area, 1.0],
        num_attempts=100,
        output_type=types.RGB,
    )

    image_tr = fn.resize(image,
                         device="gpu",
                         size=cfg.image_size,
                         interp_type=types.INTERP_TRIANGULAR)
    if cfg.random_interpolation:
        image_cub = fn.resize(image,
                              device="gpu",
                              size=cfg.image_size,
                              interp_type=types.INTERP_CUBIC)
        image = mix(fn.random.coin_flip(probability=0.5), image_cub, image_tr)
    else:
        image = image_tr

    if cfg.blur_prob > 0:
        blur_image = fn.gaussian_blur(
            image,
            device="gpu",
            window_size=11,
            sigma=fn.random.uniform(range=[0.5, 1.1]))
        image = mix(
            fn.random.coin_flip(probability=cfg.blur_prob, dtype=types.BOOL),
            blur_image, image)

    if cfg.color_twist_prob > 0:
        image_ct = fn.color_twist(
            image,
            device="gpu",
            contrast=fn.random.uniform(range=[0.7, 1.3]),
            brightness=fn.random.uniform(range=[0.7, 1.3]),
            hue=fn.random.uniform(range=[-20, 20]),  # in degrees
            saturation=fn.random.uniform(range=[0.7, 1.3]),
        )
        image = mix(
            fn.random.coin_flip(probability=cfg.color_twist_prob,
                                dtype=types.BOOL), image_ct, image)

    if cfg.gray_prob > 0:
        grayscale_coin = fn.cast(
            fn.random.coin_flip(probability=cfg.gray_prob), dtype=types.FLOAT)
        image = fn.hsv(image, device="gpu", saturation=grayscale_coin)

    if cfg.re_prob:  # random erasing
        image_re = fn.erase(
            image,
            device="gpu",
            anchor=fn.random.uniform(range=(0.0, 1), shape=cfg.re_count * 2),
            shape=fn.random.uniform(range=(0.05, 0.25),
                                    shape=cfg.re_count * 2),
            axis_names="HW",
            fill_value=DATA_MEAN,
            normalized_anchor=True,
            normalized_shape=True,
        )
        image = mix(
            fn.random.coin_flip(probability=cfg.re_prob, dtype=types.BOOL),
            image_re, image)

    image = fn.crop_mirror_normalize(
        image,
        device="gpu",
        crop=(cfg.image_size, cfg.image_size),
        mirror=fn.random.coin_flip(probability=0.5),
        mean=DATA_MEAN,
        std=DATA_STD,
        dtype=types.FLOAT,
        output_layout=types.NCHW,
    )
    label = fn.one_hot(label, num_classes=cfg.num_classes).gpu()
    return image, label
Ejemplo n.º 27
0
def create_coco_pipeline(default_boxes, args):
    try:
        shard_id = torch.distributed.get_rank()
        num_shards = torch.distributed.get_world_size()
    except RuntimeError:
        shard_id = 0
        num_shards = 1

    images, bboxes, labels = fn.readers.coco(
        file_root=args.train_coco_root,
        annotations_file=args.train_annotate,
        skip_empty=True,
        shard_id=shard_id,
        num_shards=num_shards,
        ratio=True,
        ltrb=True,
        random_shuffle=False,
        shuffle_after_epoch=True,
        name="Reader")

    crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(
        bboxes,
        labels,
        device="cpu",
        aspect_ratio=[0.5, 2.0],
        thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
        scaling=[0.3, 1.0],
        bbox_layout="xyXY",
        allow_no_crop=True,
        num_attempts=50)
    images = fn.image_decoder_slice(images,
                                    crop_begin,
                                    crop_size,
                                    device="mixed",
                                    output_type=types.RGB)
    flip_coin = fn.random.coin_flip(probability=0.5)
    images = fn.resize(images,
                       resize_x=300,
                       resize_y=300,
                       min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

    saturation = fn.uniform(range=[0.5, 1.5])
    contrast = fn.uniform(range=[0.5, 1.5])
    brightness = fn.uniform(range=[0.875, 1.125])
    hue = fn.uniform(range=[-0.5, 0.5])

    images = fn.hsv(images, dtype=types.FLOAT, hue=hue,
                    saturation=saturation)  # use float to avoid clipping and
    # quantizing the intermediate result
    images = fn.brightness_contrast(
        images,
        contrast_center=128,  # input is in float, but in 0..255 range
        dtype=types.UINT8,
        brightness=brightness,
        contrast=contrast)

    dtype = types.FLOAT16 if args.fp16 else types.FLOAT

    bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
    images = fn.crop_mirror_normalize(
        images,
        crop=(300, 300),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        mirror=flip_coin,
        dtype=dtype,
        output_layout="CHW",
        pad_output=False)

    bboxes, labels = fn.box_encoder(bboxes,
                                    labels,
                                    criteria=0.5,
                                    anchors=default_boxes.as_ltrb_list())

    labels = labels.gpu()
    bboxes = bboxes.gpu()

    return images, bboxes, labels
Ejemplo n.º 28
0
def load_tfrecord(directory, batch_size, training):
    tfrecord = []
    tfrecord_idx = []
    for f in os.listdir(directory):
        fullpath = os.path.join(directory, f)
        if not os.path.isfile(fullpath):
            continue

        if f.endswith(".tfrecord"):
            tfrecord.append(fullpath)

        if f.endswith(".idx"):
            tfrecord_idx.append(fullpath)

    tfrecord.sort()
    tfrecord_idx.sort()

    pipe = Pipeline(batch_size=batch_size, num_threads=32, device_id=0)
    with pipe:
        inputs = fn.tfrecord_reader(
            path=tfrecord,
            index_path=tfrecord_idx,
            features={
                "frame_one":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "frame_two":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "frame_three":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "frame_four":
                tfrec.FixedLenFeature((), tfrec.string, ""),
                "plus_one_position":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_one_orientation":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_two_position":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_two_orientation":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_three_position":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "plus_three_orientation":
                tfrec.FixedLenFeature([3], tfrec.float32, 0.0),
                "speed":
                tfrec.FixedLenFeature([], tfrec.float32, 0.0),
            })
        frame1 = inputs["frame_one"]
        frame1 = fn.image_decoder(frame1,
                                  device="mixed",
                                  output_type=types.RGB)
        # frame1 = fn.resize(frame1, device="gpu", resize_shorter=256.)
        frame1 = fn.crop_mirror_normalize(frame1,
                                          device="gpu",
                                          dtype=types.FLOAT,
                                          mean=[0., 0., 0.],
                                          std=[1., 1., 1.])
        frame1 = fn.transpose(frame1, device="gpu", perm=[1, 2, 0])

        frame2 = inputs["frame_two"]
        frame2 = fn.image_decoder(frame2,
                                  device="mixed",
                                  output_type=types.RGB)
        # frame2 = fn.resize(frame2, device="gpu", resize_shorter=256.)
        frame2 = fn.crop_mirror_normalize(frame2,
                                          device="gpu",
                                          dtype=types.FLOAT,
                                          mean=[0., 0., 0.],
                                          std=[1., 1., 1.])
        frame2 = fn.transpose(frame2, device="gpu", perm=[1, 2, 0])

        position = inputs["plus_one_position"].gpu()
        orientation = inputs["plus_one_orientation"].gpu()
        speed = inputs["speed"].gpu()

        image = fn.cat(frame1, frame2, device="gpu", axis=2)
        pose = fn.cat(position, orientation, device="gpu", axis=0)
        pipe.set_outputs(image, pose, speed)

    # Define shapes and types of the outputs
    shapes = ((batch_size, 480, 640), (batch_size, 6), (batch_size))
    dtypes = (tf.float32, tf.float32)

    # Create dataset
    return dali_tf.DALIDataset(pipeline=pipe,
                               batch_size=batch_size,
                               output_shapes=shapes,
                               output_dtypes=dtypes,
                               device_id=0)