コード例 #1
0
def create_coco_pipeline(file_root,
                         annotations_file,
                         batch_size=1,
                         device_id=0,
                         num_threads=4,
                         local_rank=0,
                         world_size=1):
    pipeline = Pipeline(batch_size, num_threads,
                        local_rank, seed=42 + device_id)

    with pipeline:
        images, bboxes, labels = fn.coco_reader(file_root=file_root,
                                                annotations_file=annotations_file,
                                                skip_empty=True,
                                                shard_id=local_rank,
                                                num_shards=world_size,
                                                ratio=True,
                                                ltrb=True,
                                                random_shuffle=False,
                                                shuffle_after_epoch=True,
                                                name="Reader")

        crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels,
                                                                    device="cpu",
                                                                    aspect_ratio=[0.5, 2.0],
                                                                    thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
                                                                    scaling=[0.3, 1.0],
                                                                    bbox_layout="xyXY",
                                                                    allow_no_crop=True,
                                                                    num_attempts=50)
        images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB)
        flip_coin = fn.coin_flip(probability=0.5)
        images = fn.resize(images,
                           resize_x=300,
                           resize_y=300,
                           min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        # use float to avoid clipping and quantizing the intermediate result
        images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]),
                        saturation=fn.uniform(range=[0.5, 1.5]))

        images = fn.brightness_contrast(images,
                                        contrast_center = 128,  # input is in float, but in 0..255 range
                                        dtype = types.UINT8,
                                        brightness = fn.uniform(range=[0.875, 1.125]),
                                        contrast = fn.uniform(range=[0.5, 1.5]))

        bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
        images = fn.crop_mirror_normalize(images,
                                          mean=[104., 117., 123.],
                                          std=[1., 1., 1.],
                                          mirror=flip_coin,
                                          dtype=types.FLOAT,
                                          output_layout="CHW",
                                          pad_output=False)

        pipeline.set_outputs(images, bboxes, labels)
    return pipeline
コード例 #2
0
ファイル: test_dali_cpu_only.py プロジェクト: szalpal/DALI
def test_bb_flip_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    test_data_shape = [200, 4]

    def get_data():
        out = [
            (np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8) /
             255).astype(dtype=np.float32) for _ in range(batch_size)
        ]
        return out

    data = fn.external_source(source=get_data)
    processed = fn.bb_flip(data)
    pipe.set_outputs(processed)
    pipe.build()
    for _ in range(3):
        pipe.run()
コード例 #3
0
ファイル: coco_pipeline.py プロジェクト: xvdp/DALI
def create_coco_pipeline(default_boxes, args):
    try:
        shard_id = torch.distributed.get_rank()
        num_shards = torch.distributed.get_world_size()
    except RuntimeError:
        shard_id = 0
        num_shards = 1

    images, bboxes, labels = fn.readers.coco(
        file_root=args.train_coco_root,
        annotations_file=args.train_annotate,
        skip_empty=True,
        shard_id=shard_id,
        num_shards=num_shards,
        ratio=True,
        ltrb=True,
        random_shuffle=False,
        shuffle_after_epoch=True,
        name="Reader")

    crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(
        bboxes,
        labels,
        device="cpu",
        aspect_ratio=[0.5, 2.0],
        thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
        scaling=[0.3, 1.0],
        bbox_layout="xyXY",
        allow_no_crop=True,
        num_attempts=50)
    images = fn.image_decoder_slice(images,
                                    crop_begin,
                                    crop_size,
                                    device="mixed",
                                    output_type=types.RGB)
    flip_coin = fn.random.coin_flip(probability=0.5)
    images = fn.resize(images,
                       resize_x=300,
                       resize_y=300,
                       min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

    saturation = fn.uniform(range=[0.5, 1.5])
    contrast = fn.uniform(range=[0.5, 1.5])
    brightness = fn.uniform(range=[0.875, 1.125])
    hue = fn.uniform(range=[-0.5, 0.5])

    images = fn.hsv(images, dtype=types.FLOAT, hue=hue,
                    saturation=saturation)  # use float to avoid clipping and
    # quantizing the intermediate result
    images = fn.brightness_contrast(
        images,
        contrast_center=128,  # input is in float, but in 0..255 range
        dtype=types.UINT8,
        brightness=brightness,
        contrast=contrast)

    dtype = types.FLOAT16 if args.fp16 else types.FLOAT

    bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
    images = fn.crop_mirror_normalize(
        images,
        crop=(300, 300),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        mirror=flip_coin,
        dtype=dtype,
        output_layout="CHW",
        pad_output=False)

    bboxes, labels = fn.box_encoder(bboxes,
                                    labels,
                                    criteria=0.5,
                                    anchors=default_boxes.as_ltrb_list())

    labels = labels.gpu()
    bboxes = bboxes.gpu()

    return images, bboxes, labels