예제 #1
0
파일: transform.py 프로젝트: gadm21/AI
def get_albumentations_transforms(mode ):
    """
    Composes albumentations transforms.
    Returns the full list of transforms when mode is "train".
    mode should be one of "train", "val".
    """
    # compose validation transforms
    if mode == "val":
        transforms = compo(
            [],
            bbox_params=BboxParams(
                format="pascal_voc",
                min_area=0.0,
                min_visibility=0.0,
                label_fields=["category_id"],
            ),
        )
    # compose train transforms
    # TODO: make transformation parameters configurable from yml
    elif mode == "train":
        transforms = compo(
            [
        # A.Normalize(),
        # A.Blur(p=0.5),
        # A.ColorJitter(p=0.5),
        # A.Downscale(p=0.3),
        # A.Superpixels(p=0.3),
        A.RandomContrast(p=0.5),
        A.ShiftScaleRotate(p=0.8),

        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Sharpen(p = 0.5),

        # A.RGBShift(p=0.5),
        # A.RandomRain(p=0.3),
        # A.RandomFog(p=0.3)
            ],
            bbox_params=BboxParams(
                format="pascal_voc",
                min_area=0.0,
                min_visibility=0.0,
                label_fields=["category_id"],
            ),
        )
    return transforms
예제 #2
0
def test_crop_boxes_replay_compose():
    image = np.ones((512, 384, 3))
    bboxes = [(78, 42, 142, 80), (32, 12, 42, 72), (200, 100, 300, 200)]
    labels = [0, 1, 2]
    transform = ReplayCompose(
        [RandomCrop(256, 256, p=1.0)],
        bbox_params=BboxParams(format="pascal_voc", min_area=16, label_fields=["labels"]),
    )

    input_data = dict(image=image, bboxes=bboxes, labels=labels)
    transformed = transform(**input_data)
    transformed2 = ReplayCompose.replay(transformed["replay"], **input_data)

    np.testing.assert_almost_equal(transformed["bboxes"], transformed2["bboxes"])
예제 #3
0
def strong_aug(p=0.75):
    return Compose(
        [
            ShiftScaleRotate(scale_limit=0.1, rotate_limit=90),
            Transpose(),
            #IAAAffine(shear=0.1),
            #IAAPerspective(),
            Cutout(num_holes=20, max_h_size=8, max_w_size=8),
            HorizontalFlip(),
            VerticalFlip(),
            GaussNoise(),
            JpegCompression(),
            #RandomShadow(shadow_roi=(0, 0, 1, 1), p=0.75),
            OneOf([MotionBlur(), GaussianBlur()]),
            OneOf([ToGray(), ToSepia()]),
            RandomBrightnessContrast(brightness_limit=0.75, p=0.75)
        ],
        bbox_params=BboxParams("pascal_voc",
                               label_fields=["category_id"],
                               min_area=0.0,
                               min_visibility=0.5),
        p=p)
예제 #4
0
def augment_objects(objects, per_obj):
    """
    Perform image augmentation on objects and their associated bboxes

    Parameters
    ----------
    objects : list
        a list of objects (dictionnaries)
    per_obj : int
        number of augmented objects to create per object

    Returns
    -------
    A list containing len(objects)*per_obj augmented objects


    """

    # schedule augmentation of overlies
    rotate = albumentations.augmentations.transforms.RandomRotate90(
        p=0.0
    )  # TODO : allow user to define augmentations (true e.g. config or yaml file)
    aug = Compose(
        [rotate],
        bbox_params=BboxParams(
            format="yolo",
            min_visibility=0.5,
            label_fields=["class_id"],
        ),
    )
    # perform augmentation on overlies
    # obj_per_bkg = max(len(objects), np.random.choice(1))
    aug_per_obj = int(round(per_obj / len(objects)))
    augmented = []
    for object in objects:
        temp = [aug(**object) for i in range(aug_per_obj)]
        augmented = augmented + temp

    return augmented
예제 #5
0
def overlay(objects, bkg):
    """
    overlay a source image containing bounding boxes onto a background

    Parameters:
    -----
    objects : list 
        list of the source image(s), potentially augmented (rotated, scaled, ...) with their associated data
    bkg : numpy array
        the background on which the objects will be overlayed

    Returns:
    --------
    bkg_with_objects : numpy array
        the background with overlayed objects
    target_bboxes : list 
        list of imgaug.augmentables.bbs.BoundingBoxesOnImage : the bounding boxes of the sources images in the background image coordinate reference frame

    Details about the overlay function
    -------------------
    The overlay function works in conjunction with the imgaug package.
    Its ojective is to allow the user to overlay an object represented by a source
    image and an associated bounding box into a bigger picture called the background
    it takes as input a list of cv2 images containing (or not) an alpha channel,
    a list of corresponding bounding boxes and a background.

    The script then ensures that the overlayed source will have adequate proportions
    when overlayed into the background i.e. it restricts the overlayed source
    to be between 1% and 5% of the background x-dimension. The y dimension is
    then scaled accordingly to keep the aspect ratio of the original source image.
    The associated bounding box are recomputed accordingly.

    The script overlays the source randomly into the background and allows the
    overlayed source to sit on the edge of the background i.e. allows for an
    incomplete overlay. The bounding box associated to this overlayed source
    are only kept if more than 25% of the source sits within the background.
    Note that only the bounding box is removed, not the overlayed source i.e.
    the overlayed source will appear in the picture but no bounding box will be
    associated to it.
    """
    bkg_with_objects = {key: None for key in ["image", "bboxes", "class_id"]}

    if bkg.shape[2] != 4:
        bkg = cv2.cvtColor(bkg, cv2.COLOR_RGB2RGBA)

    for object in objects:

        # reshape source in order to limit its size on the bkg
        # ratio is chosen based on the width i.e. x-axis
        # the figures are based on analysis performed on the desired
        # detections
        ratioChoice = np.arange(0.2, 0.3, step=0.01)
        ratioChosen = np.random.choice(ratioChoice, 1)[0]

        resize = albumentations.augmentations.transforms.LongestMaxSize(
            max_size=int(ratioChosen * bkg.shape[1]), always_apply=True
        )
        aug = Compose(
            [resize],
            bbox_params=BboxParams(
                format="yolo",
                min_visibility=0.5,
                label_fields=["class_id"],
            ),
        )

        object = aug(**object)

        # create a padded version of the background
        # allowing the source picture to sit on the edges i.e.
        # to be partially outside the picture
        # the padding equal to the source width and height i.e.
        # actually allows for the source to be located completely outside
        # the background
        hO = object["image"].shape[0]
        wO = object["image"].shape[1]
        hB = bkg.shape[0]
        wB = bkg.shape[1]
        bkg_padded = cv2.copyMakeBorder(
            bkg,
            hO,
            hO,
            wO,
            wO,
            cv2.BORDER_CONSTANT,
        )

        # Anchor is chosen randomly within the padded image
        # but without allowing the picture to lie outside the original image
        # in other words, the padding is not used at present
        # padding shall be used in a further step to allow partial image to be
        # detected (this supposedly would help training) but this requires
        # to recompute bounding boxes after inclusion (otherwhise, center of
        # bbox could be outside the picture !)
        easy = False
        if easy:
            xAnchor = round(bkg_padded.shape[1] / 2)
            yAnchor = round(bkg_padded.shape[0] / 2)
        else:
            xAnchor = np.random.choice(
                np.arange(
                    round(1.5 * wO),
                    bkg_padded.shape[1] - round(1.5 * wO),
                )
            )  # anchor represents the center. Therefore, only allowed between these bounds in the padded image
            yAnchor = np.random.choice(
                np.arange(
                    round(1.5 * hO),
                    bkg_padded.shape[0] - round(1.5 * hO),
                )
            )
        # note the round hereabove are for future when widthSource/2 will have to be used
        # to allow partial inclusion of bbox

        # compute the boundaries of the source image inside the total image
        # This is performed assuming that in the source image, the
        # object center is perfectly in the middle of the source image &
        # associated bounding box
        # Therefore, one only to add/remove half the height and width with respect
        # to the chosen anchor to find the two opposite corners of the
        # source/bounding box inside the padded background
        xLeft = int(round(xAnchor - wO / 2))
        xRight = int(round(xAnchor + wO / 2))
        yBot = int(round(yAnchor - hO / 2))
        yTop = int(round(yAnchor + hO / 2))

        # can happen due to roundings that the shape of the image in the
        # computed location exceeds/is smaller than original image
        # correct that
        if (xRight - xLeft > wO) | (xRight - xLeft < wO):
            xLeft = xRight - wO
        if (yTop - yBot > hO) | (yTop - yBot < hO):
            yBot = yTop - hO

        # can also happen that when recomputing the location,
        # the coordinates go negative which is impossible
        # therefore, make coordinate positive and then
        # translate by an amount equal to the negative value of the
        # coordinate
        if xLeft < 0:
            xLeft = abs(xLeft)
            delta = 2 * xLeft
            xRight = xRight + delta
        if yBot < 0:
            yBot = abs(yBot)
            delta = 2 * yBot
            yTop = yTop + delta

        # perform overlay at chosen location
        # takes into account alpha channel of the source
        alpha = object["image"][:, :, -1]
        alpha_rgb = np.expand_dims(alpha, 2)
        alpha_rgb = np.repeat(alpha_rgb, 3, axis=2)
        alpha_rgb = alpha_rgb.astype(float) / 255
        foreground = cv2.multiply(
            alpha_rgb, object["image"][:, :, :-1], dtype=cv2.CV_64F
        )
        background = cv2.multiply(
            1 - alpha_rgb,
            bkg_padded[yBot:yTop, xLeft:xRight][:, :, :-1],
            dtype=cv2.CV_32F,
        )
        bkg_padded[yBot:yTop, xLeft:xRight][:, :, :-1] = cv2.add(
            foreground, background, dtype=cv2.CV_64F
        )
        # bkg_padded[yBot:yTop, xLeft:xRight][:, :, -1] = alpha # WRONG
        # Keep only the original picture without the padding
        bkg = bkg_padded[
            hO : hO + bkg.shape[0],
            wO : wO + bkg.shape[1],
        ]
        # In source image, bounding box is around the entire image
        # and assumed at the middle therefore one computes the distance between
        # the center of the bounding box and its edges
        # note the convention in imgAug : x-axis grows from left to right
        # y-axis grows from top to bottom
        # xCtr = wO/2
        # yCtr = hO/2
        # deltaX = abs(sourceBbox.bounding_boxes[0].x1_int - xCtr)
        # deltaY = abs(sourceBbox.bounding_boxes[0].y1_int - yCtr)

        # Now define the new bounding box associated to the overlayed source
        # inside the background image. The associated bboxes will be placed
        # in the original (non-padded) background referential so
        # define the bbox coordinates consistently i.e. removing
        # widthSource and heightSource from the coordinates
        # make a drawing to vizualize if needed for understanding
        bkg_with_objects["image"] = bkg
        try:
            bkg_with_objects["bboxes"].append(
                [(xAnchor - wO) / wB, (yAnchor - hO) / hB, wO / wB, hO / hB]
            )
            bkg_with_objects["class_id"].append(object["class_id"][0])
        except AttributeError:
            bkg_with_objects["bboxes"] = [
                [(xAnchor - wO) / wB, (yAnchor - hO) / hB, wO / wB, hO / hB]
            ]
            bkg_with_objects["class_id"] = [object["class_id"][0]]

    # # view
    # # imgaug.imshow(BoundingBoxesOnImage(targetBboxes, bkg.shape).draw_on_image(bkg, color = (0,255,0,255)))
    # # check if bounding boxes are within the picture
    # i = 0
    # for bbox in targetBboxes:
    #     originalArea = bbox.area
    #     remainingArea = bbox.clip_out_of_image(
    #         bkg.shape
    #     ).area  # this computes the remaining area when cut
    #     if remainingArea / originalArea < 0.4:
    #         targetBboxes.pop(i)
    #         # don't increment i if popping element
    #     else:
    #         targetBboxes[i] = bbox.clip_out_of_image(bkg.shape)
    #         i += 1

    return bkg_with_objects
예제 #6
0
 [
     {
         "keypoints": [[10, 10], [70, 70], [10, 70], [70, 10]]
     },
     None,
     KeypointParams("xy", check_each_transform=True),
     {
         "keypoints": np.array([[10, 10]]) + 25
     },
 ],
 [
     {
         "bboxes": [[0, 0, 10, 10, 0], [5, 5, 70, 70, 0],
                    [60, 60, 70, 70, 0]]
     },
     BboxParams("pascal_voc", check_each_transform=False),
     None,
     {
         "bboxes": [[25, 25, 35, 35, 0], [30, 30, 95, 95, 0],
                    [85, 85, 95, 95, 0]]
     },
 ],
 [
     {
         "bboxes": [[0, 0, 10, 10, 0], [5, 5, 70, 70, 0],
                    [60, 60, 70, 70, 0]]
     },
     BboxParams("pascal_voc", check_each_transform=True),
     None,
     {
         "bboxes": [[25, 25, 35, 35, 0], [30, 30, 75, 75, 0]]
예제 #7
0
 def __init__(self, **kwargs):
     self.transforms = Compose([v for v in kwargs.values()],
                               bbox_params=BboxParams(
                                   format="coco", label_fields=["types"]))
예제 #8
0
def get_transforms(config, mode: str = "train") -> Compose:
    """
    Composes albumentations transforms.
    Returns the full list of transforms when mode is "train".
    mode should be one of "train", "val".
    """
    # compose validation transforms
    if mode == "val":
        transforms = Compose(
            [],
            bbox_params=BboxParams(
                format="pascal_voc",
                min_area=0.0,
                min_visibility=0.0,
                label_fields=["category_id"],
            ),
        )
    # compose train transforms
    # TODO: make transformation parameters configurable from yml
    elif mode == "train":
        transforms = Compose(
            [
                LongestMaxSize(
                    max_size=config["LONGESTMAXSIZE_MAXSIZE"],
                    p=config["LONGESTMAXSIZE_P"],
                ),
                # PadIfNeeded(min_height=768, min_width=768, border_mode=0, p=1),
                RandomSizedBBoxSafeCrop(
                    height=config["RANDOMSIZEDBBOXSAFECROP_HEIGHT"],
                    width=config["RANDOMSIZEDBBOXSAFECROP_WIDTH"],
                    p=config["LONGESTMAXSIZE_P"],
                ),
                ShiftScaleRotate(
                    shift_limit=config["SHIFTSCALEROTATE_SHIFTLIMIT"],
                    scale_limit=config["SHIFTSCALEROTATE_SCALELIMIT"],
                    rotate_limit=config["SHIFTSCALEROTATE_ROTATELIMIT"],
                    p=config["SHIFTSCALEROTATE_P"],
                ),
                HorizontalFlip(p=config["HORIZONTALFLIP_P"]),
                RandomRotate90(p=config["RANDOMROTATE90_P"]),
                RandomBrightnessContrast(
                    brightness_limit=config[
                        "RANDOMBRIGHTNESSCONTRAST_BRIGHTNESSLIMIT"],
                    contrast_limit=config[
                        "RANDOMBRIGHTNESSCONTRAST_CONTRASTLIMIT"],
                    p=config["RANDOMBRIGHTNESSCONTRAST_P"],
                ),
                RandomGamma(
                    gamma_limit=config["RANDOMGAMMA_GAMMALIMIT"],
                    p=config["RANDOMGAMMA_P"],
                ),
                HueSaturationValue(
                    hue_shift_limit=config["HUESATURATIONVALUE_HUESHIFTLIMIT"],
                    sat_shift_limit=config["HUESATURATIONVALUE_SATSHIFTLIMIT"],
                    val_shift_limit=config["HUESATURATIONVALUE_VALSHIFTLIMIT"],
                    p=config["HUESATURATIONVALUE_P"],
                ),
                MotionBlur(
                    blur_limit=tuple(config["MOTIONBLUR_BLURLIMIT"]),
                    p=config["MOTIONBLUR_P"],
                ),
                JpegCompression(
                    quality_lower=config["JPEGCOMPRESSION_QUALITYLOWER"],
                    quality_upper=config["JPEGCOMPRESSION_QUALITYUPPER"],
                    p=config["JPEGCOMPRESSION_P"],
                ),
            ],
            bbox_params=BboxParams(
                format="pascal_voc",
                min_area=0.0,
                min_visibility=0.0,
                label_fields=["category_id"],
            ),
        )
    return transforms