def get_albumentations_transforms(mode ): """ Composes albumentations transforms. Returns the full list of transforms when mode is "train". mode should be one of "train", "val". """ # compose validation transforms if mode == "val": transforms = compo( [], bbox_params=BboxParams( format="pascal_voc", min_area=0.0, min_visibility=0.0, label_fields=["category_id"], ), ) # compose train transforms # TODO: make transformation parameters configurable from yml elif mode == "train": transforms = compo( [ # A.Normalize(), # A.Blur(p=0.5), # A.ColorJitter(p=0.5), # A.Downscale(p=0.3), # A.Superpixels(p=0.3), A.RandomContrast(p=0.5), A.ShiftScaleRotate(p=0.8), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomBrightnessContrast(p=0.5), A.Sharpen(p = 0.5), # A.RGBShift(p=0.5), # A.RandomRain(p=0.3), # A.RandomFog(p=0.3) ], bbox_params=BboxParams( format="pascal_voc", min_area=0.0, min_visibility=0.0, label_fields=["category_id"], ), ) return transforms
def test_crop_boxes_replay_compose(): image = np.ones((512, 384, 3)) bboxes = [(78, 42, 142, 80), (32, 12, 42, 72), (200, 100, 300, 200)] labels = [0, 1, 2] transform = ReplayCompose( [RandomCrop(256, 256, p=1.0)], bbox_params=BboxParams(format="pascal_voc", min_area=16, label_fields=["labels"]), ) input_data = dict(image=image, bboxes=bboxes, labels=labels) transformed = transform(**input_data) transformed2 = ReplayCompose.replay(transformed["replay"], **input_data) np.testing.assert_almost_equal(transformed["bboxes"], transformed2["bboxes"])
def strong_aug(p=0.75): return Compose( [ ShiftScaleRotate(scale_limit=0.1, rotate_limit=90), Transpose(), #IAAAffine(shear=0.1), #IAAPerspective(), Cutout(num_holes=20, max_h_size=8, max_w_size=8), HorizontalFlip(), VerticalFlip(), GaussNoise(), JpegCompression(), #RandomShadow(shadow_roi=(0, 0, 1, 1), p=0.75), OneOf([MotionBlur(), GaussianBlur()]), OneOf([ToGray(), ToSepia()]), RandomBrightnessContrast(brightness_limit=0.75, p=0.75) ], bbox_params=BboxParams("pascal_voc", label_fields=["category_id"], min_area=0.0, min_visibility=0.5), p=p)
def augment_objects(objects, per_obj): """ Perform image augmentation on objects and their associated bboxes Parameters ---------- objects : list a list of objects (dictionnaries) per_obj : int number of augmented objects to create per object Returns ------- A list containing len(objects)*per_obj augmented objects """ # schedule augmentation of overlies rotate = albumentations.augmentations.transforms.RandomRotate90( p=0.0 ) # TODO : allow user to define augmentations (true e.g. config or yaml file) aug = Compose( [rotate], bbox_params=BboxParams( format="yolo", min_visibility=0.5, label_fields=["class_id"], ), ) # perform augmentation on overlies # obj_per_bkg = max(len(objects), np.random.choice(1)) aug_per_obj = int(round(per_obj / len(objects))) augmented = [] for object in objects: temp = [aug(**object) for i in range(aug_per_obj)] augmented = augmented + temp return augmented
def overlay(objects, bkg): """ overlay a source image containing bounding boxes onto a background Parameters: ----- objects : list list of the source image(s), potentially augmented (rotated, scaled, ...) with their associated data bkg : numpy array the background on which the objects will be overlayed Returns: -------- bkg_with_objects : numpy array the background with overlayed objects target_bboxes : list list of imgaug.augmentables.bbs.BoundingBoxesOnImage : the bounding boxes of the sources images in the background image coordinate reference frame Details about the overlay function ------------------- The overlay function works in conjunction with the imgaug package. Its ojective is to allow the user to overlay an object represented by a source image and an associated bounding box into a bigger picture called the background it takes as input a list of cv2 images containing (or not) an alpha channel, a list of corresponding bounding boxes and a background. The script then ensures that the overlayed source will have adequate proportions when overlayed into the background i.e. it restricts the overlayed source to be between 1% and 5% of the background x-dimension. The y dimension is then scaled accordingly to keep the aspect ratio of the original source image. The associated bounding box are recomputed accordingly. The script overlays the source randomly into the background and allows the overlayed source to sit on the edge of the background i.e. allows for an incomplete overlay. The bounding box associated to this overlayed source are only kept if more than 25% of the source sits within the background. Note that only the bounding box is removed, not the overlayed source i.e. the overlayed source will appear in the picture but no bounding box will be associated to it. """ bkg_with_objects = {key: None for key in ["image", "bboxes", "class_id"]} if bkg.shape[2] != 4: bkg = cv2.cvtColor(bkg, cv2.COLOR_RGB2RGBA) for object in objects: # reshape source in order to limit its size on the bkg # ratio is chosen based on the width i.e. x-axis # the figures are based on analysis performed on the desired # detections ratioChoice = np.arange(0.2, 0.3, step=0.01) ratioChosen = np.random.choice(ratioChoice, 1)[0] resize = albumentations.augmentations.transforms.LongestMaxSize( max_size=int(ratioChosen * bkg.shape[1]), always_apply=True ) aug = Compose( [resize], bbox_params=BboxParams( format="yolo", min_visibility=0.5, label_fields=["class_id"], ), ) object = aug(**object) # create a padded version of the background # allowing the source picture to sit on the edges i.e. # to be partially outside the picture # the padding equal to the source width and height i.e. # actually allows for the source to be located completely outside # the background hO = object["image"].shape[0] wO = object["image"].shape[1] hB = bkg.shape[0] wB = bkg.shape[1] bkg_padded = cv2.copyMakeBorder( bkg, hO, hO, wO, wO, cv2.BORDER_CONSTANT, ) # Anchor is chosen randomly within the padded image # but without allowing the picture to lie outside the original image # in other words, the padding is not used at present # padding shall be used in a further step to allow partial image to be # detected (this supposedly would help training) but this requires # to recompute bounding boxes after inclusion (otherwhise, center of # bbox could be outside the picture !) easy = False if easy: xAnchor = round(bkg_padded.shape[1] / 2) yAnchor = round(bkg_padded.shape[0] / 2) else: xAnchor = np.random.choice( np.arange( round(1.5 * wO), bkg_padded.shape[1] - round(1.5 * wO), ) ) # anchor represents the center. Therefore, only allowed between these bounds in the padded image yAnchor = np.random.choice( np.arange( round(1.5 * hO), bkg_padded.shape[0] - round(1.5 * hO), ) ) # note the round hereabove are for future when widthSource/2 will have to be used # to allow partial inclusion of bbox # compute the boundaries of the source image inside the total image # This is performed assuming that in the source image, the # object center is perfectly in the middle of the source image & # associated bounding box # Therefore, one only to add/remove half the height and width with respect # to the chosen anchor to find the two opposite corners of the # source/bounding box inside the padded background xLeft = int(round(xAnchor - wO / 2)) xRight = int(round(xAnchor + wO / 2)) yBot = int(round(yAnchor - hO / 2)) yTop = int(round(yAnchor + hO / 2)) # can happen due to roundings that the shape of the image in the # computed location exceeds/is smaller than original image # correct that if (xRight - xLeft > wO) | (xRight - xLeft < wO): xLeft = xRight - wO if (yTop - yBot > hO) | (yTop - yBot < hO): yBot = yTop - hO # can also happen that when recomputing the location, # the coordinates go negative which is impossible # therefore, make coordinate positive and then # translate by an amount equal to the negative value of the # coordinate if xLeft < 0: xLeft = abs(xLeft) delta = 2 * xLeft xRight = xRight + delta if yBot < 0: yBot = abs(yBot) delta = 2 * yBot yTop = yTop + delta # perform overlay at chosen location # takes into account alpha channel of the source alpha = object["image"][:, :, -1] alpha_rgb = np.expand_dims(alpha, 2) alpha_rgb = np.repeat(alpha_rgb, 3, axis=2) alpha_rgb = alpha_rgb.astype(float) / 255 foreground = cv2.multiply( alpha_rgb, object["image"][:, :, :-1], dtype=cv2.CV_64F ) background = cv2.multiply( 1 - alpha_rgb, bkg_padded[yBot:yTop, xLeft:xRight][:, :, :-1], dtype=cv2.CV_32F, ) bkg_padded[yBot:yTop, xLeft:xRight][:, :, :-1] = cv2.add( foreground, background, dtype=cv2.CV_64F ) # bkg_padded[yBot:yTop, xLeft:xRight][:, :, -1] = alpha # WRONG # Keep only the original picture without the padding bkg = bkg_padded[ hO : hO + bkg.shape[0], wO : wO + bkg.shape[1], ] # In source image, bounding box is around the entire image # and assumed at the middle therefore one computes the distance between # the center of the bounding box and its edges # note the convention in imgAug : x-axis grows from left to right # y-axis grows from top to bottom # xCtr = wO/2 # yCtr = hO/2 # deltaX = abs(sourceBbox.bounding_boxes[0].x1_int - xCtr) # deltaY = abs(sourceBbox.bounding_boxes[0].y1_int - yCtr) # Now define the new bounding box associated to the overlayed source # inside the background image. The associated bboxes will be placed # in the original (non-padded) background referential so # define the bbox coordinates consistently i.e. removing # widthSource and heightSource from the coordinates # make a drawing to vizualize if needed for understanding bkg_with_objects["image"] = bkg try: bkg_with_objects["bboxes"].append( [(xAnchor - wO) / wB, (yAnchor - hO) / hB, wO / wB, hO / hB] ) bkg_with_objects["class_id"].append(object["class_id"][0]) except AttributeError: bkg_with_objects["bboxes"] = [ [(xAnchor - wO) / wB, (yAnchor - hO) / hB, wO / wB, hO / hB] ] bkg_with_objects["class_id"] = [object["class_id"][0]] # # view # # imgaug.imshow(BoundingBoxesOnImage(targetBboxes, bkg.shape).draw_on_image(bkg, color = (0,255,0,255))) # # check if bounding boxes are within the picture # i = 0 # for bbox in targetBboxes: # originalArea = bbox.area # remainingArea = bbox.clip_out_of_image( # bkg.shape # ).area # this computes the remaining area when cut # if remainingArea / originalArea < 0.4: # targetBboxes.pop(i) # # don't increment i if popping element # else: # targetBboxes[i] = bbox.clip_out_of_image(bkg.shape) # i += 1 return bkg_with_objects
[ { "keypoints": [[10, 10], [70, 70], [10, 70], [70, 10]] }, None, KeypointParams("xy", check_each_transform=True), { "keypoints": np.array([[10, 10]]) + 25 }, ], [ { "bboxes": [[0, 0, 10, 10, 0], [5, 5, 70, 70, 0], [60, 60, 70, 70, 0]] }, BboxParams("pascal_voc", check_each_transform=False), None, { "bboxes": [[25, 25, 35, 35, 0], [30, 30, 95, 95, 0], [85, 85, 95, 95, 0]] }, ], [ { "bboxes": [[0, 0, 10, 10, 0], [5, 5, 70, 70, 0], [60, 60, 70, 70, 0]] }, BboxParams("pascal_voc", check_each_transform=True), None, { "bboxes": [[25, 25, 35, 35, 0], [30, 30, 75, 75, 0]]
def __init__(self, **kwargs): self.transforms = Compose([v for v in kwargs.values()], bbox_params=BboxParams( format="coco", label_fields=["types"]))
def get_transforms(config, mode: str = "train") -> Compose: """ Composes albumentations transforms. Returns the full list of transforms when mode is "train". mode should be one of "train", "val". """ # compose validation transforms if mode == "val": transforms = Compose( [], bbox_params=BboxParams( format="pascal_voc", min_area=0.0, min_visibility=0.0, label_fields=["category_id"], ), ) # compose train transforms # TODO: make transformation parameters configurable from yml elif mode == "train": transforms = Compose( [ LongestMaxSize( max_size=config["LONGESTMAXSIZE_MAXSIZE"], p=config["LONGESTMAXSIZE_P"], ), # PadIfNeeded(min_height=768, min_width=768, border_mode=0, p=1), RandomSizedBBoxSafeCrop( height=config["RANDOMSIZEDBBOXSAFECROP_HEIGHT"], width=config["RANDOMSIZEDBBOXSAFECROP_WIDTH"], p=config["LONGESTMAXSIZE_P"], ), ShiftScaleRotate( shift_limit=config["SHIFTSCALEROTATE_SHIFTLIMIT"], scale_limit=config["SHIFTSCALEROTATE_SCALELIMIT"], rotate_limit=config["SHIFTSCALEROTATE_ROTATELIMIT"], p=config["SHIFTSCALEROTATE_P"], ), HorizontalFlip(p=config["HORIZONTALFLIP_P"]), RandomRotate90(p=config["RANDOMROTATE90_P"]), RandomBrightnessContrast( brightness_limit=config[ "RANDOMBRIGHTNESSCONTRAST_BRIGHTNESSLIMIT"], contrast_limit=config[ "RANDOMBRIGHTNESSCONTRAST_CONTRASTLIMIT"], p=config["RANDOMBRIGHTNESSCONTRAST_P"], ), RandomGamma( gamma_limit=config["RANDOMGAMMA_GAMMALIMIT"], p=config["RANDOMGAMMA_P"], ), HueSaturationValue( hue_shift_limit=config["HUESATURATIONVALUE_HUESHIFTLIMIT"], sat_shift_limit=config["HUESATURATIONVALUE_SATSHIFTLIMIT"], val_shift_limit=config["HUESATURATIONVALUE_VALSHIFTLIMIT"], p=config["HUESATURATIONVALUE_P"], ), MotionBlur( blur_limit=tuple(config["MOTIONBLUR_BLURLIMIT"]), p=config["MOTIONBLUR_P"], ), JpegCompression( quality_lower=config["JPEGCOMPRESSION_QUALITYLOWER"], quality_upper=config["JPEGCOMPRESSION_QUALITYUPPER"], p=config["JPEGCOMPRESSION_P"], ), ], bbox_params=BboxParams( format="pascal_voc", min_area=0.0, min_visibility=0.0, label_fields=["category_id"], ), ) return transforms