コード例 #1
0
    def test_flip_keypoints(self):
        transforms = T.TransformList([T.HFlipTransform(400)])
        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "keypoints": np.random.rand(17, 3) * 50 + 15,
        }

        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno),
            transforms,
            (400, 400),
            keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices(
                ["keypoints_coco_2017_train"]
            ),
        )
        # The first keypoint is nose
        self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0]))
        # The last 16 keypoints are 8 left-right pairs
        self.assertTrue(
            np.allclose(
                output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1],
                400 - anno["keypoints"][1:, 0].reshape(-1, 2),
            )
        )
        self.assertTrue(
            np.allclose(
                output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :],
                anno["keypoints"][1:, 1:].reshape(-1, 2, 2),
            )
        )
コード例 #2
0
    def test_transform_simple_annotation(self):
        transforms = T.TransformList([T.HFlipTransform(400)])
        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "category_id": 3,
            "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]],
        }

        output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400))
        self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300]))
        self.assertEqual(len(output["segmentation"]), len(anno["segmentation"]))
        self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10]))

        detection_utils.annotations_to_instances([output, output], (400, 400))
コード例 #3
0
    def test_crop(self):
        transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)])
        keypoints = np.random.rand(17, 3) * 50 + 15
        keypoints[:, 2] = 2
        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "keypoints": keypoints,
        }

        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (400, 400)
        )
        self.assertTrue((output["bbox"] == np.asarray([-290, -290, -100, 0])).all())
        # keypoints are no longer visible
        self.assertTrue((output["keypoints"][:, 2] == 0).all())
コード例 #4
0
ファイル: dataset_mapper.py プロジェクト: SeanWangJS/txtdet
    def transform_instance_annotations_rotated(self, annotation, transforms,
                                               image_size):
        if isinstance(transforms, (tuple, list)):
            transforms = T.TransformList(transforms)

        bbox = annotation["bbox"]
        alpha = bbox[-1]
        bbox = bbox[:4]
        bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
        bbox = bbox.tolist()
        bbox = BoxMode.convert(bbox, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
        bbox = np.minimum(bbox, list(image_size + image_size)[::-1])
        bbox = np.append(bbox, alpha)
        annotation["bbox"] = bbox
        annotation["bbox_mode"] = BoxMode.XYWHA_ABS

        return annotation
コード例 #5
0
    def test_transform_RLE_resize(self):
        transforms = T.TransformList(
            [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")]
        )
        mask = np.zeros((300, 400), order="F").astype("uint8")
        mask[:, :200] = 1

        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "segmentation": mask_util.encode(mask[:, :, None])[0],
            "category_id": 3,
        }
        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (400, 400)
        )

        inst = detection_utils.annotations_to_instances(
            [output, output], (400, 400), mask_format="bitmask"
        )
        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
コード例 #6
0
def transform_instance_annotations(
        annotation, transforms, image_size, *, add_meta_infos=False
):
    """
    Apply transforms to box and meta_infos annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
            It will be modified in-place.
        transforms (TransformList or list[Transform]):
        image_size (tuple): the height, width of the transformed image
        add_meta_infos (bool): Whether to apply meta_infos.

    Returns:
        dict:
            the same input dict with fields "bbox", "meta_infos"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    if isinstance(transforms, (tuple, list)):
        transforms = T.TransformList(transforms)
    # bbox is 1d (per-instance bounding box)
    bbox = BoxMode.convert(
        annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
    # clip transformed bbox to image size
    bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
    annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    # add meta_infos
    if add_meta_infos:
        meta_infos = dict()
        meta_infos = transforms.apply_meta_infos(meta_infos)
        annotation["meta_infos"] = meta_infos
    return annotation
コード例 #7
0
    def test_transform_uncompressed_RLE(self):
        transforms = T.TransformList([T.HFlipTransform(400)])
        mask = np.zeros((300, 400)).astype("uint8")
        mask[:, :200] = 1

        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "segmentation": binary_mask_to_uncompressed_rle(mask),
            "category_id": 3,
        }
        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (300, 400)
        )
        mask = output["segmentation"]
        self.assertTrue((mask[:, 200:] == 1).all())
        self.assertTrue((mask[:, :200] == 0).all())

        inst = detection_utils.annotations_to_instances(
            [output, output], (400, 400), mask_format="bitmask"
        )
        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
コード例 #8
0
  def train_mapper(self,dataset_dict):#,dataset_used):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    # Crop for all but comparison
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # Comparison has bbox the size of the image, so dont bother cropping
    else:
      # scaled between 0.5 and 1; shifted up to 0.5 in each dimension
      # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) )
      # transforms = T.TransformList([randomExtant])
      transforms = T.TransformList([])

    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # NOTE: YOLO input size must be multiple of 32
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    else:# Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to threshold, other in proportion
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))
 

        # Apply the scaling transform
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    transforms = transforms + tfms

    # Apply Other Transforms ##
    # Standard random image mods
    if(self.dataset_used != "comparison"):
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # More extreme for comparison set
    else:
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image)
    transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    PILImage = Image.fromarray(image)

    # Standard affine
    if(self.dataset_used != "comparison"):
      shear_range = 8
      angle_range = 30
      # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8))
      # rand_angle = np.random.uniform(-30,30)
    # More extreme random affine for comparison
    else:
      shear_range = 50
      angle_range = 30
      # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30))
      # rand_angle = np.random.uniform(-70,70)

    rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range))
    rand_angle = np.random.uniform(-angle_range,angle_range)

    RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle)
    # Apply affine to image
    image = RandAffT.apply_image(image.copy())
    # Append to transforms
    transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    # bboxes
    # if(self.dataset_used != "comparison"):
    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    # # no bboxes
    # else:
    #   instances = Instances(  (dataset_dict["height"],dataset_dict["width"])  )
    #   instances.gt_classes = torch.tensor([dataset_dict["classID"]])
    #   dataset_dict["instances"] = instances

    dataset_dict["transforms"] = transforms

    return dataset_dict
コード例 #9
0
  def test_mapper(self,dataset_dict):#,dataset_used):
    # If we're mapping at test time
    if(self.is_test_time_mapping):
      return self.train_mapper(dataset_dict)



    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # else:
      # nudgedH = dataset_dict["height"]
      # nudgedW = dataset_dict["width"]
    else:
      transforms = T.TransformList([])

      
    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # the way ive done vgg and yolo means they need the same size images
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    # not vgg or yolo
    else:# Downscale only at this threshold
      # Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to 1333, the shorter to 800
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))

        # Apply the scaling transform
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    # image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    # transforms = transforms + tfms

    # Apply Other Transforms ##
    # image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    # PILImage = Image.fromarray(image)
    # RandAffT = RandomAffineTransform(PILImage.size)
    # Apply affine to image
    # image = RandAffT.apply_image(image.copy())
    # Append to transforms
    # transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    dataset_dict["transforms"] = transforms

    return dataset_dict

  # # Small mappers
  # def small_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"small")

  # def small_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"small")
  #   else:
  #     return self.train_mapper(dataset_dict,"small")

  # # Large mappers
  # def large_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"large")

  # def large_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"large")
  #   else:
  #     return self.train_mapper(dataset_dict,"large")

  # # Full mappers
  # def full_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"full")

  # def full_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"full")
  #   else:
  #     return self.train_mapper(dataset_dict,"full")

  # # Comparison mappers
  # def comparison_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"comparison")

  # def comparison_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"comparison")
  #   else:
  #     return self.train_mapper(dataset_dict,"comparison")
コード例 #10
0
 def __call__(self, aug_input):
     oldx, oldy, oldc = aug_input.image.shape
     scaler = T.RandomExtent(self.scale_range, self.shift_range)(aug_input)
     resizer = T.Resize((oldx, oldy))(aug_input)
     return T.TransformList([scaler, resizer])
コード例 #11
0
ファイル: detection_utils.py プロジェクト: madhutry/tableocr
def transform_instance_annotations(annotation,
                                   transforms,
                                   image_size,
                                   *,
                                   keypoint_hflip_indices=None):
    """
    Apply transforms to box, segmentation and keypoints annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
            It will be modified in-place.
        transforms (TransformList or list[Transform]):
        image_size (tuple): the height, width of the transformed image
        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.

    Returns:
        dict:
            the same input dict with fields "bbox", "segmentation", "keypoints"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    if isinstance(transforms, (tuple, list)):
        transforms = T.TransformList(transforms)
    # bbox is 1d (per-instance bounding box)
    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"],
                           BoxMode.XYXY_ABS)
    # clip transformed bbox to image size
    bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
    annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    if "segmentation" in annotation:
        # each instance contains 1 or more polygons
        segm = annotation["segmentation"]
        if isinstance(segm, list):
            # polygons
            polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
            annotation["segmentation"] = [
                p.reshape(-1) for p in transforms.apply_polygons(polygons)
            ]
        elif isinstance(segm, dict):
            # RLE
            mask = mask_util.decode(segm)
            mask = transforms.apply_segmentation(mask)
            assert tuple(mask.shape[:2]) == image_size
            annotation["segmentation"] = mask
        else:
            raise ValueError(
                "Cannot transform segmentation of type '{}'!"
                "Supported types are: polygons as list[list[float] or ndarray],"
                " COCO-style RLE as a dict.".format(type(segm)))

    if "keypoints" in annotation:
        keypoints = transform_keypoint_annotations(annotation["keypoints"],
                                                   transforms, image_size,
                                                   keypoint_hflip_indices)
        annotation["keypoints"] = keypoints

    return annotation