Example #1
0
def gen_crop_transform_with_instance(crop_size,
                                     image_size,
                                     instances,
                                     crop_box=True):
    """
    Generate a CropTransform so that the cropping region contains
    the center of the given instance.

    Args:
        crop_size (tuple): h, w in pixels
        image_size (tuple): h, w
        instance (dict): an annotation dict of one instance, in Detectron2's
            dataset format.
    """
    instance = np.random.choice(instances),
    instance = instance[0]
    crop_size = np.asarray(crop_size, dtype=np.int32)
    bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"],
                           BoxMode.XYXY_ABS)
    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
    assert (image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
            ), "The annotation bounding box is outside of the image!"
    assert (image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
            ), "Crop size is larger than image size!"

    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))

    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)

    # if some instance is cropped extend the box
    if not crop_box:
        num_modifications = 0
        modified = True

        # convert crop_size to float
        crop_size = crop_size.astype(np.float32)
        while modified:
            modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size,
                                                      instances)
            num_modifications += 1
            if num_modifications > 100:
                logger.info(
                    "Cannot finished cropping adjustment within 100 tries (#instances {})."
                    .format(len(instances)))
                return T.CropTransform(0, 0, image_size[1], image_size[0])

    return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
Example #2
0
def gen_crop_transform_with_instance(crop_size, image_size, instance):
    """
    Generate a CropTransform so that the cropping region contains
    the center of the given instance.

    Args:
        crop_size (tuple): h, w in pixels
        image_size (tuple): h, w
        instance (dict): an annotation dict of one instance, in Detectron2's
            dataset format.
    """
    crop_size = np.asarray(crop_size, dtype=np.int32)
    bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"],
                           BoxMode.XYXY_ABS)
    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
    assert (image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
            ), "The annotation bounding box is outside of the image!"
    assert (image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
            ), "Crop size is larger than image size!"

    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))

    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
    return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
    def test_crop(self):
        transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)])
        keypoints = np.random.rand(17, 3) * 50 + 15
        keypoints[:, 2] = 2
        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "keypoints": keypoints,
        }

        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (400, 400)
        )
        self.assertTrue((output["bbox"] == np.asarray([-290, -290, -100, 0])).all())
        # keypoints are no longer visible
        self.assertTrue((output["keypoints"][:, 2] == 0).all())
Example #4
0
  def train_mapper(self,dataset_dict):#,dataset_used):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    # Crop for all but comparison
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # Comparison has bbox the size of the image, so dont bother cropping
    else:
      # scaled between 0.5 and 1; shifted up to 0.5 in each dimension
      # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) )
      # transforms = T.TransformList([randomExtant])
      transforms = T.TransformList([])

    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # NOTE: YOLO input size must be multiple of 32
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    else:# Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to threshold, other in proportion
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))
 

        # Apply the scaling transform
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    transforms = transforms + tfms

    # Apply Other Transforms ##
    # Standard random image mods
    if(self.dataset_used != "comparison"):
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # More extreme for comparison set
    else:
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image)
    transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    PILImage = Image.fromarray(image)

    # Standard affine
    if(self.dataset_used != "comparison"):
      shear_range = 8
      angle_range = 30
      # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8))
      # rand_angle = np.random.uniform(-30,30)
    # More extreme random affine for comparison
    else:
      shear_range = 50
      angle_range = 30
      # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30))
      # rand_angle = np.random.uniform(-70,70)

    rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range))
    rand_angle = np.random.uniform(-angle_range,angle_range)

    RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle)
    # Apply affine to image
    image = RandAffT.apply_image(image.copy())
    # Append to transforms
    transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    # bboxes
    # if(self.dataset_used != "comparison"):
    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    # # no bboxes
    # else:
    #   instances = Instances(  (dataset_dict["height"],dataset_dict["width"])  )
    #   instances.gt_classes = torch.tensor([dataset_dict["classID"]])
    #   dataset_dict["instances"] = instances

    dataset_dict["transforms"] = transforms

    return dataset_dict
Example #5
0
  def test_mapper(self,dataset_dict):#,dataset_used):
    # If we're mapping at test time
    if(self.is_test_time_mapping):
      return self.train_mapper(dataset_dict)



    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # else:
      # nudgedH = dataset_dict["height"]
      # nudgedW = dataset_dict["width"]
    else:
      transforms = T.TransformList([])

      
    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # the way ive done vgg and yolo means they need the same size images
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    # not vgg or yolo
    else:# Downscale only at this threshold
      # Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to 1333, the shorter to 800
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))

        # Apply the scaling transform
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    # image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    # transforms = transforms + tfms

    # Apply Other Transforms ##
    # image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    # PILImage = Image.fromarray(image)
    # RandAffT = RandomAffineTransform(PILImage.size)
    # Apply affine to image
    # image = RandAffT.apply_image(image.copy())
    # Append to transforms
    # transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    dataset_dict["transforms"] = transforms

    return dataset_dict

  # # Small mappers
  # def small_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"small")

  # def small_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"small")
  #   else:
  #     return self.train_mapper(dataset_dict,"small")

  # # Large mappers
  # def large_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"large")

  # def large_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"large")
  #   else:
  #     return self.train_mapper(dataset_dict,"large")

  # # Full mappers
  # def full_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"full")

  # def full_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"full")
  #   else:
  #     return self.train_mapper(dataset_dict,"full")

  # # Comparison mappers
  # def comparison_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"comparison")

  # def comparison_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"comparison")
  #   else:
  #     return self.train_mapper(dataset_dict,"comparison")