def test_flip_keypoints(self): transforms = T.TransformList([T.HFlipTransform(400)]) anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "keypoints": np.random.rand(17, 3) * 50 + 15, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (400, 400), keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices( ["keypoints_coco_2017_train"] ), ) # The first keypoint is nose self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0])) # The last 16 keypoints are 8 left-right pairs self.assertTrue( np.allclose( output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1], 400 - anno["keypoints"][1:, 0].reshape(-1, 2), ) ) self.assertTrue( np.allclose( output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :], anno["keypoints"][1:, 1:].reshape(-1, 2, 2), ) )
def test_transform_simple_annotation(self): transforms = T.TransformList([T.HFlipTransform(400)]) anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "category_id": 3, "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]], } output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400)) self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300])) self.assertEqual(len(output["segmentation"]), len(anno["segmentation"])) self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10])) detection_utils.annotations_to_instances([output, output], (400, 400))
def test_crop(self): transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)]) keypoints = np.random.rand(17, 3) * 50 + 15 keypoints[:, 2] = 2 anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "keypoints": keypoints, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (400, 400) ) self.assertTrue((output["bbox"] == np.asarray([-290, -290, -100, 0])).all()) # keypoints are no longer visible self.assertTrue((output["keypoints"][:, 2] == 0).all())
def transform_instance_annotations_rotated(self, annotation, transforms, image_size): if isinstance(transforms, (tuple, list)): transforms = T.TransformList(transforms) bbox = annotation["bbox"] alpha = bbox[-1] bbox = bbox[:4] bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) bbox = bbox.tolist() bbox = BoxMode.convert(bbox, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) bbox = np.minimum(bbox, list(image_size + image_size)[::-1]) bbox = np.append(bbox, alpha) annotation["bbox"] = bbox annotation["bbox_mode"] = BoxMode.XYWHA_ABS return annotation
def test_transform_RLE_resize(self): transforms = T.TransformList( [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")] ) mask = np.zeros((300, 400), order="F").astype("uint8") mask[:, :200] = 1 anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "segmentation": mask_util.encode(mask[:, :, None])[0], "category_id": 3, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (400, 400) ) inst = detection_utils.annotations_to_instances( [output, output], (400, 400), mask_format="bitmask" ) self.assertTrue(isinstance(inst.gt_masks, BitMasks))
def transform_instance_annotations( annotation, transforms, image_size, *, add_meta_infos=False ): """ Apply transforms to box and meta_infos annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. It will be modified in-place. transforms (TransformList or list[Transform]): image_size (tuple): the height, width of the transformed image add_meta_infos (bool): Whether to apply meta_infos. Returns: dict: the same input dict with fields "bbox", "meta_infos" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ if isinstance(transforms, (tuple, list)): transforms = T.TransformList(transforms) # bbox is 1d (per-instance bounding box) bbox = BoxMode.convert( annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # clip transformed bbox to image size bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) annotation["bbox_mode"] = BoxMode.XYXY_ABS # add meta_infos if add_meta_infos: meta_infos = dict() meta_infos = transforms.apply_meta_infos(meta_infos) annotation["meta_infos"] = meta_infos return annotation
def test_transform_uncompressed_RLE(self): transforms = T.TransformList([T.HFlipTransform(400)]) mask = np.zeros((300, 400)).astype("uint8") mask[:, :200] = 1 anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "segmentation": binary_mask_to_uncompressed_rle(mask), "category_id": 3, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (300, 400) ) mask = output["segmentation"] self.assertTrue((mask[:, 200:] == 1).all()) self.assertTrue((mask[:, :200] == 0).all()) inst = detection_utils.annotations_to_instances( [output, output], (400, 400), mask_format="bitmask" ) self.assertTrue(isinstance(inst.gt_masks, BitMasks))
def train_mapper(self,dataset_dict):#,dataset_used): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations # Create a copy of the dataset dict dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below ##### Image Transformations ##### # Read in the image image = utils.read_image(dataset_dict["file_name"], format="BGR") # fileName = dataset_dict["file_name"] ## Crop to bounding box ## # Crop for all but comparison if(self.dataset_used != "comparison" and self.is_crop_to_bbox): # Get the bounding box bbox = ((dataset_dict["annotations"])[0])["bbox"] xmin,ymin,xmax,ymax = bbox w = xmax-xmin h = ymax-ymin # IsCropToBBox = True # if(IsCropToBBox): # Nudge the crop to be slightly outside of the bounding box nudgedXMin = xmin-15 nudgedYMin = ymin-15 nudgedW = w+50 nudgedH = h+50 # If the bounding boxes go outside of the image dimensions, fix this imageHeight = image.shape[0] imageWidth = image.shape[1] if(nudgedXMin < 0): nudgedXMin = 0 if(nudgedYMin < 0): nudgedYMin = 0 if(nudgedXMin+nudgedW >= imageWidth): nudgedW = imageWidth-1 if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1 # Apply the crop cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH) image = cropT.apply_image(image) transforms = T.TransformList([cropT]) # Comparison has bbox the size of the image, so dont bother cropping else: # scaled between 0.5 and 1; shifted up to 0.5 in each dimension # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) ) # transforms = T.TransformList([randomExtant]) transforms = T.TransformList([]) # Apply the crop to the bbox as well # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # Add to the list of transforms # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] ## Scale the image size ## # thresholdDimension = 1000 # if(dataset_used == "large"): # thresholdDimension = 500 # thresholdDimension = 800 # thresholdDimension = 600 thresholdDimension = self.threshold_dimension currWidth = dataset_dict["width"] currHeight = dataset_dict["height"] # NOTE: YOLO input size must be multiple of 32 if(self.modelLink in ["VGG19_BN","YOLOV3"]): vgg_im_size = thresholdDimension # Apply the scaling transform scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] else:# Downscale only at this threshold if(currHeight > thresholdDimension or currWidth > thresholdDimension): myNewH = 0 myNewW = 0 # Scale the longest dimension to threshold, other in proportion if(currHeight > currWidth): myNewH = thresholdDimension ratio = currHeight/float(myNewH) myNewW = currWidth/float(ratio) myNewW = int(round(myNewW)) # myNewW = 800 else: # myNewH = 800 myNewW = thresholdDimension ratio = currWidth/float(myNewW) myNewH = currHeight/float(ratio) myNewH = int(round(myNewH)) # Apply the scaling transform if(self.fixed_wh): scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") else: scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] ## Apply a random flip ## image, tfms = T.apply_transform_gens([T.RandomFlip()], image) transforms = transforms + tfms # Apply Other Transforms ## # Standard random image mods if(self.dataset_used != "comparison"): image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image) # More extreme for comparison set else: image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image) transforms = transforms + tfms ## Apply random affine (actually just a shear) ## # Pass in the image size PILImage = Image.fromarray(image) # Standard affine if(self.dataset_used != "comparison"): shear_range = 8 angle_range = 30 # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8)) # rand_angle = np.random.uniform(-30,30) # More extreme random affine for comparison else: shear_range = 50 angle_range = 30 # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30)) # rand_angle = np.random.uniform(-70,70) rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range)) rand_angle = np.random.uniform(-angle_range,angle_range) RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle) # Apply affine to image image = RandAffT.apply_image(image.copy()) # Append to transforms transforms = transforms + RandAffT ##### END Image Transformations ##### # Keep these in for now I suppose if(image.shape[0] == 0): raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"]) if(image.shape[1] == 0): raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"]) # Set the image in the dictionary dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Do remainder of dictionary classID = ((dataset_dict["annotations"])[0])["category_id"] dataset_dict["classID"] = classID # bboxes # if(self.dataset_used != "comparison"): annos = \ [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # transformNames = [transforms.__name__ for x in transforms] # transformNames = ", ".join(transformNames) instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) # # no bboxes # else: # instances = Instances( (dataset_dict["height"],dataset_dict["width"]) ) # instances.gt_classes = torch.tensor([dataset_dict["classID"]]) # dataset_dict["instances"] = instances dataset_dict["transforms"] = transforms return dataset_dict
def test_mapper(self,dataset_dict):#,dataset_used): # If we're mapping at test time if(self.is_test_time_mapping): return self.train_mapper(dataset_dict) # Implement a mapper, similar to the default DatasetMapper, but with your own customizations # Create a copy of the dataset dict dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below ##### Image Transformations ##### # Read in the image image = utils.read_image(dataset_dict["file_name"], format="BGR") # fileName = dataset_dict["file_name"] ## Crop to bounding box ## if(self.dataset_used != "comparison" and self.is_crop_to_bbox): # Get the bounding box bbox = ((dataset_dict["annotations"])[0])["bbox"] xmin,ymin,xmax,ymax = bbox w = xmax-xmin h = ymax-ymin # IsCropToBBox = True # if(IsCropToBBox): # Nudge the crop to be slightly outside of the bounding box nudgedXMin = xmin-15 nudgedYMin = ymin-15 nudgedW = w+50 nudgedH = h+50 # If the bounding boxes go outside of the image dimensions, fix this imageHeight = image.shape[0] imageWidth = image.shape[1] if(nudgedXMin < 0): nudgedXMin = 0 if(nudgedYMin < 0): nudgedYMin = 0 if(nudgedXMin+nudgedW >= imageWidth): nudgedW = imageWidth-1 if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1 # Apply the crop cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH) image = cropT.apply_image(image) transforms = T.TransformList([cropT]) # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] else: transforms = T.TransformList([]) # Apply the crop to the bbox as well # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # Add to the list of transforms # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] ## Scale the image size ## # thresholdDimension = 1000 # if(dataset_used == "large"): # thresholdDimension = 500 # thresholdDimension = 800 # thresholdDimension = 600 thresholdDimension = self.threshold_dimension currWidth = dataset_dict["width"] currHeight = dataset_dict["height"] # the way ive done vgg and yolo means they need the same size images if(self.modelLink in ["VGG19_BN","YOLOV3"]): vgg_im_size = thresholdDimension # Apply the scaling transform scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # not vgg or yolo else:# Downscale only at this threshold # Downscale only at this threshold if(currHeight > thresholdDimension or currWidth > thresholdDimension): myNewH = 0 myNewW = 0 # Scale the longest dimension to 1333, the shorter to 800 if(currHeight > currWidth): myNewH = thresholdDimension ratio = currHeight/float(myNewH) myNewW = currWidth/float(ratio) myNewW = int(round(myNewW)) # myNewW = 800 else: # myNewH = 800 myNewW = thresholdDimension ratio = currWidth/float(myNewW) myNewH = currHeight/float(ratio) myNewH = int(round(myNewH)) # Apply the scaling transform # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") if(self.fixed_wh): scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") else: scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] ## Apply a random flip ## # image, tfms = T.apply_transform_gens([T.RandomFlip()], image) # transforms = transforms + tfms # Apply Other Transforms ## # image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image) # transforms = transforms + tfms ## Apply random affine (actually just a shear) ## # Pass in the image size # PILImage = Image.fromarray(image) # RandAffT = RandomAffineTransform(PILImage.size) # Apply affine to image # image = RandAffT.apply_image(image.copy()) # Append to transforms # transforms = transforms + RandAffT ##### END Image Transformations ##### # Keep these in for now I suppose if(image.shape[0] == 0): raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"]) if(image.shape[1] == 0): raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"]) # Set the image in the dictionary dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Do remainder of dictionary classID = ((dataset_dict["annotations"])[0])["category_id"] dataset_dict["classID"] = classID annos = \ [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # transformNames = [transforms.__name__ for x in transforms] # transformNames = ", ".join(transformNames) instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) dataset_dict["transforms"] = transforms return dataset_dict # # Small mappers # def small_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"small") # def small_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"small") # else: # return self.train_mapper(dataset_dict,"small") # # Large mappers # def large_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"large") # def large_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"large") # else: # return self.train_mapper(dataset_dict,"large") # # Full mappers # def full_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"full") # def full_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"full") # else: # return self.train_mapper(dataset_dict,"full") # # Comparison mappers # def comparison_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"comparison") # def comparison_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"comparison") # else: # return self.train_mapper(dataset_dict,"comparison")
def __call__(self, aug_input): oldx, oldy, oldc = aug_input.image.shape scaler = T.RandomExtent(self.scale_range, self.shift_range)(aug_input) resizer = T.Resize((oldx, oldy))(aug_input) return T.TransformList([scaler, resizer])
def transform_instance_annotations(annotation, transforms, image_size, *, keypoint_hflip_indices=None): """ Apply transforms to box, segmentation and keypoints annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. It will be modified in-place. transforms (TransformList or list[Transform]): image_size (tuple): the height, width of the transformed image keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. Returns: dict: the same input dict with fields "bbox", "segmentation", "keypoints" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ if isinstance(transforms, (tuple, list)): transforms = T.TransformList(transforms) # bbox is 1d (per-instance bounding box) bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # clip transformed bbox to image size bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) annotation["bbox_mode"] = BoxMode.XYXY_ABS if "segmentation" in annotation: # each instance contains 1 or more polygons segm = annotation["segmentation"] if isinstance(segm, list): # polygons polygons = [np.asarray(p).reshape(-1, 2) for p in segm] annotation["segmentation"] = [ p.reshape(-1) for p in transforms.apply_polygons(polygons) ] elif isinstance(segm, dict): # RLE mask = mask_util.decode(segm) mask = transforms.apply_segmentation(mask) assert tuple(mask.shape[:2]) == image_size annotation["segmentation"] = mask else: raise ValueError( "Cannot transform segmentation of type '{}'!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict.".format(type(segm))) if "keypoints" in annotation: keypoints = transform_keypoint_annotations(annotation["keypoints"], transforms, image_size, keypoint_hflip_indices) annotation["keypoints"] = keypoints return annotation