def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True): """ Generate a CropTransform so that the cropping region contains the center of the given instance. Args: crop_size (tuple): h, w in pixels image_size (tuple): h, w instance (dict): an annotation dict of one instance, in Detectron2's dataset format. """ instance = np.random.choice(instances), instance = instance[0] crop_size = np.asarray(crop_size, dtype=np.int32) bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 assert (image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] ), "The annotation bounding box is outside of the image!" assert (image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] ), "Crop size is larger than image size!" min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) y0 = np.random.randint(min_yx[0], max_yx[0] + 1) x0 = np.random.randint(min_yx[1], max_yx[1] + 1) # if some instance is cropped extend the box if not crop_box: num_modifications = 0 modified = True # convert crop_size to float crop_size = crop_size.astype(np.float32) while modified: modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances) num_modifications += 1 if num_modifications > 100: logger.info( "Cannot finished cropping adjustment within 100 tries (#instances {})." .format(len(instances))) return T.CropTransform(0, 0, image_size[1], image_size[0]) return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
def gen_crop_transform_with_instance(crop_size, image_size, instance): """ Generate a CropTransform so that the cropping region contains the center of the given instance. Args: crop_size (tuple): h, w in pixels image_size (tuple): h, w instance (dict): an annotation dict of one instance, in Detectron2's dataset format. """ crop_size = np.asarray(crop_size, dtype=np.int32) bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 assert (image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] ), "The annotation bounding box is outside of the image!" assert (image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] ), "Crop size is larger than image size!" min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) y0 = np.random.randint(min_yx[0], max_yx[0] + 1) x0 = np.random.randint(min_yx[1], max_yx[1] + 1) return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
def test_crop(self): transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)]) keypoints = np.random.rand(17, 3) * 50 + 15 keypoints[:, 2] = 2 anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "keypoints": keypoints, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (400, 400) ) self.assertTrue((output["bbox"] == np.asarray([-290, -290, -100, 0])).all()) # keypoints are no longer visible self.assertTrue((output["keypoints"][:, 2] == 0).all())
def train_mapper(self,dataset_dict):#,dataset_used): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations # Create a copy of the dataset dict dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below ##### Image Transformations ##### # Read in the image image = utils.read_image(dataset_dict["file_name"], format="BGR") # fileName = dataset_dict["file_name"] ## Crop to bounding box ## # Crop for all but comparison if(self.dataset_used != "comparison" and self.is_crop_to_bbox): # Get the bounding box bbox = ((dataset_dict["annotations"])[0])["bbox"] xmin,ymin,xmax,ymax = bbox w = xmax-xmin h = ymax-ymin # IsCropToBBox = True # if(IsCropToBBox): # Nudge the crop to be slightly outside of the bounding box nudgedXMin = xmin-15 nudgedYMin = ymin-15 nudgedW = w+50 nudgedH = h+50 # If the bounding boxes go outside of the image dimensions, fix this imageHeight = image.shape[0] imageWidth = image.shape[1] if(nudgedXMin < 0): nudgedXMin = 0 if(nudgedYMin < 0): nudgedYMin = 0 if(nudgedXMin+nudgedW >= imageWidth): nudgedW = imageWidth-1 if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1 # Apply the crop cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH) image = cropT.apply_image(image) transforms = T.TransformList([cropT]) # Comparison has bbox the size of the image, so dont bother cropping else: # scaled between 0.5 and 1; shifted up to 0.5 in each dimension # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) ) # transforms = T.TransformList([randomExtant]) transforms = T.TransformList([]) # Apply the crop to the bbox as well # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # Add to the list of transforms # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] ## Scale the image size ## # thresholdDimension = 1000 # if(dataset_used == "large"): # thresholdDimension = 500 # thresholdDimension = 800 # thresholdDimension = 600 thresholdDimension = self.threshold_dimension currWidth = dataset_dict["width"] currHeight = dataset_dict["height"] # NOTE: YOLO input size must be multiple of 32 if(self.modelLink in ["VGG19_BN","YOLOV3"]): vgg_im_size = thresholdDimension # Apply the scaling transform scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] else:# Downscale only at this threshold if(currHeight > thresholdDimension or currWidth > thresholdDimension): myNewH = 0 myNewW = 0 # Scale the longest dimension to threshold, other in proportion if(currHeight > currWidth): myNewH = thresholdDimension ratio = currHeight/float(myNewH) myNewW = currWidth/float(ratio) myNewW = int(round(myNewW)) # myNewW = 800 else: # myNewH = 800 myNewW = thresholdDimension ratio = currWidth/float(myNewW) myNewH = currHeight/float(ratio) myNewH = int(round(myNewH)) # Apply the scaling transform if(self.fixed_wh): scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") else: scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] ## Apply a random flip ## image, tfms = T.apply_transform_gens([T.RandomFlip()], image) transforms = transforms + tfms # Apply Other Transforms ## # Standard random image mods if(self.dataset_used != "comparison"): image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image) # More extreme for comparison set else: image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image) transforms = transforms + tfms ## Apply random affine (actually just a shear) ## # Pass in the image size PILImage = Image.fromarray(image) # Standard affine if(self.dataset_used != "comparison"): shear_range = 8 angle_range = 30 # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8)) # rand_angle = np.random.uniform(-30,30) # More extreme random affine for comparison else: shear_range = 50 angle_range = 30 # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30)) # rand_angle = np.random.uniform(-70,70) rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range)) rand_angle = np.random.uniform(-angle_range,angle_range) RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle) # Apply affine to image image = RandAffT.apply_image(image.copy()) # Append to transforms transforms = transforms + RandAffT ##### END Image Transformations ##### # Keep these in for now I suppose if(image.shape[0] == 0): raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"]) if(image.shape[1] == 0): raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"]) # Set the image in the dictionary dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Do remainder of dictionary classID = ((dataset_dict["annotations"])[0])["category_id"] dataset_dict["classID"] = classID # bboxes # if(self.dataset_used != "comparison"): annos = \ [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # transformNames = [transforms.__name__ for x in transforms] # transformNames = ", ".join(transformNames) instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) # # no bboxes # else: # instances = Instances( (dataset_dict["height"],dataset_dict["width"]) ) # instances.gt_classes = torch.tensor([dataset_dict["classID"]]) # dataset_dict["instances"] = instances dataset_dict["transforms"] = transforms return dataset_dict
def test_mapper(self,dataset_dict):#,dataset_used): # If we're mapping at test time if(self.is_test_time_mapping): return self.train_mapper(dataset_dict) # Implement a mapper, similar to the default DatasetMapper, but with your own customizations # Create a copy of the dataset dict dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below ##### Image Transformations ##### # Read in the image image = utils.read_image(dataset_dict["file_name"], format="BGR") # fileName = dataset_dict["file_name"] ## Crop to bounding box ## if(self.dataset_used != "comparison" and self.is_crop_to_bbox): # Get the bounding box bbox = ((dataset_dict["annotations"])[0])["bbox"] xmin,ymin,xmax,ymax = bbox w = xmax-xmin h = ymax-ymin # IsCropToBBox = True # if(IsCropToBBox): # Nudge the crop to be slightly outside of the bounding box nudgedXMin = xmin-15 nudgedYMin = ymin-15 nudgedW = w+50 nudgedH = h+50 # If the bounding boxes go outside of the image dimensions, fix this imageHeight = image.shape[0] imageWidth = image.shape[1] if(nudgedXMin < 0): nudgedXMin = 0 if(nudgedYMin < 0): nudgedYMin = 0 if(nudgedXMin+nudgedW >= imageWidth): nudgedW = imageWidth-1 if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1 # Apply the crop cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH) image = cropT.apply_image(image) transforms = T.TransformList([cropT]) # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] else: transforms = T.TransformList([]) # Apply the crop to the bbox as well # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # Add to the list of transforms # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] ## Scale the image size ## # thresholdDimension = 1000 # if(dataset_used == "large"): # thresholdDimension = 500 # thresholdDimension = 800 # thresholdDimension = 600 thresholdDimension = self.threshold_dimension currWidth = dataset_dict["width"] currHeight = dataset_dict["height"] # the way ive done vgg and yolo means they need the same size images if(self.modelLink in ["VGG19_BN","YOLOV3"]): vgg_im_size = thresholdDimension # Apply the scaling transform scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # not vgg or yolo else:# Downscale only at this threshold # Downscale only at this threshold if(currHeight > thresholdDimension or currWidth > thresholdDimension): myNewH = 0 myNewW = 0 # Scale the longest dimension to 1333, the shorter to 800 if(currHeight > currWidth): myNewH = thresholdDimension ratio = currHeight/float(myNewH) myNewW = currWidth/float(ratio) myNewW = int(round(myNewW)) # myNewW = 800 else: # myNewH = 800 myNewW = thresholdDimension ratio = currWidth/float(myNewW) myNewH = currHeight/float(ratio) myNewH = int(round(myNewH)) # Apply the scaling transform # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") if(self.fixed_wh): scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") else: scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] ## Apply a random flip ## # image, tfms = T.apply_transform_gens([T.RandomFlip()], image) # transforms = transforms + tfms # Apply Other Transforms ## # image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image) # transforms = transforms + tfms ## Apply random affine (actually just a shear) ## # Pass in the image size # PILImage = Image.fromarray(image) # RandAffT = RandomAffineTransform(PILImage.size) # Apply affine to image # image = RandAffT.apply_image(image.copy()) # Append to transforms # transforms = transforms + RandAffT ##### END Image Transformations ##### # Keep these in for now I suppose if(image.shape[0] == 0): raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"]) if(image.shape[1] == 0): raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"]) # Set the image in the dictionary dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Do remainder of dictionary classID = ((dataset_dict["annotations"])[0])["category_id"] dataset_dict["classID"] = classID annos = \ [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # transformNames = [transforms.__name__ for x in transforms] # transformNames = ", ".join(transformNames) instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) dataset_dict["transforms"] = transforms return dataset_dict # # Small mappers # def small_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"small") # def small_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"small") # else: # return self.train_mapper(dataset_dict,"small") # # Large mappers # def large_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"large") # def large_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"large") # else: # return self.train_mapper(dataset_dict,"large") # # Full mappers # def full_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"full") # def full_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"full") # else: # return self.train_mapper(dataset_dict,"full") # # Comparison mappers # def comparison_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"comparison") # def comparison_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"comparison") # else: # return self.train_mapper(dataset_dict,"comparison")