def test_augmentation_input_args(self): input_shape = (100, 100) output_shape = (50, 50) # define two augmentations with different args class TG1(T.Augmentation): input_args = ("image", "sem_seg") def get_transform(self, image, sem_seg): return T.ResizeTransform(input_shape[0], input_shape[1], output_shape[0], output_shape[1]) class TG2(T.Augmentation): def get_transform(self, image): assert image.shape[:2] == output_shape # check that TG1 is applied return T.HFlipTransform(output_shape[1]) image = np.random.rand(*input_shape).astype("float32") sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") inputs = T.StandardAugInput(image, sem_seg=sem_seg) # provide two args tfms = inputs.apply_augmentations([TG1(), TG2()]) self.assertIsInstance(tfms[0], T.ResizeTransform) self.assertIsInstance(tfms[1], T.HFlipTransform) self.assertTrue(inputs.image.shape[:2] == output_shape) self.assertTrue(inputs.sem_seg.shape[:2] == output_shape) class TG3(T.Augmentation): input_args = ("image", "nonexist") def get_transform(self, image, nonexist): pass with self.assertRaises(AttributeError): inputs.apply_augmentations([TG3()])
def test_augmentation_list(self): input_shape = (100, 100) image = np.random.rand(*input_shape).astype("float32") sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") inputs = T.StandardAugInput(image, sem_seg=sem_seg) # provide two args augs = T.AugmentationList([T.RandomFlip(), T.Resize(20)]) _ = T.AugmentationList([augs, T.Resize(30)])(inputs)
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) image = transform(image=image)["image"] ############################################################################ ''' image = utils.read_image(dataset_dict["file_name"], format=self.image_format) h, w, _ = image.shape utils.check_image_size(dataset_dict, image) bboxes = [ann["bbox"] for ann in dataset_dict['annotations']] labels = [ann['category_id'] for ann in dataset_dict['annotations']] class_labels = [CLASSES[label] for label in labels] segmentations = [ann["segmentation"] for ann in dataset_dict['annotations']] #cprint("before :" , segmentations) masks = convert_coco_poly_to_mask(segmentations, h, w) masks = [mask.numpy() for mask in masks] transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, masks=masks) image = transformed["image"] bboxes = transformed["bboxes"] class_labels = transformed["class_labels"] labels = [CLASSES.index(cl) for cl in class_labels] filtered_masks = [] for mask in transformed["masks"]: #if len(np.unique(mask)) > 1: filtered_masks.append(mask) if len(bboxes) != len(filtered_masks): print(len(bboxes), len(filtered_masks), len(labels)) #print(len(bboxes), len(masks), len(labels)) seg_masks = [binary_mask_to_polygon(mask, tolerance=2) for mask in masks] for idx in range(len(labels)): dataset_dict['annotations'][idx]["bbox"] = bboxes[idx] dataset_dict['annotations'][idx]["labels"] = labels[idx] dataset_dict['annotations'][idx]["segmentation"] = seg_masks[idx] dataset_dict['annotations'] = dataset_dict['annotations'][:len(labels)] ''' # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk ) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format ) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # @ Will Lee 精细分类类别:非标准,基本标准 or 标准 standard_ids = [obj["standard_id"] for obj in annos] standard_ids = torch.tensor(standard_ids, dtype=torch.int64) instances.gt_standards = standard_ids # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below #print("BELOW IS THE dataset_dict (FOR DEBUGGING)") #print(dataset_dict) # USER: Write your own image loading if it's not from a file try: image = utils.read_image( dataset_dict["file_name"], format=self.image_format ) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L" ).squeeze(2) else: sem_seg_gt = None boxes = np.asarray( [ BoxMode.convert( instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS ) for instance in dataset_dict["annotations"] ] ) aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1)) ) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format ) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = ( dataset_dict["file_name"] .replace("train2017", "thing_train2017") .replace("image/train", "thing_train") ) else: basis_sem_path = ( dataset_dict["file_name"] .replace("coco", "lvis") .replace("train2017", "thing_train") ) # change extension to npz basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file category = dataset_dict["annotations"][0]['category_id'] try: image = utils.read_image(dataset_dict["file_name"], format=self.image_format) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None if (int(category) != 5): boxes = np.asarray([ BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) for instance in dataset_dict["annotations"] ]) aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image, sem_seg_gt = aug_input.image, aug_input.sem_seg else: boxes = np.asarray([0]) # aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) # transforms = aug_input.apply_augmentations(self.augmentation) # image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk: if (int(category) != 5): utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: if (int(category) != 5): dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data if (int(category) != 5): annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] segment_transform = transf.Compose([ myTransform.FreeScaleMask((60, 100)), myTransform.MaskToTensor(), ]) img_transform = transf.Compose([ transf.Resize((288, 800)), transf.ToTensor(), transf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) if self.is_train: use_ax = True else: use_ax = False if (int(category) != 5): # dataset_dict['seg_label'] = torch.zeros([36,100,3]) # dataset_dict['cls_label'] = [[-1 for _ in range(4)] for _ in range(18)] instances = annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes( ) dataset_dict["instances"] = utils.filter_empty_instances( instances) else: cl = LaneClsDataset( '/home/ghr/hdd/traffic_sign/only_lane/images/CULANE_288', img_path=dataset_dict['file_name'], row_anchor=culane_row_anchor, seg_path=dataset_dict['annotations'][0]['lanefilepath'], segment_transform=segment_transform, use_aux=use_ax) if use_ax: img, cls, seg = cl.get_item() else: img, cls = cl.get_item() seg = 0 # print('hahahahahahahahahah')img # import pdb; pdb.set_trace() # dataset_dict["image"] = img dataset_dict['seg_label'] = seg dataset_dict['cls_label'] = cls #instances = annotations_to_instances(dataset_dict['annotations'], image_shape, mask_format=self.instance_mask_format) # Call lane class return label,... # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # if self.recompute_boxes: # instances.gt_boxes = instances.gt_masks.get_bounding_boxes() # dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = (dataset_dict["file_name"].replace( "train2017", "thing_train2017").replace("image/train", "thing_train")) else: basis_sem_path = (dataset_dict["file_name"].replace( "coco", "lvis").replace("train2017", "thing_train")) # change extension to npz basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) try: utils.check_image_size(dataset_dict, image) except Exception as e: print(e) import moxing as mox mox.file.copy_parallel(dataset_dict["file_name"], 's3://bucket-6756/liangxiwen/result/haitian_semi/unbiased-teacher/wrong_imgs/' + dataset_dict["file_name"].split('/')[-1]) print(image.shape) image = np.rot90(image) print(image.shape) utils.check_image_size(dataset_dict, image) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L" ).squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image_weak_aug, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image_weak_aug.shape[:2] # h, w if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) if self.compute_tight_boxes and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() bboxes_d2_format = utils.filter_empty_instances(instances) dataset_dict["instances"] = bboxes_d2_format # apply strong augmentation # We use torchvision augmentation, which is not compatiable with # detectron2, which use numpy format for images. Thus, we need to # convert to PIL format first. image_pil = Image.fromarray(image_weak_aug.astype("uint8"), "RGB") image_strong_aug = np.array(self.strong_augmentation(image_pil)) dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image_strong_aug.transpose(2, 0, 1)) ) dataset_dict_key = copy.deepcopy(dataset_dict) dataset_dict_key["image"] = torch.as_tensor( np.ascontiguousarray(image_weak_aug.transpose(2, 0, 1)) ) assert dataset_dict["image"].size(1) == dataset_dict_key["image"].size(1) assert dataset_dict["image"].size(2) == dataset_dict_key["image"].size(2) return (dataset_dict, dataset_dict_key)
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format="BGR") imgh, imgw = image.shape[:2] utils.check_image_size(dataset_dict, image) transform = train_transforms() country = dataset_dict['file_name'].split('/')[-1].split('_')[0] #Augment the image for category_id, sample_prob in enumerate(self.sample_probs[country]): if np.random.random() <= sample_prob: damage_obj = sample_a_damage_of_type( self.dataset_dicts_to_sample, category_id) if "annotations" not in dataset_dict: dataset_dict["annotations"] = [] # Duplicate the damage at the index image = copy.deepcopy(image) image.setflags(write=1) # damage = damage_obj['damage_masked'] damage = damage_obj['damage'] # the place to put posx, posy = random.sample(self.all_locs, 1)[0] dh, dw = damage.shape[:2] bboxes = np.array( [obj['bbox'] for obj in dataset_dict['annotations']]) counter = 0 while len(bboxes) > 0 and check_conflict_boxes( [posx, posy, posx + dw, posy + dh], bboxes): posx, posy = random.sample(self.all_locs, 1)[0] counter += 1 # only try for 1000 times maximum if counter > 1000: break # make sure that we don't place it out of the picture posy = min(posy, imgh - dh) posx = min(posx, imgw - dw) # make sure the damage is not out of bounds posy = 0 if posy < 0 else posy posx = 0 if posx < 0 else posx dh = imgh - posy if posy + dh > imgh else dh dw = imgw - posx if posx + dw > imgw else dw damage = damage[0:imgh, 0:imgw] # scale its color to its underlying range area_tobe_replaced = image[posy:posy + dh, posx:posx + dw] # Also transfer the color from the original picture to this damage = color_transfer(area_tobe_replaced, damage) # rotate it if category_id == 0 or category_id == 1: damage = rotate_image(damage, random.randint(-5, 5)) if category_id == 2 or category_id == 3: damage = rotate_image(damage, random.randint(-30, 30)) dh, dw = damage.shape[:2] # Build the mask to avoid the black due to rotation mask = np.full((imgh, imgw), False) # default to not set all mask1 = damage.max(axis=2) > 0 mask[posy:posy + dh, posx:posx + dw] = mask1 image[mask] = damage[mask1] image.setflags(write=0) # change the box location of the annotation damage_obj['annotation']['bbox'] = [ posx, posy, posx + dw, posy + dh ] # Add the annotation to the set dataset_dict["annotations"].append(damage_obj['annotation']) # TODO: Augmentation comes here if "annotations" in dataset_dict and len( dataset_dict['annotations']) > 0: bboxes = np.array( [obj['bbox'] for obj in dataset_dict['annotations']]) # Make sure the bounding boxes are not out of ranges bw = bboxes[:, 2] - bboxes[:, 0] bh = bboxes[:, 3] - bboxes[:, 1] bw[bw <= 0] = 1 bh[bh <= 0] = 1 bboxes[:, 0] = np.maximum(bboxes[:, 0], 0) bboxes[:, 0] = np.minimum(bboxes[:, 0], imgw - 1) bboxes[:, 1] = np.maximum(bboxes[:, 1], 0) bboxes[:, 1] = np.minimum(bboxes[:, 1], imgh - 1) bboxes[:, 2] = bboxes[:, 0] + bw bboxes[:, 3] = bboxes[:, 1] + bh class_labels = np.array( [obj['category_id'] for obj in dataset_dict['annotations']]) if transform: for i in range(10): sample = { 'image': image, 'bboxes': bboxes, 'class_labels': class_labels } sample = transform(**sample) if len(sample['bboxes']) > 0: image = sample['image'] bboxes = torch.stack( tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute( 1, 0).numpy() class_labels = sample['class_labels'] break # Update the annotations annotations = [] bbox_mode = dataset_dict.pop("annotations")[0]['bbox_mode'] for i in range(len(bboxes)): annotations.append({ 'bbox': bboxes[i], 'bbox_mode': bbox_mode, 'category_id': class_labels[i] }) dataset_dict["annotations"] = annotations if "annotations" in dataset_dict and len( dataset_dict["annotations"]) > 0: bboxes = np.array( [obj['bbox'] for obj in dataset_dict['annotations']]) aug_input = T.StandardAugInput(image, boxes=bboxes) apply_augmentations(self.augmentations, aug_input) image = aug_input.image image_shape = image.shape[:2] # height, width # USER: Implement additional transformations if you have other types of data dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) for i, obj in enumerate(dataset_dict["annotations"]): if obj.get("iscrowd", 0) == 0: obj['bbox'] = aug_input.boxes[i] annos = [obj for obj in dataset_dict["annotations"] ] # keep for visualization purposes if not self.for_vis: dataset_dict.pop( 'annotations' ) # remove annotations if we don't need it for visualization instances = utils.annotations_to_instances(annos, image_shape) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) original_image = image if self.crop_gen is None or np.random.rand() > 0.5: tfm_gens = self.tfm_gens else: tfm_gens = self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:] if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) else: sem_seg_gt = None aug_input = T.StandardAugInput(original_image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(tfm_gens) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. return dataset_dict if type(transforms[0]) is FT.NoOpTransform: flip = 0 elif type(transforms[0]) is FT.HFlipTransform: flip = 1 else: flip = 2 dataset_dict["flip"] = flip if sem_seg_gt is not None: sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) if self.sem_seg_unlabeled_region_on: sem_seg_gt[sem_seg_gt == self.ignore_value] = self.num_sem_seg_classes dataset_dict["sem_seg"] = sem_seg_gt if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.unseen_label_set is not None: dataset_dict["instances"] = filter_unseen_class( dataset_dict["instances"], self.unseen_label_set) if self.unlabeled_region_on: if self.sem_seg_unlabeled_region_on: cum_sem_seg = cum_map(dataset_dict["sem_seg"], self.num_sem_seg_classes) else: cum_sem_seg = cum_map(dataset_dict["sem_seg"], self.ignore_value) dataset_dict["integral_sem_seg"] = cum_sem_seg return dataset_dict