def adjust_for_zoom(self) -> (np.ndarray, COCO_Annotation): assert self.src_img is not None and self.magnitude is not None if self.center is None: self.center_to_cener_of(img=self.src_img) img_h, img_w = self.src_img.shape[:2] target_crop_h, target_crop_w = int(img_h / self.magnitude), int( img_w / self.magnitude) zoom_cx, crop_xmin, crop_xmax = self._adjust_for_zoom( target_zoom_c=self.center.x, target_crop_length=target_crop_w, image_side_length=img_w) zoom_cy, crop_ymin, crop_ymax = self._adjust_for_zoom( target_zoom_c=self.center.y, target_crop_length=target_crop_h, image_side_length=img_h) crop_bbox = BBox(xmin=crop_xmin, ymin=crop_ymin, xmax=crop_xmax, ymax=crop_ymax) cropped_img = crop_bbox.crop_from(img=self.src_img) if self.src_coco_ann is not None: new_coco_ann = self.src_coco_ann.copy() new_coco_ann.bbox = self.src_coco_ann.bbox - crop_bbox.pmin new_coco_ann.segmentation = self.src_coco_ann.segmentation - crop_bbox.pmin new_coco_ann.keypoints = self.src_coco_ann.keypoints - crop_bbox.pmin return cropped_img, new_coco_ann else: return cropped_img, None
def from_dict(cls, ann_dict: dict) -> Detectron2_Annotation: check_required_keys( ann_dict, required_keys=['iscrowd', 'bbox', 'category_id', 'bbox_mode']) if ann_dict['bbox_mode'] == BoxMode.XYWH_ABS: bbox = BBox.from_list(ann_dict['bbox'], input_format='pminsize') elif ann_dict['bbox_mode'] == BoxMode.XYXY_ABS: bbox = BBox.from_list(ann_dict['bbox'], input_format='pminpmax') else: raise NotImplementedError if 'keypoints' in ann_dict: keypoints = Keypoint2D_List.from_list( value_list=ann_dict['keypoints'], demarcation=False) else: keypoints = Keypoint2D_List() return Detectron2_Annotation(iscrowd=ann_dict['iscrowd'], bbox=bbox, keypoints=keypoints, category_id=ann_dict['category_id'], segmentation=Segmentation.from_list( points_list=ann_dict['segmentation'] if 'segmentation' in ann_dict else [], demarcation=False), bbox_mode=ann_dict['bbox_mode'])
def from_dict(cls, ann_dict: dict, strict: bool = True) -> COCO_Annotation: if strict: check_required_keys(ann_dict, required_keys=[ 'segmentation', 'num_keypoints', 'area', 'iscrowd', 'keypoints', 'image_id', 'bbox', 'category_id', 'id' ]) return COCO_Annotation( segmentation=Segmentation.from_list(ann_dict['segmentation'], demarcation=False), num_keypoints=ann_dict['num_keypoints'], area=ann_dict['area'], iscrowd=ann_dict['iscrowd'], keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'], demarcation=False), image_id=ann_dict['image_id'], bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize'), category_id=ann_dict['category_id'], id=ann_dict['id'], keypoints_3d=Keypoint3D_List.from_list( ann_dict['keypoints_3d'], demarcation=False) if 'keypoints_3d' in ann_dict else None, camera=Camera.from_dict(ann_dict['camera_params']) if 'camera_params' in ann_dict else None) else: check_required_keys( ann_dict, required_keys=['id', 'category_id', 'image_id']) return COCO_Annotation( segmentation=Segmentation.from_list(ann_dict['segmentation'], demarcation=False) if 'segmentation' in ann_dict else None, num_keypoints=ann_dict['num_keypoints'] if 'num_keypoints' in ann_dict else None, area=ann_dict['area'] if 'area' in ann_dict else None, iscrowd=ann_dict['iscrowd'] if 'iscrowd' in ann_dict else None, keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'], demarcation=False) if 'keypoints' in ann_dict else None, image_id=ann_dict['image_id'], bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize') if 'bbox' in ann_dict else None, category_id=ann_dict['category_id'], id=ann_dict['id'], keypoints_3d=Keypoint3D_List.from_list( ann_dict['keypoints_3d'], demarcation=False) if 'keypoints_3d' in ann_dict else None, camera=Camera.from_dict(ann_dict['camera_params']) if 'camera_params' in ann_dict else None)
def process_shape(shape: Shape, bbox: BBox, new_shape_handler: ShapeHandler): points = [Point.from_list(point) for point in shape.points] contained_count = 0 for point in points: if bbox.contains(point): contained_count += 1 if contained_count == 0: return elif contained_count == len(points): pass else: logger.error( f"Found a shape that is only partially contained by a bbox.") logger.error(f"Shape: {shape}") logger.error(f"BBox: {bbox}") cropped_points = [ Point(x=point.x - bbox.xmin, y=point.y - bbox.ymin) for point in points ] for point in cropped_points: if point.x < 0 or point.y < 0: logger.error(f"Encountered negative point after crop: {point}") raise Exception new_shape = shape.copy() new_shape.points = [ cropped_point.to_list() for cropped_point in cropped_points ] new_shape_handler.add(new_shape)
def from_dict(self, object_dict: dict) -> NDDS_Annotation_Object: check_required_keys(object_dict, required_keys=[ 'class', 'instance_id', 'visibility', 'location', 'quaternion_xyzw', 'pose_transform', 'cuboid_centroid', 'projected_cuboid_centroid', 'bounding_box', 'cuboid', 'projected_cuboid' ]) check_required_keys(object_dict['bounding_box'], required_keys=['top_left', 'bottom_right']) return NDDS_Annotation_Object( class_name=object_dict['class'], instance_id=object_dict['instance_id'], visibility=object_dict['visibility'], location=Point3D.from_list(object_dict['location']), quaternion_xyzw=Quaternion.from_list( object_dict['quaternion_xyzw']), pose_transform=np.array(object_dict['pose_transform']), cuboid_centroid=Point3D.from_list(object_dict['cuboid_centroid']), projected_cuboid_centroid=Point2D.from_list( object_dict['projected_cuboid_centroid']), bounding_box=BBox.from_list( object_dict['bounding_box']['top_left'] + object_dict['bounding_box']['bottom_right'], input_format='pminpmax'), cuboid=Cuboid3D.from_list(object_dict['cuboid'], demarcation=True), projected_cuboid=Cuboid2D.from_list( object_dict['projected_cuboid'], demarcation=True))
def from_dict(self, item_dict: dict) -> BBoxResult: check_required_keys( item_dict, required_keys=['image_id', 'category_id', 'bbox', 'score']) return BBoxResult(image_id=item_dict['image_id'], category_id=item_dict['category_id'], bbox=BBox.from_list(bbox=item_dict['bbox'], input_format='pminsize'), score=item_dict['score'])
def from_dict(self, item_dict: dict) -> BBoxKeypointResult: return BBoxKeypointResult(image_id=item_dict['image_id'], category_id=item_dict['category_id'], keypoints=Keypoint2D_List.from_list( item_dict['keypoints'], demarcation=False), bbox=BBox.from_list(item_dict['bbox'], input_format='pminsize'), score=item_dict['score'])
def write_cropped_image(src_path: str, dst_path: str, bbox: BBox, verbose: bool = False): check_input_path_and_output_dir(input_path=src_path, output_path=dst_path) img = cv2.imread(src_path) int_bbox = bbox.to_int() cropped_img = img[int_bbox.ymin:int_bbox.ymax, int_bbox.xmin:int_bbox.xmax, :] cv2.imwrite(filename=dst_path, img=cropped_img)
def from_dict(self, object_dict: dict) -> NDDS_Annotation_Object: return NDDS_Annotation_Object( class_name=object_dict['class'], instance_id=object_dict['instance_id'], visibility=object_dict['visibility'], location=Point3D.from_list(object_dict['location']), quaternion_xyzw=Quaternion.from_list( object_dict['quaternion_xyzw']), pose_transform=np.array(object_dict['pose_transform']), cuboid_centroid=Point3D.from_list(object_dict['cuboid_centroid']), projected_cuboid_centroid=Point2D.from_list( object_dict['projected_cuboid_centroid']), bounding_box=BBox.from_list( object_dict['bounding_box']['top_left'] + object_dict['bounding_box']['bottom_right']), cuboid=Cuboid3D.from_list(object_dict['cuboid']), projected_cuboid=Cuboid2D.from_list( object_dict['projected_cuboid']))
def mapper(dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") #print(dataset_dict["file_name"]) #cv2.imwrite(os.path.join(aug_save_path, f'{str(count.count)}_asd.jpg'),image) handler = load_augmentation_settings(handler_save_path = 'test_handler.json') for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i]["segmentation"] = [] image, dataset_dict = handler(image=image, dataset_dict_detectron=dataset_dict) annots = [] for item in dataset_dict["annotations"]: annots.append(item) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) instances = utils.annotations_to_instances(annots, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) if True: vis_img = image.copy() bbox_list = [BBox.from_list(vals) for vals in dataset_dict["instances"].gt_boxes.tensor.numpy().tolist()] # seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons] for bbox in (bbox_list): # if len(seg) > 0 and False: # vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True) vis_img = draw_bbox(img=vis_img, bbox=bbox) height,width,channels = vis_img.shape print(height,width) if count.count<100: cv2.imwrite(os.path.join(aug_save_path, f'{str(count.count)}.jpg'),vis_img) count.count_add() aug_vis.step(vis_img) return dataset_dict
def _predict(self, img: np.ndarray, bbox: BBox = None) -> Dict[str, torch.cuda.Tensor]: # Convert to JpegImageFile if isinstance(img, JpegImageFile): img0 = img.copy() elif isinstance(img, np.ndarray): img0 = img.copy() img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB) img0 = Image.fromarray(img0) else: acceptable_types = "" for i, acceptable_type in enumerate([np.ndarray, JpegImageFile]): if i == 0: acceptable_types += f"\t{acceptable_type.__name__}" else: acceptable_types += f"\n\t{acceptable_type.__name__}" raise TypeError( get_type_error_message( func_name="PVNetInferer._predict", acceptable_types=[np.ndarray, JpegImageFile], unacceptable_type=type(img), param_name="img", )) # Blackout Region Outside BBox If Given BBox if bbox is not None: img0 = np.array(img0) img0 = bbox.crop_and_paste(src_img=img0, dst_img=np.zeros_like(img0)) img0 = Image.fromarray(img0) # Infer img0 = self.transforms.on_image(img0) img0 = img0.reshape(tuple([1] + list(img0.shape))) img0 = torch.from_numpy(img0) img0 = img0.cuda() return self.network(img0)
def get_bounding_box(self) -> BBox: return BBox.from_list([ self.bbox[0], self.bbox[1], self.bbox[0] + self.bbox[2], self.bbox[1] + self.bbox[3] ])
def is_in_frame(self, frame_shape: List[int]) -> bool: frame_h, frame_w = frame_shape[:2] frame_bbox = BBox(xmin=0, ymin=0, xmax=frame_w, ymax=frame_h) return self.bounding_box.within(frame_bbox)
from annotation_utils.labelme.structs import LabelmeAnnotationHandler from common_utils.common_types.bbox import BBox from common_utils.common_types.point import Point2D_List handler = LabelmeAnnotationHandler.load_from_dir( '/home/clayton/workspace/prj/data_keep/data/toyota/dataset/real/phone_videos/new/sampled_data/VID_20200217_161043/json' ) bbox_scale_factor = 1.4 for ann in handler: for shape in ann.shapes: if shape.shape_type == 'rectangle': bbox = BBox.from_p0p1(shape.points.to_list(demarcation=2)) bbox = bbox.scale_about_center(scale_factor=bbox_scale_factor, frame_shape=[ann.img_h, ann.img_w]) shape.points = Point2D_List.from_list(value_list=bbox.to_list(), demarcation=False) handler.save_to_dir( json_save_dir='/home/clayton/workspace/test/labelme_testing/temp/json', src_img_dir= '/home/clayton/workspace/prj/data_keep/data/toyota/dataset/real/phone_videos/new/sampled_data/VID_20200217_161043/img', dst_img_dir='/home/clayton/workspace/test/labelme_testing/temp/img', overwrite=True)
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i]["segmentation"] = [] ### my code ## image, dataset_dict = self.aug_handler(image=image, dataset_dict_detectron=dataset_dict) ### my code ## if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk ) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt #### this uses local variable, use with caution ###### if True: vis_img = image.copy() bbox_list = [BBox.from_list(vals) for vals in dataset_dict["instances"].gt_boxes.tensor.numpy().tolist()] # seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons] for bbox in (bbox_list): # if len(seg) > 0 and False: # vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True) vis_img = draw_bbox(img=vis_img, bbox=bbox) aug_vis.step(vis_img) return dataset_dict
def write_cropped_json(src_img_path: str, src_json_path: str, dst_img_path: str, dst_json_path: str, bound_type='rect', verbose: bool = False): def process_shape(shape: Shape, bbox: BBox, new_shape_handler: ShapeHandler): points = [Point.from_list(point) for point in shape.points] contained_count = 0 for point in points: if bbox.contains(point): contained_count += 1 if contained_count == 0: return elif contained_count == len(points): pass else: logger.error( f"Found a shape that is only partially contained by a bbox.") logger.error(f"Shape: {shape}") logger.error(f"BBox: {bbox}") cropped_points = [ Point(x=point.x - bbox.xmin, y=point.y - bbox.ymin) for point in points ] for point in cropped_points: if point.x < 0 or point.y < 0: logger.error(f"Encountered negative point after crop: {point}") raise Exception new_shape = shape.copy() new_shape.points = [ cropped_point.to_list() for cropped_point in cropped_points ] new_shape_handler.add(new_shape) check_input_path_and_output_dir(input_path=src_img_path, output_path=dst_img_path) check_input_path_and_output_dir(input_path=src_json_path, output_path=dst_json_path) output_img_dir = get_dirpath_from_filepath(dst_img_path) annotation = LabelMeAnnotation(annotation_path=src_img_path, img_dir=dst_img_path, bound_type=bound_type) parser = LabelMeAnnotationParser(annotation_path=src_json_path) parser.load() bbox_list = [] for rect in parser.shape_handler.rectangles: numpy_array = np.array(rect.points) if numpy_array.shape != (2, 2): logger.error( f"Encountered rectangle with invalid shape: {numpy_array.shape}" ) logger.error(f"rect: {rect}") raise Exception xmin, xmax = numpy_array.T[0].min(), numpy_array.T[0].max() ymin, ymax = numpy_array.T[1].min(), numpy_array.T[1].max() bbox_list.append(BBox.from_list([xmin, ymin, xmax, ymax])) img = cv2.imread(src_img_path) img_h, img_w = img.shape[:2] for i, bbox in enumerate(bbox_list): bbox = BBox.buffer(bbox) new_shape_handler = ShapeHandler() for shape_group in [ parser.shape_handler.points, parser.shape_handler.rectangles, parser.shape_handler.polygons ]: for shape in shape_group: process_shape(shape=shape, bbox=bbox, new_shape_handler=new_shape_handler) new_shape_list = new_shape_handler.to_shape_list() if len(new_shape_list) > 0: img_rootname, json_rootname = get_rootname_from_path( dst_img_path), get_rootname_from_path(dst_json_path) dst_img_dir, dst_json_dir = get_dirpath_from_filepath( dst_img_path), get_dirpath_from_filepath(dst_json_path) dst_img_extension = get_extension_from_path(dst_img_path) dst_cropped_img_path = f"{dst_img_dir}/{img_rootname}_{i}.{dst_img_extension}" dst_cropped_json_path = f"{dst_json_dir}/{json_rootname}_{i}.json" write_cropped_image(src_path=src_img_path, dst_path=dst_cropped_img_path, bbox=bbox, verbose=verbose) cropped_labelme_ann = annotation.copy() cropped_labelme_ann.annotation_path = dst_cropped_json_path cropped_labelme_ann.img_dir = dst_img_dir cropped_labelme_ann.img_path = dst_cropped_img_path cropped_img = cv2.imread(dst_cropped_img_path) cropped_img_h, cropped_img_w = cropped_img.shape[:2] cropped_labelme_ann.img_height = cropped_img_h cropped_labelme_ann.img_width = cropped_img_w cropped_labelme_ann.shapes = new_shape_list cropped_labelme_ann.shape_handler = new_shape_handler writer = LabelMeAnnotationWriter(cropped_labelme_ann) writer.write() if verbose: logger.info(f"Wrote {dst_cropped_json_path}")
def to_coco(self, img_dir: str = None, mask_dir: str = None, coco_license: COCO_License = None, check_paths: bool = True, mask_lower_bgr: Tuple[int] = None, mask_upper_bgr: Tuple[int] = (255, 255, 255), show_pbar: bool = True) -> COCO_Dataset: dataset = COCO_Dataset.new( description='Dataset converted from Linemod to COCO format.') dataset.licenses.append( coco_license if coco_license is not None else COCO_License( url= 'https://github.com/cm107/annotation_utils/blob/master/LICENSE', name='MIT License', id=len(dataset.licenses))) coco_license0 = dataset.licenses[-1] for linemod_image in self.images: file_name = get_filename(linemod_image.file_name) img_path = linemod_image.file_name if img_dir is None else f'{img_dir}/{file_name}' if file_exists(img_path): date_captured = get_ctime(img_path) else: if check_paths: raise FileNotFoundError( f"Couldn't find image at {img_path}") date_captured = '' coco_image = COCO_Image(license_id=coco_license0.id, file_name=file_name, coco_url=img_path, width=linemod_image.width, height=linemod_image.height, date_captured=date_captured, flickr_url=None, id=linemod_image.id) dataset.images.append(coco_image) pbar = tqdm(total=len(self.annotations), unit='annotation(s)') if show_pbar else None if pbar is not None: pbar.set_description('Converting Linemod to COCO') for linemod_ann in self.annotations: mask_filename = get_filename(linemod_ann.mask_path) if mask_dir is not None: mask_path = f'{mask_dir}/{mask_filename}' if not file_exists(mask_path): if check_paths: raise FileNotFoundError( f"Couldn't find mask at {mask_path}") else: seg = Segmentation() else: seg = Segmentation.from_mask_path(mask_path, lower_bgr=mask_lower_bgr, upper_bgr=mask_upper_bgr) elif file_exists(linemod_ann.mask_path): seg = Segmentation.from_mask_path(linemod_ann.mask_path, lower_bgr=mask_lower_bgr, upper_bgr=mask_upper_bgr) elif img_dir is not None and file_exists( f'{img_dir}/{mask_filename}'): seg = Segmentation.from_mask_path(f'{img_dir}/{mask_filename}', lower_bgr=mask_lower_bgr, upper_bgr=mask_upper_bgr) elif not check_paths: seg = Segmentation() else: raise FileNotFoundError(f""" Couldn't resolve mask_path for calculating segmentation. Please either specify mask_dir or correct the mask paths in your linemod dataset. linemod_ann.id: {linemod_ann.id} linemod_ann.mask_path: {linemod_ann.mask_path} """) if len(seg) > 0: bbox = seg.to_bbox() else: xmin = int( linemod_ann.corner_2d.to_numpy(demarcation=True)[:, 0].min()) xmax = int( linemod_ann.corner_2d.to_numpy(demarcation=True)[:, 0].max()) ymin = int( linemod_ann.corner_2d.to_numpy(demarcation=True)[:, 1].min()) ymax = int( linemod_ann.corner_2d.to_numpy(demarcation=True)[:, 1].max()) bbox = BBox(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax) keypoints = Keypoint2D_List.from_point_list(linemod_ann.fps_2d, visibility=2) keypoints_3d = Keypoint3D_List.from_point_list(linemod_ann.fps_3d, visibility=2) num_keypoints = len(keypoints) if linemod_ann.category_id not in [ cat.id for cat in dataset.categories ]: linemod_cat = self.categories.get_obj_from_id( linemod_ann.category_id) cat_keypoints = list( 'abcdefghijklmnopqrstuvwxyz'.upper())[:num_keypoints] cat_keypoints_idx_list = [ idx for idx in range(len(cat_keypoints)) ] cat_keypoints_idx_list_shift_left = cat_keypoints_idx_list[ 1:] + cat_keypoints_idx_list[:1] dataset.categories.append( COCO_Category( id=linemod_ann.category_id, supercategory=linemod_cat.supercategory, name=linemod_cat.name, keypoints=cat_keypoints, skeleton=[[start_idx, end_idx] for start_idx, end_idx in zip( cat_keypoints_idx_list, cat_keypoints_idx_list_shift_left)])) coco_ann = COCO_Annotation( id=linemod_ann.id, category_id=linemod_ann.category_id, image_id=linemod_ann.image_id, segmentation=seg, bbox=bbox, area=bbox.area(), keypoints=keypoints, num_keypoints=num_keypoints, iscrowd=0, keypoints_3d=keypoints_3d, camera=COCO_Camera(f=[linemod_ann.K.fx, linemod_ann.K.fy], c=[linemod_ann.K.cx, linemod_ann.K.cy], T=[0, 0])) dataset.annotations.append(coco_ann) if pbar is not None: pbar.update() if pbar is not None: pbar.close() return dataset
def from_dict(cls, ann_dict: dict, strict: bool = True) -> COCO_Annotation: if strict: check_required_keys(ann_dict, required_keys=[ 'segmentation', 'num_keypoints', 'area', 'iscrowd', 'keypoints', 'image_id', 'bbox', 'category_id', 'id' ]) return COCO_Annotation( segmentation=Segmentation.from_list(ann_dict['segmentation'], demarcation=False), num_keypoints=ann_dict['num_keypoints'], area=ann_dict['area'], iscrowd=ann_dict['iscrowd'], keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'], demarcation=False), image_id=ann_dict['image_id'], bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize'), category_id=ann_dict['category_id'], id=ann_dict['id'], keypoints_3d=Keypoint3D_List.from_list( ann_dict['keypoints_3d'], demarcation=False) if 'keypoints_3d' in ann_dict else None, camera=Camera.from_dict(ann_dict['camera_params']) if 'camera_params' in ann_dict else None) else: check_required_keys( ann_dict, required_keys=['id', 'category_id', 'image_id']) if 'segmentation' not in ann_dict: seg = None elif ann_dict['iscrowd'] == 1 and type( ann_dict['segmentation']) == dict: compressed_rle = mask.frPyObjects( ann_dict['segmentation'], ann_dict['segmentation']['size'][0], ann_dict['segmentation']['size'][1]) seg_mask = mask.decode(compressed_rle) contours, _ = cv2.findContours(seg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) seg = Segmentation.from_contour(contour_list=contours) else: seg = Segmentation.from_list(ann_dict['segmentation'], demarcation=False) return COCO_Annotation( segmentation=seg, num_keypoints=ann_dict['num_keypoints'] if 'num_keypoints' in ann_dict else None, area=ann_dict['area'] if 'area' in ann_dict else None, iscrowd=ann_dict['iscrowd'] if 'iscrowd' in ann_dict else None, keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'], demarcation=False) if 'keypoints' in ann_dict else None, image_id=ann_dict['image_id'], bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize') if 'bbox' in ann_dict else None, category_id=ann_dict['category_id'], id=ann_dict['id'], keypoints_3d=Keypoint3D_List.from_list( ann_dict['keypoints_3d'], demarcation=False) if 'keypoints_3d' in ann_dict else None, camera=Camera.from_dict(ann_dict['camera_params']) if 'camera_params' in ann_dict else None)
def infer_linemod_dataset( self, dataset: Linemod_Dataset, img_dir: str, blackout: bool = False, show_pbar: bool = True, show_preview: bool = False, video_save_path: str = None, dump_dir: str = None, accumulate_pred_dump: bool = True, pred_dump_path: str = None, test_name: str = None, ) -> PVNetFrameResultList: stream_writer = StreamWriter( show_preview=show_preview, video_save_path=video_save_path, dump_dir=dump_dir, ) pbar = tqdm(total=len(dataset.images), unit="image(s)") \ if show_pbar else None frame_result_list = (PVNetFrameResultList() if pred_dump_path is not None or accumulate_pred_dump else None) for linemod_image in dataset.images: file_name = get_filename(linemod_image.file_name) if pbar is not None: pbar.set_description(file_name) img_path = f"{img_dir}/{file_name}" img = Image.open(img_path) orig_img = np.asarray(img) orig_img = cv2.cvtColor(orig_img, cv2.COLOR_RGB2BGR) result = orig_img.copy() gt_ann = dataset.annotations.get(image_id=linemod_image.id)[0] if blackout: xmin = int( gt_ann.corner_2d.to_numpy(demarcation=True)[:, 0].min()) xmax = int( gt_ann.corner_2d.to_numpy(demarcation=True)[:, 0].max()) ymin = int( gt_ann.corner_2d.to_numpy(demarcation=True)[:, 1].min()) ymax = int( gt_ann.corner_2d.to_numpy(demarcation=True)[:, 1].max()) bbox = BBox(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax) bbox = bbox.clip_at_bounds(frame_shape=result.shape) result = bbox.crop_and_paste(src_img=result, dst_img=np.zeros_like(result)) else: bbox = None pred = self.predict(img=img, bbox=bbox) if frame_result_list is not None: frame_result = PVNetFrameResult( frame=file_name, pred_list=PVNetPredictionList([pred]), test_name=test_name, ) frame_result_list.append(frame_result) gt_kpt_3d = gt_ann.fps_3d.copy() gt_kpt_3d.append(gt_ann.center_3d) pose_pred = pred.to_pose_pred(gt_kpt_3d=gt_kpt_3d, K=gt_ann.K) corner_2d_gt = pvnet_pose_utils.project( gt_ann.corner_3d.to_numpy(), gt_ann.K.to_matrix(), np.array(gt_ann.pose.to_list()), ) corner_2d_pred = pred.to_corner_2d_pred( gt_corner_3d=gt_ann.corner_3d, K=gt_ann.K, pose_pred=pose_pred) result = draw_pts2d(img=result, pts2d=gt_ann.fps_2d.to_numpy(), color=(0, 255, 0), radius=3) result = draw_corners(img=result, corner_2d=corner_2d_gt, color=(0, 255, 0), thickness=2) result = draw_pts2d(img=result, pts2d=pred.kpt_2d, color=(0, 0, 255), radius=2) result = draw_corners(img=result, corner_2d=corner_2d_pred, color=(0, 0, 255), thickness=2) stream_writer.step(img=result, file_name=file_name) if pbar is not None: pbar.update() stream_writer.close() if pred_dump_path is not None: frame_result_list.save_to_path(pred_dump_path, overwrite=True) pbar.close() return frame_result_list
COCO_Image(license_id=i % len(dataset.licenses), file_name=f'{i}.jpg', coco_url=f'/path/to/{i}.jpg', height=500, width=500, date_captured='N/A', flickr_url=None, id=len(dataset.images))) for i in range(len(dataset.images)): if i % 2 == 0: dataset.annotations.append( COCO_Annotation( category_id=dataset.categories.get_unique_category_from_name( 'category_a').id, image_id=dataset.images[i].id, bbox=BBox(xmin=0, ymin=0, xmax=100, ymax=100), id=len(dataset.annotations))) dataset.annotations.append( COCO_Annotation( category_id=dataset.categories.get_unique_category_from_name( 'category_b').id, image_id=dataset.images[i].id, bbox=BBox(xmin=0, ymin=0, xmax=100, ymax=100), id=len(dataset.annotations))) else: dataset.annotations.append( COCO_Annotation( category_id=dataset.categories.get_unique_category_from_name( 'category_b').id, image_id=dataset.images[i].id, bbox=BBox(xmin=0, ymin=0, xmax=100, ymax=100),
# cv2.waitKey(1000) i = 0 for filename in glob.glob(test_data_path): ima = cv2.imread(filename) #ima = cv2.resize(ima,(1024,1024)) outputs = predictor(ima) #v = Visualizer(ima[:, :, ::-1],metadata=box_metadata,scale=0.8,instance_mode=ColorMode.IMAGE_BW) v = Visualizer(ima[:, :, ::-1],metadata=box_metadata,scale=1,instance_mode=ColorMode.SEGMENTATION) # remove the colors of unsegmented pixels v = v.draw_instance_predictions(outputs["instances"].to("cpu")) from common_utils.common_types.bbox import BBox instances = outputs['instances'] pred_boxes = [BBox.from_list(val_list).to_int() for val_list in instances.pred_boxes.tensor.cpu().numpy()] scores = [float(val) for val in instances.scores.cpu().numpy()] print(scores) pred_classes = instances.pred_classes.cpu().numpy().tolist() test_bbox = BBox(xmin=0, ymin=0, xmax=1, ymax=1) for bbox in pred_boxes: from typing import cast bbox = cast(BBox,bbox) hshape = bbox.xmin print(hshape) crop_image = bbox.crop_from(ima) cv2.imshow("predictions1", crop_image) cv2.imwrite(os.path.join(save_cropimg_path,str(i)+'.jpg'),crop_image)
assert kpt3d_list_0 + const_int == Keypoint3D_List( [kpt + const_int for kpt in kpt3d_list_0]) assert kpt3d_list_0 - kpt3d_1 == Keypoint3D_List( [kpt - kpt3d_1 for kpt in kpt3d_list_0]) assert kpt3d_list_0 - pt3d_1 == Keypoint3D_List( [kpt - pt3d_1 for kpt in kpt3d_list_0]) assert kpt3d_list_0 - const_int == Keypoint3D_List( [kpt - const_int for kpt in kpt3d_list_0]) assert kpt3d_list_0 * const_int == Keypoint3D_List( [kpt * const_int for kpt in kpt3d_list_0]) assert kpt3d_list_0 / const_int == Keypoint3D_List( [kpt / const_int for kpt in kpt3d_list_0]) print(f'Keypoint3D_List Test Passed') bbox0 = BBox(xmin=0, ymin=1, xmax=2, ymax=3) bbox1 = BBox(xmin=4, ymin=5, xmax=6, ymax=7) assert bbox0 + bbox1 == BBox(xmin=0, ymin=1, xmax=6, ymax=7) assert bbox0 + const_int == BBox(xmin=bbox0.xmin + const_int, ymin=bbox0.ymin + const_int, xmax=bbox0.xmax + const_int, ymax=bbox0.ymax + const_int) assert bbox0 + const_float == BBox(xmin=bbox0.xmin + const_float, ymin=bbox0.ymin + const_float, xmax=bbox0.xmax + const_float, ymax=bbox0.ymax + const_float) assert bbox0 + pt2d_0 == BBox(xmin=bbox0.xmin + pt2d_0.x, ymin=bbox0.ymin + pt2d_0.y, xmax=bbox0.xmax + pt2d_0.x, ymax=bbox0.ymax + pt2d_0.y) assert bbox0 + kpt2d_0 == BBox(xmin=bbox0.xmin + kpt2d_0.point.x,