Exemple #1
0
    def adjust_for_zoom(self) -> (np.ndarray, COCO_Annotation):
        assert self.src_img is not None and self.magnitude is not None
        if self.center is None:
            self.center_to_cener_of(img=self.src_img)
        img_h, img_w = self.src_img.shape[:2]
        target_crop_h, target_crop_w = int(img_h / self.magnitude), int(
            img_w / self.magnitude)
        zoom_cx, crop_xmin, crop_xmax = self._adjust_for_zoom(
            target_zoom_c=self.center.x,
            target_crop_length=target_crop_w,
            image_side_length=img_w)
        zoom_cy, crop_ymin, crop_ymax = self._adjust_for_zoom(
            target_zoom_c=self.center.y,
            target_crop_length=target_crop_h,
            image_side_length=img_h)
        crop_bbox = BBox(xmin=crop_xmin,
                         ymin=crop_ymin,
                         xmax=crop_xmax,
                         ymax=crop_ymax)

        cropped_img = crop_bbox.crop_from(img=self.src_img)
        if self.src_coco_ann is not None:
            new_coco_ann = self.src_coco_ann.copy()
            new_coco_ann.bbox = self.src_coco_ann.bbox - crop_bbox.pmin
            new_coco_ann.segmentation = self.src_coco_ann.segmentation - crop_bbox.pmin
            new_coco_ann.keypoints = self.src_coco_ann.keypoints - crop_bbox.pmin
            return cropped_img, new_coco_ann
        else:
            return cropped_img, None
Exemple #2
0
    def from_dict(cls, ann_dict: dict) -> Detectron2_Annotation:
        check_required_keys(
            ann_dict,
            required_keys=['iscrowd', 'bbox', 'category_id', 'bbox_mode'])

        if ann_dict['bbox_mode'] == BoxMode.XYWH_ABS:
            bbox = BBox.from_list(ann_dict['bbox'], input_format='pminsize')
        elif ann_dict['bbox_mode'] == BoxMode.XYXY_ABS:
            bbox = BBox.from_list(ann_dict['bbox'], input_format='pminpmax')
        else:
            raise NotImplementedError
        if 'keypoints' in ann_dict:
            keypoints = Keypoint2D_List.from_list(
                value_list=ann_dict['keypoints'], demarcation=False)
        else:
            keypoints = Keypoint2D_List()

        return Detectron2_Annotation(iscrowd=ann_dict['iscrowd'],
                                     bbox=bbox,
                                     keypoints=keypoints,
                                     category_id=ann_dict['category_id'],
                                     segmentation=Segmentation.from_list(
                                         points_list=ann_dict['segmentation']
                                         if 'segmentation' in ann_dict else [],
                                         demarcation=False),
                                     bbox_mode=ann_dict['bbox_mode'])
Exemple #3
0
 def from_dict(cls, ann_dict: dict, strict: bool = True) -> COCO_Annotation:
     if strict:
         check_required_keys(ann_dict,
                             required_keys=[
                                 'segmentation', 'num_keypoints', 'area',
                                 'iscrowd', 'keypoints', 'image_id', 'bbox',
                                 'category_id', 'id'
                             ])
         return COCO_Annotation(
             segmentation=Segmentation.from_list(ann_dict['segmentation'],
                                                 demarcation=False),
             num_keypoints=ann_dict['num_keypoints'],
             area=ann_dict['area'],
             iscrowd=ann_dict['iscrowd'],
             keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'],
                                                 demarcation=False),
             image_id=ann_dict['image_id'],
             bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize'),
             category_id=ann_dict['category_id'],
             id=ann_dict['id'],
             keypoints_3d=Keypoint3D_List.from_list(
                 ann_dict['keypoints_3d'], demarcation=False)
             if 'keypoints_3d' in ann_dict else None,
             camera=Camera.from_dict(ann_dict['camera_params'])
             if 'camera_params' in ann_dict else None)
     else:
         check_required_keys(
             ann_dict, required_keys=['id', 'category_id', 'image_id'])
         return COCO_Annotation(
             segmentation=Segmentation.from_list(ann_dict['segmentation'],
                                                 demarcation=False)
             if 'segmentation' in ann_dict else None,
             num_keypoints=ann_dict['num_keypoints']
             if 'num_keypoints' in ann_dict else None,
             area=ann_dict['area'] if 'area' in ann_dict else None,
             iscrowd=ann_dict['iscrowd'] if 'iscrowd' in ann_dict else None,
             keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'],
                                                 demarcation=False)
             if 'keypoints' in ann_dict else None,
             image_id=ann_dict['image_id'],
             bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize')
             if 'bbox' in ann_dict else None,
             category_id=ann_dict['category_id'],
             id=ann_dict['id'],
             keypoints_3d=Keypoint3D_List.from_list(
                 ann_dict['keypoints_3d'], demarcation=False)
             if 'keypoints_3d' in ann_dict else None,
             camera=Camera.from_dict(ann_dict['camera_params'])
             if 'camera_params' in ann_dict else None)
    def process_shape(shape: Shape, bbox: BBox,
                      new_shape_handler: ShapeHandler):
        points = [Point.from_list(point) for point in shape.points]

        contained_count = 0
        for point in points:
            if bbox.contains(point):
                contained_count += 1
        if contained_count == 0:
            return
        elif contained_count == len(points):
            pass
        else:
            logger.error(
                f"Found a shape that is only partially contained by a bbox.")
            logger.error(f"Shape: {shape}")
            logger.error(f"BBox: {bbox}")

        cropped_points = [
            Point(x=point.x - bbox.xmin, y=point.y - bbox.ymin)
            for point in points
        ]
        for point in cropped_points:
            if point.x < 0 or point.y < 0:
                logger.error(f"Encountered negative point after crop: {point}")
                raise Exception
        new_shape = shape.copy()
        new_shape.points = [
            cropped_point.to_list() for cropped_point in cropped_points
        ]
        new_shape_handler.add(new_shape)
Exemple #5
0
 def from_dict(self, object_dict: dict) -> NDDS_Annotation_Object:
     check_required_keys(object_dict,
                         required_keys=[
                             'class', 'instance_id', 'visibility',
                             'location', 'quaternion_xyzw',
                             'pose_transform', 'cuboid_centroid',
                             'projected_cuboid_centroid', 'bounding_box',
                             'cuboid', 'projected_cuboid'
                         ])
     check_required_keys(object_dict['bounding_box'],
                         required_keys=['top_left', 'bottom_right'])
     return NDDS_Annotation_Object(
         class_name=object_dict['class'],
         instance_id=object_dict['instance_id'],
         visibility=object_dict['visibility'],
         location=Point3D.from_list(object_dict['location']),
         quaternion_xyzw=Quaternion.from_list(
             object_dict['quaternion_xyzw']),
         pose_transform=np.array(object_dict['pose_transform']),
         cuboid_centroid=Point3D.from_list(object_dict['cuboid_centroid']),
         projected_cuboid_centroid=Point2D.from_list(
             object_dict['projected_cuboid_centroid']),
         bounding_box=BBox.from_list(
             object_dict['bounding_box']['top_left'] +
             object_dict['bounding_box']['bottom_right'],
             input_format='pminpmax'),
         cuboid=Cuboid3D.from_list(object_dict['cuboid'], demarcation=True),
         projected_cuboid=Cuboid2D.from_list(
             object_dict['projected_cuboid'], demarcation=True))
Exemple #6
0
 def from_dict(self, item_dict: dict) -> BBoxResult:
     check_required_keys(
         item_dict,
         required_keys=['image_id', 'category_id', 'bbox', 'score'])
     return BBoxResult(image_id=item_dict['image_id'],
                       category_id=item_dict['category_id'],
                       bbox=BBox.from_list(bbox=item_dict['bbox'],
                                           input_format='pminsize'),
                       score=item_dict['score'])
Exemple #7
0
 def from_dict(self, item_dict: dict) -> BBoxKeypointResult:
     return BBoxKeypointResult(image_id=item_dict['image_id'],
                               category_id=item_dict['category_id'],
                               keypoints=Keypoint2D_List.from_list(
                                   item_dict['keypoints'],
                                   demarcation=False),
                               bbox=BBox.from_list(item_dict['bbox'],
                                                   input_format='pminsize'),
                               score=item_dict['score'])
def write_cropped_image(src_path: str,
                        dst_path: str,
                        bbox: BBox,
                        verbose: bool = False):
    check_input_path_and_output_dir(input_path=src_path, output_path=dst_path)
    img = cv2.imread(src_path)
    int_bbox = bbox.to_int()
    cropped_img = img[int_bbox.ymin:int_bbox.ymax,
                      int_bbox.xmin:int_bbox.xmax, :]
    cv2.imwrite(filename=dst_path, img=cropped_img)
Exemple #9
0
 def from_dict(self, object_dict: dict) -> NDDS_Annotation_Object:
     return NDDS_Annotation_Object(
         class_name=object_dict['class'],
         instance_id=object_dict['instance_id'],
         visibility=object_dict['visibility'],
         location=Point3D.from_list(object_dict['location']),
         quaternion_xyzw=Quaternion.from_list(
             object_dict['quaternion_xyzw']),
         pose_transform=np.array(object_dict['pose_transform']),
         cuboid_centroid=Point3D.from_list(object_dict['cuboid_centroid']),
         projected_cuboid_centroid=Point2D.from_list(
             object_dict['projected_cuboid_centroid']),
         bounding_box=BBox.from_list(
             object_dict['bounding_box']['top_left'] +
             object_dict['bounding_box']['bottom_right']),
         cuboid=Cuboid3D.from_list(object_dict['cuboid']),
         projected_cuboid=Cuboid2D.from_list(
             object_dict['projected_cuboid']))
def mapper(dataset_dict):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    #print(dataset_dict["file_name"])
    #cv2.imwrite(os.path.join(aug_save_path, f'{str(count.count)}_asd.jpg'),image)

    handler = load_augmentation_settings(handler_save_path = 'test_handler.json')
    for i in range(len(dataset_dict["annotations"])):
        dataset_dict["annotations"][i]["segmentation"] = []
    image, dataset_dict = handler(image=image, dataset_dict_detectron=dataset_dict)
    
    annots = []

    
    for item in dataset_dict["annotations"]:
        annots.append(item)

    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
    instances = utils.annotations_to_instances(annots, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    if True:
        vis_img = image.copy()
        bbox_list = [BBox.from_list(vals) for vals in dataset_dict["instances"].gt_boxes.tensor.numpy().tolist()]
        # seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons]
        for bbox in (bbox_list):
            # if len(seg) > 0 and False:
            #     vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True)
            vis_img = draw_bbox(img=vis_img, bbox=bbox)
        
        height,width,channels = vis_img.shape
        print(height,width)
        if count.count<100:
            cv2.imwrite(os.path.join(aug_save_path, f'{str(count.count)}.jpg'),vis_img)
        count.count_add()
        aug_vis.step(vis_img)


    return dataset_dict
Exemple #11
0
    def _predict(self,
                 img: np.ndarray,
                 bbox: BBox = None) -> Dict[str, torch.cuda.Tensor]:
        # Convert to JpegImageFile
        if isinstance(img, JpegImageFile):
            img0 = img.copy()
        elif isinstance(img, np.ndarray):
            img0 = img.copy()
            img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB)
            img0 = Image.fromarray(img0)
        else:
            acceptable_types = ""
            for i, acceptable_type in enumerate([np.ndarray, JpegImageFile]):
                if i == 0:
                    acceptable_types += f"\t{acceptable_type.__name__}"
                else:
                    acceptable_types += f"\n\t{acceptable_type.__name__}"
            raise TypeError(
                get_type_error_message(
                    func_name="PVNetInferer._predict",
                    acceptable_types=[np.ndarray, JpegImageFile],
                    unacceptable_type=type(img),
                    param_name="img",
                ))

        # Blackout Region Outside BBox If Given BBox
        if bbox is not None:
            img0 = np.array(img0)
            img0 = bbox.crop_and_paste(src_img=img0,
                                       dst_img=np.zeros_like(img0))
            img0 = Image.fromarray(img0)

        # Infer
        img0 = self.transforms.on_image(img0)
        img0 = img0.reshape(tuple([1] + list(img0.shape)))
        img0 = torch.from_numpy(img0)
        img0 = img0.cuda()
        return self.network(img0)
Exemple #12
0
 def get_bounding_box(self) -> BBox:
     return BBox.from_list([
         self.bbox[0], self.bbox[1], self.bbox[0] + self.bbox[2],
         self.bbox[1] + self.bbox[3]
     ])
Exemple #13
0
 def is_in_frame(self, frame_shape: List[int]) -> bool:
     frame_h, frame_w = frame_shape[:2]
     frame_bbox = BBox(xmin=0, ymin=0, xmax=frame_w, ymax=frame_h)
     return self.bounding_box.within(frame_bbox)
from annotation_utils.labelme.structs import LabelmeAnnotationHandler
from common_utils.common_types.bbox import BBox
from common_utils.common_types.point import Point2D_List

handler = LabelmeAnnotationHandler.load_from_dir(
    '/home/clayton/workspace/prj/data_keep/data/toyota/dataset/real/phone_videos/new/sampled_data/VID_20200217_161043/json'
)
bbox_scale_factor = 1.4
for ann in handler:
    for shape in ann.shapes:
        if shape.shape_type == 'rectangle':
            bbox = BBox.from_p0p1(shape.points.to_list(demarcation=2))
            bbox = bbox.scale_about_center(scale_factor=bbox_scale_factor,
                                           frame_shape=[ann.img_h, ann.img_w])
            shape.points = Point2D_List.from_list(value_list=bbox.to_list(),
                                                  demarcation=False)

handler.save_to_dir(
    json_save_dir='/home/clayton/workspace/test/labelme_testing/temp/json',
    src_img_dir=
    '/home/clayton/workspace/prj/data_keep/data/toyota/dataset/real/phone_videos/new/sampled_data/VID_20200217_161043/img',
    dst_img_dir='/home/clayton/workspace/test/labelme_testing/temp/img',
    overwrite=True)
Exemple #15
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        for i in range(len(dataset_dict["annotations"])):
            dataset_dict["annotations"][i]["segmentation"] = []

        ### my code ##
        image, dataset_dict = self.aug_handler(image=image, dataset_dict_detectron=dataset_dict)
        ### my code ##

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
            )

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
        
        #### this uses local variable, use with caution ######
        if True:
            vis_img = image.copy()
            bbox_list = [BBox.from_list(vals) for vals in dataset_dict["instances"].gt_boxes.tensor.numpy().tolist()]
            # seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons]
            for bbox in (bbox_list):
                # if len(seg) > 0 and False:
                #     vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True)
                vis_img = draw_bbox(img=vis_img, bbox=bbox)
            aug_vis.step(vis_img)    
        
        return dataset_dict
def write_cropped_json(src_img_path: str,
                       src_json_path: str,
                       dst_img_path: str,
                       dst_json_path: str,
                       bound_type='rect',
                       verbose: bool = False):
    def process_shape(shape: Shape, bbox: BBox,
                      new_shape_handler: ShapeHandler):
        points = [Point.from_list(point) for point in shape.points]

        contained_count = 0
        for point in points:
            if bbox.contains(point):
                contained_count += 1
        if contained_count == 0:
            return
        elif contained_count == len(points):
            pass
        else:
            logger.error(
                f"Found a shape that is only partially contained by a bbox.")
            logger.error(f"Shape: {shape}")
            logger.error(f"BBox: {bbox}")

        cropped_points = [
            Point(x=point.x - bbox.xmin, y=point.y - bbox.ymin)
            for point in points
        ]
        for point in cropped_points:
            if point.x < 0 or point.y < 0:
                logger.error(f"Encountered negative point after crop: {point}")
                raise Exception
        new_shape = shape.copy()
        new_shape.points = [
            cropped_point.to_list() for cropped_point in cropped_points
        ]
        new_shape_handler.add(new_shape)

    check_input_path_and_output_dir(input_path=src_img_path,
                                    output_path=dst_img_path)
    check_input_path_and_output_dir(input_path=src_json_path,
                                    output_path=dst_json_path)
    output_img_dir = get_dirpath_from_filepath(dst_img_path)

    annotation = LabelMeAnnotation(annotation_path=src_img_path,
                                   img_dir=dst_img_path,
                                   bound_type=bound_type)
    parser = LabelMeAnnotationParser(annotation_path=src_json_path)
    parser.load()

    bbox_list = []
    for rect in parser.shape_handler.rectangles:
        numpy_array = np.array(rect.points)
        if numpy_array.shape != (2, 2):
            logger.error(
                f"Encountered rectangle with invalid shape: {numpy_array.shape}"
            )
            logger.error(f"rect: {rect}")
            raise Exception
        xmin, xmax = numpy_array.T[0].min(), numpy_array.T[0].max()
        ymin, ymax = numpy_array.T[1].min(), numpy_array.T[1].max()
        bbox_list.append(BBox.from_list([xmin, ymin, xmax, ymax]))

    img = cv2.imread(src_img_path)
    img_h, img_w = img.shape[:2]

    for i, bbox in enumerate(bbox_list):
        bbox = BBox.buffer(bbox)
        new_shape_handler = ShapeHandler()
        for shape_group in [
                parser.shape_handler.points, parser.shape_handler.rectangles,
                parser.shape_handler.polygons
        ]:
            for shape in shape_group:
                process_shape(shape=shape,
                              bbox=bbox,
                              new_shape_handler=new_shape_handler)
        new_shape_list = new_shape_handler.to_shape_list()
        if len(new_shape_list) > 0:
            img_rootname, json_rootname = get_rootname_from_path(
                dst_img_path), get_rootname_from_path(dst_json_path)
            dst_img_dir, dst_json_dir = get_dirpath_from_filepath(
                dst_img_path), get_dirpath_from_filepath(dst_json_path)
            dst_img_extension = get_extension_from_path(dst_img_path)
            dst_cropped_img_path = f"{dst_img_dir}/{img_rootname}_{i}.{dst_img_extension}"
            dst_cropped_json_path = f"{dst_json_dir}/{json_rootname}_{i}.json"
            write_cropped_image(src_path=src_img_path,
                                dst_path=dst_cropped_img_path,
                                bbox=bbox,
                                verbose=verbose)
            cropped_labelme_ann = annotation.copy()
            cropped_labelme_ann.annotation_path = dst_cropped_json_path
            cropped_labelme_ann.img_dir = dst_img_dir
            cropped_labelme_ann.img_path = dst_cropped_img_path
            cropped_img = cv2.imread(dst_cropped_img_path)
            cropped_img_h, cropped_img_w = cropped_img.shape[:2]
            cropped_labelme_ann.img_height = cropped_img_h
            cropped_labelme_ann.img_width = cropped_img_w
            cropped_labelme_ann.shapes = new_shape_list
            cropped_labelme_ann.shape_handler = new_shape_handler
            writer = LabelMeAnnotationWriter(cropped_labelme_ann)
            writer.write()
            if verbose:
                logger.info(f"Wrote {dst_cropped_json_path}")
Exemple #17
0
    def to_coco(self,
                img_dir: str = None,
                mask_dir: str = None,
                coco_license: COCO_License = None,
                check_paths: bool = True,
                mask_lower_bgr: Tuple[int] = None,
                mask_upper_bgr: Tuple[int] = (255, 255, 255),
                show_pbar: bool = True) -> COCO_Dataset:
        dataset = COCO_Dataset.new(
            description='Dataset converted from Linemod to COCO format.')
        dataset.licenses.append(
            coco_license if coco_license is not None else COCO_License(
                url=
                'https://github.com/cm107/annotation_utils/blob/master/LICENSE',
                name='MIT License',
                id=len(dataset.licenses)))
        coco_license0 = dataset.licenses[-1]
        for linemod_image in self.images:
            file_name = get_filename(linemod_image.file_name)
            img_path = linemod_image.file_name if img_dir is None else f'{img_dir}/{file_name}'
            if file_exists(img_path):
                date_captured = get_ctime(img_path)
            else:
                if check_paths:
                    raise FileNotFoundError(
                        f"Couldn't find image at {img_path}")
                date_captured = ''
            coco_image = COCO_Image(license_id=coco_license0.id,
                                    file_name=file_name,
                                    coco_url=img_path,
                                    width=linemod_image.width,
                                    height=linemod_image.height,
                                    date_captured=date_captured,
                                    flickr_url=None,
                                    id=linemod_image.id)
            dataset.images.append(coco_image)

        pbar = tqdm(total=len(self.annotations),
                    unit='annotation(s)') if show_pbar else None
        if pbar is not None:
            pbar.set_description('Converting Linemod to COCO')
        for linemod_ann in self.annotations:
            mask_filename = get_filename(linemod_ann.mask_path)
            if mask_dir is not None:
                mask_path = f'{mask_dir}/{mask_filename}'
                if not file_exists(mask_path):
                    if check_paths:
                        raise FileNotFoundError(
                            f"Couldn't find mask at {mask_path}")
                    else:
                        seg = Segmentation()
                else:
                    seg = Segmentation.from_mask_path(mask_path,
                                                      lower_bgr=mask_lower_bgr,
                                                      upper_bgr=mask_upper_bgr)
            elif file_exists(linemod_ann.mask_path):
                seg = Segmentation.from_mask_path(linemod_ann.mask_path,
                                                  lower_bgr=mask_lower_bgr,
                                                  upper_bgr=mask_upper_bgr)
            elif img_dir is not None and file_exists(
                    f'{img_dir}/{mask_filename}'):
                seg = Segmentation.from_mask_path(f'{img_dir}/{mask_filename}',
                                                  lower_bgr=mask_lower_bgr,
                                                  upper_bgr=mask_upper_bgr)
            elif not check_paths:
                seg = Segmentation()
            else:
                raise FileNotFoundError(f"""
                    Couldn't resolve mask_path for calculating segmentation.
                    Please either specify mask_dir or correct the mask paths
                    in your linemod dataset.
                    linemod_ann.id: {linemod_ann.id}
                    linemod_ann.mask_path: {linemod_ann.mask_path}
                    """)
            if len(seg) > 0:
                bbox = seg.to_bbox()
            else:
                xmin = int(
                    linemod_ann.corner_2d.to_numpy(demarcation=True)[:,
                                                                     0].min())
                xmax = int(
                    linemod_ann.corner_2d.to_numpy(demarcation=True)[:,
                                                                     0].max())
                ymin = int(
                    linemod_ann.corner_2d.to_numpy(demarcation=True)[:,
                                                                     1].min())
                ymax = int(
                    linemod_ann.corner_2d.to_numpy(demarcation=True)[:,
                                                                     1].max())
                bbox = BBox(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax)

            keypoints = Keypoint2D_List.from_point_list(linemod_ann.fps_2d,
                                                        visibility=2)
            keypoints_3d = Keypoint3D_List.from_point_list(linemod_ann.fps_3d,
                                                           visibility=2)
            num_keypoints = len(keypoints)

            if linemod_ann.category_id not in [
                    cat.id for cat in dataset.categories
            ]:
                linemod_cat = self.categories.get_obj_from_id(
                    linemod_ann.category_id)
                cat_keypoints = list(
                    'abcdefghijklmnopqrstuvwxyz'.upper())[:num_keypoints]
                cat_keypoints_idx_list = [
                    idx for idx in range(len(cat_keypoints))
                ]
                cat_keypoints_idx_list_shift_left = cat_keypoints_idx_list[
                    1:] + cat_keypoints_idx_list[:1]
                dataset.categories.append(
                    COCO_Category(
                        id=linemod_ann.category_id,
                        supercategory=linemod_cat.supercategory,
                        name=linemod_cat.name,
                        keypoints=cat_keypoints,
                        skeleton=[[start_idx, end_idx]
                                  for start_idx, end_idx in zip(
                                      cat_keypoints_idx_list,
                                      cat_keypoints_idx_list_shift_left)]))

            coco_ann = COCO_Annotation(
                id=linemod_ann.id,
                category_id=linemod_ann.category_id,
                image_id=linemod_ann.image_id,
                segmentation=seg,
                bbox=bbox,
                area=bbox.area(),
                keypoints=keypoints,
                num_keypoints=num_keypoints,
                iscrowd=0,
                keypoints_3d=keypoints_3d,
                camera=COCO_Camera(f=[linemod_ann.K.fx, linemod_ann.K.fy],
                                   c=[linemod_ann.K.cx, linemod_ann.K.cy],
                                   T=[0, 0]))
            dataset.annotations.append(coco_ann)
            if pbar is not None:
                pbar.update()
        if pbar is not None:
            pbar.close()
        return dataset
Exemple #18
0
    def from_dict(cls, ann_dict: dict, strict: bool = True) -> COCO_Annotation:
        if strict:
            check_required_keys(ann_dict,
                                required_keys=[
                                    'segmentation', 'num_keypoints', 'area',
                                    'iscrowd', 'keypoints', 'image_id', 'bbox',
                                    'category_id', 'id'
                                ])
            return COCO_Annotation(
                segmentation=Segmentation.from_list(ann_dict['segmentation'],
                                                    demarcation=False),
                num_keypoints=ann_dict['num_keypoints'],
                area=ann_dict['area'],
                iscrowd=ann_dict['iscrowd'],
                keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'],
                                                    demarcation=False),
                image_id=ann_dict['image_id'],
                bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize'),
                category_id=ann_dict['category_id'],
                id=ann_dict['id'],
                keypoints_3d=Keypoint3D_List.from_list(
                    ann_dict['keypoints_3d'], demarcation=False)
                if 'keypoints_3d' in ann_dict else None,
                camera=Camera.from_dict(ann_dict['camera_params'])
                if 'camera_params' in ann_dict else None)
        else:
            check_required_keys(
                ann_dict, required_keys=['id', 'category_id', 'image_id'])
            if 'segmentation' not in ann_dict:
                seg = None
            elif ann_dict['iscrowd'] == 1 and type(
                    ann_dict['segmentation']) == dict:
                compressed_rle = mask.frPyObjects(
                    ann_dict['segmentation'],
                    ann_dict['segmentation']['size'][0],
                    ann_dict['segmentation']['size'][1])
                seg_mask = mask.decode(compressed_rle)
                contours, _ = cv2.findContours(seg_mask, cv2.RETR_EXTERNAL,
                                               cv2.CHAIN_APPROX_SIMPLE)
                seg = Segmentation.from_contour(contour_list=contours)
            else:
                seg = Segmentation.from_list(ann_dict['segmentation'],
                                             demarcation=False)

            return COCO_Annotation(
                segmentation=seg,
                num_keypoints=ann_dict['num_keypoints']
                if 'num_keypoints' in ann_dict else None,
                area=ann_dict['area'] if 'area' in ann_dict else None,
                iscrowd=ann_dict['iscrowd'] if 'iscrowd' in ann_dict else None,
                keypoints=Keypoint2D_List.from_list(ann_dict['keypoints'],
                                                    demarcation=False)
                if 'keypoints' in ann_dict else None,
                image_id=ann_dict['image_id'],
                bbox=BBox.from_list(ann_dict['bbox'], input_format='pminsize')
                if 'bbox' in ann_dict else None,
                category_id=ann_dict['category_id'],
                id=ann_dict['id'],
                keypoints_3d=Keypoint3D_List.from_list(
                    ann_dict['keypoints_3d'], demarcation=False)
                if 'keypoints_3d' in ann_dict else None,
                camera=Camera.from_dict(ann_dict['camera_params'])
                if 'camera_params' in ann_dict else None)
Exemple #19
0
    def infer_linemod_dataset(
        self,
        dataset: Linemod_Dataset,
        img_dir: str,
        blackout: bool = False,
        show_pbar: bool = True,
        show_preview: bool = False,
        video_save_path: str = None,
        dump_dir: str = None,
        accumulate_pred_dump: bool = True,
        pred_dump_path: str = None,
        test_name: str = None,
    ) -> PVNetFrameResultList:
        stream_writer = StreamWriter(
            show_preview=show_preview,
            video_save_path=video_save_path,
            dump_dir=dump_dir,
        )
        pbar = tqdm(total=len(dataset.images), unit="image(s)") \
            if show_pbar else None
        frame_result_list = (PVNetFrameResultList() if pred_dump_path
                             is not None or accumulate_pred_dump else None)
        for linemod_image in dataset.images:
            file_name = get_filename(linemod_image.file_name)
            if pbar is not None:
                pbar.set_description(file_name)
            img_path = f"{img_dir}/{file_name}"
            img = Image.open(img_path)

            orig_img = np.asarray(img)
            orig_img = cv2.cvtColor(orig_img, cv2.COLOR_RGB2BGR)
            result = orig_img.copy()

            gt_ann = dataset.annotations.get(image_id=linemod_image.id)[0]
            if blackout:
                xmin = int(
                    gt_ann.corner_2d.to_numpy(demarcation=True)[:, 0].min())
                xmax = int(
                    gt_ann.corner_2d.to_numpy(demarcation=True)[:, 0].max())
                ymin = int(
                    gt_ann.corner_2d.to_numpy(demarcation=True)[:, 1].min())
                ymax = int(
                    gt_ann.corner_2d.to_numpy(demarcation=True)[:, 1].max())
                bbox = BBox(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax)
                bbox = bbox.clip_at_bounds(frame_shape=result.shape)
                result = bbox.crop_and_paste(src_img=result,
                                             dst_img=np.zeros_like(result))
            else:
                bbox = None
            pred = self.predict(img=img, bbox=bbox)
            if frame_result_list is not None:
                frame_result = PVNetFrameResult(
                    frame=file_name,
                    pred_list=PVNetPredictionList([pred]),
                    test_name=test_name,
                )
                frame_result_list.append(frame_result)

            gt_kpt_3d = gt_ann.fps_3d.copy()
            gt_kpt_3d.append(gt_ann.center_3d)
            pose_pred = pred.to_pose_pred(gt_kpt_3d=gt_kpt_3d, K=gt_ann.K)
            corner_2d_gt = pvnet_pose_utils.project(
                gt_ann.corner_3d.to_numpy(),
                gt_ann.K.to_matrix(),
                np.array(gt_ann.pose.to_list()),
            )
            corner_2d_pred = pred.to_corner_2d_pred(
                gt_corner_3d=gt_ann.corner_3d, K=gt_ann.K, pose_pred=pose_pred)

            result = draw_pts2d(img=result,
                                pts2d=gt_ann.fps_2d.to_numpy(),
                                color=(0, 255, 0),
                                radius=3)
            result = draw_corners(img=result,
                                  corner_2d=corner_2d_gt,
                                  color=(0, 255, 0),
                                  thickness=2)
            result = draw_pts2d(img=result,
                                pts2d=pred.kpt_2d,
                                color=(0, 0, 255),
                                radius=2)
            result = draw_corners(img=result,
                                  corner_2d=corner_2d_pred,
                                  color=(0, 0, 255),
                                  thickness=2)
            stream_writer.step(img=result, file_name=file_name)
            if pbar is not None:
                pbar.update()
        stream_writer.close()
        if pred_dump_path is not None:
            frame_result_list.save_to_path(pred_dump_path, overwrite=True)
        pbar.close()
        return frame_result_list
        COCO_Image(license_id=i % len(dataset.licenses),
                   file_name=f'{i}.jpg',
                   coco_url=f'/path/to/{i}.jpg',
                   height=500,
                   width=500,
                   date_captured='N/A',
                   flickr_url=None,
                   id=len(dataset.images)))
for i in range(len(dataset.images)):
    if i % 2 == 0:
        dataset.annotations.append(
            COCO_Annotation(
                category_id=dataset.categories.get_unique_category_from_name(
                    'category_a').id,
                image_id=dataset.images[i].id,
                bbox=BBox(xmin=0, ymin=0, xmax=100, ymax=100),
                id=len(dataset.annotations)))
        dataset.annotations.append(
            COCO_Annotation(
                category_id=dataset.categories.get_unique_category_from_name(
                    'category_b').id,
                image_id=dataset.images[i].id,
                bbox=BBox(xmin=0, ymin=0, xmax=100, ymax=100),
                id=len(dataset.annotations)))
    else:
        dataset.annotations.append(
            COCO_Annotation(
                category_id=dataset.categories.get_unique_category_from_name(
                    'category_b').id,
                image_id=dataset.images[i].id,
                bbox=BBox(xmin=0, ymin=0, xmax=100, ymax=100),
# cv2.waitKey(1000)

i = 0
for filename in glob.glob(test_data_path):
    ima = cv2.imread(filename)
    #ima = cv2.resize(ima,(1024,1024))
    outputs = predictor(ima)
    #v = Visualizer(ima[:, :, ::-1],metadata=box_metadata,scale=0.8,instance_mode=ColorMode.IMAGE_BW)
    v = Visualizer(ima[:, :, ::-1],metadata=box_metadata,scale=1,instance_mode=ColorMode.SEGMENTATION)   # remove the colors of unsegmented pixels
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))


    from common_utils.common_types.bbox import BBox

    instances = outputs['instances']
    pred_boxes = [BBox.from_list(val_list).to_int() for val_list in instances.pred_boxes.tensor.cpu().numpy()]
    scores = [float(val) for val in instances.scores.cpu().numpy()]
    print(scores)
    pred_classes = instances.pred_classes.cpu().numpy().tolist()

    test_bbox = BBox(xmin=0, ymin=0, xmax=1, ymax=1)

    for bbox in pred_boxes:
        from typing import cast 
        
        bbox = cast(BBox,bbox)
        hshape = bbox.xmin
        print(hshape)
        crop_image = bbox.crop_from(ima)
        cv2.imshow("predictions1", crop_image)
        cv2.imwrite(os.path.join(save_cropimg_path,str(i)+'.jpg'),crop_image)
Exemple #22
0
assert kpt3d_list_0 + const_int == Keypoint3D_List(
    [kpt + const_int for kpt in kpt3d_list_0])
assert kpt3d_list_0 - kpt3d_1 == Keypoint3D_List(
    [kpt - kpt3d_1 for kpt in kpt3d_list_0])
assert kpt3d_list_0 - pt3d_1 == Keypoint3D_List(
    [kpt - pt3d_1 for kpt in kpt3d_list_0])
assert kpt3d_list_0 - const_int == Keypoint3D_List(
    [kpt - const_int for kpt in kpt3d_list_0])
assert kpt3d_list_0 * const_int == Keypoint3D_List(
    [kpt * const_int for kpt in kpt3d_list_0])
assert kpt3d_list_0 / const_int == Keypoint3D_List(
    [kpt / const_int for kpt in kpt3d_list_0])

print(f'Keypoint3D_List Test Passed')

bbox0 = BBox(xmin=0, ymin=1, xmax=2, ymax=3)
bbox1 = BBox(xmin=4, ymin=5, xmax=6, ymax=7)
assert bbox0 + bbox1 == BBox(xmin=0, ymin=1, xmax=6, ymax=7)
assert bbox0 + const_int == BBox(xmin=bbox0.xmin + const_int,
                                 ymin=bbox0.ymin + const_int,
                                 xmax=bbox0.xmax + const_int,
                                 ymax=bbox0.ymax + const_int)
assert bbox0 + const_float == BBox(xmin=bbox0.xmin + const_float,
                                   ymin=bbox0.ymin + const_float,
                                   xmax=bbox0.xmax + const_float,
                                   ymax=bbox0.ymax + const_float)
assert bbox0 + pt2d_0 == BBox(xmin=bbox0.xmin + pt2d_0.x,
                              ymin=bbox0.ymin + pt2d_0.y,
                              xmax=bbox0.xmax + pt2d_0.x,
                              ymax=bbox0.ymax + pt2d_0.y)
assert bbox0 + kpt2d_0 == BBox(xmin=bbox0.xmin + kpt2d_0.point.x,