def get_annot_from_image_data(self, image_data: ImageData) -> Dict: image_data = self.filter_image_data(image_data) image = image_data.open_image() height, width, _ = image.shape annot = { "description": "", "tags": [], "size": { "height": height, "width": width }, "objects": [{ "description": "", "geometryType": "rectangle", "tags": [{ "name": str(bbox_data.label), "value": None, }], "classTitle": "bbox", "points": { "exterior": [[int(bbox_data.xmin), int(bbox_data.ymin)], [int(bbox_data.xmax), int(bbox_data.ymax)]], "interior": [] } } for bbox_data in image_data.bboxes_data] } return annot
def apply_perspective_transform_to_image_data( image_data: ImageData, perspective_matrix: np.ndarray, result_width: int, result_height: int, allow_negative_and_large_coords: bool, remove_bad_coords: bool) -> ImageData: image = image_data.open_image() image = cv2.warpPerspective(image, perspective_matrix, (result_width, result_height)) image_data = copy.deepcopy(image_data) image_data.keypoints = apply_perspective_transform_to_points( image_data.keypoints, perspective_matrix, result_width, result_height, allow_negative_and_large_coords, remove_bad_coords) image_data.bboxes_data = [ _apply_perspective_transform_to_bbox_data( bbox_data, perspective_matrix, result_width, result_height, allow_negative_and_large_coords, remove_bad_coords) for bbox_data in image_data.bboxes_data ] image_data.bboxes_data = [ bbox_data for bbox_data in image_data.bboxes_data if bbox_data is not None ] image_data.image_path = None image_data.image = image return image_data
def thumbnail_image_data(image_data: ImageData, size: Tuple[int, int], resample: Optional[int] = None) -> ImageData: image = image_data.open_image() new_width, new_height = get_thumbnail_resize(Image.fromarray(image), size) return resize_image_data(image_data, (new_width, new_height), resample=resample)
def convert_image_data_to_polygon_label( image_data: ImageData, from_name: str, polygonlabels: str, ) -> Dict: if image_data.image_path is not None: im_width, im_height = imagesize.get(image_data.image_path) else: im_height, im_width, _ = image_data.open_image().shape rectangle_labels = [] for bbox_data in image_data.bboxes_data: rectangle_labels.append({ "original_width": im_width, "original_height": im_height, "image_rotation": 0, "value": { "points": [[x * 100 / im_width, y * 100 / im_height] for x, y in bbox_data.keypoints], "polygonlabels": [polygonlabels] }, "from_name": from_name, "to_name": "image", "type": "polygonlabels" }) return rectangle_labels
def convert_image_data_to_rectangle_labels( image_data: ImageData, from_name: str, to_name: str, ) -> Dict: if image_data.image_path is not None: im_width, im_height = imagesize.get(image_data.image_path) else: im_height, im_width, _ = image_data.open_image().shape rectangle_labels = [] for bbox_data in image_data.bboxes_data: rectangle_labels.append({ "original_width": im_width, "original_height": im_height, "image_rotation": 0, "value": { "x": bbox_data.xmin / im_width * 100, "y": bbox_data.ymin / im_height * 100, "width": (bbox_data.xmax - bbox_data.xmin) / im_width * 100, "height": (bbox_data.ymax - bbox_data.ymin) / im_height * 100, "rotation": 0, "rectanglelabels": [bbox_data.label] }, "from_name": from_name, "to_name": to_name, "type": "rectanglelabels" }) return rectangle_labels
def convert_image_data_to_keypoint_label( image_data: ImageData, from_name: str, keypointlabels: str, ) -> Dict: if image_data.image_path is not None: im_width, im_height = imagesize.get(image_data.image_path) else: im_height, im_width, _ = image_data.open_image().shape rectangle_labels = [] for bbox_data in image_data.bboxes_data: for keypoint in bbox_data.keypoints: x, y = keypoint[0], keypoint[1] rectangle_labels.append({ "original_width": im_width, "original_height": im_height, "image_rotation": 0, "value": { "x": x * 100 / im_width, "y": y * 100 / im_height, "width": 0.55, "keypointlabels": [keypointlabels] }, "from_name": from_name, "to_name": "image", "type": "keypointlabels" }) return rectangle_labels
def rotate_image_data(image_data: ImageData, angle: float, border_mode: Optional[int] = None, border_value: Tuple[int, int, int] = None): if abs(angle) <= 1e-6: return image_data image = image_data.open_image() height, width, _ = image.shape image_center = width // 2, height // 2 angle_to_factor = {0: 0, 90: 1, 180: 2, 270: 3} angle = angle % 360 rotated_image_data = copy.deepcopy(image_data) if angle in angle_to_factor: factor = angle_to_factor[angle] rotated_image = np.rot90(image, factor) rotated_image_data.keypoints = rotate_keypoints90( image_data.keypoints, factor, width, height) rotated_image_data.bboxes_data = [ _rotate_bbox_data90(bbox_data, factor, width, height) for bbox_data in rotated_image_data.bboxes_data ] else: # grab the rotation matrix rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.) # compute the new bounding dimensions of the image abs_cos = abs(rotation_mat[0, 0]) abs_sin = abs(rotation_mat[0, 1]) bound_w = int(height * abs_sin + width * abs_cos) bound_h = int(height * abs_cos + width * abs_sin) # adjust the rotation matrix to take into account translation rotation_mat[0, 2] += bound_w / 2 - image_center[0] rotation_mat[1, 2] += bound_h / 2 - image_center[1] rotated_image = cv2.warpAffine(image, rotation_mat, (bound_w, bound_h), borderMode=border_mode, borderValue=border_value) new_height, new_width, _ = rotated_image.shape rotated_image_data = copy.deepcopy(image_data) rotated_image_data.keypoints = rotate_keypoints( image_data.keypoints, rotation_mat, new_height, new_width) keypoints = [] for (x, y) in rotated_image_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) rotated_image_data.keypoints = np.array(keypoints).reshape(-1, 2) rotated_image_data.bboxes_data = [ _rotate_bbox_data(bbox_data, rotation_mat, new_height, new_width) for bbox_data in rotated_image_data.bboxes_data ] rotated_image_data.image_path = None # It applies to all bboxes_data inside rotated_image_data.image = rotated_image return rotated_image_data
def resize_image_data(image_data: ImageData, size: Tuple[int, int], resample: Optional[int] = None) -> ImageData: image_data = copy.deepcopy(image_data) image = image_data.open_image() old_height, old_width, _ = image.shape image = Image.fromarray(image) image = image.resize(size, resample=resample) image = np.array(image) new_height, new_width, _ = image.shape def resize_coords(bbox_data: BboxData): bbox_data.xmin = max( 0, min(int(bbox_data.xmin * (new_width / old_width)), new_width - 1)) bbox_data.ymin = max( 0, min(int(bbox_data.ymin * (new_height / old_height)), new_height - 1)) bbox_data.xmax = max( 0, min(int(bbox_data.xmax * (new_width / old_width)), new_width - 1)) bbox_data.ymax = max( 0, min(int(bbox_data.ymax * (new_height / old_height)), new_height - 1)) bbox_data.keypoints[:, 0] = (bbox_data.keypoints[:, 0] * (new_width / old_width)).astype(int) bbox_data.keypoints[:, 1] = (bbox_data.keypoints[:, 1] * (new_height / old_height)).astype(int) bbox_data.keypoints = bbox_data.keypoints.astype(int) bbox_data.cropped_image = None keypoints = [] for (x, y) in bbox_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) bbox_data.keypoints = np.array(keypoints).reshape(-1, 2) for additional_bbox_data in bbox_data.additional_bboxes_data: resize_coords(additional_bbox_data) for bbox_data in image_data.bboxes_data: resize_coords(bbox_data) image_data.keypoints[:, 0] = (image_data.keypoints[:, 0] * (new_width / old_width)).astype(int) image_data.keypoints[:, 1] = (image_data.keypoints[:, 1] * (new_height / old_height)).astype(int) keypoints = [] for (x, y) in image_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) image_data.keypoints = np.array(keypoints).reshape(-1, 2) image_data.image_path = None image_data.image = image return image_data
def tf_record_from_image_data(image_data: ImageData, label_map: Dict[str, int], use_thumbnail: Tuple[int, int] = None): filename = image_data.image_path encoded_filename = str(filename).encode('utf8') true_bboxes = np.array( [[bbox_data.xmin, bbox_data.ymin, bbox_data.xmax, bbox_data.ymax] for bbox_data in image_data.bboxes_data], dtype=float) image = image_data.open_image() height, width, _ = image.shape if len(true_bboxes) > 0: normalized_true_bboxes = true_bboxes.copy() normalized_true_bboxes[:, [0, 2]] /= width normalized_true_bboxes[:, [1, 3]] /= height xmins = normalized_true_bboxes[:, 0] ymins = normalized_true_bboxes[:, 1] xmaxs = normalized_true_bboxes[:, 2] ymaxs = normalized_true_bboxes[:, 3] else: ymins, xmins, ymaxs, xmaxs = [], [], [], [] encoded_jpg = BytesIO() image = Image.fromarray(image) if use_thumbnail: image.thumbnail(use_thumbnail) image.save(encoded_jpg, format='JPEG') encoded_jpg = encoded_jpg.getvalue() image_format = b'jpg' class_names = [bbox_data.label for bbox_data in image_data.bboxes_data] encoded_class_names = [ class_name.encode('utf-8') for class_name in class_names ] classes = [label_map[class_name] for class_name in class_names] tf_record = create_tf_record(height=height, width=width, encoded_filename=encoded_filename, encoded_jpg=encoded_jpg, image_format=image_format, xmins=xmins, ymins=ymins, xmaxs=xmaxs, ymaxs=ymaxs, encoded_class_names=encoded_class_names, classes=classes) return tf_record
def concat_images_data(image_data_a: ImageData, image_data_b: ImageData, background_color_a: Tuple[int, int, int, int] = None, background_color_b: Tuple[int, int, int, int] = None, thumbnail_size_a: Tuple[int, int] = None, thumbnail_size_b: Tuple[int, int] = None, how: Literal['horizontally', 'vertically'] = 'horizontally', mode: Literal['L', 'RGB', 'RGBA'] = 'RGBA', background_edge_width: int = 3, between_edge_width: int = 0) -> ImageData: image_data_a = copy.deepcopy(image_data_a) image_data_b = copy.deepcopy(image_data_b) if image_data_a is None and image_data_b is not None: return image_data_b if image_data_a is not None and image_data_b is None: return image_data_a image_a = image_data_a.open_image() image_b = image_data_b.open_image() ha, wa = image_a.shape[:2] hb, wb = image_b.shape[:2] image = concat_images(image_a=image_a, image_b=image_b, background_color_a=background_color_a, background_color_b=background_color_b, thumbnail_size_a=thumbnail_size_a, thumbnail_size_b=thumbnail_size_b, how=how, mode=mode, background_edge_width=background_edge_width, between_edge_width=between_edge_width) image_data_a_new_xmin, image_data_a_new_ymin = None, None image_data_b_new_xmin, image_data_b_new_ymin = None, None if how == 'horizontally': max_height = np.max([ha, hb]) min_ha = max_height // 2 - ha // 2 max_ha = max_height // 2 + ha // 2 min_hb = max_height // 2 - hb // 2 max_hb = max_height // 2 + hb // 2 image_data_a_new_xmin = 0 image_data_a_new_ymin = min_ha image_data_a_new_xmax = wa image_data_a_new_ymax = max_ha image_data_b_new_xmin = wa + between_edge_width image_data_b_new_ymin = min_hb image_data_b_new_xmax = wa + between_edge_width + wb image_data_b_new_ymax = max_hb elif how == 'vertically': max_width = np.max([wa, wb]) min_wa = max_width // 2 - wa // 2 max_wa = max_width // 2 + wa // 2 min_wb = max_width // 2 - wb // 2 max_wb = max_width // 2 + wb // 2 image_data_a_new_xmin = min_wa image_data_a_new_ymin = 0 image_data_a_new_xmax = max_wa image_data_a_new_ymax = ha image_data_b_new_xmin = min_wb image_data_b_new_ymin = ha + between_edge_width image_data_b_new_xmax = max_wb image_data_b_new_ymax = ha + between_edge_width + hb keypoints_a = image_data_a.keypoints keypoints_b = image_data_a.keypoints keypoints_a[:, 0] += image_data_a_new_xmin keypoints_a[:, 1] += image_data_a_new_ymin keypoints_b[:, 0] += image_data_b_new_xmin keypoints_b[:, 1] += image_data_b_new_ymin def _get_new_coords_for_bbox_data(bbox_data: BboxData, xmin: int, ymin: int): bbox_data.keypoints[:, 0] += xmin bbox_data.keypoints[:, 1] += ymin bbox_data.xmin += xmin bbox_data.ymin += ymin bbox_data.xmax += xmin bbox_data.ymax += ymin bbox_data.image = None bbox_data.image_path = None bbox_data.cropped_image = None for additional_bbox_data in bbox_data.additional_bboxes_data: _get_new_coords_for_bbox_data(additional_bbox_data, xmin, ymin) for bbox_data in image_data_a.bboxes_data: _get_new_coords_for_bbox_data(bbox_data, image_data_a_new_xmin, image_data_a_new_ymin) if 'concat_images_data__image_data' not in [ bbox_data.label for bbox_data in image_data_a.bboxes_data ]: bbox_data_a_into = [ BboxData(xmin=image_data_a_new_xmin, ymin=image_data_a_new_ymin, xmax=image_data_a_new_xmax, ymax=image_data_a_new_ymax, label='concat_images_data__image_data', additional_bboxes_data=[ bbox_data for bbox_data in image_data_a.bboxes_data if 'concat_images_data__image_data' != bbox_data.label ]) ] else: bbox_data_a_into = [] image_data_a.bboxes_data = [ bbox_data for bbox_data in image_data_a.bboxes_data if 'concat_images_data__image_data' == bbox_data.label ] + bbox_data_a_into for bbox_data in image_data_b.bboxes_data: _get_new_coords_for_bbox_data(bbox_data, image_data_b_new_xmin, image_data_b_new_ymin) if 'concat_images_data__image_data' not in [ bbox_data.label for bbox_data in image_data_b.bboxes_data ]: bbox_data_b_into = [ BboxData(xmin=image_data_b_new_xmin, ymin=image_data_b_new_ymin, xmax=image_data_b_new_xmax, ymax=image_data_b_new_ymax, label='concat_images_data__image_data', additional_bboxes_data=[ bbox_data for bbox_data in image_data_b.bboxes_data if 'concat_images_data__image_data' != bbox_data.label ]) ] else: bbox_data_b_into = [] image_data_b.bboxes_data = [ bbox_data for bbox_data in image_data_b.bboxes_data if 'concat_images_data__image_data' == bbox_data.label ] + bbox_data_b_into image_data = ImageData(image_path=None, image=image, bboxes_data=image_data_a.bboxes_data + image_data_b.bboxes_data, label=None, keypoints=np.concatenate([keypoints_a, keypoints_b], axis=0), additional_info={ **image_data_a.additional_info, **image_data_b.additional_info }) return image_data
def crop_image_data( image_data: ImageData, xmin: int, ymin: int, xmax: int, ymax: int, allow_negative_and_large_coords: bool, remove_bad_coords: bool, ) -> ImageData: assert 0 <= xmin and 0 <= ymin assert xmin <= xmax and ymin <= ymax image_data = copy.deepcopy(image_data) image = image_data.open_image() height, width, _ = image.shape assert xmax <= width and ymax <= height image = image[ymin:ymax, xmin:xmax] new_height, new_width, _ = image.shape def resize_coords(bbox_data: BboxData): bbox_data.xmin = bbox_data.xmin - xmin bbox_data.ymin = bbox_data.ymin - ymin bbox_data.xmax = bbox_data.xmax - xmin bbox_data.ymax = bbox_data.ymax - ymin bbox_data.keypoints[:, 0] -= xmin bbox_data.keypoints[:, 1] -= ymin bbox_data.cropped_image = None if not allow_negative_and_large_coords: bbox_data.xmin = max(0, min(bbox_data.xmin, new_width - 1)) bbox_data.ymin = max(0, min(bbox_data.ymin, new_height - 1)) bbox_data.xmax = max(0, min(bbox_data.xmax, new_width - 1)) bbox_data.ymax = max(0, min(bbox_data.ymax, new_height - 1)) keypoints = [] for (x, y) in bbox_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) bbox_data.keypoints = np.array(keypoints).reshape(-1, 2) for additional_bbox_data in bbox_data.additional_bboxes_data: resize_coords(additional_bbox_data) for bbox_data in image_data.bboxes_data: resize_coords(bbox_data) keypoints = [] for (x, y) in image_data.keypoints: x = max(0, min(x - xmin, new_width - 1)) y = max(0, min(y - ymin, new_height - 1)) keypoints.append([x, y]) image_data.keypoints = np.array(keypoints).reshape(-1, 2) def if_bbox_data_inside_crop(bbox_data: BboxData): bbox_data.keypoints = bbox_data.keypoints[( (bbox_data.keypoints[:, 0] >= 0) & (bbox_data.keypoints[:, 1] >= 0) & (bbox_data.keypoints[:, 0] < new_height) & (bbox_data.keypoints[:, 1] < new_width))] bbox_data.additional_bboxes_data = [ additional_bbox_data for additional_bbox_data in bbox_data.additional_bboxes_data if if_bbox_data_inside_crop(additional_bbox_data) ] return (bbox_data.xmin >= 0 and bbox_data.ymin >= 0 and bbox_data.xmax < new_width and bbox_data.ymax < new_height and bbox_data.xmin < bbox_data.xmax and bbox_data.ymin < bbox_data.ymax) if remove_bad_coords: image_data.bboxes_data = [ bbox_data for bbox_data in image_data.bboxes_data if if_bbox_data_inside_crop(bbox_data) ] image_data.keypoints = image_data.keypoints[( (image_data.keypoints[:, 0] >= 0) & (image_data.keypoints[:, 1] >= 0) & (image_data.keypoints[:, 0] < new_height) & (image_data.keypoints[:, 1] < new_width))] image_data.image_path = None image_data.image = image return image_data
def visualize_image_data( image_data: ImageData, use_labels: bool = False, score_type: Literal['detection', 'classification'] = None, filter_by_labels: List[str] = None, known_labels: List[str] = None, draw_base_labels_with_given_label_to_base_label_image: Callable[[str], np.ndarray] = None, keypoints_radius: int = 5, include_additional_bboxes_data: bool = False, additional_bboxes_data_depth: Optional[int] = None, fontsize: int = 24, thickness: int = 4, return_as_pil_image: bool = False ) -> Union[np.ndarray, Image.Image]: image_data = get_image_data_filtered_by_labels( image_data=image_data, filter_by_labels=filter_by_labels ) image = image_data.open_image() if include_additional_bboxes_data: bboxes_data = [] def recursive_get_bboxes_data(bbox_data, depth): if additional_bboxes_data_depth is not None and depth > additional_bboxes_data_depth: return bboxes_data.append(bbox_data) for bbox_data in bbox_data.additional_bboxes_data: recursive_get_bboxes_data(bbox_data, depth=depth+1) for bbox_data in image_data.bboxes_data: recursive_get_bboxes_data(bbox_data, depth=0) else: bboxes_data = image_data.bboxes_data labels = [bbox_data.label for bbox_data in bboxes_data] if known_labels is None: known_labels = list(set(labels)) k_keypoints = [bbox_data.keypoints for bbox_data in bboxes_data] bboxes = np.array([ (bbox_data.ymin, bbox_data.xmin, bbox_data.ymax, bbox_data.xmax) for bbox_data in bboxes_data ]) angles = np.array([0. for _ in bboxes_data]) if score_type == 'detection': scores = np.array([bbox_data.detection_score for bbox_data in bboxes_data]) skip_scores = False elif score_type == 'classification': scores = np.array([bbox_data.classification_score for bbox_data in bboxes_data]) skip_scores = False else: scores = None skip_scores = True image = visualize_boxes_and_labels_on_image_array( image=image, bboxes=bboxes, angles=angles, scores=scores, k_keypoints=k_keypoints, labels=labels, use_normalized_coordinates=False, skip_scores=skip_scores, skip_labels=not use_labels, groundtruth_box_visualization_color='lime', known_labels=known_labels, keypoints_radius=keypoints_radius, fontsize=fontsize, thickness=thickness ) if len(image_data.keypoints) > 0: image_pil = Image.fromarray(image) draw = ImageDraw.Draw(image_pil) for idx, (x, y) in enumerate(image_data.keypoints): draw.pieslice( [(x-keypoints_radius, y-keypoints_radius), (x+keypoints_radius, y+keypoints_radius)], start=0, end=360, fill=STANDARD_COLORS_RGB[idx % len(STANDARD_COLORS_RGB)] ) image = np.array(image_pil) if draw_base_labels_with_given_label_to_base_label_image is not None: for bbox_data in image_data.bboxes_data: base_label_image = draw_base_labels_with_given_label_to_base_label_image(bbox_data.label) draw_label_image( image=image, base_label_image=base_label_image, bbox_data=bbox_data, inplace=True ) if return_as_pil_image: return Image.fromarray(image) return image