Ejemplo n.º 1
0
def non_max_suppression_image_data(image_data: ImageData,
                                   iou: float) -> ImageData:
    image_data = copy.deepcopy(image_data)
    current_bboxes_data = image_data.bboxes_data.copy()
    new_bboxes_data = []
    while len(current_bboxes_data) != 0:
        current_bbox_data = current_bboxes_data[0]
        success = True
        if len(current_bboxes_data) > 1:
            for idx, bbox_data in enumerate(current_bboxes_data):
                if idx == 0:
                    continue
                bbox_iou = intersection_over_union(current_bbox_data,
                                                   bbox_data)

                if bbox_iou >= iou:
                    pairs_bboxes_data = [bbox_data, current_bbox_data]
                    pairs_scores = [
                        possible_bbox_data.detection_score if
                        possible_bbox_data.detection_score is not None else 1.
                        for possible_bbox_data in pairs_bboxes_data
                    ]
                    top_score_idx = np.argmax(pairs_scores)
                    current_bboxes_data.pop(idx)
                    current_bboxes_data.pop(0)
                    current_bboxes_data.append(
                        BboxData(
                            xmin=min(bbox_data.xmin, current_bbox_data.xmin),
                            ymin=min(bbox_data.ymin, current_bbox_data.ymin),
                            xmax=max(bbox_data.xmax, current_bbox_data.xmax),
                            ymax=max(bbox_data.ymax, current_bbox_data.ymax),
                            detection_score=pairs_bboxes_data[top_score_idx].
                            detection_score,
                            label=pairs_bboxes_data[top_score_idx].label,
                            keypoints=pairs_bboxes_data[top_score_idx].
                            keypoints,
                            additional_bboxes_data=pairs_bboxes_data[
                                top_score_idx].additional_bboxes_data,
                            additional_info=pairs_bboxes_data[top_score_idx].
                            additional_info))
                    success = False
                    break
        if success:
            new_bboxes_data.append(current_bboxes_data.pop(0))

    image_data.bboxes_data = new_bboxes_data
    image_data.image_path = image_data.image_path
    image_data.image = image_data.image
    return image_data
Ejemplo n.º 2
0
 def _inference_detection_and_get_metrics(
         self,
         model_spec: DetectionModelSpec,
         true_images_data: List[ImageData],
         score_threshold: float,
         minimum_iou: float,
         extra_bbox_label: str = None,
         batch_size: int = 16) -> Tuple[pd.DataFrame, pd.DataFrame]:
     detection_model = model_spec.load()
     inferencer = DetectionInferencer(detection_model)
     images_data_gen = BatchGeneratorImageData(
         true_images_data,
         batch_size=batch_size,
         use_not_caught_elements_as_last_batch=True)
     raw_pred_images_data = inferencer.predict(images_data_gen,
                                               score_threshold=0.)
     pred_images_data = [
         ImageData(image_path=image_data.image_path,
                   bboxes_data=[
                       bbox_data for bbox_data in image_data.bboxes_data
                       if bbox_data.detection_score >= score_threshold
                   ]) for image_data in raw_pred_images_data
     ]
     df_detection_metrics = get_df_detection_metrics(
         true_images_data=true_images_data,
         pred_images_data=pred_images_data,
         minimum_iou=minimum_iou,
         raw_pred_images_data=raw_pred_images_data)
     df_detection_recall_per_class = get_df_detection_recall_per_class(
         true_images_data=true_images_data,
         pred_images_data=pred_images_data,
         minimum_iou=minimum_iou,
     )
     return df_detection_metrics, df_detection_recall_per_class
Ejemplo n.º 3
0
def convert_image_data_to_polygon_label(
    image_data: ImageData,
    from_name: str,
    polygonlabels: str,
) -> Dict:
    if image_data.image_path is not None:
        im_width, im_height = imagesize.get(image_data.image_path)
    else:
        im_height, im_width, _ = image_data.open_image().shape
    rectangle_labels = []
    for bbox_data in image_data.bboxes_data:
        rectangle_labels.append({
            "original_width": im_width,
            "original_height": im_height,
            "image_rotation": 0,
            "value": {
                "points": [[x * 100 / im_width, y * 100 / im_height]
                           for x, y in bbox_data.keypoints],
                "polygonlabels": [polygonlabels]
            },
            "from_name": from_name,
            "to_name": "image",
            "type": "polygonlabels"
        })
    return rectangle_labels
Ejemplo n.º 4
0
def convert_image_data_to_rectangle_labels(
    image_data: ImageData,
    from_name: str,
    to_name: str,
) -> Dict:
    if image_data.image_path is not None:
        im_width, im_height = imagesize.get(image_data.image_path)
    else:
        im_height, im_width, _ = image_data.open_image().shape
    rectangle_labels = []
    for bbox_data in image_data.bboxes_data:
        rectangle_labels.append({
            "original_width": im_width,
            "original_height": im_height,
            "image_rotation": 0,
            "value": {
                "x": bbox_data.xmin / im_width * 100,
                "y": bbox_data.ymin / im_height * 100,
                "width": (bbox_data.xmax - bbox_data.xmin) / im_width * 100,
                "height": (bbox_data.ymax - bbox_data.ymin) / im_height * 100,
                "rotation": 0,
                "rectanglelabels": [bbox_data.label]
            },
            "from_name": from_name,
            "to_name": to_name,
            "type": "rectanglelabels"
        })
    return rectangle_labels
Ejemplo n.º 5
0
def convert_image_data_to_keypoint_label(
    image_data: ImageData,
    from_name: str,
    keypointlabels: str,
) -> Dict:
    if image_data.image_path is not None:
        im_width, im_height = imagesize.get(image_data.image_path)
    else:
        im_height, im_width, _ = image_data.open_image().shape
    rectangle_labels = []
    for bbox_data in image_data.bboxes_data:
        for keypoint in bbox_data.keypoints:
            x, y = keypoint[0], keypoint[1]
            rectangle_labels.append({
                "original_width": im_width,
                "original_height": im_height,
                "image_rotation": 0,
                "value": {
                    "x": x * 100 / im_width,
                    "y": y * 100 / im_height,
                    "width": 0.55,
                    "keypointlabels": [keypointlabels]
                },
                "from_name": from_name,
                "to_name": "image",
                "type": "keypointlabels"
            })
    return rectangle_labels
Ejemplo n.º 6
0
    def get_image_data_from_annot(
        self, image_path: Union[str, Path],
        annot: Union[Path, str, Dict, fsspec.core.OpenFile, List[str]]
    ) -> ImageData:
        if isinstance(annot, str) or isinstance(annot, Path):
            with fsspec.open(annot, 'r', encoding='utf8') as f:
                annots = f.read()
        elif isinstance(annot, fsspec.core.OpenFile):
            with annot as f:
                annots = f.read()
        elif isinstance(annot, List):
            annots = '\n'.join(annot)

        width, height = get_image_size(image_path)
        bboxes_data = []
        for line in annots.strip().split('\n'):
            idx, xcenter, ycenter, w, h = line.split(' ')
            label = self.idx_to_class_name[int(idx)]
            xcenter, ycenter, w, h = float(xcenter), float(ycenter), float(
                w), float(h)
            xcenter, w = xcenter * width, w * width
            ycenter, h = ycenter * height, h * height
            bboxes_data.append(
                BboxData(xmin=xcenter - w / 2,
                         ymin=ycenter - h / 2,
                         xmax=xcenter + w / 2,
                         ymax=ycenter + h / 2,
                         label=label))

        return ImageData(image_path=image_path, bboxes_data=bboxes_data)
Ejemplo n.º 7
0
    def _postprocess_images_data(
            self, images_data: List[ImageData],
            pred_labels_top_n: List[List[str]],
            pred_scores_top_n: List[List[float]],
            open_images_in_images_data: bool) -> List[ImageData]:
        images_data_res = []
        for (image_data, pred_label_top_n,
             pred_classification_score_top_n) in zip(images_data,
                                                     pred_labels_top_n,
                                                     pred_scores_top_n):
            image = image_data.image if open_images_in_images_data else None
            images_data_res.append(
                ImageData(
                    image_path=image_data.image_path,
                    image=image,
                    label=pred_label_top_n[0],
                    bboxes_data=image_data.bboxes_data,
                    keypoints=image_data.keypoints,
                    additional_info={
                        **image_data.additional_info, 'pred_classification_score':
                        pred_classification_score_top_n[0],
                        'pred_label_top_n':
                        pred_label_top_n,
                        'pred_classification_scores_top_n':
                        pred_classification_score_top_n
                    }))

        return images_data_res
Ejemplo n.º 8
0
def thumbnail_image_data(image_data: ImageData,
                         size: Tuple[int, int],
                         resample: Optional[int] = None) -> ImageData:
    image = image_data.open_image()
    new_width, new_height = get_thumbnail_resize(Image.fromarray(image), size)
    return resize_image_data(image_data, (new_width, new_height),
                             resample=resample)
Ejemplo n.º 9
0
 def get_annot_from_image_data(self, image_data: ImageData) -> Dict:
     image_data = self.filter_image_data(image_data)
     image = image_data.open_image()
     height, width, _ = image.shape
     annot = {
         "description":
         "",
         "tags": [],
         "size": {
             "height": height,
             "width": width
         },
         "objects": [{
             "description":
             "",
             "geometryType":
             "rectangle",
             "tags": [{
                 "name": str(bbox_data.label),
                 "value": None,
             }],
             "classTitle":
             "bbox",
             "points": {
                 "exterior": [[int(bbox_data.xmin),
                               int(bbox_data.ymin)],
                              [int(bbox_data.xmax),
                               int(bbox_data.ymax)]],
                 "interior": []
             }
         } for bbox_data in image_data.bboxes_data]
     }
     return annot
Ejemplo n.º 10
0
    def get_image_data_from_annot(
            self, image_path: Union[str, Path, fsspec.core.OpenFile],
            annot: Union[Path, str, Dict, fsspec.core.OpenFile]) -> ImageData:
        if isinstance(annot, str) or isinstance(annot, Path):
            with fsspec.open(annot, 'r', encoding='utf8') as f:
                annot = json.load(f)
        if isinstance(annot, fsspec.core.OpenFile):
            with annot as f:
                annot = json.load(f)

        bboxes_data = []
        for obj in annot['objects']:
            (xmin, ymin), (xmax, ymax) = obj['points']['exterior']
            label = obj['tags'][0]['name'] if obj['tags'] else None
            bboxes_data.append(
                BboxData(image_path=image_path,
                         xmin=xmin,
                         ymin=ymin,
                         xmax=xmax,
                         ymax=ymax,
                         label=label))

        image_data = ImageData(image_path=image_path, bboxes_data=bboxes_data)

        return image_data
Ejemplo n.º 11
0
def rotate_image_data(image_data: ImageData,
                      angle: float,
                      border_mode: Optional[int] = None,
                      border_value: Tuple[int, int, int] = None):
    if abs(angle) <= 1e-6:
        return image_data

    image = image_data.open_image()
    height, width, _ = image.shape
    image_center = width // 2, height // 2

    angle_to_factor = {0: 0, 90: 1, 180: 2, 270: 3}
    angle = angle % 360
    rotated_image_data = copy.deepcopy(image_data)

    if angle in angle_to_factor:
        factor = angle_to_factor[angle]
        rotated_image = np.rot90(image, factor)
        rotated_image_data.keypoints = rotate_keypoints90(
            image_data.keypoints, factor, width, height)
        rotated_image_data.bboxes_data = [
            _rotate_bbox_data90(bbox_data, factor, width, height)
            for bbox_data in rotated_image_data.bboxes_data
        ]
    else:
        # grab the rotation matrix
        rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.)
        # compute the new bounding dimensions of the image
        abs_cos = abs(rotation_mat[0, 0])
        abs_sin = abs(rotation_mat[0, 1])
        bound_w = int(height * abs_sin + width * abs_cos)
        bound_h = int(height * abs_cos + width * abs_sin)
        # adjust the rotation matrix to take into account translation
        rotation_mat[0, 2] += bound_w / 2 - image_center[0]
        rotation_mat[1, 2] += bound_h / 2 - image_center[1]

        rotated_image = cv2.warpAffine(image,
                                       rotation_mat, (bound_w, bound_h),
                                       borderMode=border_mode,
                                       borderValue=border_value)
        new_height, new_width, _ = rotated_image.shape
        rotated_image_data = copy.deepcopy(image_data)
        rotated_image_data.keypoints = rotate_keypoints(
            image_data.keypoints, rotation_mat, new_height, new_width)
        keypoints = []
        for (x, y) in rotated_image_data.keypoints:
            x = max(0, min(x, new_width - 1))
            y = max(0, min(y, new_height - 1))
            keypoints.append([x, y])
        rotated_image_data.keypoints = np.array(keypoints).reshape(-1, 2)
        rotated_image_data.bboxes_data = [
            _rotate_bbox_data(bbox_data, rotation_mat, new_height, new_width)
            for bbox_data in rotated_image_data.bboxes_data
        ]

    rotated_image_data.image_path = None  # It applies to all bboxes_data inside
    rotated_image_data.image = rotated_image

    return rotated_image_data
Ejemplo n.º 12
0
def get_true_and_pred_images_data_with_visualized_labels(
        image_data_matching: ImageDataMatching,
        error_type: Literal['detection', 'pipeline'],
        label: str = None) -> ImageData:
    """
    Create true and pred ImageData with changed label for visualization

    For detection error_type, the label will be one of ["TP", "FP", "FN"]

    For pipeline error_type, the label will be in format "label {matching_error_type}"
    (where matching_error_type is one of ["TP", "FP", "FN", "TP (extra bbox)", "FP (extra bbox)"])
    """
    for tag, tag_image_data in [('true', image_data_matching.true_image_data),
                                ('pred', image_data_matching.pred_image_data)]:
        tag_bboxes_data_with_visualized_label = []
        for tag_bbox_data_matching in image_data_matching.bboxes_data_matchings:
            if tag == 'true':
                tag_bbox_data = tag_bbox_data_matching.true_bbox_data
            elif tag == 'pred':
                tag_bbox_data = tag_bbox_data_matching.pred_bbox_data

            if tag_bbox_data is None:
                continue

            if error_type == 'detection':
                label_caption = f"[{tag_bbox_data_matching.get_detection_error_type(label=label)}]"
            elif error_type == 'pipeline':
                pipeline_error_type = tag_bbox_data_matching.get_pipeline_error_type(
                    label=label)
                if label is None:
                    label_caption = f"{tag_bbox_data.label} [{pipeline_error_type}]"
                else:
                    label_caption = f"{tag_bbox_data.label} [{pipeline_error_type}, cls. '{label}']"

            tag_bbox_data_with_visualized_label = BboxData(
                image_path=tag_bbox_data.image_path,
                cropped_image=tag_bbox_data.cropped_image,
                xmin=tag_bbox_data.xmin,
                ymin=tag_bbox_data.ymin,
                xmax=tag_bbox_data.xmax,
                ymax=tag_bbox_data.ymax,
                detection_score=tag_bbox_data.detection_score,
                label=label_caption,
                classification_score=tag_bbox_data.classification_score)
            tag_bboxes_data_with_visualized_label.append(
                tag_bbox_data_with_visualized_label)

        tag_image_data_with_visualized_labels = ImageData(
            image_path=tag_image_data.image_path,
            image=tag_image_data.image,
            bboxes_data=tag_bboxes_data_with_visualized_label)
        if tag == 'true':
            true_image_data_with_visualized_labels = tag_image_data_with_visualized_labels
        elif tag == 'pred':
            pred_image_data_with_visualized_labels = tag_image_data_with_visualized_labels

    return true_image_data_with_visualized_labels, pred_image_data_with_visualized_labels
Ejemplo n.º 13
0
    def get_images_data_from_annots(
        self,
        images_dir: Union[str, Path, fsspec.core.OpenFile],
        annots: Union[Path, str, Dict, fsspec.core.OpenFile]
    ) -> List[ImageData]:
        if isinstance(annots, str) or isinstance(annots, Path):
            with fsspec.open(annots, 'r', encoding='utf8') as f:
                annots = json.load(f)
        elif isinstance(annots, fsspec.core.OpenFile):
            with annots as f:
                annots = json.load(f)
        images_dir = Pathy(images_dir)

        images_data = []
        for annot in annots:
            image_name = annot['filename']
            additional_info = {}
            for key in annot:
                if key not in ['objects', 'filename']:
                    additional_info[key] = annot[key]
            bboxes_data = []
            for obj in annot['objects']:
                if 'bbox' in obj:
                    xmin, ymin, xmax, ymax = obj['bbox']
                else:
                    xmin, ymin, xmax, ymax = obj
                xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
                label = obj['label'] if 'label' in obj else None
                labels_top_n = obj['labels_top_n'] if 'labels_top_n' in obj else None
                top_n = len(labels_top_n) if labels_top_n is not None else None
                keypoints = obj['keypoints'] if 'keypoints' in obj else []
                bbox_additional_info = {}
                if isinstance(obj, dict):
                    for key in obj:
                        if key not in ['bbox', 'label', 'labels_top_n', 'top_n']:
                            bbox_additional_info[key] = obj[key]
                bboxes_data.append(BboxData(
                    image_path=images_dir / image_name,
                    xmin=xmin,
                    ymin=ymin,
                    xmax=xmax,
                    ymax=ymax,
                    keypoints=keypoints,
                    label=label,
                    labels_top_n=labels_top_n,
                    top_n=top_n,
                    additional_info=bbox_additional_info
                ))

            images_data.append(ImageData(
                image_path=images_dir / image_name,
                bboxes_data=bboxes_data,
                additional_info=additional_info
            ))

        return images_data
Ejemplo n.º 14
0
 def get_annot_from_n_bboxes_data(
     self,
     image_paths: List[Union[str, Path]],
     n_bboxes_data: List[List[BboxData]],
 ) -> List[List[BboxData]]:
     images_data = [
         ImageData(image_path=image_path, bboxes_data=bboxes_data)
         for image_path, bboxes_data in zip(image_paths, n_bboxes_data)
     ]
     return self.get_annot_from_images_data(images_data)
Ejemplo n.º 15
0
def apply_perspective_transform_to_image_data(
        image_data: ImageData, perspective_matrix: np.ndarray,
        result_width: int, result_height: int,
        allow_negative_and_large_coords: bool,
        remove_bad_coords: bool) -> ImageData:
    image = image_data.open_image()
    image = cv2.warpPerspective(image, perspective_matrix,
                                (result_width, result_height))

    image_data = copy.deepcopy(image_data)
    image_data.keypoints = apply_perspective_transform_to_points(
        image_data.keypoints, perspective_matrix, result_width, result_height,
        allow_negative_and_large_coords, remove_bad_coords)
    image_data.bboxes_data = [
        _apply_perspective_transform_to_bbox_data(
            bbox_data, perspective_matrix, result_width, result_height,
            allow_negative_and_large_coords, remove_bad_coords)
        for bbox_data in image_data.bboxes_data
    ]
    image_data.bboxes_data = [
        bbox_data for bbox_data in image_data.bboxes_data
        if bbox_data is not None
    ]
    image_data.image_path = None
    image_data.image = image

    return image_data
Ejemplo n.º 16
0
 def _postprocess_predictions(
         self, images_data: List[ImageData],
         n_pred_bboxes: List[List[Tuple[int, int, int, int]]],
         n_k_pred_keypoints: List[List[Tuple[int, int]]],
         n_pred_detection_scores: List[List[float]],
         n_pred_labels_top_n: List[List[List[str]]],
         n_pred_classification_scores_top_n: List[List[List[float]]],
         open_images_in_images_data: bool,
         open_cropped_images_in_bboxes_data: bool) -> List[ImageData]:
     pred_images_data = []
     for (image_data, pred_bboxes, k_pred_keypoints, pred_detection_scores,
          pred_labels_top_n, pred_classification_scores_top_n) in zip(
              images_data, n_pred_bboxes, n_k_pred_keypoints,
              n_pred_detection_scores, n_pred_labels_top_n,
              n_pred_classification_scores_top_n):
         bboxes_data = []
         for (pred_bbox, pred_keypoints, pred_detection_score,
              pred_label_top_n, pred_classification_score_top_n) in zip(
                  pred_bboxes, k_pred_keypoints, pred_detection_scores,
                  pred_labels_top_n, pred_classification_scores_top_n):
             xmin, ymin, xmax, ymax = pred_bbox
             bboxes_data.append(
                 BboxData(
                     image_path=image_data.image_path,
                     xmin=xmin,
                     ymin=ymin,
                     xmax=xmax,
                     ymax=ymax,
                     keypoints=pred_keypoints,
                     detection_score=pred_detection_score,
                     label=pred_label_top_n[0],
                     classification_score=pred_classification_score_top_n[
                         0],
                     top_n=len(pred_label_top_n),
                     labels_top_n=pred_label_top_n,
                     classification_scores_top_n=
                     pred_classification_score_top_n))
         if open_cropped_images_in_bboxes_data:
             for bbox_data in bboxes_data:
                 bbox_data.open_cropped_image(source_image=image_data.image,
                                              inplace=True)
         image = image_data.image if open_images_in_images_data else None
         pred_images_data.append(
             ImageData(
                 image_path=image_data.image_path,
                 image=image,
                 bboxes_data=bboxes_data,
                 label=image_data.label,
                 keypoints=image_data.keypoints,
                 additional_info=image_data.additional_info,
             ))
     return pred_images_data
Ejemplo n.º 17
0
 def get_annot_from_image_data(self, image_data: ImageData) -> List[str]:
     image_data = self.filter_image_data(image_data)
     width, height = image_data.get_image_size()
     txt_results = []
     for bbox_data in image_data.bboxes_data:
         w = bbox_data.xmax - bbox_data.xmin
         h = bbox_data.ymax - bbox_data.ymin
         xcenter = bbox_data.xmin + w / 2
         ycenter = bbox_data.ymin + h / 2
         xcenter, w = round(xcenter / width, 6), round(w / width, 6)
         ycenter, h = round(ycenter / height, 6), round(h / height, 6)
         idx = self.class_name_to_idx[bbox_data.label]
         txt_results.append(f"{idx} {xcenter} {ycenter} {w} {h}")
     return txt_results
Ejemplo n.º 18
0
def resize_image_data(image_data: ImageData,
                      size: Tuple[int, int],
                      resample: Optional[int] = None) -> ImageData:
    image_data = copy.deepcopy(image_data)
    image = image_data.open_image()
    old_height, old_width, _ = image.shape
    image = Image.fromarray(image)
    image = image.resize(size, resample=resample)
    image = np.array(image)
    new_height, new_width, _ = image.shape

    def resize_coords(bbox_data: BboxData):
        bbox_data.xmin = max(
            0, min(int(bbox_data.xmin * (new_width / old_width)),
                   new_width - 1))
        bbox_data.ymin = max(
            0,
            min(int(bbox_data.ymin * (new_height / old_height)),
                new_height - 1))
        bbox_data.xmax = max(
            0, min(int(bbox_data.xmax * (new_width / old_width)),
                   new_width - 1))
        bbox_data.ymax = max(
            0,
            min(int(bbox_data.ymax * (new_height / old_height)),
                new_height - 1))
        bbox_data.keypoints[:, 0] = (bbox_data.keypoints[:, 0] *
                                     (new_width / old_width)).astype(int)
        bbox_data.keypoints[:, 1] = (bbox_data.keypoints[:, 1] *
                                     (new_height / old_height)).astype(int)
        bbox_data.keypoints = bbox_data.keypoints.astype(int)
        bbox_data.cropped_image = None
        keypoints = []
        for (x, y) in bbox_data.keypoints:
            x = max(0, min(x, new_width - 1))
            y = max(0, min(y, new_height - 1))
            keypoints.append([x, y])
        bbox_data.keypoints = np.array(keypoints).reshape(-1, 2)
        for additional_bbox_data in bbox_data.additional_bboxes_data:
            resize_coords(additional_bbox_data)

    for bbox_data in image_data.bboxes_data:
        resize_coords(bbox_data)
    image_data.keypoints[:, 0] = (image_data.keypoints[:, 0] *
                                  (new_width / old_width)).astype(int)
    image_data.keypoints[:, 1] = (image_data.keypoints[:, 1] *
                                  (new_height / old_height)).astype(int)
    keypoints = []
    for (x, y) in image_data.keypoints:
        x = max(0, min(x, new_width - 1))
        y = max(0, min(y, new_height - 1))
        keypoints.append([x, y])
    image_data.keypoints = np.array(keypoints).reshape(-1, 2)
    image_data.image_path = None
    image_data.image = image

    return image_data
Ejemplo n.º 19
0
def tf_record_from_image_data(image_data: ImageData,
                              label_map: Dict[str, int],
                              use_thumbnail: Tuple[int, int] = None):
    filename = image_data.image_path
    encoded_filename = str(filename).encode('utf8')

    true_bboxes = np.array(
        [[bbox_data.xmin, bbox_data.ymin, bbox_data.xmax, bbox_data.ymax]
         for bbox_data in image_data.bboxes_data],
        dtype=float)
    image = image_data.open_image()
    height, width, _ = image.shape
    if len(true_bboxes) > 0:
        normalized_true_bboxes = true_bboxes.copy()
        normalized_true_bboxes[:, [0, 2]] /= width
        normalized_true_bboxes[:, [1, 3]] /= height
        xmins = normalized_true_bboxes[:, 0]
        ymins = normalized_true_bboxes[:, 1]
        xmaxs = normalized_true_bboxes[:, 2]
        ymaxs = normalized_true_bboxes[:, 3]
    else:
        ymins, xmins, ymaxs, xmaxs = [], [], [], []

    encoded_jpg = BytesIO()
    image = Image.fromarray(image)
    if use_thumbnail:
        image.thumbnail(use_thumbnail)
    image.save(encoded_jpg, format='JPEG')
    encoded_jpg = encoded_jpg.getvalue()
    image_format = b'jpg'

    class_names = [bbox_data.label for bbox_data in image_data.bboxes_data]
    encoded_class_names = [
        class_name.encode('utf-8') for class_name in class_names
    ]
    classes = [label_map[class_name] for class_name in class_names]

    tf_record = create_tf_record(height=height,
                                 width=width,
                                 encoded_filename=encoded_filename,
                                 encoded_jpg=encoded_jpg,
                                 image_format=image_format,
                                 xmins=xmins,
                                 ymins=ymins,
                                 xmaxs=xmaxs,
                                 ymaxs=ymaxs,
                                 encoded_class_names=encoded_class_names,
                                 classes=classes)
    return tf_record
Ejemplo n.º 20
0
def get_image_data_filtered_by_labels(image_data: ImageData,
                                      filter_by_labels: List[str] = None,
                                      include: bool = True) -> ImageData:
    if filter_by_labels is None or len(filter_by_labels) == 0:
        return image_data

    filter_by_labels = set(filter_by_labels)

    bboxes_data = [
        bbox_data for bbox_data in image_data.bboxes_data
        if (include and bbox_data.label in filter_by_labels) or (
            not include and bbox_data.label not in filter_by_labels)
    ]
    return ImageData(image_path=image_data.image_path,
                     image=image_data.image,
                     bboxes_data=bboxes_data)
Ejemplo n.º 21
0
    def _postprocess_images_data(
            self, images_data: List[ImageData],
            pred_n_keypoints: List[List[Tuple[float, float]]],
            open_images_in_images_data: bool) -> List[ImageData]:
        images_data_res = []
        pred_n_keypoints = np.array(pred_n_keypoints)
        for image_data, pred_keypoints in zip(images_data, pred_n_keypoints):
            width, height = image_data.get_image_size()
            pred_keypoints[:, 0] *= width
            pred_keypoints[:, 1] *= height
            image = image_data.image if open_images_in_images_data else None
            images_data_res.append(
                ImageData(image_path=image_data.image_path,
                          image=image,
                          bboxes_data=image_data.bboxes_data,
                          label=image_data.label,
                          keypoints=pred_keypoints,
                          additional_info=image_data.additional_info))

        return images_data_res
Ejemplo n.º 22
0
def flatten_additional_bboxes_data_in_image_data(
    image_data: ImageData,
    additional_bboxes_data_depth: Optional[int] = None,
) -> ImageData:
    image_data = copy.deepcopy(image_data)
    bboxes_data = []

    def _append_bbox_data(bbox_data: BboxData, depth: int):
        if additional_bboxes_data_depth is not None and depth > additional_bboxes_data_depth:
            return
        bboxes_data.append(bbox_data)
        for additional_bbox_data in bbox_data.additional_bboxes_data:
            _append_bbox_data(additional_bbox_data)
        bbox_data.additional_bboxes_data = []

    for bbox_data in bboxes_data:
        _append_bbox_data(bbox_data, depth=0)

    image_data.bboxes_data = bboxes_data
    return image_data
Ejemplo n.º 23
0
def non_max_suppression_image_data_using_tf(image_data: ImageData,
                                            iou: float) -> ImageData:
    import tensorflow as tf
    image_data = copy.deepcopy(image_data)
    if len(image_data.bboxes_data) <= 1:
        return image_data
    bboxes = [(bbox_data.ymin, bbox_data.xmin, bbox_data.ymax, bbox_data.xmax)
              for bbox_data in image_data.bboxes_data]
    scores = [
        bbox_data.detection_score
        if bbox_data.detection_score is not None else 1.
        for bbox_data in image_data.bboxes_data
    ]
    result = tf.image.non_max_suppression(bboxes,
                                          scores,
                                          len(image_data.bboxes_data),
                                          iou_threshold=iou)
    image_data.bboxes_data = [
        image_data.bboxes_data[i] for i in result.numpy()
    ]
    return image_data
Ejemplo n.º 24
0
    def draw_overlay(self,
                     frame: np.ndarray,
                     tracked_bboxes: List[Tuple[int, int, int, int]],
                     tracked_ids: List[int],
                     ready_frames_at_the_moment: List['FrameResult'],
                     filter_by_labels: List[str] = None) -> np.ndarray:
        image = frame.copy()
        tracked_bboxes = tracked_bboxes.astype(int)
        ready_tracks_ids_at_the_moment = [
            ready_frame.track_id for ready_frame in ready_frames_at_the_moment
        ]

        current_bboxes_data = []
        for bbox, track_id in zip(tracked_bboxes, tracked_ids):
            if track_id not in ready_tracks_ids_at_the_moment:
                continue

            xmin, ymin, xmax, ymax = bbox
            ready_frame = ready_frames_at_the_moment[
                ready_tracks_ids_at_the_moment.index(track_id)]
            label = ready_frame.label
            current_bbox_data = BboxData(image=image,
                                         xmin=xmin,
                                         ymin=ymin,
                                         xmax=xmax,
                                         ymax=ymax,
                                         label=label)
            current_bboxes_data.append(current_bbox_data)

        image_data = ImageData(image=frame, bboxes_data=current_bboxes_data)

        image = visualize_image_data(
            image_data=image_data,
            use_labels=self.write_labels,
            filter_by_labels=filter_by_labels,
            draw_base_labels_with_given_label_to_base_label_image=(
                self.draw_base_labels_with_given_label_to_base_label_image),
            known_labels=self.classification_inferencer.class_names)

        return image
Ejemplo n.º 25
0
 def _inference_pipeline_and_get_metrics(
         self, model_spec: PipelineModelSpec,
         true_images_data: List[ImageData],
         detection_score_threshold: float, minimum_iou: float,
         extra_bbox_label: str, batch_size: int,
         pseudo_class_names: List[str]
 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
     pipeline_model = model_spec.load()
     inferencer = PipelineInferencer(pipeline_model)
     images_data_gen = BatchGeneratorImageData(
         true_images_data,
         batch_size=batch_size,
         use_not_caught_elements_as_last_batch=True)
     raw_pred_images_data = inferencer.predict(images_data_gen,
                                               detection_score_threshold=0.)
     pred_images_data = [
         ImageData(
             image_path=image_data.image_path,
             bboxes_data=[
                 bbox_data for bbox_data in image_data.bboxes_data
                 if bbox_data.detection_score >= detection_score_threshold
             ]) for image_data in raw_pred_images_data
     ]
     df_detection_metrics = get_df_detection_metrics(
         true_images_data=true_images_data,
         pred_images_data=pred_images_data,
         minimum_iou=minimum_iou,
         raw_pred_images_data=raw_pred_images_data)
     df_pipeline_metrics = get_df_pipeline_metrics(
         true_images_data=true_images_data,
         pred_images_data=pred_images_data,
         minimum_iou=minimum_iou,
         extra_bbox_label=extra_bbox_label,
         pseudo_class_names=pseudo_class_names,
         known_class_names=pipeline_model.class_names)
     return df_detection_metrics, df_pipeline_metrics
Ejemplo n.º 26
0
    def filter_image_data(self, image_data: ImageData) -> ImageData:
        if image_data is None:
            return None
        looked_bboxes = set()
        new_bboxes_data = []
        for bbox_data in image_data.bboxes_data:
            xmin, ymin, xmax, ymax = bbox_data.xmin, bbox_data.ymin, bbox_data.xmax, bbox_data.ymax
            xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)

            if (xmin, ymin, xmax, ymax) in looked_bboxes:
                logger.warning(
                    f'Repeated bbox detected at image {bbox_data.image_path}: '
                    f'(xmin, ymin, xmax, ymax) = {(xmin, xmin, ymax, xmax)}. Skipping.'
                )
                continue
            else:
                looked_bboxes.add((xmin, ymin, xmax, ymax))

            if xmin >= xmax or ymin >= ymax or xmin < 0 or ymin < 0:
                logger.warning(
                    f"Wrong annotation at image {bbox_data.image_path}: "
                    f"incorrect bbox (xmin, ymin, xmax, ymax): {(xmin, ymin, xmax, ymax)} "
                    "(xmin >= xmax or ymin >= ymax or xmin < 0 or ymin < 0). Skipping."
                )
                continue

            new_bboxes_data.append(bbox_data)

        image_data = ImageData(image_path=image_data.image_path,
                               image=image_data.image,
                               bboxes_data=new_bboxes_data,
                               additional_info=image_data.additional_info,
                               keypoints=image_data.keypoints,
                               label=image_data.label)

        return image_data
Ejemplo n.º 27
0
def visualize_image_data(
    image_data: ImageData,
    use_labels: bool = False,
    score_type: Literal['detection', 'classification'] = None,
    filter_by_labels: List[str] = None,
    known_labels: List[str] = None,
    draw_base_labels_with_given_label_to_base_label_image: Callable[[str], np.ndarray] = None,
    keypoints_radius: int = 5,
    include_additional_bboxes_data: bool = False,
    additional_bboxes_data_depth: Optional[int] = None,
    fontsize: int = 24,
    thickness: int = 4,
    return_as_pil_image: bool = False
) -> Union[np.ndarray, Image.Image]:
    image_data = get_image_data_filtered_by_labels(
        image_data=image_data,
        filter_by_labels=filter_by_labels
    )
    image = image_data.open_image()
    if include_additional_bboxes_data:
        bboxes_data = []

        def recursive_get_bboxes_data(bbox_data, depth):
            if additional_bboxes_data_depth is not None and depth > additional_bboxes_data_depth:
                return
            bboxes_data.append(bbox_data)
            for bbox_data in bbox_data.additional_bboxes_data:
                recursive_get_bboxes_data(bbox_data, depth=depth+1)
        for bbox_data in image_data.bboxes_data:
            recursive_get_bboxes_data(bbox_data, depth=0)
    else:
        bboxes_data = image_data.bboxes_data
    labels = [bbox_data.label for bbox_data in bboxes_data]
    if known_labels is None:
        known_labels = list(set(labels))
    k_keypoints = [bbox_data.keypoints for bbox_data in bboxes_data]
    bboxes = np.array([
        (bbox_data.ymin, bbox_data.xmin, bbox_data.ymax, bbox_data.xmax)
        for bbox_data in bboxes_data
    ])
    angles = np.array([0. for _ in bboxes_data])
    if score_type == 'detection':
        scores = np.array([bbox_data.detection_score for bbox_data in bboxes_data])
        skip_scores = False
    elif score_type == 'classification':
        scores = np.array([bbox_data.classification_score for bbox_data in bboxes_data])
        skip_scores = False
    else:
        scores = None
        skip_scores = True

    image = visualize_boxes_and_labels_on_image_array(
        image=image,
        bboxes=bboxes,
        angles=angles,
        scores=scores,
        k_keypoints=k_keypoints,
        labels=labels,
        use_normalized_coordinates=False,
        skip_scores=skip_scores,
        skip_labels=not use_labels,
        groundtruth_box_visualization_color='lime',
        known_labels=known_labels,
        keypoints_radius=keypoints_radius,
        fontsize=fontsize,
        thickness=thickness
    )
    if len(image_data.keypoints) > 0:
        image_pil = Image.fromarray(image)
        draw = ImageDraw.Draw(image_pil)
        for idx, (x, y) in enumerate(image_data.keypoints):
            draw.pieslice(
                [(x-keypoints_radius, y-keypoints_radius), (x+keypoints_radius, y+keypoints_radius)], start=0, end=360,
                fill=STANDARD_COLORS_RGB[idx % len(STANDARD_COLORS_RGB)]
            )
        image = np.array(image_pil)
    if draw_base_labels_with_given_label_to_base_label_image is not None:
        for bbox_data in image_data.bboxes_data:
            base_label_image = draw_base_labels_with_given_label_to_base_label_image(bbox_data.label)
            draw_label_image(
                image=image,
                base_label_image=base_label_image,
                bbox_data=bbox_data,
                inplace=True
            )

    if return_as_pil_image:
        return Image.fromarray(image)

    return image
Ejemplo n.º 28
0
def concat_images_data(image_data_a: ImageData,
                       image_data_b: ImageData,
                       background_color_a: Tuple[int, int, int, int] = None,
                       background_color_b: Tuple[int, int, int, int] = None,
                       thumbnail_size_a: Tuple[int, int] = None,
                       thumbnail_size_b: Tuple[int, int] = None,
                       how: Literal['horizontally',
                                    'vertically'] = 'horizontally',
                       mode: Literal['L', 'RGB', 'RGBA'] = 'RGBA',
                       background_edge_width: int = 3,
                       between_edge_width: int = 0) -> ImageData:

    image_data_a = copy.deepcopy(image_data_a)
    image_data_b = copy.deepcopy(image_data_b)

    if image_data_a is None and image_data_b is not None:
        return image_data_b
    if image_data_a is not None and image_data_b is None:
        return image_data_a

    image_a = image_data_a.open_image()
    image_b = image_data_b.open_image()

    ha, wa = image_a.shape[:2]
    hb, wb = image_b.shape[:2]

    image = concat_images(image_a=image_a,
                          image_b=image_b,
                          background_color_a=background_color_a,
                          background_color_b=background_color_b,
                          thumbnail_size_a=thumbnail_size_a,
                          thumbnail_size_b=thumbnail_size_b,
                          how=how,
                          mode=mode,
                          background_edge_width=background_edge_width,
                          between_edge_width=between_edge_width)
    image_data_a_new_xmin, image_data_a_new_ymin = None, None
    image_data_b_new_xmin, image_data_b_new_ymin = None, None

    if how == 'horizontally':
        max_height = np.max([ha, hb])
        min_ha = max_height // 2 - ha // 2
        max_ha = max_height // 2 + ha // 2
        min_hb = max_height // 2 - hb // 2
        max_hb = max_height // 2 + hb // 2
        image_data_a_new_xmin = 0
        image_data_a_new_ymin = min_ha
        image_data_a_new_xmax = wa
        image_data_a_new_ymax = max_ha
        image_data_b_new_xmin = wa + between_edge_width
        image_data_b_new_ymin = min_hb
        image_data_b_new_xmax = wa + between_edge_width + wb
        image_data_b_new_ymax = max_hb

    elif how == 'vertically':
        max_width = np.max([wa, wb])
        min_wa = max_width // 2 - wa // 2
        max_wa = max_width // 2 + wa // 2
        min_wb = max_width // 2 - wb // 2
        max_wb = max_width // 2 + wb // 2
        image_data_a_new_xmin = min_wa
        image_data_a_new_ymin = 0
        image_data_a_new_xmax = max_wa
        image_data_a_new_ymax = ha
        image_data_b_new_xmin = min_wb
        image_data_b_new_ymin = ha + between_edge_width
        image_data_b_new_xmax = max_wb
        image_data_b_new_ymax = ha + between_edge_width + hb

    keypoints_a = image_data_a.keypoints
    keypoints_b = image_data_a.keypoints
    keypoints_a[:, 0] += image_data_a_new_xmin
    keypoints_a[:, 1] += image_data_a_new_ymin
    keypoints_b[:, 0] += image_data_b_new_xmin
    keypoints_b[:, 1] += image_data_b_new_ymin

    def _get_new_coords_for_bbox_data(bbox_data: BboxData, xmin: int,
                                      ymin: int):
        bbox_data.keypoints[:, 0] += xmin
        bbox_data.keypoints[:, 1] += ymin
        bbox_data.xmin += xmin
        bbox_data.ymin += ymin
        bbox_data.xmax += xmin
        bbox_data.ymax += ymin
        bbox_data.image = None
        bbox_data.image_path = None
        bbox_data.cropped_image = None
        for additional_bbox_data in bbox_data.additional_bboxes_data:
            _get_new_coords_for_bbox_data(additional_bbox_data, xmin, ymin)

    for bbox_data in image_data_a.bboxes_data:
        _get_new_coords_for_bbox_data(bbox_data, image_data_a_new_xmin,
                                      image_data_a_new_ymin)

    if 'concat_images_data__image_data' not in [
            bbox_data.label for bbox_data in image_data_a.bboxes_data
    ]:
        bbox_data_a_into = [
            BboxData(xmin=image_data_a_new_xmin,
                     ymin=image_data_a_new_ymin,
                     xmax=image_data_a_new_xmax,
                     ymax=image_data_a_new_ymax,
                     label='concat_images_data__image_data',
                     additional_bboxes_data=[
                         bbox_data for bbox_data in image_data_a.bboxes_data
                         if 'concat_images_data__image_data' != bbox_data.label
                     ])
        ]
    else:
        bbox_data_a_into = []
    image_data_a.bboxes_data = [
        bbox_data for bbox_data in image_data_a.bboxes_data
        if 'concat_images_data__image_data' == bbox_data.label
    ] + bbox_data_a_into

    for bbox_data in image_data_b.bboxes_data:
        _get_new_coords_for_bbox_data(bbox_data, image_data_b_new_xmin,
                                      image_data_b_new_ymin)
    if 'concat_images_data__image_data' not in [
            bbox_data.label for bbox_data in image_data_b.bboxes_data
    ]:
        bbox_data_b_into = [
            BboxData(xmin=image_data_b_new_xmin,
                     ymin=image_data_b_new_ymin,
                     xmax=image_data_b_new_xmax,
                     ymax=image_data_b_new_ymax,
                     label='concat_images_data__image_data',
                     additional_bboxes_data=[
                         bbox_data for bbox_data in image_data_b.bboxes_data
                         if 'concat_images_data__image_data' != bbox_data.label
                     ])
        ]
    else:
        bbox_data_b_into = []
    image_data_b.bboxes_data = [
        bbox_data for bbox_data in image_data_b.bboxes_data
        if 'concat_images_data__image_data' == bbox_data.label
    ] + bbox_data_b_into

    image_data = ImageData(image_path=None,
                           image=image,
                           bboxes_data=image_data_a.bboxes_data +
                           image_data_b.bboxes_data,
                           label=None,
                           keypoints=np.concatenate([keypoints_a, keypoints_b],
                                                    axis=0),
                           additional_info={
                               **image_data_a.additional_info,
                               **image_data_b.additional_info
                           })

    return image_data
Ejemplo n.º 29
0
    def run_pipeline_on_frame(
            self, frame: np.ndarray, frame_idx: int, fps: float,
            detection_delay: int, classification_delay: int,
            detection_score_threshold: float, batch_size: int
    ) -> Tuple[List[Tuple[int, int, int, int]], List[int]]:
        frame = frame.copy()
        image_data = ImageData(image=frame)
        image_data_gen = BatchGeneratorImageData(
            [image_data],
            batch_size=batch_size,
            use_not_caught_elements_as_last_batch=True)

        pred_image_data = self.detection_inferencer.predict(
            images_data_gen=image_data_gen,
            score_threshold=detection_score_threshold)[0]
        bboxes = np.array([(bbox_data.xmin, bbox_data.ymin, bbox_data.xmax,
                            bbox_data.ymax)
                           for bbox_data in pred_image_data.bboxes_data])
        detection_scores = np.array([
            bbox_data.detection_score
            for bbox_data in pred_image_data.bboxes_data
        ])

        self.opencv_tracker = OpenCVTracker(bboxes, frame)
        tracked_bboxes, tracked_ids = self.update_sort_tracker(
            bboxes=bboxes, scores=detection_scores)

        # detection_delay_frames = int(round(detection_delay * fps / 1000))
        # classification_delay_frames = int(round(classification_delay * fps / 1000))

        current_tracks_ids = [
            frame_result.track_id
            for frame_result in self.current_ready_frames_queue
        ]
        current_not_tracked_items_idxs = [
            idx for idx, tracked_id in enumerate(tracked_ids)
            if tracked_id not in current_tracks_ids
        ]
        if current_not_tracked_items_idxs:
            current_not_tracked_bboxes = tracked_bboxes[
                current_not_tracked_items_idxs]
            current_not_tracked_ids = tracked_ids[
                current_not_tracked_items_idxs]
            bboxes_data = [
                BboxData(image=frame,
                         xmin=xmin,
                         ymin=ymin,
                         xmax=xmax,
                         ymax=ymax)
                for (xmin, ymin, xmax, ymax) in current_not_tracked_bboxes
            ]
            bboxes_data_gen = BatchGeneratorBboxData(
                [bboxes_data],
                batch_size=batch_size,
                use_not_caught_elements_as_last_batch=True)
            pred_bboxes_data = self.classification_inferencer.predict(
                bboxes_data_gen)[0]

            for bbox_data, tracked_id in zip(pred_bboxes_data,
                                             current_not_tracked_ids):
                ready_at_frame = frame_idx
                frame_result = FrameResult(label=bbox_data.label,
                                           track_id=tracked_id,
                                           ready_at_frame=ready_at_frame)
                self.current_ready_frames_queue.append(frame_result)

        return tracked_bboxes, tracked_ids
Ejemplo n.º 30
0
def crop_image_data(
    image_data: ImageData,
    xmin: int,
    ymin: int,
    xmax: int,
    ymax: int,
    allow_negative_and_large_coords: bool,
    remove_bad_coords: bool,
) -> ImageData:

    assert 0 <= xmin and 0 <= ymin
    assert xmin <= xmax and ymin <= ymax

    image_data = copy.deepcopy(image_data)
    image = image_data.open_image()
    height, width, _ = image.shape

    assert xmax <= width and ymax <= height

    image = image[ymin:ymax, xmin:xmax]
    new_height, new_width, _ = image.shape

    def resize_coords(bbox_data: BboxData):
        bbox_data.xmin = bbox_data.xmin - xmin
        bbox_data.ymin = bbox_data.ymin - ymin
        bbox_data.xmax = bbox_data.xmax - xmin
        bbox_data.ymax = bbox_data.ymax - ymin
        bbox_data.keypoints[:, 0] -= xmin
        bbox_data.keypoints[:, 1] -= ymin
        bbox_data.cropped_image = None
        if not allow_negative_and_large_coords:
            bbox_data.xmin = max(0, min(bbox_data.xmin, new_width - 1))
            bbox_data.ymin = max(0, min(bbox_data.ymin, new_height - 1))
            bbox_data.xmax = max(0, min(bbox_data.xmax, new_width - 1))
            bbox_data.ymax = max(0, min(bbox_data.ymax, new_height - 1))
            keypoints = []
            for (x, y) in bbox_data.keypoints:
                x = max(0, min(x, new_width - 1))
                y = max(0, min(y, new_height - 1))
                keypoints.append([x, y])
            bbox_data.keypoints = np.array(keypoints).reshape(-1, 2)
        for additional_bbox_data in bbox_data.additional_bboxes_data:
            resize_coords(additional_bbox_data)

    for bbox_data in image_data.bboxes_data:
        resize_coords(bbox_data)

    keypoints = []
    for (x, y) in image_data.keypoints:
        x = max(0, min(x - xmin, new_width - 1))
        y = max(0, min(y - ymin, new_height - 1))
        keypoints.append([x, y])
    image_data.keypoints = np.array(keypoints).reshape(-1, 2)

    def if_bbox_data_inside_crop(bbox_data: BboxData):
        bbox_data.keypoints = bbox_data.keypoints[(
            (bbox_data.keypoints[:, 0] >= 0) & (bbox_data.keypoints[:, 1] >= 0)
            & (bbox_data.keypoints[:, 0] < new_height) &
            (bbox_data.keypoints[:, 1] < new_width))]
        bbox_data.additional_bboxes_data = [
            additional_bbox_data
            for additional_bbox_data in bbox_data.additional_bboxes_data
            if if_bbox_data_inside_crop(additional_bbox_data)
        ]
        return (bbox_data.xmin >= 0 and bbox_data.ymin >= 0
                and bbox_data.xmax < new_width and bbox_data.ymax < new_height
                and bbox_data.xmin < bbox_data.xmax
                and bbox_data.ymin < bbox_data.ymax)

    if remove_bad_coords:
        image_data.bboxes_data = [
            bbox_data for bbox_data in image_data.bboxes_data
            if if_bbox_data_inside_crop(bbox_data)
        ]
        image_data.keypoints = image_data.keypoints[(
            (image_data.keypoints[:, 0] >= 0) &
            (image_data.keypoints[:, 1] >= 0) &
            (image_data.keypoints[:, 0] < new_height) &
            (image_data.keypoints[:, 1] < new_width))]

    image_data.image_path = None
    image_data.image = image

    return image_data