Ejemplo n.º 1
0
 def find_figures_and_captions(self, diff_im: np.ndarray, im: np.ndarray,
                               page_num: int) -> List[Figure]:
     figures = []
     all_box_mask = (np.logical_or(
         diff_im == self.FIGURE_BOX_COLOR,
         diff_im == self.TABLE_BOX_COLOR)).all(axis=2)
     all_caption_mask = (np.logical_or(
         diff_im == self.CAPTION_LABEL_COLOR,
         diff_im == self.CAPTION_TEXT_COLOR)).all(axis=2)
     components = measure.label(all_box_mask)
     # Component id 0 is for background
     for component_id in np.unique(components)[1:]:
         (box_ys, box_xs) = np.where(components == component_id)
         assert (len(box_ys) > 0
                 )  # It was found from np.unique so it must exist somewhere
         assert (len(box_xs) > 0)
         full_box = BoxClass(x1=float(min(box_xs)),
                             y1=float(min(box_ys)),
                             x2=float(max(box_xs) + 1),
                             y2=float(max(box_ys) + 1))
         caption_mask = all_caption_mask.copy()
         caption_mask[:, :round(full_box.x1)] = 0
         caption_mask[:, round(full_box.x2):] = 0
         caption_mask[:round(full_box.y1), :] = 0
         caption_mask[round(full_box.y2):, :] = 0
         (cap_ys, cap_xs) = np.where(caption_mask)
         if len(cap_ys) == 0:
             continue  # Ignore boxes with no captions
         cap_box = BoxClass(
             x1=float(min(cap_xs)),
             y1=float(min(cap_ys)),
             x2=float(max(cap_xs) + 1),
             y2=float(max(cap_ys) + 1),
         )
         fig_box = self.get_figure_box(full_box, cap_box, im)
         if fig_box is None:
             continue
         box_color = diff_im[box_ys[0], box_xs[0], :]
         if np.all(box_color == self.FIGURE_BOX_COLOR):
             figure_type = 'Figure'
         else:
             assert np.all(box_color == self.TABLE_BOX_COLOR), print(
                 'Bad box color: %s' % str(box_color))
             figure_type = 'Table'
         (page_height, page_width) = diff_im.shape[:2]
         figures.append(
             Figure(figure_boundary=fig_box,
                    caption_boundary=cap_box,
                    figure_type=figure_type,
                    name='',
                    page=page_num,
                    caption='',
                    dpi=settings.DEFAULT_INFERENCE_DPI,
                    page_width=page_width,
                    page_height=page_height))
     return figures
Ejemplo n.º 2
0
 def augment_images(self, image_path, figures) -> Optional[List[Figure]]:
     # print("Running augmentation for image: {}".format(image_path))
     if len(figures) == 0:
         return figures
     image = imageio.imread(image_path)
     bbs = [
         ia.BoundingBox(x1=figure.figure_boundary.x1,
                        y1=figure.figure_boundary.y1,
                        x2=figure.figure_boundary.x2,
                        y2=figure.figure_boundary.y2) for figure in figures
     ]
     # figg = figures[0]
     # plot_bounding_box(image_path, x1=figg.figure_boundary.x1, y1=figg.figure_boundary.y1,
     #                   x2=figg.figure_boundary.x2, y2=figg.figure_boundary.y2)
     if self.image_augmentation_transform_sequence:
         images_aug, bbs_aug = self.image_augmentation_transform_sequence(
             images=[image], bounding_boxes=[bbs])
     else:
         images_aug, bbs_aug = [image], [bbs]
     imageio.imwrite(image_path, images_aug[0])
     # plot_bounding_box(image_path, x1=bbs_aug[0][0].x1, y1=bbs_aug[0][0].y1,
     #                   x2=bbs_aug[0][0].x2, y2=bbs_aug[0][0].y2)
     # print("Replaced the original image with the augmented image.")
     figures_aug = list()
     for idx, figure in enumerate(figures):
         bb = bbs_aug[0][idx]
         fig = figures[idx]
         bc = BoxClass.from_tuple(
             (float(bb.x1), float(bb.y1), float(bb.x2), float(bb.y2)))
         fig.figure_boundary = bc
         figures_aug.append(fig)
     # print("Everything in the augmentation function complete.")
     # plot_bounding_box(image_path, x1=figures_aug[0].figure_boundary.x1, y1=figures_aug[0].figure_boundary.y1,
     #                   x2=figures_aug[0].figure_boundary.x2, y2=figures_aug[0].figure_boundary.y2)
     return figures_aug
Ejemplo n.º 3
0
 def proposal_right(self, full_box: BoxClass,
                    caption_box: BoxClass) -> BoxClass:
     return BoxClass(x1=caption_box.x2 + self.CAPTION_OFFSET,
                     y1=full_box.y1,
                     x2=full_box.x2,
                     y2=full_box.y2)
Ejemplo n.º 4
0
 def proposal_left(self, full_box: BoxClass,
                   caption_box: BoxClass) -> BoxClass:
     return BoxClass(x1=full_box.x1,
                     y1=full_box.y1,
                     x2=caption_box.x1 - self.CAPTION_OFFSET,
                     y2=full_box.y2)
def proposal_down(full_box: BoxClass, caption_box: BoxClass) -> BoxClass:
    return BoxClass(x1=full_box.x1,
                    y1=caption_box.y2 + CAPTION_OFFSET,
                    x2=full_box.x2,
                    y2=full_box.y2)
def proposal_up(full_box: BoxClass, caption_box: BoxClass) -> BoxClass:
    return BoxClass(x1=full_box.x1,
                    y1=full_box.y1,
                    x2=full_box.x2,
                    y2=caption_box.y1 - CAPTION_OFFSET)