def find_figures_and_captions(self, diff_im: np.ndarray, im: np.ndarray, page_num: int) -> List[Figure]: figures = [] all_box_mask = (np.logical_or( diff_im == self.FIGURE_BOX_COLOR, diff_im == self.TABLE_BOX_COLOR)).all(axis=2) all_caption_mask = (np.logical_or( diff_im == self.CAPTION_LABEL_COLOR, diff_im == self.CAPTION_TEXT_COLOR)).all(axis=2) components = measure.label(all_box_mask) # Component id 0 is for background for component_id in np.unique(components)[1:]: (box_ys, box_xs) = np.where(components == component_id) assert (len(box_ys) > 0 ) # It was found from np.unique so it must exist somewhere assert (len(box_xs) > 0) full_box = BoxClass(x1=float(min(box_xs)), y1=float(min(box_ys)), x2=float(max(box_xs) + 1), y2=float(max(box_ys) + 1)) caption_mask = all_caption_mask.copy() caption_mask[:, :round(full_box.x1)] = 0 caption_mask[:, round(full_box.x2):] = 0 caption_mask[:round(full_box.y1), :] = 0 caption_mask[round(full_box.y2):, :] = 0 (cap_ys, cap_xs) = np.where(caption_mask) if len(cap_ys) == 0: continue # Ignore boxes with no captions cap_box = BoxClass( x1=float(min(cap_xs)), y1=float(min(cap_ys)), x2=float(max(cap_xs) + 1), y2=float(max(cap_ys) + 1), ) fig_box = self.get_figure_box(full_box, cap_box, im) if fig_box is None: continue box_color = diff_im[box_ys[0], box_xs[0], :] if np.all(box_color == self.FIGURE_BOX_COLOR): figure_type = 'Figure' else: assert np.all(box_color == self.TABLE_BOX_COLOR), print( 'Bad box color: %s' % str(box_color)) figure_type = 'Table' (page_height, page_width) = diff_im.shape[:2] figures.append( Figure(figure_boundary=fig_box, caption_boundary=cap_box, figure_type=figure_type, name='', page=page_num, caption='', dpi=settings.DEFAULT_INFERENCE_DPI, page_width=page_width, page_height=page_height)) return figures
def augment_images(self, image_path, figures) -> Optional[List[Figure]]: # print("Running augmentation for image: {}".format(image_path)) if len(figures) == 0: return figures image = imageio.imread(image_path) bbs = [ ia.BoundingBox(x1=figure.figure_boundary.x1, y1=figure.figure_boundary.y1, x2=figure.figure_boundary.x2, y2=figure.figure_boundary.y2) for figure in figures ] # figg = figures[0] # plot_bounding_box(image_path, x1=figg.figure_boundary.x1, y1=figg.figure_boundary.y1, # x2=figg.figure_boundary.x2, y2=figg.figure_boundary.y2) if self.image_augmentation_transform_sequence: images_aug, bbs_aug = self.image_augmentation_transform_sequence( images=[image], bounding_boxes=[bbs]) else: images_aug, bbs_aug = [image], [bbs] imageio.imwrite(image_path, images_aug[0]) # plot_bounding_box(image_path, x1=bbs_aug[0][0].x1, y1=bbs_aug[0][0].y1, # x2=bbs_aug[0][0].x2, y2=bbs_aug[0][0].y2) # print("Replaced the original image with the augmented image.") figures_aug = list() for idx, figure in enumerate(figures): bb = bbs_aug[0][idx] fig = figures[idx] bc = BoxClass.from_tuple( (float(bb.x1), float(bb.y1), float(bb.x2), float(bb.y2))) fig.figure_boundary = bc figures_aug.append(fig) # print("Everything in the augmentation function complete.") # plot_bounding_box(image_path, x1=figures_aug[0].figure_boundary.x1, y1=figures_aug[0].figure_boundary.y1, # x2=figures_aug[0].figure_boundary.x2, y2=figures_aug[0].figure_boundary.y2) return figures_aug
def proposal_right(self, full_box: BoxClass, caption_box: BoxClass) -> BoxClass: return BoxClass(x1=caption_box.x2 + self.CAPTION_OFFSET, y1=full_box.y1, x2=full_box.x2, y2=full_box.y2)
def proposal_left(self, full_box: BoxClass, caption_box: BoxClass) -> BoxClass: return BoxClass(x1=full_box.x1, y1=full_box.y1, x2=caption_box.x1 - self.CAPTION_OFFSET, y2=full_box.y2)
def proposal_down(full_box: BoxClass, caption_box: BoxClass) -> BoxClass: return BoxClass(x1=full_box.x1, y1=caption_box.y2 + CAPTION_OFFSET, x2=full_box.x2, y2=full_box.y2)
def proposal_up(full_box: BoxClass, caption_box: BoxClass) -> BoxClass: return BoxClass(x1=full_box.x1, y1=full_box.y1, x2=full_box.x2, y2=caption_box.y1 - CAPTION_OFFSET)