def page_extraction(model_dir: str, filenames_to_process: List[str], output_dir: str, draw_extractions: bool=False, config: tf.compat.v1.ConfigProto=None): os.makedirs(output_dir, exist_ok=True) if draw_extractions: drawing_dir = os.path.join(output_dir, 'drawings') os.makedirs(drawing_dir) with tf.compat.v1.Session(config=config): # Load the model m = LoadedModel(model_dir, predict_mode='filename') for filename in tqdm(filenames_to_process, desc='Prediction'): # Inference prediction = m.predict(filename) probs = prediction['probs'][0] original_shape = prediction['original_shape'] probs = probs / np.max(probs) # Normalize to be in [0, 1] # Binarize the predictions page_bin = page_post_processing_fn(probs, threshold=-1) # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes) bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # Find quadrilateral enclosing the page pred_page_coords = find_boxes(bin_upscaled.astype(np.uint8, copy=False), mode='min_rectangle', min_area=0.2, n_max_boxes=1) if pred_page_coords is not None: # Write corners points into a .txt file # Create page region and XML file page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :])) if draw_extractions: # Draw page box on original image and export it. Add also box coordinates to the txt file original_img = imread(filename, pilmode='RGB') cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5) basename = os.path.basename(filename).split('.')[0] imsave(os.path.join(drawing_dir, '{}_boxes.jpg'.format(basename)), original_img) else: print('No box found in {}'.format(filename)) page_border = PAGE.Border() page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0], page_border=page_border) xml_filename = os.path.join(output_dir, '{}.xml'.format(basename)) page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
def extract_page(prediction: np.ndarray, min_area: float=0.2, post_process_params: dict=None) -> list(): """ Given an image with probabilities, post-processes it and extracts one box :param prediction: probability mask [0, 1] :param min_area: minimum area to be considered as a valid extraction :param post_process_params: params for page prost processing function :return: list of coordinates of boxe """ if post_process_params: post_pred = page_post_processing_fn(prediction, **post_process_params) else: post_pred = prediction pred_box = find_boxes(np.uint8(post_pred), mode='quadrilateral', min_area=min_area, n_max_boxes=1) return pred_box
def post_process_probs_ornament(probability_maps): binary_maps = np.zeros_like(probability_maps, np.uint8) binary_maps = np.delete(binary_maps, 0, 2) # Ornament binary_image = binarization.thresholding(probability_maps[:, :, 1], threshold=0.75) binary_image = binarization.cleaning_binary(binary_image, kernel_size=3) boxes = boxes_detection.find_boxes(binary_image, mode='rectangle', min_area=0.) bin_map = np.zeros_like(binary_maps) binary_maps[:, :, 0] = cv2.fillPoly(bin_map, boxes, (255, 0, 0))[:, :, 0] return binary_maps, boxes
probs = probs[:, :, 1] # Take only class '1' (class 0 is the background, class 1 is the page) probs = probs / np.max(probs) # Normalize to be in [0, 1] # Binarize the predictions page_bin = page_make_binary_mask(probs) # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes) bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # Find quadrilateral enclosing the page pred_page_coords = boxes_detection.find_boxes(bin_upscaled.astype( np.uint8, copy=False), mode='min_rectangle', min_area=0.2, n_max_boxes=1) # Draw page box on original image and export it. Add also box coordinates to the txt file original_img = imread(filename, pilmode='RGB') if pred_page_coords is not None: cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5) # Write corners points into a .txt file txt_coordinates += '{},{}\n'.format( filename, format_quad_to_string(pred_page_coords)) # Create page region and XML file page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(
bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # plt.imshow(bin_upscaled, interpolation='nearest') # plt.show() # plt.imshow(bin_upscaled2, interpolation='nearest') # plt.show() # plt.imshow(bin_upscaled3, interpolation='nearest') # plt.show() pred_page_coords0 = boxes_detection.find_boxes(bin_upscaled0.astype(np.uint8, copy=False), mode='min_rectangle') # Find quadrilateral enclosing the page pred_page_coords = boxes_detection.find_boxes(bin_upscaled.astype(np.uint8, copy=False), mode='min_rectangle') # Draw page box on original image and export it. Add also box coordinates to the txt file original_img = imread(filename, pilmode='RGB') if pred_page_coords is not None: for x in range(0, len(pred_page_coords0)): pred_page_coords_element0 = pred_page_coords0[x] # cv2.polylines(original_img, [pred_page_coords_element0[:, None, :]], True, (0, 0, 0), thickness=10) # Write corners points into a .txt file
async def run(self): while True: # # get item off work queue # start_wait = time.time() g = self.work_queue.pop() finish_wait = time.time() self.counter += 1 labels_all, probs_all, filename, original_shape, inference_time_sec, page_number = self.work_queue.ungroup( g) basename = os.path.basename(filename).split('.')[0] self.feat.start(basename) if self.enable_debug: # write out an image of the per pixel labels label_viz = np.zeros( (labels_all.shape[0], labels_all.shape[1], 3), np.uint8) for h in range(0, labels_all.shape[0]): for w in range(0, labels_all.shape[1]): c = self.label_val_to_color(labels_all[h, w]) label_viz[h, w, 0] = c[0] label_viz[h, w, 1] = c[1] label_viz[h, w, 2] = c[2] imsave( os.path.join(self.output_dir, f"{basename}_label_viz.png"), label_viz) # what pixel labels do we have? hist_label_counts = np.bincount(labels_all.flatten()).tolist() while len(hist_label_counts) < max(label_bins) + 1: hist_label_counts.append(0) # now hist_label_counts contains counts of pixel labels self._put_results_log( f"processing: file={filename} histogram={hist_label_counts} " f"infer_timing={inference_time_sec} original_shape={original_shape}" ) original_img = imread(filename, pilmode='RGB') if self.enable_debug: original_img_box_viz = np.array(original_img) original_img_box_viz_modified = False # # handle rectangles here! # for label_slice in label_bins: if label_slice == 0: continue # skip background color_tuple = self.label_val_to_color(label_slice) # area of all the pixel labels for a particular class, might be multiple regions area = hist_label_counts[label_slice] if area < 500: # minimum size # reject small label areas continue probs = probs_all[:, :, label_slice] # make an image showing probability map for this label before postprocessing # (it can include multiple blobs) if self.enable_debug: prob_img = np.zeros((probs.shape[0], probs.shape[1], 3), np.uint8) for h in range(0, probs.shape[0]): for w in range(0, probs.shape[1]): c = probs[h, w] * 255 prob_img[h, w, 0] = c prob_img[h, w, 1] = c prob_img[h, w, 2] = c imsave( os.path.join( self.output_dir, f"{basename}_{label_slice}_label_prob.png"), prob_img) # Binarize the predictions page_bin = self.page_make_binary_mask(probs) # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes) bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # upscale probs the same way so we can calculate confidence later probs_upscaled = cv2.resize(probs.astype(np.float32, casting='same_kind'), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # Find quadrilateral(s) enclosing the label area(s). # allow more than reasonable number of boxes so we can use spurious boxes as a reject signal pred_region_coords_list = boxes_detection.find_boxes( bin_upscaled.astype(np.uint8, copy=False), mode='rectangle', min_area=0.001, n_max_boxes=4) # coord is [[a,b], [c,b], [c,d], [a,d]] (a path for drawing a polygon, clockwise) # origin is upper left [x,y]: # [a,b] [c,b] # rectangle # [a,d] [c,d] # which means a<c and b<d if pred_region_coords_list is not None: # Draw region box on original image and export it. Add also box coordinates to the txt file region_count = len(pred_region_coords_list) count = 0 for pred_region_coords in pred_region_coords_list: # cut out rectangle for region based on original image size a = pred_region_coords[0, 0] b = pred_region_coords[0, 1] c = pred_region_coords[1, 0] d = pred_region_coords[2, 1] probs_rectangle = probs_upscaled[ b:d + 1, a:c + 1] # values are in range [0,1] overall_confidence = (sum(sum(probs_rectangle))) / ( (c - a) * (d - b)) aspect_ratio = (c - a) / (d - b) # w/h page_width_fraction = (c - a) / original_shape[0] page_height_fraction = (d - b) / original_shape[1] normalized_x = a / original_shape[0] normalized_y = b / original_shape[1] region_size = page_width_fraction * page_height_fraction cmts = f"Prediction {a},{b},{c},{d} confidence={overall_confidence} aspect={aspect_ratio} widthfrac={page_width_fraction} heightfrac={page_height_fraction} normalized_x={normalized_x} normalized_y={normalized_y} dimensions={c - a}x{d - b} spec={basename}_{label_slice}-{count}" self._put_results_log(cmts) img_rectangle = original_img[b:d + 1, a:c + 1] tag_rect_x0 = a tag_rect_y0 = b tag_rect_x1 = c tag_rect_y1 = d if self.enable_debug: # draw box to visualize rectangle cv2.polylines(original_img_box_viz, [pred_region_coords[:, None, :]], True, (color_tuple[0], color_tuple[1], color_tuple[2]), thickness=5) original_img_box_viz_modified = True imsave( os.path.join( self.output_dir, f"{basename}_{label_slice}-{count}_{overall_confidence}_rect.jpg" ), img_rectangle) # Write corners points into a .txt file # txt_coordinates += '{},{}\n'.format(filename, self.format_quad_to_string(pred_region_coords)) # store info on area for use after all areas in image are gathered self.feat.put(label_slice, count, region_size, overall_confidence, aspect_ratio, page_width_fraction, page_height_fraction, normalized_x, normalized_y, tag_rect_x0, tag_rect_y0, tag_rect_x1, tag_rect_y1, img_rectangle, cmts) # Create page region and XML file page_border = PAGE.Border( coords=PAGE.Point.cv2_to_point_list( pred_region_coords[:, None, :])) count += 1 else: # No box found for label # page_border = PAGE.Border() continue if self.enable_debug: # boxes for all labels, using mask colors if original_img_box_viz_modified: imsave( os.path.join(self.output_dir, f"{basename}__boxes.jpg"), original_img_box_viz) self.feat.finish( ) # finish image, in non-production this saves feature vector for post model page_prediction_msg = "" prediction_summary_txt = "" if self.production_mode: # # apply post-model to determine page type # v = np.zeros((1, self.feat.vec_length())) v[0] = self.feat.get_post_model_vec() y = self.post_model.predict(v) page_type = int(y[0]) page_prediction_msg = f"PagePrediction: {basename} " # # take actions # if page_type == 0: # other page, skip page_prediction_msg += f"type=0" pass elif page_type == 1: # start page of article, save info page_prediction_msg += f"type=1" title_info = self.feat.get_label_instance(1, 0) title_rect_x0 = 2 * title_info["tag_rect_x0"] title_rect_y0 = 2 * title_info["tag_rect_y0"] title_rect_x1 = 2 * title_info["tag_rect_x1"] title_rect_y1 = 2 * title_info["tag_rect_y1"] title_normalized_y = title_info["normalized_y"] author_info = self.feat.get_label_instance(2, 0) author_rect_x0 = 2 * author_info["tag_rect_x0"] author_rect_y0 = 2 * author_info["tag_rect_y0"] author_rect_x1 = 2 * author_info["tag_rect_x1"] author_rect_y1 = 2 * author_info["tag_rect_y1"] author_normalized_y = author_info["normalized_y"] acceptable = True # qualifications if title_info["confidence"] < .5 or author_info[ "confidence"]: # too low, could be 0 (missing) msg = f" REJECT confidence too low " self._put_results_log(msg) prediction_summary_txt += msg + "\n" acceptable = False if title_rect_y0 > author_rect_y0: # unusual, author appears above title msg = f" REJECT author appears above title " self._put_results_log(msg) prediction_summary_txt += msg + "\n" acceptable = False if title_normalized_y > 0.5 or author_normalized_y > 0.5: msg = f" REJECT: title or author appears in lower half of page " self._put_results_log(msg) prediction_summary_txt += msg + "\n" acceptable = False title = self.extractor.find_bbox_text( page_number, title_rect_x0, title_rect_y0, title_rect_x1, title_rect_y1) title = self.cleaner.one_line(title) authors = self.extractor.find_bbox_text( page_number, author_rect_x0, author_rect_y0, author_rect_x1, author_rect_y1) authors = self.cleaner.cleanAuthors(authors) smsg = f"{basename}: page={page_number} TITLE={title} AUTHORS={authors}" self._put_results_log(smsg) prediction_summary_txt += smsg prediction_summary_txt += f"\nTITLE({title_info['comments']})\n" prediction_summary_txt += f"AUTHOR({title_info['comments']})\n" if acceptable: json_per_image = os.path.join(self.output_dir, f"{basename}.json") json_dict = {} json_dict["page_number"] = page_number json_dict["basename"] = basename json_dict["type"] = "start_article" json_dict["title"] = title json_dict["authors"] = authors json_txt = json.dumps(json_dict) with open(json_per_image, "a") as f: f.write(f"{json_txt}\n") elif page_type == 2: # references page, save info page_prediction_msg += f"type=2" pass else: # toc page, save info # pagte_type == 3 page_prediction_msg += f"type=3" pass else: # mode for gathering of training data for post model pass finish_post = time.time() self._put_results_log( f"TIMING: wait={finish_wait - start_wait} post={finish_post - finish_wait} {page_prediction_msg}" ) # if debug, emit a txt summary if self.enable_debug: ############################## if len(prediction_summary_txt) > 0: debug_per_image = os.path.join(self.output_dir, f"{basename}.txt") with open(debug_per_image, "a") as f: f.write(f"{prediction_summary_txt}\n")
def page_evaluate_folder(output_folder: str, validation_dir: str, pixel_wise: bool = True, debug_folder: str = None, verbose: bool = False) -> dict: """ :param output_folder: contains the *.png files from the post_processing :param validation_dir: Directory contianing the gt label images :param pixel_wise: if True computes pixel-wise accuracy, if False computes IOU accuracy :param debug_folder: :param verbose: :return: """ if debug_folder is not None: os.makedirs(debug_folder, exist_ok=True) filenames_binary_masks = glob(os.path.join(output_folder, '*.png')) global_metrics = Metrics() list_boxes = list() for filename in tqdm(filenames_binary_masks, desc='Evaluation'): basename = os.path.basename(filename).split('.')[0] # Open post_processed and label image post_processed_img = imread(filename) post_processed_img = post_processed_img / np.maximum( np.max(post_processed_img), 1) label_image = imread(os.path.join(validation_dir, 'labels', '{}.png'.format(basename)), mode='L') label_image = label_image / np.max(label_image) # Upsample processed image to compare it to original image target_shape = (label_image.shape[1], label_image.shape[0]) bin_upscaled = cv2.resize(np.uint8(post_processed_img), target_shape, interpolation=cv2.INTER_NEAREST) if pixel_wise: metric = compare_bin_prediction_to_label(bin_upscaled, label_image) global_metrics += metric pred_box = find_boxes(np.uint8(bin_upscaled), mode='quadrilateral') label_box = find_boxes(np.uint8(label_image), mode='quadrilateral', min_area=0.0) if debug_folder is not None: imsave(os.path.join(debug_folder, '{}_bin.png'.format(basename)), np.uint8(bin_upscaled * 255)) orig_img = imread( os.path.join(validation_dir, 'images', '{}.jpg'.format(basename))) if label_box is not None: cv2.polylines(orig_img, [label_box[:, None, :]], True, (0, 255, 0), thickness=15) else: print('There is no labelled page in {}'.format(basename)) if pred_box is not None: cv2.polylines(orig_img, [pred_box[:, None, :]], True, (0, 0, 255), thickness=15) else: print('No box found in {}'.format(basename)) imsave(os.path.join(debug_folder, '{}_boxes.jpg'.format(basename)), orig_img) list_boxes.append((basename, pred_box)) if pred_box is not None and label_box is not None: iou = intersection_over_union(label_box[:, None, :], pred_box[:, None, :], label_image.shape) global_metrics.IOU_list.append(iou) else: global_metrics.IOU_list.append(0) if verbose: print('No box found for {}'.format(basename)) if debug_folder: with open(os.path.join(debug_folder, 'predicted_boxes.txt'), 'w') as f: for b in list_boxes: s = '{},{}\n'.format(b[0], format_quad_to_string(b)) f.write(s) if pixel_wise: global_metrics.compute_prf() print('EVAL --- R : {}, P : {}, FM : {}\n'.format( global_metrics.recall, global_metrics.precision, global_metrics.f_measure)) global_metrics.compute_miou() print('EVAL --- mIOU : {}\n'.format(global_metrics.mIOU)) # Export txt similar to test txt ? return { 'precision': global_metrics.precision, 'recall': global_metrics.recall, 'f_measure': global_metrics.f_measure, 'mIOU': global_metrics.mIOU }
def main(input_dir, model_dir, out_dir, raw_out_dir=None, min_area=0.0005, overlay_alpha=127, box_color=(255, 0, 0)): os.makedirs(out_dir, exist_ok=True) if raw_out_dir: os.makedirs(raw_out_dir, exist_ok=True) input_files = glob('{}/*'.format(input_dir)) with tf.Session(): # Load the model m = LoadedModel(model_dir, predict_mode='filename') for filename in tqdm(input_files, desc='Processed files'): basename = os.path.basename(filename).split('.')[0] # For each image, predict each pixel's label prediction_outputs = m.predict(filename) probs = prediction_outputs['probs'][0] original_shape = prediction_outputs['original_shape'] # Take only class '1' # (class 0 is the background, class 1 is the annotation.) probs = probs[:, :, 1] probs = probs / np.max(probs) # Normalize to be in [0, 1] # Binarize the predictions preds_bin = make_binary_mask(probs) # Upscale to have full resolution image # (cv2 uses (w,h) and not (h,w) for giving shapes) bin_upscaled = cv2.resize(preds_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) if raw_out_dir: # If requested, draw the binary mask as an overlay # over the image and save it. img = Image.open(filename) img = img.convert('RGBA') overlay_arr = np.stack( [ bin_upscaled * box_color[0], # R bin_upscaled * box_color[1], # G bin_upscaled * box_color[2], # B np.ones_like(bin_upscaled) * overlay_alpha # A ], axis=2) overlay = Image.fromarray(overlay_arr, mode='RGBA') img.paste(overlay, (0, 0), overlay) img.save( os.path.join(raw_out_dir, '{}_raw.png'.format(basename)), 'PNG') # Find quadrilateral enclosing the page boxes = boxes_detection.find_boxes( bin_upscaled.astype(np.uint8, copy=False), min_area=min_area, mode='min_rectangle', ) # Draw boxes on original image. original_img = imread(filename, pilmode='RGB') if boxes is not None: cv2.polylines(original_img, boxes, True, box_color, thickness=5) else: print('No annotation found in {}'.format(filename)) imsave(os.path.join(out_dir, '{}_boxes.jpg'.format(basename)), original_img)