예제 #1
0
def page_extraction(model_dir: str,
                    filenames_to_process: List[str],
                    output_dir: str,
                    draw_extractions: bool=False,
                    config: tf.compat.v1.ConfigProto=None):

    os.makedirs(output_dir, exist_ok=True)
    if draw_extractions:
        drawing_dir = os.path.join(output_dir, 'drawings')
        os.makedirs(drawing_dir)

    with tf.compat.v1.Session(config=config):
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')
        for filename in tqdm(filenames_to_process, desc='Prediction'):
            # Inference
            prediction = m.predict(filename)
            probs = prediction['probs'][0]
            original_shape = prediction['original_shape']

            probs = probs / np.max(probs)  # Normalize to be in [0, 1]
            # Binarize the predictions
            page_bin = page_post_processing_fn(probs, threshold=-1)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)

            # Find quadrilateral enclosing the page
            pred_page_coords = find_boxes(bin_upscaled.astype(np.uint8, copy=False),
                                          mode='min_rectangle', min_area=0.2, n_max_boxes=1)

            if pred_page_coords is not None:
                # Write corners points into a .txt file

                # Create page region and XML file
                page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))

                if draw_extractions:
                    # Draw page box on original image and export it. Add also box coordinates to the txt file
                    original_img = imread(filename, pilmode='RGB')
                    cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5)

                    basename = os.path.basename(filename).split('.')[0]
                    imsave(os.path.join(drawing_dir, '{}_boxes.jpg'.format(basename)), original_img)

            else:
                print('No box found in {}'.format(filename))
                page_border = PAGE.Border()

            page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0],
                                 page_border=page_border)
            xml_filename = os.path.join(output_dir, '{}.xml'.format(basename))
            page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
예제 #2
0
def extract_page(prediction: np.ndarray, min_area: float=0.2, post_process_params: dict=None) -> list():
    """
    Given an image with probabilities, post-processes it and extracts one box
    :param prediction: probability mask [0, 1]
    :param min_area: minimum area to be considered as a valid extraction
    :param post_process_params: params for page prost processing function
    :return: list of coordinates of boxe
    """
    if post_process_params:
        post_pred = page_post_processing_fn(prediction, **post_process_params)
    else:
        post_pred = prediction
    pred_box = find_boxes(np.uint8(post_pred), mode='quadrilateral', min_area=min_area, n_max_boxes=1)

    return pred_box
예제 #3
0
def post_process_probs_ornament(probability_maps):

    binary_maps = np.zeros_like(probability_maps, np.uint8)
    binary_maps = np.delete(binary_maps, 0, 2)

    # Ornament
    binary_image = binarization.thresholding(probability_maps[:, :, 1],
                                             threshold=0.75)
    binary_image = binarization.cleaning_binary(binary_image, kernel_size=3)
    boxes = boxes_detection.find_boxes(binary_image,
                                       mode='rectangle',
                                       min_area=0.)
    bin_map = np.zeros_like(binary_maps)
    binary_maps[:, :, 0] = cv2.fillPoly(bin_map, boxes, (255, 0, 0))[:, :, 0]

    return binary_maps, boxes
예제 #4
0
            probs = probs[:, :,
                          1]  # Take only class '1' (class 0 is the background, class 1 is the page)
            probs = probs / np.max(probs)  # Normalize to be in [0, 1]

            # Binarize the predictions
            page_bin = page_make_binary_mask(probs)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]),
                                      interpolation=cv2.INTER_NEAREST)

            # Find quadrilateral enclosing the page
            pred_page_coords = boxes_detection.find_boxes(bin_upscaled.astype(
                np.uint8, copy=False),
                                                          mode='min_rectangle',
                                                          min_area=0.2,
                                                          n_max_boxes=1)

            # Draw page box on original image and export it. Add also box coordinates to the txt file
            original_img = imread(filename, pilmode='RGB')
            if pred_page_coords is not None:
                cv2.polylines(original_img, [pred_page_coords[:, None, :]],
                              True, (0, 0, 255),
                              thickness=5)
                # Write corners points into a .txt file
                txt_coordinates += '{},{}\n'.format(
                    filename, format_quad_to_string(pred_page_coords))

                # Create page region and XML file
                page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(
예제 #5
0
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)



            # plt.imshow(bin_upscaled, interpolation='nearest')
            # plt.show()

            # plt.imshow(bin_upscaled2, interpolation='nearest')
            # plt.show()

            # plt.imshow(bin_upscaled3, interpolation='nearest')
            # plt.show()

            pred_page_coords0 = boxes_detection.find_boxes(bin_upscaled0.astype(np.uint8, copy=False),
                                                           mode='min_rectangle')

            # Find quadrilateral enclosing the page
            pred_page_coords = boxes_detection.find_boxes(bin_upscaled.astype(np.uint8, copy=False),
                                                          mode='min_rectangle')


            # Draw page box on original image and export it. Add also box coordinates to the txt file
            original_img = imread(filename, pilmode='RGB')
            if pred_page_coords is not None:

                for x in range(0, len(pred_page_coords0)):
                    pred_page_coords_element0 = pred_page_coords0[x]
                    # cv2.polylines(original_img, [pred_page_coords_element0[:, None, :]], True, (0, 0, 0), thickness=10)

                # Write corners points into a .txt file
예제 #6
0
    async def run(self):
        while True:
            #
            #   get item off work queue
            #
            start_wait = time.time()
            g = self.work_queue.pop()
            finish_wait = time.time()
            self.counter += 1
            labels_all, probs_all, filename, original_shape, inference_time_sec, page_number = self.work_queue.ungroup(
                g)
            basename = os.path.basename(filename).split('.')[0]
            self.feat.start(basename)
            if self.enable_debug:
                # write out an image of the per pixel labels
                label_viz = np.zeros(
                    (labels_all.shape[0], labels_all.shape[1], 3), np.uint8)
                for h in range(0, labels_all.shape[0]):
                    for w in range(0, labels_all.shape[1]):
                        c = self.label_val_to_color(labels_all[h, w])
                        label_viz[h, w, 0] = c[0]
                        label_viz[h, w, 1] = c[1]
                        label_viz[h, w, 2] = c[2]
                imsave(
                    os.path.join(self.output_dir, f"{basename}_label_viz.png"),
                    label_viz)
            # what pixel labels do we have?
            hist_label_counts = np.bincount(labels_all.flatten()).tolist()
            while len(hist_label_counts) < max(label_bins) + 1:
                hist_label_counts.append(0)
            # now hist_label_counts contains counts of pixel labels

            self._put_results_log(
                f"processing: file={filename} histogram={hist_label_counts}  "
                f"infer_timing={inference_time_sec} original_shape={original_shape}"
            )

            original_img = imread(filename, pilmode='RGB')
            if self.enable_debug:
                original_img_box_viz = np.array(original_img)
                original_img_box_viz_modified = False

            #
            #    handle rectangles here!
            #
            for label_slice in label_bins:
                if label_slice == 0:
                    continue  # skip background
                color_tuple = self.label_val_to_color(label_slice)
                #  area of all the pixel labels for a particular class, might be multiple regions
                area = hist_label_counts[label_slice]
                if area < 500:  # minimum size
                    # reject small label areas
                    continue

                probs = probs_all[:, :, label_slice]

                #        make an image showing probability map for this label before postprocessing
                #            (it can include multiple blobs)
                if self.enable_debug:
                    prob_img = np.zeros((probs.shape[0], probs.shape[1], 3),
                                        np.uint8)
                    for h in range(0, probs.shape[0]):
                        for w in range(0, probs.shape[1]):
                            c = probs[h, w] * 255
                            prob_img[h, w, 0] = c
                            prob_img[h, w, 1] = c
                            prob_img[h, w, 2] = c
                    imsave(
                        os.path.join(
                            self.output_dir,
                            f"{basename}_{label_slice}_label_prob.png"),
                        prob_img)

                # Binarize the predictions
                page_bin = self.page_make_binary_mask(probs)

                # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
                bin_upscaled = cv2.resize(page_bin.astype(np.uint8,
                                                          copy=False),
                                          tuple(original_shape[::-1]),
                                          interpolation=cv2.INTER_NEAREST)
                # upscale probs the same way so we can calculate confidence later
                probs_upscaled = cv2.resize(probs.astype(np.float32,
                                                         casting='same_kind'),
                                            tuple(original_shape[::-1]),
                                            interpolation=cv2.INTER_NEAREST)

                # Find quadrilateral(s) enclosing the label area(s).
                #  allow more than reasonable number of boxes so we can use spurious boxes as a reject signal
                pred_region_coords_list = boxes_detection.find_boxes(
                    bin_upscaled.astype(np.uint8, copy=False),
                    mode='rectangle',
                    min_area=0.001,
                    n_max_boxes=4)

                # coord is [[a,b], [c,b], [c,d], [a,d]]  (a path for drawing a polygon, clockwise)
                #  origin is upper left [x,y]:
                #  [a,b]         [c,b]
                #       rectangle
                #  [a,d]         [c,d]
                # which means a<c and b<d

                if pred_region_coords_list is not None:
                    # Draw region box on original image and export it. Add also box coordinates to the txt file
                    region_count = len(pred_region_coords_list)
                    count = 0
                    for pred_region_coords in pred_region_coords_list:
                        #  cut out rectangle for region based on original image size
                        a = pred_region_coords[0, 0]
                        b = pred_region_coords[0, 1]
                        c = pred_region_coords[1, 0]
                        d = pred_region_coords[2, 1]
                        probs_rectangle = probs_upscaled[
                            b:d + 1, a:c + 1]  # values are in range [0,1]
                        overall_confidence = (sum(sum(probs_rectangle))) / (
                            (c - a) * (d - b))
                        aspect_ratio = (c - a) / (d - b)  # w/h
                        page_width_fraction = (c - a) / original_shape[0]
                        page_height_fraction = (d - b) / original_shape[1]
                        normalized_x = a / original_shape[0]
                        normalized_y = b / original_shape[1]
                        region_size = page_width_fraction * page_height_fraction
                        cmts = f"Prediction {a},{b},{c},{d} confidence={overall_confidence} aspect={aspect_ratio} widthfrac={page_width_fraction} heightfrac={page_height_fraction} normalized_x={normalized_x} normalized_y={normalized_y} dimensions={c - a}x{d - b} spec={basename}_{label_slice}-{count}"
                        self._put_results_log(cmts)
                        img_rectangle = original_img[b:d + 1, a:c + 1]
                        tag_rect_x0 = a
                        tag_rect_y0 = b
                        tag_rect_x1 = c
                        tag_rect_y1 = d
                        if self.enable_debug:
                            # draw box to visualize rectangle
                            cv2.polylines(original_img_box_viz,
                                          [pred_region_coords[:, None, :]],
                                          True,
                                          (color_tuple[0], color_tuple[1],
                                           color_tuple[2]),
                                          thickness=5)
                            original_img_box_viz_modified = True
                            imsave(
                                os.path.join(
                                    self.output_dir,
                                    f"{basename}_{label_slice}-{count}_{overall_confidence}_rect.jpg"
                                ), img_rectangle)
                        # Write corners points into a .txt file
                        # txt_coordinates += '{},{}\n'.format(filename, self.format_quad_to_string(pred_region_coords))

                        # store info on area for use after all areas in image are gathered
                        self.feat.put(label_slice, count, region_size,
                                      overall_confidence, aspect_ratio,
                                      page_width_fraction,
                                      page_height_fraction, normalized_x,
                                      normalized_y, tag_rect_x0, tag_rect_y0,
                                      tag_rect_x1, tag_rect_y1, img_rectangle,
                                      cmts)

                        # Create page region and XML file
                        page_border = PAGE.Border(
                            coords=PAGE.Point.cv2_to_point_list(
                                pred_region_coords[:, None, :]))

                        count += 1
                else:
                    # No box found for label
                    # page_border = PAGE.Border()
                    continue
            if self.enable_debug:
                # boxes for all labels, using mask colors
                if original_img_box_viz_modified:
                    imsave(
                        os.path.join(self.output_dir,
                                     f"{basename}__boxes.jpg"),
                        original_img_box_viz)

            self.feat.finish(
            )  # finish image, in non-production this saves feature vector for post model
            page_prediction_msg = ""
            prediction_summary_txt = ""
            if self.production_mode:
                #
                #    apply post-model to determine page type
                #
                v = np.zeros((1, self.feat.vec_length()))
                v[0] = self.feat.get_post_model_vec()
                y = self.post_model.predict(v)
                page_type = int(y[0])
                page_prediction_msg = f"PagePrediction: {basename} "

                #
                #    take actions
                #
                if page_type == 0:  # other page, skip
                    page_prediction_msg += f"type=0"
                    pass
                elif page_type == 1:  # start page of article, save info
                    page_prediction_msg += f"type=1"
                    title_info = self.feat.get_label_instance(1, 0)
                    title_rect_x0 = 2 * title_info["tag_rect_x0"]
                    title_rect_y0 = 2 * title_info["tag_rect_y0"]
                    title_rect_x1 = 2 * title_info["tag_rect_x1"]
                    title_rect_y1 = 2 * title_info["tag_rect_y1"]
                    title_normalized_y = title_info["normalized_y"]

                    author_info = self.feat.get_label_instance(2, 0)
                    author_rect_x0 = 2 * author_info["tag_rect_x0"]
                    author_rect_y0 = 2 * author_info["tag_rect_y0"]
                    author_rect_x1 = 2 * author_info["tag_rect_x1"]
                    author_rect_y1 = 2 * author_info["tag_rect_y1"]
                    author_normalized_y = author_info["normalized_y"]

                    acceptable = True
                    # qualifications
                    if title_info["confidence"] < .5 or author_info[
                            "confidence"]:
                        # too low, could be 0 (missing)
                        msg = f"  REJECT confidence too low  "
                        self._put_results_log(msg)
                        prediction_summary_txt += msg + "\n"
                        acceptable = False
                    if title_rect_y0 > author_rect_y0:
                        # unusual, author appears above title
                        msg = f"  REJECT author appears above title  "
                        self._put_results_log(msg)
                        prediction_summary_txt += msg + "\n"
                        acceptable = False
                    if title_normalized_y > 0.5 or author_normalized_y > 0.5:
                        msg = f"  REJECT: title or author appears in lower half of page "
                        self._put_results_log(msg)
                        prediction_summary_txt += msg + "\n"
                        acceptable = False
                    title = self.extractor.find_bbox_text(
                        page_number, title_rect_x0, title_rect_y0,
                        title_rect_x1, title_rect_y1)
                    title = self.cleaner.one_line(title)
                    authors = self.extractor.find_bbox_text(
                        page_number, author_rect_x0, author_rect_y0,
                        author_rect_x1, author_rect_y1)
                    authors = self.cleaner.cleanAuthors(authors)
                    smsg = f"{basename}: page={page_number} TITLE={title} AUTHORS={authors}"
                    self._put_results_log(smsg)
                    prediction_summary_txt += smsg
                    prediction_summary_txt += f"\nTITLE({title_info['comments']})\n"
                    prediction_summary_txt += f"AUTHOR({title_info['comments']})\n"
                    if acceptable:
                        json_per_image = os.path.join(self.output_dir,
                                                      f"{basename}.json")
                        json_dict = {}
                        json_dict["page_number"] = page_number
                        json_dict["basename"] = basename
                        json_dict["type"] = "start_article"
                        json_dict["title"] = title
                        json_dict["authors"] = authors
                        json_txt = json.dumps(json_dict)
                        with open(json_per_image, "a") as f:
                            f.write(f"{json_txt}\n")
                elif page_type == 2:  # references page, save info
                    page_prediction_msg += f"type=2"
                    pass
                else:  #  toc page, save info
                    # pagte_type == 3
                    page_prediction_msg += f"type=3"
                    pass
            else:  # mode for gathering of training data for post model
                pass
            finish_post = time.time()
            self._put_results_log(
                f"TIMING: wait={finish_wait - start_wait} post={finish_post - finish_wait} {page_prediction_msg}"
            )

            #   if debug, emit a txt summary
            if self.enable_debug:  ##############################
                if len(prediction_summary_txt) > 0:
                    debug_per_image = os.path.join(self.output_dir,
                                                   f"{basename}.txt")
                    with open(debug_per_image, "a") as f:
                        f.write(f"{prediction_summary_txt}\n")
예제 #7
0
def page_evaluate_folder(output_folder: str,
                         validation_dir: str,
                         pixel_wise: bool = True,
                         debug_folder: str = None,
                         verbose: bool = False) -> dict:
    """

    :param output_folder: contains the *.png files from the post_processing
    :param validation_dir: Directory contianing the gt label images
    :param pixel_wise: if True computes pixel-wise accuracy, if False computes IOU accuracy
    :param debug_folder:
    :param verbose:
    :return:
    """
    if debug_folder is not None:
        os.makedirs(debug_folder, exist_ok=True)

    filenames_binary_masks = glob(os.path.join(output_folder, '*.png'))

    global_metrics = Metrics()
    list_boxes = list()
    for filename in tqdm(filenames_binary_masks, desc='Evaluation'):
        basename = os.path.basename(filename).split('.')[0]

        # Open post_processed and label image
        post_processed_img = imread(filename)
        post_processed_img = post_processed_img / np.maximum(
            np.max(post_processed_img), 1)

        label_image = imread(os.path.join(validation_dir, 'labels',
                                          '{}.png'.format(basename)),
                             mode='L')
        label_image = label_image / np.max(label_image)

        # Upsample processed image to compare it to original image
        target_shape = (label_image.shape[1], label_image.shape[0])
        bin_upscaled = cv2.resize(np.uint8(post_processed_img),
                                  target_shape,
                                  interpolation=cv2.INTER_NEAREST)

        if pixel_wise:
            metric = compare_bin_prediction_to_label(bin_upscaled, label_image)
            global_metrics += metric

        pred_box = find_boxes(np.uint8(bin_upscaled), mode='quadrilateral')
        label_box = find_boxes(np.uint8(label_image),
                               mode='quadrilateral',
                               min_area=0.0)

        if debug_folder is not None:
            imsave(os.path.join(debug_folder, '{}_bin.png'.format(basename)),
                   np.uint8(bin_upscaled * 255))
            orig_img = imread(
                os.path.join(validation_dir, 'images',
                             '{}.jpg'.format(basename)))
            if label_box is not None:
                cv2.polylines(orig_img, [label_box[:, None, :]],
                              True, (0, 255, 0),
                              thickness=15)
            else:
                print('There is no labelled page in {}'.format(basename))
            if pred_box is not None:
                cv2.polylines(orig_img, [pred_box[:, None, :]],
                              True, (0, 0, 255),
                              thickness=15)
            else:
                print('No box found in {}'.format(basename))
            imsave(os.path.join(debug_folder, '{}_boxes.jpg'.format(basename)),
                   orig_img)

            list_boxes.append((basename, pred_box))

        if pred_box is not None and label_box is not None:
            iou = intersection_over_union(label_box[:, None, :],
                                          pred_box[:,
                                                   None, :], label_image.shape)
            global_metrics.IOU_list.append(iou)
        else:
            global_metrics.IOU_list.append(0)
            if verbose:
                print('No box found for {}'.format(basename))

    if debug_folder:
        with open(os.path.join(debug_folder, 'predicted_boxes.txt'), 'w') as f:
            for b in list_boxes:
                s = '{},{}\n'.format(b[0], format_quad_to_string(b))
                f.write(s)

    if pixel_wise:
        global_metrics.compute_prf()

        print('EVAL --- R : {}, P : {}, FM : {}\n'.format(
            global_metrics.recall, global_metrics.precision,
            global_metrics.f_measure))

    global_metrics.compute_miou()
    print('EVAL --- mIOU : {}\n'.format(global_metrics.mIOU))
    # Export txt similar to test txt ?

    return {
        'precision': global_metrics.precision,
        'recall': global_metrics.recall,
        'f_measure': global_metrics.f_measure,
        'mIOU': global_metrics.mIOU
    }
def main(input_dir,
         model_dir,
         out_dir,
         raw_out_dir=None,
         min_area=0.0005,
         overlay_alpha=127,
         box_color=(255, 0, 0)):
    os.makedirs(out_dir, exist_ok=True)
    if raw_out_dir:
        os.makedirs(raw_out_dir, exist_ok=True)
    input_files = glob('{}/*'.format(input_dir))
    with tf.Session():
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')
        for filename in tqdm(input_files, desc='Processed files'):
            basename = os.path.basename(filename).split('.')[0]

            # For each image, predict each pixel's label
            prediction_outputs = m.predict(filename)
            probs = prediction_outputs['probs'][0]
            original_shape = prediction_outputs['original_shape']
            # Take only class '1'
            # (class 0 is the background, class 1 is the annotation.)
            probs = probs[:, :, 1]
            probs = probs / np.max(probs)  # Normalize to be in [0, 1]

            # Binarize the predictions
            preds_bin = make_binary_mask(probs)

            # Upscale to have full resolution image
            # (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(preds_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]),
                                      interpolation=cv2.INTER_NEAREST)

            if raw_out_dir:
                # If requested, draw the binary mask as an overlay
                # over the image and save it.
                img = Image.open(filename)
                img = img.convert('RGBA')
                overlay_arr = np.stack(
                    [
                        bin_upscaled * box_color[0],  # R
                        bin_upscaled * box_color[1],  # G
                        bin_upscaled * box_color[2],  # B
                        np.ones_like(bin_upscaled) * overlay_alpha  # A
                    ],
                    axis=2)
                overlay = Image.fromarray(overlay_arr, mode='RGBA')
                img.paste(overlay, (0, 0), overlay)
                img.save(
                    os.path.join(raw_out_dir, '{}_raw.png'.format(basename)),
                    'PNG')

            # Find quadrilateral enclosing the page
            boxes = boxes_detection.find_boxes(
                bin_upscaled.astype(np.uint8, copy=False),
                min_area=min_area,
                mode='min_rectangle',
            )

            # Draw boxes on original image.
            original_img = imread(filename, pilmode='RGB')
            if boxes is not None:
                cv2.polylines(original_img,
                              boxes,
                              True,
                              box_color,
                              thickness=5)
            else:
                print('No annotation found in {}'.format(filename))

            imsave(os.path.join(out_dir, '{}_boxes.jpg'.format(basename)),
                   original_img)