Esempio n. 1
0
def page_extraction(model_dir: str,
                    filenames_to_process: List[str],
                    output_dir: str,
                    draw_extractions: bool=False,
                    config: tf.compat.v1.ConfigProto=None):

    os.makedirs(output_dir, exist_ok=True)
    if draw_extractions:
        drawing_dir = os.path.join(output_dir, 'drawings')
        os.makedirs(drawing_dir)

    with tf.compat.v1.Session(config=config):
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')
        for filename in tqdm(filenames_to_process, desc='Prediction'):
            # Inference
            prediction = m.predict(filename)
            probs = prediction['probs'][0]
            original_shape = prediction['original_shape']

            probs = probs / np.max(probs)  # Normalize to be in [0, 1]
            # Binarize the predictions
            page_bin = page_post_processing_fn(probs, threshold=-1)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)

            # Find quadrilateral enclosing the page
            pred_page_coords = find_boxes(bin_upscaled.astype(np.uint8, copy=False),
                                          mode='min_rectangle', min_area=0.2, n_max_boxes=1)

            if pred_page_coords is not None:
                # Write corners points into a .txt file

                # Create page region and XML file
                page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))

                if draw_extractions:
                    # Draw page box on original image and export it. Add also box coordinates to the txt file
                    original_img = imread(filename, pilmode='RGB')
                    cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5)

                    basename = os.path.basename(filename).split('.')[0]
                    imsave(os.path.join(drawing_dir, '{}_boxes.jpg'.format(basename)), original_img)

            else:
                print('No box found in {}'.format(filename))
                page_border = PAGE.Border()

            page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0],
                                 page_border=page_border)
            xml_filename = os.path.join(output_dir, '{}.xml'.format(basename))
            page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
Esempio n. 2
0
def extract_lines(npy_filename: str,
                  output_dir: str,
                  original_shape: list,
                  post_process_params: dict,
                  channel_baselines: int = 1,
                  mask_dir: str = None,
                  debug: bool = False):
    """
    From the prediction files (probs) (.npy) finds and extracts the lines into PAGE-XML format.

    :param npy_filename: filename of saved predictions (probs) in range (0,255)
    :param output_dir: output direcoty to save the xml files
    :param original_shape: shpae of the original input image (to rescale the extracted lines if necessary)
    :param post_process_params: pramas for lines detection (sigma, thresholds, ...)
    :param channel_baselines: channel where the baseline class is detected
    :param mask_dir: directory containing masks of the page in order to improve the line extraction
    :param debug: if True will output the binary image of the extracted lines
    :return: contours of lines (open cv format), binary image of lines (lines mask)
    """

    os.makedirs(output_dir, exist_ok=True)

    basename = os.path.basename(npy_filename).split('.')[0]

    pred = np.load(npy_filename) / 255  # type: np.ndarray
    lines_prob = pred[:, :, channel_baselines]

    if mask_dir is not None:
        mask = imread(os.path.join(mask_dir, basename + '.png'), mode='L')
        mask = np.array(
            PIL.Image.fromarray(mask,
                                mode='L').resize(lines_prob.shape,
                                                 resample=PIL.Image.BILINEAR))
        lines_prob[mask == 0] = 0.

    contours, lines_mask = line_extraction_v1(lines_prob,
                                              **post_process_params)

    if debug:
        imsave(os.path.join(output_dir, '{}_bin.jpg'.format(basename)),
               lines_mask)

    ratio = (original_shape[0] / pred.shape[0],
             original_shape[1] / pred.shape[1])
    xml_filename = os.path.join(output_dir, basename + '.xml')
    PAGE.save_baselines(xml_filename,
                        contours,
                        ratio,
                        predictions_shape=pred.shape[:2])

    return contours, lines_mask
Esempio n. 3
0
def baseline_extraction(model_dir: str,
                        filenames_to_process: List[str],
                        output_dir: str,
                        draw_extractions: bool = False,
                        config: tf.compat.v1.ConfigProto = None) -> None:
    """
    Given a model directory this function will load the model and apply it to the given files.

    :param model_dir: Directory containing the saved model
    :param filenames_to_process: filenames of the images to process
    :param output_dir: output directory to save the predictions (probability images)
    :param draw_extractions:
    :param config: ``ConfigProto`` object for ``tf.Session``.
    :return:
    """

    os.makedirs(output_dir, exist_ok=True)
    if draw_extractions:
        drawing_dir = os.path.join(output_dir, 'drawings')
        os.makedirs(drawing_dir)

    with tf.compat.v1.Session(config=config):
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename_original_shape')
        for filename in tqdm(filenames_to_process, desc='Prediction'):
            # Inference
            prediction = m.predict(filename)
            # Take the first element of the 'probs' dictionary (batch size = 1)
            probs = prediction['probs'][0]
            original_shape = probs.shape

            # The baselines probs are on the second channel
            baseline_probs = probs[:, :, 1]
            contours, _ = line_extraction_v1(baseline_probs,
                                             low_threshold=0.2,
                                             high_threshold=0.4,
                                             sigma=1.5)

            basename = os.path.basename(filename).split('.')[0]

            # Compute the ratio to save the coordinates in the original image coordinates reference.
            ratio = (original_shape[0] / probs.shape[0],
                     original_shape[1] / probs.shape[1])
            xml_filename = os.path.join(output_dir, basename + '.xml')
            page_object = PAGE.save_baselines(
                xml_filename,
                contours,
                ratio,
                predictions_shape=probs.shape[:2])

            # If specified, saves the images with the annotated baslines
            if draw_extractions:
                image = imread(filename)
                page_object.draw_baselines(image,
                                           color=(255, 0, 0),
                                           thickness=5)

                basename = os.path.basename(filename)
                imsave(os.path.join(drawing_dir, basename), image)
Esempio n. 4
0
def get_original_shape_from_image_file_name(filename):
    """
    I think it is a hard code, filename is "../../data/cbad-mask/complex/test/images\\basename.jpg"
    I want to get image original size from "../../data/cbad-mask/complex/test/gt/basename.xml"
    :param filename:
    :return: original h, w of image
    """
    # get xml file name
    base_name = filename.split("\\")[-1].split(".")[0] + ".xml"
    dir_name = filename.split("\\")[0].replace('images', 'gt')
    gt_xml_filename = os.path.join(dir_name, base_name)
    gt_page_xml = PAGE.parse_file(gt_xml_filename)

    original_shape = [gt_page_xml.image_height, gt_page_xml.image_width]
    return original_shape
Esempio n. 5
0
                                                          mode='min_rectangle',
                                                          min_area=0.2,
                                                          n_max_boxes=1)

            # Draw page box on original image and export it. Add also box coordinates to the txt file
            original_img = imread(filename, pilmode='RGB')
            if pred_page_coords is not None:
                cv2.polylines(original_img, [pred_page_coords[:, None, :]],
                              True, (0, 0, 255),
                              thickness=5)
                # Write corners points into a .txt file
                txt_coordinates += '{},{}\n'.format(
                    filename, format_quad_to_string(pred_page_coords))

                # Create page region and XML file
                page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(
                    pred_page_coords[:, None, :]))
            else:
                print('No box found in {}'.format(filename))
                page_border = PAGE.Border()

            basename = os.path.basename(filename).split('.')[0]
            imsave(os.path.join(output_dir, '{}_boxes.jpg'.format(basename)),
                   original_img)

            page_xml = PAGE.Page(image_filename=filename,
                                 image_width=original_shape[1],
                                 image_height=original_shape[0],
                                 page_border=page_border)
            xml_filename = os.path.join(output_pagexml_dir,
                                        '{}.xml'.format(basename))
            page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
Esempio n. 6
0
            imsave("prob_masks/" + name, probs[:, :, 1])
            imsave("bin_masks/" + name, binary_map[:, :, 0])

            # Draw page box on original image and export it. Add also box coordinates to the txt file
            original_img = imread(filename, pilmode='RGB')
            if boxes_resized is not None:
                for box in boxes_resized:
                    cv2.polylines(original_img, [box[:, None, :]],
                                  True, (0, 0, 255),
                                  thickness=5)
                # Write corners points into a .txt file
                txt_coordinates += '{},{}\n'.format(
                    filename, format_quad_to_string(boxes_resized))

                # Create page region
                mark_border = PAGE.Border(
                    coords=PAGE.Point.cv2_to_point_list(box[:, None, :]))
            else:
                print('No box found in {}'.format(filename))
                mark_border = PAGE.Border()

            basename = os.path.basename(filename).split('.')[0]
            imsave(os.path.join(output_dir, '{}_boxes.jpg'.format(basename)),
                   original_img)

            # export
            text_regions = [
                PAGE.TextRegion(id='txt-reg-{}'.format(i),
                                coords=PAGE.Point.array_to_point(coords),
                                custom_attribute="structure{type:drop-cap;}")
                for i, coords in enumerate(boxes_resized)
            ]
Esempio n. 7
0
def annotate_one_page(image_filename: str,
                      output_dir: str,
                      size: int = None,
                      draw_baselines: bool = True,
                      draw_lines: bool = False,
                      draw_endpoints: bool = False,
                      baseline_thickness: float = 0.2,
                      diameter_endpoint: int = 20) -> Tuple[str, str]:
    """
    Creates an annotated mask and corresponding original image and saves it in 'labels' and 'images' folders.
    Also copies the corresponding .xml file into 'gt' folder.

    :param image_filename: filename of the image to process
    :param output_dir: directory to output the annotated label image
    :param size: Size of the resized image (# pixels)
    :param draw_baselines: Draws the baselines (boolean)
    :param draw_lines: Draws the polygon's lines (boolean)
    :param draw_endpoints: Predict beginning and end of baselines (True, False)
    :param baseline_thickness: Thickness of annotated baseline (percentage of the line's height)
    :param diameter_endpoint: Diameter of annotated start/end points
    :return: (output_image_path, output_label_path)
    """

    page_filename = get_page_filename(image_filename)
    # Parse xml file and get TextLines
    page = PAGE.parse_file(page_filename)
    text_lines = [tl for tr in page.text_regions for tl in tr.text_lines]
    img = imread(image_filename, pilmode='RGB')
    # Create empty mask
    gt = np.zeros_like(img)

    if text_lines:
        if draw_baselines:
            # Thickness : should be a percentage of the line height, for example 0.2
            # First, get the mean line height.
            mean_line_height, _, _ = _compute_statistics_line_height(page)
            absolute_baseline_thickness = int(
                max(gt.shape[0] * 0.002,
                    baseline_thickness * mean_line_height))

            # Draw the baselines
            gt_baselines = np.zeros_like(img[:, :, 0])
            gt_baselines = cv2.polylines(
                gt_baselines,
                [PAGE.Point.list_to_cv2poly(tl.baseline) for tl in text_lines],
                isClosed=False,
                color=255,
                thickness=absolute_baseline_thickness)
            gt[:, :, np.argmax(DRAWING_COLOR_BASELINES)] = gt_baselines

        if draw_lines:
            # Draw the lines
            gt_lines = np.zeros_like(img[:, :, 0])
            for tl in text_lines:
                gt_lines = cv2.fillPoly(
                    gt_lines, [PAGE.Point.list_to_cv2poly(tl.coords)],
                    color=255)
            gt[:, :, np.argmax(DRAWING_COLOR_LINES)] = gt_lines

        if draw_endpoints:
            # Draw endpoints of baselines
            gt_points = np.zeros_like(img[:, :, 0])
            for tl in text_lines:
                try:
                    gt_points = cv2.circle(
                        gt_points, (tl.baseline[0].x, tl.baseline[0].y),
                        radius=int((diameter_endpoint / 2 *
                                    (gt_points.shape[0] / TARGET_HEIGHT))),
                        color=255,
                        thickness=-1)
                    gt_points = cv2.circle(
                        gt_points, (tl.baseline[-1].x, tl.baseline[-1].y),
                        radius=int((diameter_endpoint / 2 *
                                    (gt_points.shape[0] / TARGET_HEIGHT))),
                        color=255,
                        thickness=-1)
                except IndexError:
                    print('Length of baseline is {}'.format(len(tl.baseline)))
            gt[:, :, np.argmax(DRAWING_COLOR_POINTS)] = gt_points

    # Make output filenames
    image_label_basename = get_image_label_basename(image_filename)
    output_image_path = os.path.join(output_dir, 'images',
                                     '{}.jpg'.format(image_label_basename))
    output_label_path = os.path.join(output_dir, 'labels',
                                     '{}.png'.format(image_label_basename))
    # Resize (if necessary) and save image and label
    save_and_resize(img, output_image_path, size=size)
    save_and_resize(gt, output_label_path, size=size, nearest=True)
    # Copy XML file to 'gt' folder
    shutil.copy(
        page_filename,
        os.path.join(output_dir, 'gt', '{}.xml'.format(image_label_basename)))

    return os.path.abspath(output_image_path), os.path.abspath(
        output_label_path)
Esempio n. 8
0
def eval_fn(input_dir: str,
            groudtruth_dir: str,
            output_dir: str = None,
            post_process_params: dict = PP_PARAMS,
            channel_baselines: int = 1,
            jar_tool_path: str = CBAD_JAR,
            masks_dir: str = None) -> dict:
    """
    Evaluates a model against the selected set ('groundtruth_dir' contains XML files)

    :param input_dir: Input directory containing probability maps (.npy)
    :param groudtruth_dir: directory containg XML groundtruths
    :param output_dir: output directory for results
    :param post_process_params: parameters form post processing of probability maps
    :param channel_baselines: the baseline class chanel
    :param jar_tool_path: path to cBAD evaluation tool (.jar file)
    :param masks_dir: optional, directory where binary masks of the page are stored (.png)
    :return:
    """

    if output_dir is None:
        output_dir = input_dir

    # Apply post processing and find lines
    for file in tqdm(glob(os.path.join(input_dir, '*.npy'))):
        basename = os.path.basename(file).split('.')[0]
        gt_xml_filename = os.path.join(groudtruth_dir, basename + '.xml')
        gt_page_xml = PAGE.parse_file(gt_xml_filename)

        original_shape = [gt_page_xml.image_height, gt_page_xml.image_width]

        _, _ = extract_lines(file,
                             output_dir,
                             original_shape,
                             post_process_params,
                             channel_baselines=channel_baselines,
                             mask_dir=masks_dir)

    # Create pairs predicted XML - groundtruth XML to be evaluated
    xml_pred_filenames_list = glob(os.path.join(output_dir, '*.xml'))
    xml_filenames_tuples = list()
    for xml_filename in xml_pred_filenames_list:
        basename = os.path.basename(xml_filename)
        gt_xml_filename = os.path.join(groudtruth_dir, basename)

        xml_filenames_tuples.append((gt_xml_filename, xml_filename))

    gt_pages_list_filename = os.path.join(output_dir, 'gt_pages_simple.lst')
    generated_pages_list_filename = os.path.join(output_dir,
                                                 'generated_pages_simple.lst')
    with open(gt_pages_list_filename, 'w') as f:
        f.writelines('\n'.join([s[0] for s in xml_filenames_tuples]))
    with open(generated_pages_list_filename, 'w') as f:
        f.writelines('\n'.join([s[1] for s in xml_filenames_tuples]))

    # Evaluation using JAVA Tool
    cmd = 'java -jar {} {} {}'.format(jar_tool_path, gt_pages_list_filename,
                                      generated_pages_list_filename)
    result = subprocess.check_output(cmd, shell=True).decode()
    with open(os.path.join(output_dir, 'scores.txt'), 'w') as f:
        f.write(result)
    parse_score_txt(result, os.path.join(output_dir, 'scores.csv'))

    # Parse results from output of tool
    lines = result.splitlines()
    avg_precision = float(
        next(filter(lambda l: 'Avg (over pages) P value:' in l,
                    lines)).split()[-1])
    avg_recall = float(
        next(filter(lambda l: 'Avg (over pages) R value:' in l,
                    lines)).split()[-1])
    f_measure = float(
        next(filter(lambda l: 'Resulting F_1 value:' in l, lines)).split()[-1])

    print('P {}, R {}, F {}'.format(avg_precision, avg_recall, f_measure))

    return {
        'avg_precision': avg_precision,
        'avg_recall': avg_recall,
        'f_measure': f_measure
    }
Esempio n. 9
0
    async def run(self):
        while True:
            #
            #   get item off work queue
            #
            start_wait = time.time()
            g = self.work_queue.pop()
            finish_wait = time.time()
            self.counter += 1
            labels_all, probs_all, filename, original_shape, inference_time_sec, page_number = self.work_queue.ungroup(
                g)
            basename = os.path.basename(filename).split('.')[0]
            self.feat.start(basename)
            if self.enable_debug:
                # write out an image of the per pixel labels
                label_viz = np.zeros(
                    (labels_all.shape[0], labels_all.shape[1], 3), np.uint8)
                for h in range(0, labels_all.shape[0]):
                    for w in range(0, labels_all.shape[1]):
                        c = self.label_val_to_color(labels_all[h, w])
                        label_viz[h, w, 0] = c[0]
                        label_viz[h, w, 1] = c[1]
                        label_viz[h, w, 2] = c[2]
                imsave(
                    os.path.join(self.output_dir, f"{basename}_label_viz.png"),
                    label_viz)
            # what pixel labels do we have?
            hist_label_counts = np.bincount(labels_all.flatten()).tolist()
            while len(hist_label_counts) < max(label_bins) + 1:
                hist_label_counts.append(0)
            # now hist_label_counts contains counts of pixel labels

            self._put_results_log(
                f"processing: file={filename} histogram={hist_label_counts}  "
                f"infer_timing={inference_time_sec} original_shape={original_shape}"
            )

            original_img = imread(filename, pilmode='RGB')
            if self.enable_debug:
                original_img_box_viz = np.array(original_img)
                original_img_box_viz_modified = False

            #
            #    handle rectangles here!
            #
            for label_slice in label_bins:
                if label_slice == 0:
                    continue  # skip background
                color_tuple = self.label_val_to_color(label_slice)
                #  area of all the pixel labels for a particular class, might be multiple regions
                area = hist_label_counts[label_slice]
                if area < 500:  # minimum size
                    # reject small label areas
                    continue

                probs = probs_all[:, :, label_slice]

                #        make an image showing probability map for this label before postprocessing
                #            (it can include multiple blobs)
                if self.enable_debug:
                    prob_img = np.zeros((probs.shape[0], probs.shape[1], 3),
                                        np.uint8)
                    for h in range(0, probs.shape[0]):
                        for w in range(0, probs.shape[1]):
                            c = probs[h, w] * 255
                            prob_img[h, w, 0] = c
                            prob_img[h, w, 1] = c
                            prob_img[h, w, 2] = c
                    imsave(
                        os.path.join(
                            self.output_dir,
                            f"{basename}_{label_slice}_label_prob.png"),
                        prob_img)

                # Binarize the predictions
                page_bin = self.page_make_binary_mask(probs)

                # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
                bin_upscaled = cv2.resize(page_bin.astype(np.uint8,
                                                          copy=False),
                                          tuple(original_shape[::-1]),
                                          interpolation=cv2.INTER_NEAREST)
                # upscale probs the same way so we can calculate confidence later
                probs_upscaled = cv2.resize(probs.astype(np.float32,
                                                         casting='same_kind'),
                                            tuple(original_shape[::-1]),
                                            interpolation=cv2.INTER_NEAREST)

                # Find quadrilateral(s) enclosing the label area(s).
                #  allow more than reasonable number of boxes so we can use spurious boxes as a reject signal
                pred_region_coords_list = boxes_detection.find_boxes(
                    bin_upscaled.astype(np.uint8, copy=False),
                    mode='rectangle',
                    min_area=0.001,
                    n_max_boxes=4)

                # coord is [[a,b], [c,b], [c,d], [a,d]]  (a path for drawing a polygon, clockwise)
                #  origin is upper left [x,y]:
                #  [a,b]         [c,b]
                #       rectangle
                #  [a,d]         [c,d]
                # which means a<c and b<d

                if pred_region_coords_list is not None:
                    # Draw region box on original image and export it. Add also box coordinates to the txt file
                    region_count = len(pred_region_coords_list)
                    count = 0
                    for pred_region_coords in pred_region_coords_list:
                        #  cut out rectangle for region based on original image size
                        a = pred_region_coords[0, 0]
                        b = pred_region_coords[0, 1]
                        c = pred_region_coords[1, 0]
                        d = pred_region_coords[2, 1]
                        probs_rectangle = probs_upscaled[
                            b:d + 1, a:c + 1]  # values are in range [0,1]
                        overall_confidence = (sum(sum(probs_rectangle))) / (
                            (c - a) * (d - b))
                        aspect_ratio = (c - a) / (d - b)  # w/h
                        page_width_fraction = (c - a) / original_shape[0]
                        page_height_fraction = (d - b) / original_shape[1]
                        normalized_x = a / original_shape[0]
                        normalized_y = b / original_shape[1]
                        region_size = page_width_fraction * page_height_fraction
                        cmts = f"Prediction {a},{b},{c},{d} confidence={overall_confidence} aspect={aspect_ratio} widthfrac={page_width_fraction} heightfrac={page_height_fraction} normalized_x={normalized_x} normalized_y={normalized_y} dimensions={c - a}x{d - b} spec={basename}_{label_slice}-{count}"
                        self._put_results_log(cmts)
                        img_rectangle = original_img[b:d + 1, a:c + 1]
                        tag_rect_x0 = a
                        tag_rect_y0 = b
                        tag_rect_x1 = c
                        tag_rect_y1 = d
                        if self.enable_debug:
                            # draw box to visualize rectangle
                            cv2.polylines(original_img_box_viz,
                                          [pred_region_coords[:, None, :]],
                                          True,
                                          (color_tuple[0], color_tuple[1],
                                           color_tuple[2]),
                                          thickness=5)
                            original_img_box_viz_modified = True
                            imsave(
                                os.path.join(
                                    self.output_dir,
                                    f"{basename}_{label_slice}-{count}_{overall_confidence}_rect.jpg"
                                ), img_rectangle)
                        # Write corners points into a .txt file
                        # txt_coordinates += '{},{}\n'.format(filename, self.format_quad_to_string(pred_region_coords))

                        # store info on area for use after all areas in image are gathered
                        self.feat.put(label_slice, count, region_size,
                                      overall_confidence, aspect_ratio,
                                      page_width_fraction,
                                      page_height_fraction, normalized_x,
                                      normalized_y, tag_rect_x0, tag_rect_y0,
                                      tag_rect_x1, tag_rect_y1, img_rectangle,
                                      cmts)

                        # Create page region and XML file
                        page_border = PAGE.Border(
                            coords=PAGE.Point.cv2_to_point_list(
                                pred_region_coords[:, None, :]))

                        count += 1
                else:
                    # No box found for label
                    # page_border = PAGE.Border()
                    continue
            if self.enable_debug:
                # boxes for all labels, using mask colors
                if original_img_box_viz_modified:
                    imsave(
                        os.path.join(self.output_dir,
                                     f"{basename}__boxes.jpg"),
                        original_img_box_viz)

            self.feat.finish(
            )  # finish image, in non-production this saves feature vector for post model
            page_prediction_msg = ""
            prediction_summary_txt = ""
            if self.production_mode:
                #
                #    apply post-model to determine page type
                #
                v = np.zeros((1, self.feat.vec_length()))
                v[0] = self.feat.get_post_model_vec()
                y = self.post_model.predict(v)
                page_type = int(y[0])
                page_prediction_msg = f"PagePrediction: {basename} "

                #
                #    take actions
                #
                if page_type == 0:  # other page, skip
                    page_prediction_msg += f"type=0"
                    pass
                elif page_type == 1:  # start page of article, save info
                    page_prediction_msg += f"type=1"
                    title_info = self.feat.get_label_instance(1, 0)
                    title_rect_x0 = 2 * title_info["tag_rect_x0"]
                    title_rect_y0 = 2 * title_info["tag_rect_y0"]
                    title_rect_x1 = 2 * title_info["tag_rect_x1"]
                    title_rect_y1 = 2 * title_info["tag_rect_y1"]
                    title_normalized_y = title_info["normalized_y"]

                    author_info = self.feat.get_label_instance(2, 0)
                    author_rect_x0 = 2 * author_info["tag_rect_x0"]
                    author_rect_y0 = 2 * author_info["tag_rect_y0"]
                    author_rect_x1 = 2 * author_info["tag_rect_x1"]
                    author_rect_y1 = 2 * author_info["tag_rect_y1"]
                    author_normalized_y = author_info["normalized_y"]

                    acceptable = True
                    # qualifications
                    if title_info["confidence"] < .5 or author_info[
                            "confidence"]:
                        # too low, could be 0 (missing)
                        msg = f"  REJECT confidence too low  "
                        self._put_results_log(msg)
                        prediction_summary_txt += msg + "\n"
                        acceptable = False
                    if title_rect_y0 > author_rect_y0:
                        # unusual, author appears above title
                        msg = f"  REJECT author appears above title  "
                        self._put_results_log(msg)
                        prediction_summary_txt += msg + "\n"
                        acceptable = False
                    if title_normalized_y > 0.5 or author_normalized_y > 0.5:
                        msg = f"  REJECT: title or author appears in lower half of page "
                        self._put_results_log(msg)
                        prediction_summary_txt += msg + "\n"
                        acceptable = False
                    title = self.extractor.find_bbox_text(
                        page_number, title_rect_x0, title_rect_y0,
                        title_rect_x1, title_rect_y1)
                    title = self.cleaner.one_line(title)
                    authors = self.extractor.find_bbox_text(
                        page_number, author_rect_x0, author_rect_y0,
                        author_rect_x1, author_rect_y1)
                    authors = self.cleaner.cleanAuthors(authors)
                    smsg = f"{basename}: page={page_number} TITLE={title} AUTHORS={authors}"
                    self._put_results_log(smsg)
                    prediction_summary_txt += smsg
                    prediction_summary_txt += f"\nTITLE({title_info['comments']})\n"
                    prediction_summary_txt += f"AUTHOR({title_info['comments']})\n"
                    if acceptable:
                        json_per_image = os.path.join(self.output_dir,
                                                      f"{basename}.json")
                        json_dict = {}
                        json_dict["page_number"] = page_number
                        json_dict["basename"] = basename
                        json_dict["type"] = "start_article"
                        json_dict["title"] = title
                        json_dict["authors"] = authors
                        json_txt = json.dumps(json_dict)
                        with open(json_per_image, "a") as f:
                            f.write(f"{json_txt}\n")
                elif page_type == 2:  # references page, save info
                    page_prediction_msg += f"type=2"
                    pass
                else:  #  toc page, save info
                    # pagte_type == 3
                    page_prediction_msg += f"type=3"
                    pass
            else:  # mode for gathering of training data for post model
                pass
            finish_post = time.time()
            self._put_results_log(
                f"TIMING: wait={finish_wait - start_wait} post={finish_post - finish_wait} {page_prediction_msg}"
            )

            #   if debug, emit a txt summary
            if self.enable_debug:  ##############################
                if len(prediction_summary_txt) > 0:
                    debug_per_image = os.path.join(self.output_dir,
                                                   f"{basename}.txt")
                    with open(debug_per_image, "a") as f:
                        f.write(f"{prediction_summary_txt}\n")