Ejemplo n.º 1
0
def prediction_fn(model_dir: str,
                  input_dir: str,
                  output_dir: str = None,
                  config: tf.ConfigProto = None) -> None:
    """
    Given a model directory this function will load the model and apply it to the files (.jpg, .png) found in input_dir.
    The predictions will be saved in output_dir as .npy files (values ranging [0,255])

    :param model_dir: Directory containing the saved model
    :param input_dir: input directory where the images to predict are
    :param output_dir: output directory to save the predictions (probability images)
    :param config: ConfigProto object to pass to the session in order to define which GPU to use
    :return:
    """
    if not output_dir:
        # For model_dir of style model_name/export/timestamp/ this will create a folder model_name/predictions'
        output_dir = '{}'.format(os.path.sep).join(
            model_dir.split(os.path.sep)[:-3] + ['predictions'])

    os.makedirs(output_dir, exist_ok=True)
    filenames_to_predict = glob(os.path.join(input_dir, '*.jp*g')) + glob(
        os.path.join(input_dir, '*.png'))

    with tf.Session(config=config):
        m = LoadedModel(model_dir, predict_mode='filename_original_shape')
        for filename in tqdm(filenames_to_predict, desc='Prediction'):
            pred = m.predict(filename)['probs'][0]
            np.save(
                os.path.join(output_dir,
                             os.path.basename(filename).split('.')[0]),
                np.uint8(255 * pred))
Ejemplo n.º 2
0
def baseline_extraction(model_dir: str,
                        filenames_to_process: List[str],
                        output_dir: str,
                        draw_extractions: bool = False,
                        config: tf.compat.v1.ConfigProto = None) -> None:
    """
    Given a model directory this function will load the model and apply it to the given files.

    :param model_dir: Directory containing the saved model
    :param filenames_to_process: filenames of the images to process
    :param output_dir: output directory to save the predictions (probability images)
    :param draw_extractions:
    :param config: ``ConfigProto`` object for ``tf.Session``.
    :return:
    """

    os.makedirs(output_dir, exist_ok=True)
    if draw_extractions:
        drawing_dir = os.path.join(output_dir, 'drawings')
        os.makedirs(drawing_dir)

    with tf.compat.v1.Session(config=config):
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename_original_shape')
        for filename in tqdm(filenames_to_process, desc='Prediction'):
            # Inference
            prediction = m.predict(filename)
            # Take the first element of the 'probs' dictionary (batch size = 1)
            probs = prediction['probs'][0]
            original_shape = probs.shape

            # The baselines probs are on the second channel
            baseline_probs = probs[:, :, 1]
            contours, _ = line_extraction_v1(baseline_probs,
                                             low_threshold=0.2,
                                             high_threshold=0.4,
                                             sigma=1.5)

            basename = os.path.basename(filename).split('.')[0]

            # Compute the ratio to save the coordinates in the original image coordinates reference.
            ratio = (original_shape[0] / probs.shape[0],
                     original_shape[1] / probs.shape[1])
            xml_filename = os.path.join(output_dir, basename + '.xml')
            page_object = PAGE.save_baselines(
                xml_filename,
                contours,
                ratio,
                predictions_shape=probs.shape[:2])

            # If specified, saves the images with the annotated baslines
            if draw_extractions:
                image = imread(filename)
                page_object.draw_baselines(image,
                                           color=(255, 0, 0),
                                           thickness=5)

                basename = os.path.basename(filename)
                imsave(os.path.join(drawing_dir, basename), image)
Ejemplo n.º 3
0
def main(input_dir, output_dir, model_dir, classes_file):
    if not os.path.isdir(input_dir):
        print('No such input directory: {}'.format(input_dir))
        sys.exit(1)

    if not os.path.isdir(model_dir):
        print('No such model directory: {}'.format(model_dir))
        sys.exit(2)

    if not os.path.isfile(classes_file):
        print('No such classes file: {}'.format(classes_file))
        sys.exit(3)

    input_files = glob('{}/*'.format(input_dir))

    raw_dir = os.path.join(output_dir, 'raw')
    raw_overlays_dir = os.path.join(output_dir, 'raw_overlays')
    os.makedirs(raw_dir, exist_ok=True)
    os.makedirs(raw_overlays_dir, exist_ok=True)

    # Shape [num_classes, 3] (3 is for RGB)
    class_colors = np.array(get_classes_color_from_file(classes_file),
                            dtype=np.uint8)
    num_classes = class_colors.shape[0]

    with tf.Session():
        m = LoadedModel(model_dir, predict_mode='filename')

        for filename in tqdm(input_files, desc='Processed files'):
            rootname, _ = os.path.splitext(filename)
            basename = os.path.basename(rootname + '.png')

            # For each image, predict each pixel's label
            prediction_outputs = m.predict(filename)
            probs = prediction_outputs['probs'][0]
            original_shape = prediction_outputs['original_shape']
            assert probs.shape[2] == num_classes

            # Shape: (h, w)
            class_map = probs.argmax(axis=-1)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            class_map_upscaled = cv2.resize(class_map.astype(np.uint8,
                                                             copy=False),
                                            tuple(original_shape[::-1]),
                                            interpolation=cv2.INTER_NEAREST)
            # Shape: (h', w', 3)
            color_map = np.take(class_colors, class_map_upscaled, axis=0)
            raw = Image.fromarray(color_map)
            raw.save(os.path.join(raw_dir, basename), 'PNG')
            Image.fromarray(color_map)

            original_img = Image.open(filename).convert('RGBA')
            predicted_mask = Image.fromarray(color_map).convert('RGBA')
            raw_overlay = Image.blend(original_img, predicted_mask, 0.5)
            raw_overlay.save(os.path.join(raw_overlays_dir, basename), 'PNG')
Ejemplo n.º 4
0
def page_extraction(model_dir: str,
                    filenames_to_process: List[str],
                    output_dir: str,
                    draw_extractions: bool=False,
                    config: tf.compat.v1.ConfigProto=None):

    os.makedirs(output_dir, exist_ok=True)
    if draw_extractions:
        drawing_dir = os.path.join(output_dir, 'drawings')
        os.makedirs(drawing_dir)

    with tf.compat.v1.Session(config=config):
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')
        for filename in tqdm(filenames_to_process, desc='Prediction'):
            # Inference
            prediction = m.predict(filename)
            probs = prediction['probs'][0]
            original_shape = prediction['original_shape']

            probs = probs / np.max(probs)  # Normalize to be in [0, 1]
            # Binarize the predictions
            page_bin = page_post_processing_fn(probs, threshold=-1)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)

            # Find quadrilateral enclosing the page
            pred_page_coords = find_boxes(bin_upscaled.astype(np.uint8, copy=False),
                                          mode='min_rectangle', min_area=0.2, n_max_boxes=1)

            if pred_page_coords is not None:
                # Write corners points into a .txt file

                # Create page region and XML file
                page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))

                if draw_extractions:
                    # Draw page box on original image and export it. Add also box coordinates to the txt file
                    original_img = imread(filename, pilmode='RGB')
                    cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5)

                    basename = os.path.basename(filename).split('.')[0]
                    imsave(os.path.join(drawing_dir, '{}_boxes.jpg'.format(basename)), original_img)

            else:
                print('No box found in {}'.format(filename))
                page_border = PAGE.Border()

            page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0],
                                 page_border=page_border)
            xml_filename = os.path.join(output_dir, '{}.xml'.format(basename))
            page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
Ejemplo n.º 5
0
def run(testDir, modelDir, outDir, gpu, _config):

    # Create output directory
    os.makedirs(outDir, exist_ok=True)

    # I/O
    files = glob(testDir + '/*')

    # Store coordinates of page in a .txt file
    txt_coordinates = ''

    models = list()
    mask_unused_gpus(2)

    with tf.Session():  # Start a tensorflow session

        # Load the model
        # model = LoadedModel(modelDir, modelName, predict_mode='filename')
        model = LoadedModel(modelDir, predict_mode='filename')

        for filename in tqdm(files, desc='Processed files'):

            basename = os.path.basename(filename).split('.')[0]

            if os.path.exists(os.path.join(outDir, basename + '.xml')):
                print(basename + " skipped...")

            # print("predict filename:" + filename)
            prediction_outputs = model.predict(filename)

            probs = prediction_outputs['probs'][0]
            probs = probs.astype(float)
            # probs = probs / np.max(probs)

            imgPath = os.path.join(testDir, filename)

            # print("loading:" + imgPath)
            img = imread(filename)

            # pageSeparatorsToXml(probs, img.shape, filename, outDir)

            probsN = probs / np.max(probs)  # Normalize to be in [0, 1]

            imsave(os.path.join(outDir, basename + '-articles.png'),
                   convert_image(probs))
Ejemplo n.º 6
0
    output_dir = 'demo/processed_images'
    os.makedirs(output_dir, exist_ok=True)
    # PAGE XML format output
    output_pagexml_dir = os.path.join(output_dir, PAGE_XML_DIR)
    os.makedirs(output_pagexml_dir, exist_ok=True)

    # Store coordinates of page in a .txt file
    txt_coordinates = ''

    with tf.compat.v1.Session():  # Start a tensorflow session
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')

        for filename in tqdm(input_files, desc='Processed files'):
            # For each image, predict each pixel's label
            prediction_outputs = m.predict(filename)
            probs = prediction_outputs['probs'][0]
            original_shape = prediction_outputs['original_shape']
            probs = probs[:, :,
                          1]  # Take only class '1' (class 0 is the background, class 1 is the page)
            probs = probs / np.max(probs)  # Normalize to be in [0, 1]

            # Binarize the predictions
            page_bin = page_make_binary_mask(probs)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]),
                                      interpolation=cv2.INTER_NEAREST)

            # Find quadrilateral enclosing the page
Ejemplo n.º 7
0
            # Read image
            img = cv2.imread(input_path)

            # Parse filename
            basename = os.path.basename(input_path)
            basename_wo_ext = os.path.splitext(basename)[0]
            fl.write("input_path\t\t: {}\n".format(input_path))
            fl.write("basename\t\t: {}\n".format(basename))
            fl.write("basename (w/o ext)\t: {}\n".format(basename_wo_ext))
            """
            2. Main

            Run prediction
            """
            # Run prediction
            prediction_outputs = m.predict(input_path)
            pred_labels = np.copy(prediction_outputs['labels'][0]).astype(
                np.uint8)
            """
            2. Main

            Get basic attributes
            """
            oriH, oriW = np.shape(img)[:2]
            newH, newW = np.shape(pred_labels)

            data_page.set("HEIGHT", str(oriH))
            data_page.set("WIDTH", str(oriW))
            """
            3. Postprocessing
Ejemplo n.º 8
0
class TextLineDetector():
    def __init__(self, model_dir, debug=True):
        self.sess = tf.Session()
        with self.sess.as_default():
            self.model = LoadedModel(model_dir, predict_mode="image")
        self.debug = debug
        self.debug_dir = Path("./dhSegment_debug/")
        self.debug_dir.mkdir(exist_ok=True)

    def __exit__():
        self.sess.close()

    def detect(self, img, model_predict_h_w=None):
        '''
            Input:
                img: a BGR image (np.ndarray)
            Return:
                cv2 style contours

            Reference:
                https://github.com/dhlab-epfl/fdh-tutorials/blob/master/computer-vision-deep-learning/3-applications/dl-document-processing-textlines/fdh_document_processing.ipynb
        '''
        assert isinstance(img, np.ndarray) and len(img.shape) == 3
        assert model_predict_h_w is None or isinstance(model_predict_h_w,
                                                       tuple)

        with self.sess.as_default():
            # Deep Learning based textline detection
            start = time.time()
            # Note: the model takes RGB image as input
            output_textline = self.model.predict(img[:, :, [2, 1, 0]])
            if self.debug:
                print("[!] The model took {} to predict this image".format(
                    time.time() - start))
            textline_probs = output_textline['probs'][0, :, :, 1]
            if self.debug:
                plt.imshow(textline_probs)
                plt.savefig(str(self.debug_dir / "textline_probability.png"))

            # The higher the sigma, the less number of textlines we have
            textline_probs2 = cleaning_probs(textline_probs, sigma=1)
            textline_mask = hysteresis_thresholding(textline_probs2,
                                                    low_threshold=0.3,
                                                    high_threshold=0.6,
                                                    candidates_mask=None)
            if self.debug:
                plt.imshow(textline_mask)
                plt.savefig(str(self.debug_dir / "textline_mask.png"))

            start = time.time()
            line_contours = line_vectorization.find_lines(
                resize(textline_mask, img.shape[0:2]))
            if self.debug:
                print("[!] Find lines took {} secs".format(time.time() -
                                                           start))

            if self.debug:
                drawn_line_img = cv2.drawContours(img.copy(),
                                                  line_contours,
                                                  -1, (0, 255, 0),
                                                  thickness=3)
                cv2.imwrite(str(self.debug_dir / "drawn_line_img.png"),
                            drawn_line_img)

        return line_contours
Ejemplo n.º 9
0
"""Download a single test image using URL to local server"""

# Download image
!wget -O 'my_test_image.jp2' -A jpeg,jpg,jp2 $TEST_IMAGE_URL
# Convert format from jp2 to jpg
img = cv2.imread('./my_test_image.jp2',cv2.IMREAD_GRAYSCALE)
cv2.imwrite('./images/my_test_image.jpg',img)
# Remove jp2
!rm my_test_image.jp2

"""Load a test image"""
file_to_process = './dataset/ENP_500/train/images/00674399.jpg'
img = cv2.imread(file_to_process)

"""Run prediction"""
prediction_outputs = m.predict(file_to_process)

"""POST PROCESSING"""
# Generate polygon regions
CONNECTIVITY = 8
THRESHOLD_SM_ZONE = 200*200

# Prepare img_copy for drawing Bounding-box
img_bb = np.copy(img)
img_pg = np.copy(img)
newH,newW = np.shape(img)[:2]

# Build figure mask from prediction
pred_figures = np.copy(prediction_outputs['labels'][0]).astype(np.uint8)
prob_figures = np.copy(prediction_outputs['probs'][0][:,:,2])
mask_figures = np.copy(pred_figures)
Ejemplo n.º 10
0
     print("")
     print("                    Post-Model Training")
 print("")
 #  For each image
 for filename in tqdm(input_file_list, desc='Processed files'):
     basename = os.path.basename(filename).split('.')[0]
     page_number_parsed = -1
     try:
         page_number_parsed = int(basename.split("_")[-1])
     except Exception:
         pass
     start_time_sec = time.time()
     #
     #       predict each pixel's label
     #
     prediction_outputs = tf_model.predict(filename)
     finish_time_sec = time.time()
     # labels_all has shape (h,w) which is like (976, 737)
     labels_all = prediction_outputs['labels'][0]
     probs_all = prediction_outputs['probs'][0]
     # probs_all have shape like (976, 737, 4) corresponding to (H, W, Nclasses)
     original_shape = prediction_outputs['original_shape']
     if (page_number_counted != page_number_parsed):
         page_number = page_number_parsed
         if not warned_on_file_ordering:
             warned_on_file_ordering = True
             print(
                 f"  WARN: page number mismatch: parse={page_number_parsed} vs. count={page_number_counted}"
             )
     else:
         page_number = page_number_counted
Ejemplo n.º 11
0
def evaluate_fnc(model_dir):

    model_name = os.path.basename(os.path.normpath(model_dir))

    model_json_file = os.path.join(model_dir, 'config.json')
    if not os.path.isfile(model_json_file):
        print('Sorry, I could not load the config.json file')
        return

    with open(model_json_file) as f:
        data = json.load(f)
        train_data_dir = data["train_data"]
        print('train data: ' + train_data_dir)
        # simply generate the test data dir, by replacing the last occurence of /train with /test
        test_data_dir = re.sub('(/train)(?!.*/train)', '/train_and_val',
                               train_data_dir)
        # test_data_dir = re.sub('(/train)(?!.*/train)', '/test', train_data_dir)
        print('saving to : ' + test_data_dir)
        # test_data_dir = re.sub('(/train)(?!.*/train)', '/train_and_val', train_data_dir)
        test_data_dir = os.path.join(test_data_dir, 'images')
        print('test_data_dir: ' + test_data_dir)
        if not os.path.isdir(test_data_dir):
            print('Sorry, the test folder is not existing: ' + test_data_dir)
            return

        # simply generate the results dir, by using a 'result' child of the parent folder of the train_data_dir
        result_parent_dir = os.path.join(os.path.dirname(train_data_dir),
                                         'results_train_and_val')
        output_dir = os.path.join(result_parent_dir, model_name)

        # TODO: read this from json
        use_ms = True
        # TODO: is this necessary?
        model_dir = os.path.join(model_dir, 'export')

        if use_ms:
            input_image_filenames = glob(os.path.join(test_data_dir, '*.jpg'),
                                         recursive=False) + \
                glob(os.path.join(test_data_dir, '*.png'),
                     recursive=False)
            input_image_filenames = [
                re.sub(r'_\d\d?', '', f) for f in input_image_filenames
            ]
            # input_image_filenames = [re.sub(r'_\d', '', f) for f in input_image_filenames]
            input_files = set(input_image_filenames)
            print('Found {} MSI images'.format(len(input_files)))

        else:
            input_image_filenames = glob(os.path.join(test_data_dir, '*.jpg'),
                                         recursive=False) + \
                glob(os.path.join(test_data_dir, '*.png'),
                     recursive=False)
            input_files = input_image_filenames
            print('Found {} images'.format(len(input_files)))

        mask_unused_gpus.mask_unused_gpus(2)

        os.makedirs(output_dir, exist_ok=True)

        with tf.Session():  # Start a tensorflow session
            # Load the model

            m = LoadedModel(model_dir, predict_mode='filename')

            total_parameters = 0
            for variable in tf.trainable_variables():
                # shape is an array of tf.Dimension
                shape = variable.get_shape()
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters

            # print('number of network parameters: ' + str(total_parameters))

    #         Iterate over the images:
            for filename in tqdm(input_files, desc='Processed files'):

                prediction_outputs = m.predict(filename)
                probs = prediction_outputs['probs'][0]

                original_shape = prediction_outputs['original_shape']

                if use_ms:
                    filename = re.sub(r'.png', '_2.png', filename)

                img = cv.imread(filename, cv.IMREAD_COLOR)

                # just use the fg class:
                p = probs[:, :, 1] * 255
                bin_upscaled = cv.resize(p.astype(np.uint8, copy=False),
                                         tuple(original_shape[::-1]),
                                         interpolation=cv.INTER_CUBIC)

                b = (bin_upscaled > 127) * 255
                b = np.array(b, dtype=np.uint8)

                b_rgb = np.zeros(
                    (bin_upscaled.shape[0], bin_upscaled.shape[1], 3))
                b_rgb[:, :, 1] = b

                # do we have a second fg class?
                if (probs.shape[2] == 3):
                    p_fg2 = probs[:, :, 2] * 255
                    bin_upscaled_fg2 = cv.resize(p_fg2.astype(np.uint8,
                                                              copy=False),
                                                 tuple(original_shape[::-1]),
                                                 interpolation=cv.INTER_CUBIC)
                    b_fg2 = (bin_upscaled_fg2 > 127) * 255
                    b_fg2 = np.array(b_fg2, dtype=np.uint8)
                    b_rgb[:, :, 2] = b_fg2

                filename = re.sub(r'_\d.png', '.png',
                                  os.path.basename(filename))
                full_filename = os.path.join(output_dir, filename)

                cv.imwrite(full_filename, b_rgb)
def main(input_dir,
         model_dir,
         out_dir,
         raw_out_dir=None,
         min_area=0.0005,
         overlay_alpha=127,
         box_color=(255, 0, 0)):
    os.makedirs(out_dir, exist_ok=True)
    if raw_out_dir:
        os.makedirs(raw_out_dir, exist_ok=True)
    input_files = glob('{}/*'.format(input_dir))
    with tf.Session():
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')
        for filename in tqdm(input_files, desc='Processed files'):
            basename = os.path.basename(filename).split('.')[0]

            # For each image, predict each pixel's label
            prediction_outputs = m.predict(filename)
            probs = prediction_outputs['probs'][0]
            original_shape = prediction_outputs['original_shape']
            # Take only class '1'
            # (class 0 is the background, class 1 is the annotation.)
            probs = probs[:, :, 1]
            probs = probs / np.max(probs)  # Normalize to be in [0, 1]

            # Binarize the predictions
            preds_bin = make_binary_mask(probs)

            # Upscale to have full resolution image
            # (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(preds_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]),
                                      interpolation=cv2.INTER_NEAREST)

            if raw_out_dir:
                # If requested, draw the binary mask as an overlay
                # over the image and save it.
                img = Image.open(filename)
                img = img.convert('RGBA')
                overlay_arr = np.stack(
                    [
                        bin_upscaled * box_color[0],  # R
                        bin_upscaled * box_color[1],  # G
                        bin_upscaled * box_color[2],  # B
                        np.ones_like(bin_upscaled) * overlay_alpha  # A
                    ],
                    axis=2)
                overlay = Image.fromarray(overlay_arr, mode='RGBA')
                img.paste(overlay, (0, 0), overlay)
                img.save(
                    os.path.join(raw_out_dir, '{}_raw.png'.format(basename)),
                    'PNG')

            # Find quadrilateral enclosing the page
            boxes = boxes_detection.find_boxes(
                bin_upscaled.astype(np.uint8, copy=False),
                min_area=min_area,
                mode='min_rectangle',
            )

            # Draw boxes on original image.
            original_img = imread(filename, pilmode='RGB')
            if boxes is not None:
                cv2.polylines(original_img,
                              boxes,
                              True,
                              box_color,
                              thickness=5)
            else:
                print('No annotation found in {}'.format(filename))

            imsave(os.path.join(out_dir, '{}_boxes.jpg'.format(basename)),
                   original_img)