Beispiel #1
0
def prediction_fn(model_dir: str,
                  input_dir: str,
                  output_dir: str = None,
                  config: tf.ConfigProto = None) -> None:
    """
    Given a model directory this function will load the model and apply it to the files (.jpg, .png) found in input_dir.
    The predictions will be saved in output_dir as .npy files (values ranging [0,255])

    :param model_dir: Directory containing the saved model
    :param input_dir: input directory where the images to predict are
    :param output_dir: output directory to save the predictions (probability images)
    :param config: ConfigProto object to pass to the session in order to define which GPU to use
    :return:
    """
    if not output_dir:
        # For model_dir of style model_name/export/timestamp/ this will create a folder model_name/predictions'
        output_dir = '{}'.format(os.path.sep).join(
            model_dir.split(os.path.sep)[:-3] + ['predictions'])

    os.makedirs(output_dir, exist_ok=True)
    filenames_to_predict = glob(os.path.join(input_dir, '*.jp*g')) + glob(
        os.path.join(input_dir, '*.png'))

    with tf.Session(config=config):
        m = LoadedModel(model_dir, predict_mode='filename_original_shape')
        for filename in tqdm(filenames_to_predict, desc='Prediction'):
            pred = m.predict(filename)['probs'][0]
            np.save(
                os.path.join(output_dir,
                             os.path.basename(filename).split('.')[0]),
                np.uint8(255 * pred))
Beispiel #2
0
 def __init__(self, model_dir, debug=True):
     self.sess = tf.Session()
     with self.sess.as_default():
         self.model = LoadedModel(model_dir, predict_mode="image")
     self.debug = debug
     self.debug_dir = Path("./dhSegment_debug/")
     self.debug_dir.mkdir(exist_ok=True)
Beispiel #3
0
def baseline_extraction(model_dir: str,
                        filenames_to_process: List[str],
                        output_dir: str,
                        draw_extractions: bool = False,
                        config: tf.compat.v1.ConfigProto = None) -> None:
    """
    Given a model directory this function will load the model and apply it to the given files.

    :param model_dir: Directory containing the saved model
    :param filenames_to_process: filenames of the images to process
    :param output_dir: output directory to save the predictions (probability images)
    :param draw_extractions:
    :param config: ``ConfigProto`` object for ``tf.Session``.
    :return:
    """

    os.makedirs(output_dir, exist_ok=True)
    if draw_extractions:
        drawing_dir = os.path.join(output_dir, 'drawings')
        os.makedirs(drawing_dir)

    with tf.compat.v1.Session(config=config):
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename_original_shape')
        for filename in tqdm(filenames_to_process, desc='Prediction'):
            # Inference
            prediction = m.predict(filename)
            # Take the first element of the 'probs' dictionary (batch size = 1)
            probs = prediction['probs'][0]
            original_shape = probs.shape

            # The baselines probs are on the second channel
            baseline_probs = probs[:, :, 1]
            contours, _ = line_extraction_v1(baseline_probs,
                                             low_threshold=0.2,
                                             high_threshold=0.4,
                                             sigma=1.5)

            basename = os.path.basename(filename).split('.')[0]

            # Compute the ratio to save the coordinates in the original image coordinates reference.
            ratio = (original_shape[0] / probs.shape[0],
                     original_shape[1] / probs.shape[1])
            xml_filename = os.path.join(output_dir, basename + '.xml')
            page_object = PAGE.save_baselines(
                xml_filename,
                contours,
                ratio,
                predictions_shape=probs.shape[:2])

            # If specified, saves the images with the annotated baslines
            if draw_extractions:
                image = imread(filename)
                page_object.draw_baselines(image,
                                           color=(255, 0, 0),
                                           thickness=5)

                basename = os.path.basename(filename)
                imsave(os.path.join(drawing_dir, basename), image)
def main(input_dir, output_dir, model_dir, classes_file):
    if not os.path.isdir(input_dir):
        print('No such input directory: {}'.format(input_dir))
        sys.exit(1)

    if not os.path.isdir(model_dir):
        print('No such model directory: {}'.format(model_dir))
        sys.exit(2)

    if not os.path.isfile(classes_file):
        print('No such classes file: {}'.format(classes_file))
        sys.exit(3)

    input_files = glob('{}/*'.format(input_dir))

    raw_dir = os.path.join(output_dir, 'raw')
    raw_overlays_dir = os.path.join(output_dir, 'raw_overlays')
    os.makedirs(raw_dir, exist_ok=True)
    os.makedirs(raw_overlays_dir, exist_ok=True)

    # Shape [num_classes, 3] (3 is for RGB)
    class_colors = np.array(get_classes_color_from_file(classes_file),
                            dtype=np.uint8)
    num_classes = class_colors.shape[0]

    with tf.Session():
        m = LoadedModel(model_dir, predict_mode='filename')

        for filename in tqdm(input_files, desc='Processed files'):
            rootname, _ = os.path.splitext(filename)
            basename = os.path.basename(rootname + '.png')

            # For each image, predict each pixel's label
            prediction_outputs = m.predict(filename)
            probs = prediction_outputs['probs'][0]
            original_shape = prediction_outputs['original_shape']
            assert probs.shape[2] == num_classes

            # Shape: (h, w)
            class_map = probs.argmax(axis=-1)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            class_map_upscaled = cv2.resize(class_map.astype(np.uint8,
                                                             copy=False),
                                            tuple(original_shape[::-1]),
                                            interpolation=cv2.INTER_NEAREST)
            # Shape: (h', w', 3)
            color_map = np.take(class_colors, class_map_upscaled, axis=0)
            raw = Image.fromarray(color_map)
            raw.save(os.path.join(raw_dir, basename), 'PNG')
            Image.fromarray(color_map)

            original_img = Image.open(filename).convert('RGBA')
            predicted_mask = Image.fromarray(color_map).convert('RGBA')
            raw_overlay = Image.blend(original_img, predicted_mask, 0.5)
            raw_overlay.save(os.path.join(raw_overlays_dir, basename), 'PNG')
def page_extraction(model_dir: str,
                    filenames_to_process: List[str],
                    output_dir: str,
                    draw_extractions: bool=False,
                    config: tf.compat.v1.ConfigProto=None):

    os.makedirs(output_dir, exist_ok=True)
    if draw_extractions:
        drawing_dir = os.path.join(output_dir, 'drawings')
        os.makedirs(drawing_dir)

    with tf.compat.v1.Session(config=config):
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')
        for filename in tqdm(filenames_to_process, desc='Prediction'):
            # Inference
            prediction = m.predict(filename)
            probs = prediction['probs'][0]
            original_shape = prediction['original_shape']

            probs = probs / np.max(probs)  # Normalize to be in [0, 1]
            # Binarize the predictions
            page_bin = page_post_processing_fn(probs, threshold=-1)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)

            # Find quadrilateral enclosing the page
            pred_page_coords = find_boxes(bin_upscaled.astype(np.uint8, copy=False),
                                          mode='min_rectangle', min_area=0.2, n_max_boxes=1)

            if pred_page_coords is not None:
                # Write corners points into a .txt file

                # Create page region and XML file
                page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))

                if draw_extractions:
                    # Draw page box on original image and export it. Add also box coordinates to the txt file
                    original_img = imread(filename, pilmode='RGB')
                    cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5)

                    basename = os.path.basename(filename).split('.')[0]
                    imsave(os.path.join(drawing_dir, '{}_boxes.jpg'.format(basename)), original_img)

            else:
                print('No box found in {}'.format(filename))
                page_border = PAGE.Border()

            page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0],
                                 page_border=page_border)
            xml_filename = os.path.join(output_dir, '{}.xml'.format(basename))
            page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
Beispiel #6
0
def run(testDir, modelDir, outDir, gpu, _config):

    # Create output directory
    os.makedirs(outDir, exist_ok=True)

    # I/O
    files = glob(testDir + '/*')

    # Store coordinates of page in a .txt file
    txt_coordinates = ''

    models = list()
    mask_unused_gpus(2)

    with tf.Session():  # Start a tensorflow session

        # Load the model
        # model = LoadedModel(modelDir, modelName, predict_mode='filename')
        model = LoadedModel(modelDir, predict_mode='filename')

        for filename in tqdm(files, desc='Processed files'):

            basename = os.path.basename(filename).split('.')[0]

            if os.path.exists(os.path.join(outDir, basename + '.xml')):
                print(basename + " skipped...")

            # print("predict filename:" + filename)
            prediction_outputs = model.predict(filename)

            probs = prediction_outputs['probs'][0]
            probs = probs.astype(float)
            # probs = probs / np.max(probs)

            imgPath = os.path.join(testDir, filename)

            # print("loading:" + imgPath)
            img = imread(filename)

            # pageSeparatorsToXml(probs, img.shape, filename, outDir)

            probsN = probs / np.max(probs)  # Normalize to be in [0, 1]

            imsave(os.path.join(outDir, basename + '-articles.png'),
                   convert_image(probs))
Beispiel #7
0
        model_dir = 'demo/model/'

    input_files = glob('demo/pages/test_a1/images/*')

    output_dir = 'demo/processed_images'
    os.makedirs(output_dir, exist_ok=True)
    # PAGE XML format output
    output_pagexml_dir = os.path.join(output_dir, PAGE_XML_DIR)
    os.makedirs(output_pagexml_dir, exist_ok=True)

    # Store coordinates of page in a .txt file
    txt_coordinates = ''

    with tf.compat.v1.Session():  # Start a tensorflow session
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')

        for filename in tqdm(input_files, desc='Processed files'):
            # For each image, predict each pixel's label
            prediction_outputs = m.predict(filename)
            probs = prediction_outputs['probs'][0]
            original_shape = prediction_outputs['original_shape']
            probs = probs[:, :,
                          1]  # Take only class '1' (class 0 is the background, class 1 is the page)
            probs = probs / np.max(probs)  # Normalize to be in [0, 1]

            # Binarize the predictions
            page_bin = page_make_binary_mask(probs)

            # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
Beispiel #8
0
class TextLineDetector():
    def __init__(self, model_dir, debug=True):
        self.sess = tf.Session()
        with self.sess.as_default():
            self.model = LoadedModel(model_dir, predict_mode="image")
        self.debug = debug
        self.debug_dir = Path("./dhSegment_debug/")
        self.debug_dir.mkdir(exist_ok=True)

    def __exit__():
        self.sess.close()

    def detect(self, img, model_predict_h_w=None):
        '''
            Input:
                img: a BGR image (np.ndarray)
            Return:
                cv2 style contours

            Reference:
                https://github.com/dhlab-epfl/fdh-tutorials/blob/master/computer-vision-deep-learning/3-applications/dl-document-processing-textlines/fdh_document_processing.ipynb
        '''
        assert isinstance(img, np.ndarray) and len(img.shape) == 3
        assert model_predict_h_w is None or isinstance(model_predict_h_w,
                                                       tuple)

        with self.sess.as_default():
            # Deep Learning based textline detection
            start = time.time()
            # Note: the model takes RGB image as input
            output_textline = self.model.predict(img[:, :, [2, 1, 0]])
            if self.debug:
                print("[!] The model took {} to predict this image".format(
                    time.time() - start))
            textline_probs = output_textline['probs'][0, :, :, 1]
            if self.debug:
                plt.imshow(textline_probs)
                plt.savefig(str(self.debug_dir / "textline_probability.png"))

            # The higher the sigma, the less number of textlines we have
            textline_probs2 = cleaning_probs(textline_probs, sigma=1)
            textline_mask = hysteresis_thresholding(textline_probs2,
                                                    low_threshold=0.3,
                                                    high_threshold=0.6,
                                                    candidates_mask=None)
            if self.debug:
                plt.imshow(textline_mask)
                plt.savefig(str(self.debug_dir / "textline_mask.png"))

            start = time.time()
            line_contours = line_vectorization.find_lines(
                resize(textline_mask, img.shape[0:2]))
            if self.debug:
                print("[!] Find lines took {} secs".format(time.time() -
                                                           start))

            if self.debug:
                drawn_line_img = cv2.drawContours(img.copy(),
                                                  line_contours,
                                                  -1, (0, 255, 0),
                                                  thickness=3)
                cv2.imwrite(str(self.debug_dir / "drawn_line_img.png"),
                            drawn_line_img)

        return line_contours
Beispiel #9
0
def _signature_def_to_tensors(signature_def):
    g = tf.get_default_graph()
    return {k: g.get_tensor_by_name(v.name) for k, v in signature_def.inputs.items()}, \
           {k: g.get_tensor_by_name(v.name) for k, v in signature_def.outputs.items()}



"""Set gpu device"""
os.environ["CUDA_VISIBLE_DEVICES"]="0"

"""Start session"""
sess = tf.InteractiveSession()

"""Load model"""
model_dir = 'models/ENP_500_model_v3/export/1564890842'
m = LoadedModel(model_dir, predict_mode='filename')

"""Localize latent space"""
dhSegment_graph = tf.get_default_graph()
for op in dhSegment_graph.get_operations():
    print(op.values())

latent_space = dhSegment_graph.get_tensor_by_name('resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0')
print(latent_space)

loaded_model = tf.saved_model.loader.load(sess, ['serve'], model_dir)
list(loaded_model.signature_def)
input_dict_key = 'filename'
signature_def_key = 'serving_default'
input_dict, output_dict = _signature_def_to_tensors(loaded_model.signature_def[signature_def_key])
output_dict.update( {'latent' : latent_space} )
Beispiel #10
0
def evaluate_fnc(model_dir):

    model_name = os.path.basename(os.path.normpath(model_dir))

    model_json_file = os.path.join(model_dir, 'config.json')
    if not os.path.isfile(model_json_file):
        print('Sorry, I could not load the config.json file')
        return

    with open(model_json_file) as f:
        data = json.load(f)
        train_data_dir = data["train_data"]
        print('train data: ' + train_data_dir)
        # simply generate the test data dir, by replacing the last occurence of /train with /test
        test_data_dir = re.sub('(/train)(?!.*/train)', '/train_and_val',
                               train_data_dir)
        # test_data_dir = re.sub('(/train)(?!.*/train)', '/test', train_data_dir)
        print('saving to : ' + test_data_dir)
        # test_data_dir = re.sub('(/train)(?!.*/train)', '/train_and_val', train_data_dir)
        test_data_dir = os.path.join(test_data_dir, 'images')
        print('test_data_dir: ' + test_data_dir)
        if not os.path.isdir(test_data_dir):
            print('Sorry, the test folder is not existing: ' + test_data_dir)
            return

        # simply generate the results dir, by using a 'result' child of the parent folder of the train_data_dir
        result_parent_dir = os.path.join(os.path.dirname(train_data_dir),
                                         'results_train_and_val')
        output_dir = os.path.join(result_parent_dir, model_name)

        # TODO: read this from json
        use_ms = True
        # TODO: is this necessary?
        model_dir = os.path.join(model_dir, 'export')

        if use_ms:
            input_image_filenames = glob(os.path.join(test_data_dir, '*.jpg'),
                                         recursive=False) + \
                glob(os.path.join(test_data_dir, '*.png'),
                     recursive=False)
            input_image_filenames = [
                re.sub(r'_\d\d?', '', f) for f in input_image_filenames
            ]
            # input_image_filenames = [re.sub(r'_\d', '', f) for f in input_image_filenames]
            input_files = set(input_image_filenames)
            print('Found {} MSI images'.format(len(input_files)))

        else:
            input_image_filenames = glob(os.path.join(test_data_dir, '*.jpg'),
                                         recursive=False) + \
                glob(os.path.join(test_data_dir, '*.png'),
                     recursive=False)
            input_files = input_image_filenames
            print('Found {} images'.format(len(input_files)))

        mask_unused_gpus.mask_unused_gpus(2)

        os.makedirs(output_dir, exist_ok=True)

        with tf.Session():  # Start a tensorflow session
            # Load the model

            m = LoadedModel(model_dir, predict_mode='filename')

            total_parameters = 0
            for variable in tf.trainable_variables():
                # shape is an array of tf.Dimension
                shape = variable.get_shape()
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters

            # print('number of network parameters: ' + str(total_parameters))

    #         Iterate over the images:
            for filename in tqdm(input_files, desc='Processed files'):

                prediction_outputs = m.predict(filename)
                probs = prediction_outputs['probs'][0]

                original_shape = prediction_outputs['original_shape']

                if use_ms:
                    filename = re.sub(r'.png', '_2.png', filename)

                img = cv.imread(filename, cv.IMREAD_COLOR)

                # just use the fg class:
                p = probs[:, :, 1] * 255
                bin_upscaled = cv.resize(p.astype(np.uint8, copy=False),
                                         tuple(original_shape[::-1]),
                                         interpolation=cv.INTER_CUBIC)

                b = (bin_upscaled > 127) * 255
                b = np.array(b, dtype=np.uint8)

                b_rgb = np.zeros(
                    (bin_upscaled.shape[0], bin_upscaled.shape[1], 3))
                b_rgb[:, :, 1] = b

                # do we have a second fg class?
                if (probs.shape[2] == 3):
                    p_fg2 = probs[:, :, 2] * 255
                    bin_upscaled_fg2 = cv.resize(p_fg2.astype(np.uint8,
                                                              copy=False),
                                                 tuple(original_shape[::-1]),
                                                 interpolation=cv.INTER_CUBIC)
                    b_fg2 = (bin_upscaled_fg2 > 127) * 255
                    b_fg2 = np.array(b_fg2, dtype=np.uint8)
                    b_rgb[:, :, 2] = b_fg2

                filename = re.sub(r'_\d.png', '.png',
                                  os.path.basename(filename))
                full_filename = os.path.join(output_dir, filename)

                cv.imwrite(full_filename, b_rgb)
def main(input_dir,
         model_dir,
         out_dir,
         raw_out_dir=None,
         min_area=0.0005,
         overlay_alpha=127,
         box_color=(255, 0, 0)):
    os.makedirs(out_dir, exist_ok=True)
    if raw_out_dir:
        os.makedirs(raw_out_dir, exist_ok=True)
    input_files = glob('{}/*'.format(input_dir))
    with tf.Session():
        # Load the model
        m = LoadedModel(model_dir, predict_mode='filename')
        for filename in tqdm(input_files, desc='Processed files'):
            basename = os.path.basename(filename).split('.')[0]

            # For each image, predict each pixel's label
            prediction_outputs = m.predict(filename)
            probs = prediction_outputs['probs'][0]
            original_shape = prediction_outputs['original_shape']
            # Take only class '1'
            # (class 0 is the background, class 1 is the annotation.)
            probs = probs[:, :, 1]
            probs = probs / np.max(probs)  # Normalize to be in [0, 1]

            # Binarize the predictions
            preds_bin = make_binary_mask(probs)

            # Upscale to have full resolution image
            # (cv2 uses (w,h) and not (h,w) for giving shapes)
            bin_upscaled = cv2.resize(preds_bin.astype(np.uint8, copy=False),
                                      tuple(original_shape[::-1]),
                                      interpolation=cv2.INTER_NEAREST)

            if raw_out_dir:
                # If requested, draw the binary mask as an overlay
                # over the image and save it.
                img = Image.open(filename)
                img = img.convert('RGBA')
                overlay_arr = np.stack(
                    [
                        bin_upscaled * box_color[0],  # R
                        bin_upscaled * box_color[1],  # G
                        bin_upscaled * box_color[2],  # B
                        np.ones_like(bin_upscaled) * overlay_alpha  # A
                    ],
                    axis=2)
                overlay = Image.fromarray(overlay_arr, mode='RGBA')
                img.paste(overlay, (0, 0), overlay)
                img.save(
                    os.path.join(raw_out_dir, '{}_raw.png'.format(basename)),
                    'PNG')

            # Find quadrilateral enclosing the page
            boxes = boxes_detection.find_boxes(
                bin_upscaled.astype(np.uint8, copy=False),
                min_area=min_area,
                mode='min_rectangle',
            )

            # Draw boxes on original image.
            original_img = imread(filename, pilmode='RGB')
            if boxes is not None:
                cv2.polylines(original_img,
                              boxes,
                              True,
                              box_color,
                              thickness=5)
            else:
                print('No annotation found in {}'.format(filename))

            imsave(os.path.join(out_dir, '{}_boxes.jpg'.format(basename)),
                   original_img)
Beispiel #12
0
    # Store coordinates of page in a .txt file
    txt_coordinates = ''

    models = list()

    tuple_models = list(
    )  # list of tuples containing (Session, Graph, Model) for each model

    # For each model to be loaded, create a new graph and session. Then load the model.
    for mn in modelnames:
        graph = tf.Graph()
        session = tf.Session(graph=graph)
        with session.as_default(), graph.as_default(
        ):  # This is the important line!
            model = LoadedModel(bp + 'model/', mn, predict_mode='filename')
        tuple_models.append((session, graph, model))

    for filename in tqdm(input_files, desc='Processed files'):

        probList = list()
        basename = os.path.basename(filename).split('.')[0]

        if os.path.exists(os.path.join(output_dir, basename + '-probs.png')):
            print(basename + " skipped...")

        # For each image, predict each pixel's label
        for sess, g, model in tuple_models:
            # You need to call 'predict' method in the same Graph and Session 'environment' as you loaded it
            with sess.as_default(), g.as_default():
                #probs = model.predict(filename, prediction_key='probs')
Beispiel #13
0
def evaluate(layout_name, model_dir, data_dir):

	evaluation_path = data_dir / "evaluation.json"

	if (not args.force) and evaluation_path.exists():
		return evaluation_path

	n_classes = 0
	classes = []
	with open(data_dir / "classes.txt") as f:
		classes = [line.strip() for line in f.readlines()]
		classes = [tuple(map(int, line.split())) for line in classes if line]
		n_classes = len(classes)

	data = []
	with open(data_dir / "val.csv", "r") as f:
		for line in f.readlines():
			if line.strip():
				im, gt = line.strip().split(",")
				data.append((Path(im), Path(gt)))

	def colorize(pixels):
		colors = plt.get_cmap("tab10").colors
		colors = (np.array(colors).flatten() * 255).astype(np.uint8)
		im = PIL.Image.fromarray(pixels, "P")
		palette = np.zeros((768,), dtype=np.uint8)
		palette[:len(colors)] = colors
		im.putpalette(palette)
		return np.array(im.convert("RGB"))


	LongShelhamerDarrellMetrics = namedtuple(
		'LongShelhamerDarrellMetrics',
		['pixel_accuracy', 'mean_accuracy', 'mean_IU', 'frequency_weighted_IU'])


	def lsd_metrics(
		prediction: np.ndarray,
		truth: np.ndarray,
		n_classes: int) -> LongShelhamerDarrellMetrics:

		"""This computes the evaluation metrics given for semantic segmentation given in:
		[1] J. Long, E. Shelhamer, and T. Darrell, "Fully Convolutional Networks for
		Semantic Segmentation", 2014. (available at https://arxiv.org/abs/1411.4038).

		Note:
			Modified to exclude empty classes.

		Args:
			prediction: integer array of predicted classes for each pixel.
			truth: integer array of ground truth for each pixel.
			n_classes: defines the pixel classes [0, 1, ..., n_classes - 1].

		Returns:
			LongShelhamerDarrellMetrics: The computed metrics.
		"""

		def _check_array(name, a):
			if not np.issubdtype(a.dtype, np.integer):
				raise ValueError("given %s-array must be of type integer" % name)

			if not (0 <= np.min(a) < n_classes and 0 <= np.max(a) < n_classes):
				raise ValueError("non-class values in given %s-array" % name)

		_check_array('prediction', prediction)
		_check_array('truth', truth)

		classes = list(range(n_classes))

		@lru_cache(maxsize=None)
		def n(i: int, j: int) -> Fraction:
			# n(i, j) is "the number of pixels of class i predicted to belong to
			# class j", see [1].
			return Fraction(int(np.sum(np.logical_and(
				truth == i, prediction == j).astype(np.uint8), dtype=np.uint64)))

		@lru_cache(maxsize=None)
		def t(i: int) -> Fraction:
			# t(i) is "the total number of pixels of class i", see [1].
			return sum(n(j, i) for j in classes)

		non_empty_classes = [i for i in classes if t(i) > 0]

		return LongShelhamerDarrellMetrics(
			pixel_accuracy=sum(n(i, i) for i in classes) / sum(t(i) for i in classes),

			mean_accuracy=(Fraction(1) / len(non_empty_classes)) * sum(
				(n(i, i) / t(i)) for i in non_empty_classes),

			mean_IU=(Fraction(1) / len(non_empty_classes)) * sum(
				(
					n(i, i) / (
						t(i) + sum(n(j, i) for j in non_empty_classes) - n(i, i))
				) for i in non_empty_classes),

			frequency_weighted_IU=(Fraction(1) / sum(t(k) for k in non_empty_classes)) * sum(
				(
					(t(i) * n(i, i)) / (
						t(i) + sum(n(j, i) for j in non_empty_classes) - n(i, i))
				) for i in non_empty_classes)
		)


	decimal.getcontext().prec = 30

	tf.reset_default_graph()
	session = tf.InteractiveSession()

	# see dhSegment/dh_segment/inference/loader.py
	model = LoadedModel(model_dir, predict_mode='filename_original_shape')
	tile_loader = TileLoader(model, classes)
	documents = load_documents(layout_name, data, tile_loader)

	print("found %d documents." % len(documents), flush=True)

	images_path = data_dir / "inference"
	images_path.mkdir(exist_ok=True)

	metrics_results = collections.defaultdict(list)
	for name, y_pred, y_true in tqdm(documents, desc="computing metrics"):
		
		#print("y_pred", y_pred.shape, y_pred.dtype)
		#print("y_true", y_true.shape, y_true.dtype)
		#print("original shape", prediction_outputs['original_shape'])

		#original_shape = prediction_outputs['original_shape']

		lsdm = lsd_metrics(y_pred, y_true, n_classes)
		for k in lsdm._fields:
			x = getattr(lsdm, k)
			metrics_results[k].append(Decimal(x.numerator) / Decimal(x.denominator))


		y_true_flat = y_true.reshape((y_true.size, ))
		y_pred_flat = y_pred.reshape((y_pred.size, ))


		for k in ('micro', 'macro', 'weighted'):
			metrics_results['precision-%s' % k].append(
				Decimal(sklearn.metrics.precision_score(y_true_flat, y_pred_flat, average=k)))
			metrics_results['recall-%s' % k].append(
				Decimal(sklearn.metrics.recall_score(y_true_flat, y_pred_flat, average=k)))
			metrics_results['jaccard-%s' % k].append(
				Decimal(sklearn.metrics.jaccard_score(y_true_flat, y_pred_flat, average=k)))

		metrics_results['matthews'].append(
			Decimal(sklearn.metrics.matthews_corrcoef(y_true_flat, y_pred_flat)))

		PIL.Image.fromarray(colorize(y_pred), "RGB").save(
			images_path / ("%s-pred.png" % name))
		PIL.Image.fromarray(colorize(y_true), "RGB").save(
			images_path / ("%s-true.png" % name))

	with open(evaluation_path, "w") as result_file:
		result_data = dict()
		for k, v in metrics_results.items():
			result_data[k] = str(statistics.mean(v).quantize(Decimal('.0001'), rounding=decimal.ROUND_DOWN))
		result_file.write(json.dumps(result_data))

		#result_file.write("metric;value;number of samples\n")
		#for k, v in metrics_results.items():
		#	result_file.write("%s;%s;%d\n" % (
		#		k, str(statistics.mean(v).quantize(Decimal('.0001'), rounding=decimal.ROUND_DOWN)), len(v)))

	session.close()

	return evaluation_path