Example #1
0
 def test_segment_color(self):
     """
     Test correct handling of color input.
     """
     with raises(KrakenInputException):
         with Image.open(resources / 'input.jpg') as im:
             segment(im)
Example #2
0
def segmenter(input_image, text_direction, script_detect, allowed_scripts,
              scale, maxcolseps, black_colseps, remove_hlines, pad,
              mask_filepath) -> Image:
    mask = None
    if mask_filepath:
        try:
            mask = Image.open(mask_filepath)
        except IOError as e:
            print('Failed to load a mask, {}.'.format(mask_filepath))
            raise

    try:
        res = pageseg.segment(input_image,
                              text_direction,
                              scale,
                              maxcolseps,
                              black_colseps,
                              no_hlines=remove_hlines,
                              pad=pad,
                              mask=mask)
        if script_detect:
            res = kraken.pageseg.detect_scripts(input_image,
                                                res,
                                                valid_scripts=allowed_scripts)
        return res
    except Exception:
        print('Page segmentation error.')
        raise
Example #3
0
def segmenter(text_direction, script_detect, allowed_scripts, scale,
              maxcolseps, black_colseps, remove_hlines, pad, mask, base_image, input,
              output) -> None:
    import json

    from kraken import pageseg

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask)
        if script_detect:
            res = pageseg.detect_scripts(im, res, valid_scripts=allowed_scripts)
    except Exception:
        message('\u2717', fg='red')
        raise
    with open_file(output, 'w') as fp:
        fp = cast(IO[Any], fp)
        json.dump(res, fp)
    message('\u2713', fg='green')
Example #4
0
def segmentation_kraken(doc, method=u'segment_kraken', black_colseps=False):
    """ 
    Performs page segmentation using kraken's built-in algorithm and writes a
    skeleton TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        black_colseps (bool): Assume black column separator instead of white
        ones.

    Returns:
        Two storage tuples with the first one containing the segmentation and
        the second one being the file the segmentation was calculated upon.
    """

    input_path = storage.get_abs_path(*doc)
    output_path, ext = os.path.splitext(
        storage.insert_suffix(input_path, method))
    logger.debug('Reading image using PIL')
    img = Image.open(input_path)
    with open(output_path + '.xml', 'w') as fp:
        logger.debug('Initializing TEI with {} ({} {})'.format(
            doc[1], *img.size))
        tei = OCRRecord()
        tei.img = storage.get_url(*doc)
        tei.dimensions = img.size
        tei.title = os.path.basename(doc[1])
        tei.add_respstmt('kraken', 'page segmentation')
        for seg in pageseg.segment(img, black_colseps=black_colseps)['boxes']:
            logger.debug('Found line at {} {} {} {}'.format(*seg))
            tei.add_line(seg)
        logger.debug('Write segmentation to {}'.format(fp.name))
        tei.write_tei(fp)
    return storage.get_storage_path(output_path + '.xml')
Example #5
0
def segmenter(text_direction, script_detect, allowed_scripts, scale,
              maxcolseps, black_colseps, remove_hlines, pad, base_image, input,
              output) -> None:
    import json

    from kraken import pageseg

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        res = pageseg.segment(im,
                              text_direction,
                              scale,
                              maxcolseps,
                              black_colseps,
                              no_hlines=remove_hlines,
                              pad=pad)
        if script_detect:
            res = pageseg.detect_scripts(im,
                                         res,
                                         valid_scripts=allowed_scripts)
    except Exception:
        message('\u2717', fg='red')
        raise
    with open_file(output, 'w') as fp:
        fp = cast(IO[Any], fp)
        json.dump(res, fp)
    message('\u2713', fg='green')
Example #6
0
def segmentation_kraken(doc, method=u'segment_kraken', black_colseps=False):
    """ 
    Performs page segmentation using kraken's built-in algorithm and writes a
    skeleton TEI file.

    Args:
        doc (unicode, unicode): The input document tuple
        method (unicode): The suffix string append to all output files
        black_colseps (bool): Assume black column separator instead of white
        ones.

    Returns:
        Two storage tuples with the first one containing the segmentation and
        the second one being the file the segmentation was calculated upon.
    """

    input_path = storage.get_abs_path(*doc)
    output_path, ext = os.path.splitext(storage.insert_suffix(input_path,
                                        method))
    logger.debug('Reading image using PIL')
    img = Image.open(input_path)
    with open(output_path + '.xml', 'w') as fp:
        logger.debug('Initializing TEI with {} ({} {})'.format(doc[1], *img.size))
        tei = OCRRecord()
        tei.img = storage.get_url(*doc)
        tei.dimensions = img.size
        tei.title = os.path.basename(doc[1])
        tei.add_respstmt('kraken', 'page segmentation')
        for seg in pageseg.segment(img, black_colseps=black_colseps)['boxes']:
            logger.debug('Found line at {} {} {} {}'.format(*seg))
            tei.add_line(seg)
        logger.debug('Write segmentation to {}'.format(fp.name))
        tei.write_tei(fp)
    return storage.get_storage_path(output_path + '.xml')
Example #7
0
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images):
    st_time = time.time()
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
Example #8
0
def transcription(ctx, font, font_style, prefill, output, images):
    st_time = time.time()
    ti = transcrib.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill)
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, records=preds)
        else:
            ti.add_page(im, res)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
Example #9
0
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
              remove_hlines, pad, mask, device, input, output) -> None:
    import json

    from kraken import pageseg
    from kraken import blla

    ctx = click.get_current_context()

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False

    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        if legacy:
            res = pageseg.segment(im,
                                  text_direction,
                                  scale,
                                  maxcolseps,
                                  black_colseps,
                                  no_hlines=remove_hlines,
                                  pad=pad,
                                  mask=mask)
        else:
            res = blla.segment(im,
                               text_direction,
                               mask=mask,
                               model=model,
                               device=device)
    except Exception:
        message('\u2717', fg='red')
        raise
    with open_file(output, 'w') as fp:
        fp = cast(IO[Any], fp)
        json.dump(res, fp)
    message('\u2713', fg='green')
Example #10
0
def bound_image(cv2image):
    """
    :param cv2image: Numpy array representing the text section of the cardboard
    :return: The detected regions that might contain text according to the kraken page segmenter
    """
    binary = _binarize(cv2image)
    rbounds = pageseg.segment(binary)
    bounds = list(rbounds)
    count = len(bounds)
    common = []
    for i in range(count):
        for j in range(i + 1, count):
            rect_a = bounds[i]
            rect_b = bounds[j]
            # TODO Fix merging algorithm
            if abs(rect_a[0] - rect_b[0]) < 0.03 * cv2image.shape[1] and abs(
                    rect_a[3] - rect_b[1]) < 0.03 * cv2image.shape[0]:
                common.append((i, j))

            elif abs(rect_a[1] - rect_b[1]) < 0.03 * cv2image.shape[0] and abs(
                    rect_a[2] - rect_b[0]) < 0.03 * cv2image.shape[1]:
                common.append((i, j))

            elif rect_a[0] < rect_b[2] and rect_a[2] > rect_b[0] and rect_a[
                    1] < rect_b[3] and rect_a[3] > rect_b[1]:
                common.append((i, j))

    for (f, s) in common:
        b1 = bounds[f]
        b2 = bounds[s]
        new_bound = [
            min(b1[0], b2[0]),
            min(b1[1], b2[1]),
            max(b1[2], b2[2]),
            max(b1[3], b2[3])
        ]
        bounds.append(new_bound)

    indices = [e for l in common for e in l]
    for i in sorted(indices, reverse=True):
        del bounds[i]

    boxes = []
    for x1, y1, x2, y2 in bounds:
        mid = ((x1 + x2) / 2, (y1 + y2) / 2)
        # Adding 10 to grow borders a bit
        rect = (mid, (x2 - x1 + 10, y2 - y1 + 10), 0)
        boxes.append(np.int0(cv2.boxPoints(rect)))
    return boxes
def get_text(image):
    '''Function that receives as an argument an PIL Image object 
    and returns a string with the text inside the Image using the
    pageseg function from the kraken module'''

    # get the list of the coordinates of the boxes that contain text
    page_boxes = pageseg.segment(image.convert('1'))['boxes']
    # get the text
    text = ''
    for box in page_boxes:
        x, y, width, height = box
        cropped_image = image.crop(box)
        # the string its addend with a whitespace
        text += ' ' + pytesseract.image_to_string(cropped_image)
    return text
Example #12
0
def segmenter(scale, black_colseps, base_image, input, output):
    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    click.echo('Segmenting\t', nl=False)
    try:
        res = pageseg.segment(im, scale, black_colseps)
    except:
        click.secho(u'\u2717', fg='red')
        raise
    with open_file(output, 'w') as fp:
        for box in res:
            fp.write(u'{},{},{},{}\n'.format(*box))
    click.secho(u'\u2713', fg='green')
Example #13
0
def segmenter(scale, black_colseps, base_image, input, output):
    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    click.echo('Segmenting\t', nl=False)
    try:
        res = pageseg.segment(im, scale, black_colseps)
    except:
        click.secho(u'\u2717', fg='red')
        raise
    with open_file(output, 'w') as fp:
        for box in res:
            fp.write(u'{},{},{},{}\n'.format(*box))
    click.secho(u'\u2713', fg='green')
Example #14
0
 def test_segment_bw(self):
     """
     Tests segmentation of bi-level input.
     """
     with Image.open(os.path.join(resources, 'bw.png')) as im:
         lines = segment(im)
         # test if line count is roughly correct
         self.assertAlmostEqual(len(lines), 30, msg='Segmentation differs '
                                'wildly from true line count', delta=5)
         # check if lines do not extend beyond image
         for box in lines:
             self.assertLess(0, box[0], msg='Line x0 < 0')
             self.assertLess(0, box[1], msg='Line y0 < 0')
             self.assertGreater(im.size[0], box[2], msg='Line x1 > {}'.format(im.size[0]))
             self.assertGreater(im.size[1], box[3], msg='Line y1 > {}'.format(im.size[1]))
Example #15
0
 def test_segment_bw(self):
     """
     Tests segmentation of bi-level input.
     """
     with Image.open(os.path.join(resources, 'bw.png')) as im:
         lines = segment(im)
         # test if line count is roughly correct
         self.assertAlmostEqual(len(lines['boxes']), 30, msg='Segmentation differs '
                                'wildly from true line count', delta=5)
         # check if lines do not extend beyond image
         for box in lines['boxes']:
             self.assertLess(0, box[0], msg='Line x0 < 0')
             self.assertLess(0, box[1], msg='Line y0 < 0')
             self.assertGreater(im.size[0], box[2], msg='Line x1 > {}'.format(im.size[0]))
             self.assertGreater(im.size[1], box[3], msg='Line y1 > {}'.format(im.size[1]))
Example #16
0
def segmenter(text_direction, script_detect, scale, maxcolseps, black_colseps, base_image, input, output):
    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    click.echo('Segmenting\t', nl=False)
    try:
        res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps)
        if script_detect:
            res = pageseg.detect_scripts(im, res)
    except:
        click.secho(u'\u2717', fg='red')
        raise
    with open_file(output, 'w') as fp:
        json.dump(res, fp)
    click.secho(u'\u2713', fg='green')
def calculate_line_height(img):
    '''Calculates the average height of a line from a given image
    :param img: A PIL.Image object
    :return: The average line height in pixels
    '''
    # Lets get a list of bounding boxes for this image
    bounding_boxes = pageseg.segment(img.convert('1'))['boxes']
    # Each box is a tuple of (top, left, bottom, right) so the height is just top - bottom
    # So lets just calculate this over the set of all boxes
    height_accumulator = 0
    for box in bounding_boxes:
        height_accumulator = height_accumulator + box[3] - box[1]
        # this is a bit tricky, remember that we start counting at the upper left corner in PIL!
    # now lets just return the average height
    # lets change it to the nearest full pixel by making it an integer
    return int(height_accumulator / len(bounding_boxes))
Example #18
0
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images, segment_page):
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        message(u'\b\u2713', fg='green', nl=False)
        message('\033[?25h\n', nl=False)

    for fp in images:
        logger.info('Reading {}'.format(fp.name))
        spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            logger.info(u'Binarizing page')
            im = binarization.nlbin(im)
        if segment_page:
            logger.info(u'Segmenting page')
            res = pageseg.segment(im, text_direction, scale, maxcolseps,
                                  black_colseps)
        else:
            res = {
                'text_direction': 'horizontal-tb',
                'boxes': [(0, 0) + im.size]
            }
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it:
                logger.info('{}'.format(pred.prediction))
                spin('Recognizing')
                preds.append(pred)
            message(u'\b\u2713', fg='green', nl=False)
            message('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
    logger.info(u'Writing transcription to {}'.format(output.name))
    spin('Writing output')
    ti.write(output)
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
def show_boxes(img):
    '''Modifies the passed image to show a series of bounding boxes on an image as run by kraken
    
    :param img: A PIL.Image object
    :return img: The modified PIL.Image object
    '''
    # Lets bring in our ImageDraw object
    from PIL import ImageDraw
    # And grab a drawing object to annotate that image
    drawing_object = ImageDraw.Draw(img)
    # We can create a set of boxes using pageseg.segment
    bounding_boxes = pageseg.segment(img.convert('1'))['boxes']
    # Now lets go through the list of bounding boxes
    for box in bounding_boxes:
        # An just draw a nice rectangle
        drawing_object.rectangle(box, fill=None, outline='red')
    # And to make it easy, lets return the image object
    return img
Example #20
0
def show_boxes(img):
    '''Modifies the passed image to show a series of bounding boxes on an image as run by kraken
    
    :param img: A PIL.Image object
    :return img: The modified PIL.Image object
    '''
    # Lets bring in our ImageDraw object
    # And grab a drawing object to annotate that image
    drawing_object = ImageDraw.Draw(img)
    # We can create a set of boxes using pageseg.segment
    #  black_colseps: If set to True, kraken will assume that columns will be separated by black lines.
    bounding_boxes = pageseg.segment(img.convert('1'),
                                     black_colseps=True)['boxes']
    # Now lets go through the list of bounding boxes
    for box in bounding_boxes:
        # An just draw a nice rectangle
        drawing_object.rectangle(box, fill=None, outline='red')
    # And to make it easy, lets return the image object
    return img
Example #21
0
def bound_image(cv2image):
    """
    :param cv2image: Numpy array representing the text section of the cardboard
    :return: The detected regions that might contain text according to the kraken page segmenter
    """
    binary = _binarize(cv2image)
    rbounds = pageseg.segment(binary)
    bounds = list(rbounds)
    count = len(bounds)
    common = []
    for i in range(count):
        for j in range(i + 1, count):
            rect_a = bounds[i]
            rect_b = bounds[j]
            if abs(rect_a[0] - rect_b[0]) < 0.03 * cv2image.shape[1]:
                if abs(rect_a[3] - rect_b[1]) < 0.03 * cv2image.shape[0]:
                    common.append((i, j))

            if abs(rect_a[1] - rect_b[1]) < 0.03 * cv2image.shape[0]:
                if abs(rect_a[2] - rect_b[0]) < 0.03 * cv2image.shape[1]:
                    common.append((i, j))

            if rect_a[0] < rect_b[2] and rect_a[2] > rect_b[0] and rect_a[1] < rect_b[3] and rect_a[3] > rect_b[1]:
                common.append((i, j))

    for (f, s) in common:
        b1 = bounds[f]
        b2 = bounds[s]
        new_bound = [min(b1[0], b2[0]), min(b1[1], b2[1]), max(b1[2], b2[2]), max(b1[3], b2[3])]
        bounds.append(new_bound)

    indices = [e for l in common for e in l]
    for i in sorted(indices, reverse=True):
        del bounds[i]

    boxes = []
    for x1, y1, x2, y2 in bounds:
        mid = ((x1 + x2) / 2, (y1 + y2) / 2)
        # Adding 10 to grow borders a bit
        rect = (mid, (x2 - x1 + 10, y2 - y1 + 10), 0)
        boxes.append(np.int0(cv2.boxPoints(rect)))
    return boxes
Example #22
0
    def process(self):
        """
        Segment with kraken
        """
        log = getLogger('processor.KrakenSegment')
        for (n, input_file) in enumerate(self.input_files):
            log.info("INPUT FILE %i / %s", n, input_file)
            downloaded_file = self.workspace.download_file(input_file)
            log.info("downloaded_file %s", downloaded_file)
            pcgts = page_from_file(downloaded_file)
            # TODO binarized variant from get_AlternativeImage()
            image_url = pcgts.get_Page().imageFilename
            log.info("pcgts %s", pcgts)

            im = self.workspace.resolve_image_as_pil(image_url)

            log.info('Segmenting')
            log.info('Params %s', self.parameter)
            res = segment(im, self.parameter['text_direction'],
                          self.parameter['scale'],
                          self.parameter['maxcolseps'],
                          self.parameter['black_colseps'])
            if self.parameter['script_detect']:
                res = detect_scripts(im, res)

            dummyRegion = TextRegionType()
            pcgts.get_Page().add_TextRegion(dummyRegion)
            #  print(res)
            for lineno, box in enumerate(res['boxes']):
                textline = TextLineType(
                    id=concat_padded("line", lineno),
                    Coords=CoordsType(points=points_from_x0y0x1y1(box)))
                dummyRegion.add_TextLine(textline)
            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(self.output_file_grp,
                                    pageId=input_file.pageId,
                                    ID=ID,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename="%s/%s.xml" %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts).encode('utf-8'))
Example #23
0
def transcribe(book_pages_png: list,
               book_path: str = "book",
               name_of_transcribed_file="html_transcribe"):
    """
    A warper for transcribing book pages

    :param book_pages_png: List with all of the pages that need to be transcribed
    :param book_path: The path of the pages
    :return: None
    """
    t_interface = tr.TranscriptionInterface()
    t_interface.text_direction = "rl"
    for page in book_pages_png:
        im = PIL.Image.open(f"{book_path}/{page}").convert(mode='1')
        # Create segments with the basic segmentor
        segments = segment(im, 'horizontal-rl')
        # Manualy fix some errors in the segmentation
        boxes = fix_overlapping_boxes(segments['boxes'])
        segments['boxes'] = boxes
        t_interface.add_page(im, segments)
    f = open(f"{name_of_transcribed_file}.html", "wb+")
    t_interface.write(f)
    f.close()
Example #24
0
def transcription(ctx, text_direction, scale, bw, maxcolseps,
                  black_colseps, font, font_style, prefill, pad, lines, output,
                  images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError('--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
Example #25
0
import sys

from PIL import Image, ImageDraw

from kraken.pageseg import segment
from kraken.binarization import nlbin
from kraken.rpred import rpred
from itertools import cycle
from kraken.lib import models

cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127),
              (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127),
              (70, 240, 240, 127)])

net = models.load_any(sys.argv[1])

for fname in sys.argv[2:]:
    im = Image.open(fname)
    print(fname)
    im = nlbin(im)
    res = segment(im, maxcolseps=0)
    pred = rpred(net, im, res)
    im = im.convert('RGBA')
    tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(tmp)
    for line in pred:
        for box in line.cuts:
            draw.rectangle(box, fill=next(cmap))
    im = Image.alpha_composite(im, tmp)
    im.save('high_{}'.format(os.path.basename(fname)))
Example #26
0
from kraken.pageseg import segment
from kraken.binarization import nlbin
from kraken.rpred import rpred
from itertools import cycle
from kraken.lib import models

cmap = cycle([(230, 25, 75, 127),
              (60, 180, 75, 127),
              (255, 225, 25, 127),
              (0, 130, 200, 127),
              (245, 130, 48, 127),
              (145, 30, 180, 127),
              (70, 240, 240, 127)])

net = models.load_any(sys.argv[1])

for fname in sys.argv[2:]:
    im = Image.open(fname)
    print(fname)
    im = nlbin(im)
    res = segment(im, maxcolseps=0)
    pred = rpred(net, im, res)
    im = im.convert('RGBA')
    tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(tmp)
    for line in pred:
        for box in line.cuts:
            draw.rectangle(box, fill=next(cmap))
    im = Image.alpha_composite(im, tmp)
    im.save('high_{}'.format(os.path.basename(fname)))
Example #27
0
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
              remove_hlines, pad, mask, device, input, output) -> None:
    import json

    from kraken import pageseg
    from kraken import blla

    ctx = click.get_current_context()

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False

    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        if legacy:
            res = pageseg.segment(im,
                                  text_direction,
                                  scale,
                                  maxcolseps,
                                  black_colseps,
                                  no_hlines=remove_hlines,
                                  pad=pad,
                                  mask=mask)
        else:
            res = blla.segment(im,
                               text_direction,
                               mask=mask,
                               model=model,
                               device=device)
    except Exception:
        message('\u2717', fg='red')
        raise
    if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
        with open_file(output, 'w', encoding='utf-8') as fp:
            fp = cast(IO[Any], fp)
            logger.info('Serializing as {} into {}'.format(
                ctx.meta['output_mode'], output))
            from kraken import serialization
            from kraken.rpred import ocr_record
            if 'type' in res and res['type'] == 'baselines':
                records = [ocr_record('', '', '', bl) for bl in res['lines']]
            else:
                records = []
                for line in res['boxes']:
                    xmin, xmax = min(line[::2]), max(line[::2])
                    ymin, ymax = min(line[1::2]), max(line[1::2])
                    records.append(
                        ocr_record('', [], [], [[xmin, ymin], [xmin, ymax],
                                                [xmax, ymax], [xmax, ymin]]))
            fp.write(
                serialization.serialize(
                    records,
                    image_name=ctx.meta['base_image'],
                    image_size=im.size,
                    regions=res['regions'] if 'regions' in res else None,
                    template=ctx.meta['output_mode']))
    else:
        with open_file(output, 'w') as fp:
            fp = cast(IO[Any], fp)
            json.dump(res, fp)
    message('\u2713', fg='green')
Example #28
0
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps,
                  font, font_style, prefill, pad, lines, output, images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError(
            '--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning(
                    'Input {} is in {} color mode. Converting to RGB'.format(
                        fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin,
                                      text_direction,
                                      scale,
                                      maxcolseps,
                                      black_colseps,
                                      pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError(
                            '{} invalid segmentation: {}'.format(
                                lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
Example #29
0
 def test_segment_color(self):
     """
     Test correct handling of color input.
     """
     with Image.open(os.path.join(resources, 'input.jpg')) as im:
         segment(im)
# function looks particularly appropriate. I love how expressive this library is on the
# documentation front -- I can see immediately that we are working with PIL.Image files,
# and the author has even indicated that we need to pass in either a binarized (e.g. '1')
# or grayscale (e.g. 'L') image. We can also see that the return value is a dictionary
# object with two keys, "text_direction" which will return to us a string of the
# direction of the text, and "boxes" which appears to be a list of tuples, where each
# tuple is a box in the original image.
#
# Lets try this on the image of text. I have a simple bit of text in a file called
# two_col.png which is from a newspaper on campus here
from PIL import Image
im = Image.open("readonly/two_col.png")
# Lets display the image inline
display(im)
# Lets now convert it to black and white and segment it up into lines with kraken
bounding_boxes = pageseg.segment(im.convert('1'))['boxes']
# And lets print those lines to the screen
print(bounding_boxes)

# In[41]:


# Ok, pretty simple two column text and then a list of lists which are the bounding boxes of
# lines of that text. Lets write a little routine to try and see the effects a bit more
# clearly. I'm going to clean up my act a bit and write real documentation too, it's a good
# practice
def show_boxes(img):
    '''Modifies the passed image to show a series of bounding boxes on an image as run by kraken
    
    :param img: A PIL.Image object
    :return img: The modified PIL.Image object
Example #31
0
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
              remove_hlines, pad, mask, device, input, output) -> None:
    import json

    from kraken import pageseg
    from kraken import blla

    ctx = click.get_current_context()

    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False

    if 'base_image' not in ctx.meta:
        ctx.meta['base_image'] = input

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    if mask:
        try:
            mask = Image.open(mask)
        except IOError as e:
            raise click.BadParameter(str(e))
    message('Segmenting\t', nl=False)
    try:
        if legacy:
            res = pageseg.segment(im,
                                  text_direction,
                                  scale,
                                  maxcolseps,
                                  black_colseps,
                                  no_hlines=remove_hlines,
                                  pad=pad,
                                  mask=mask)
        else:
            res = blla.segment(im,
                               text_direction,
                               mask=mask,
                               model=model,
                               device=device)
    except Exception:
        if ctx.meta['raise_failed']:
            raise
        message('\u2717', fg='red')
        ctx.exit(1)
    if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
        with click.open_file(output, 'w', encoding='utf-8') as fp:
            fp = cast(IO[Any], fp)
            logger.info('Serializing as {} into {}'.format(
                ctx.meta['output_mode'], output))
            from kraken import serialization
            from kraken.rpred import ocr_record
            fp.write(
                serialization.serialize_segmentation(
                    res,
                    image_name=ctx.meta['base_image'],
                    image_size=im.size,
                    template=ctx.meta['output_mode']))
    else:
        with click.open_file(output, 'w') as fp:
            fp = cast(IO[Any], fp)
            json.dump(res, fp)
    message('\u2713', fg='green')
Example #32
0
 def test_segment_color(self):
     """
     Test correct handling of color input.
     """
     with Image.open(os.path.join(resources, 'input.jpg')) as im:
         segment(im)