def test_segment_color(self): """ Test correct handling of color input. """ with raises(KrakenInputException): with Image.open(resources / 'input.jpg') as im: segment(im)
def segmenter(input_image, text_direction, script_detect, allowed_scripts, scale, maxcolseps, black_colseps, remove_hlines, pad, mask_filepath) -> Image: mask = None if mask_filepath: try: mask = Image.open(mask_filepath) except IOError as e: print('Failed to load a mask, {}.'.format(mask_filepath)) raise try: res = pageseg.segment(input_image, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) if script_detect: res = kraken.pageseg.detect_scripts(input_image, res, valid_scripts=allowed_scripts) return res except Exception: print('Page segmentation error.') raise
def segmenter(text_direction, script_detect, allowed_scripts, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, base_image, input, output) -> None: import json from kraken import pageseg try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) if script_detect: res = pageseg.detect_scripts(im, res, valid_scripts=allowed_scripts) except Exception: message('\u2717', fg='red') raise with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def segmentation_kraken(doc, method=u'segment_kraken', black_colseps=False): """ Performs page segmentation using kraken's built-in algorithm and writes a skeleton TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files black_colseps (bool): Assume black column separator instead of white ones. Returns: Two storage tuples with the first one containing the segmentation and the second one being the file the segmentation was calculated upon. """ input_path = storage.get_abs_path(*doc) output_path, ext = os.path.splitext( storage.insert_suffix(input_path, method)) logger.debug('Reading image using PIL') img = Image.open(input_path) with open(output_path + '.xml', 'w') as fp: logger.debug('Initializing TEI with {} ({} {})'.format( doc[1], *img.size)) tei = OCRRecord() tei.img = storage.get_url(*doc) tei.dimensions = img.size tei.title = os.path.basename(doc[1]) tei.add_respstmt('kraken', 'page segmentation') for seg in pageseg.segment(img, black_colseps=black_colseps)['boxes']: logger.debug('Found line at {} {} {} {}'.format(*seg)) tei.add_line(seg) logger.debug('Write segmentation to {}'.format(fp.name)) tei.write_tei(fp) return storage.get_storage_path(output_path + '.xml')
def segmenter(text_direction, script_detect, allowed_scripts, scale, maxcolseps, black_colseps, remove_hlines, pad, base_image, input, output) -> None: import json from kraken import pageseg try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad) if script_detect: res = pageseg.detect_scripts(im, res, valid_scripts=allowed_scripts) except Exception: message('\u2717', fg='red') raise with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def segmentation_kraken(doc, method=u'segment_kraken', black_colseps=False): """ Performs page segmentation using kraken's built-in algorithm and writes a skeleton TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files black_colseps (bool): Assume black column separator instead of white ones. Returns: Two storage tuples with the first one containing the segmentation and the second one being the file the segmentation was calculated upon. """ input_path = storage.get_abs_path(*doc) output_path, ext = os.path.splitext(storage.insert_suffix(input_path, method)) logger.debug('Reading image using PIL') img = Image.open(input_path) with open(output_path + '.xml', 'w') as fp: logger.debug('Initializing TEI with {} ({} {})'.format(doc[1], *img.size)) tei = OCRRecord() tei.img = storage.get_url(*doc) tei.dimensions = img.size tei.title = os.path.basename(doc[1]) tei.add_respstmt('kraken', 'page segmentation') for seg in pageseg.segment(img, black_colseps=black_colseps)['boxes']: logger.debug('Found line at {} {} {} {}'.format(*seg)) tei.add_line(seg) logger.debug('Write segmentation to {}'.format(fp.name)) tei.write_tei(fp) return storage.get_storage_path(output_path + '.xml')
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images): st_time = time.time() ti = transcribe.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def transcription(ctx, font, font_style, prefill, output, images): st_time = time.time() ti = transcrib.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, records=preds) else: ti.add_page(im, res) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, device, input, output) -> None: import json from kraken import pageseg from kraken import blla ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False if 'base_image' not in ctx.meta: ctx.meta['base_image'] = input try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: if legacy: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) else: res = blla.segment(im, text_direction, mask=mask, model=model, device=device) except Exception: message('\u2717', fg='red') raise with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def bound_image(cv2image): """ :param cv2image: Numpy array representing the text section of the cardboard :return: The detected regions that might contain text according to the kraken page segmenter """ binary = _binarize(cv2image) rbounds = pageseg.segment(binary) bounds = list(rbounds) count = len(bounds) common = [] for i in range(count): for j in range(i + 1, count): rect_a = bounds[i] rect_b = bounds[j] # TODO Fix merging algorithm if abs(rect_a[0] - rect_b[0]) < 0.03 * cv2image.shape[1] and abs( rect_a[3] - rect_b[1]) < 0.03 * cv2image.shape[0]: common.append((i, j)) elif abs(rect_a[1] - rect_b[1]) < 0.03 * cv2image.shape[0] and abs( rect_a[2] - rect_b[0]) < 0.03 * cv2image.shape[1]: common.append((i, j)) elif rect_a[0] < rect_b[2] and rect_a[2] > rect_b[0] and rect_a[ 1] < rect_b[3] and rect_a[3] > rect_b[1]: common.append((i, j)) for (f, s) in common: b1 = bounds[f] b2 = bounds[s] new_bound = [ min(b1[0], b2[0]), min(b1[1], b2[1]), max(b1[2], b2[2]), max(b1[3], b2[3]) ] bounds.append(new_bound) indices = [e for l in common for e in l] for i in sorted(indices, reverse=True): del bounds[i] boxes = [] for x1, y1, x2, y2 in bounds: mid = ((x1 + x2) / 2, (y1 + y2) / 2) # Adding 10 to grow borders a bit rect = (mid, (x2 - x1 + 10, y2 - y1 + 10), 0) boxes.append(np.int0(cv2.boxPoints(rect))) return boxes
def get_text(image): '''Function that receives as an argument an PIL Image object and returns a string with the text inside the Image using the pageseg function from the kraken module''' # get the list of the coordinates of the boxes that contain text page_boxes = pageseg.segment(image.convert('1'))['boxes'] # get the text text = '' for box in page_boxes: x, y, width, height = box cropped_image = image.crop(box) # the string its addend with a whitespace text += ' ' + pytesseract.image_to_string(cropped_image) return text
def segmenter(scale, black_colseps, base_image, input, output): try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) click.echo('Segmenting\t', nl=False) try: res = pageseg.segment(im, scale, black_colseps) except: click.secho(u'\u2717', fg='red') raise with open_file(output, 'w') as fp: for box in res: fp.write(u'{},{},{},{}\n'.format(*box)) click.secho(u'\u2713', fg='green')
def test_segment_bw(self): """ Tests segmentation of bi-level input. """ with Image.open(os.path.join(resources, 'bw.png')) as im: lines = segment(im) # test if line count is roughly correct self.assertAlmostEqual(len(lines), 30, msg='Segmentation differs ' 'wildly from true line count', delta=5) # check if lines do not extend beyond image for box in lines: self.assertLess(0, box[0], msg='Line x0 < 0') self.assertLess(0, box[1], msg='Line y0 < 0') self.assertGreater(im.size[0], box[2], msg='Line x1 > {}'.format(im.size[0])) self.assertGreater(im.size[1], box[3], msg='Line y1 > {}'.format(im.size[1]))
def test_segment_bw(self): """ Tests segmentation of bi-level input. """ with Image.open(os.path.join(resources, 'bw.png')) as im: lines = segment(im) # test if line count is roughly correct self.assertAlmostEqual(len(lines['boxes']), 30, msg='Segmentation differs ' 'wildly from true line count', delta=5) # check if lines do not extend beyond image for box in lines['boxes']: self.assertLess(0, box[0], msg='Line x0 < 0') self.assertLess(0, box[1], msg='Line y0 < 0') self.assertGreater(im.size[0], box[2], msg='Line x1 > {}'.format(im.size[0])) self.assertGreater(im.size[1], box[3], msg='Line y1 > {}'.format(im.size[1]))
def segmenter(text_direction, script_detect, scale, maxcolseps, black_colseps, base_image, input, output): try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) click.echo('Segmenting\t', nl=False) try: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) if script_detect: res = pageseg.detect_scripts(im, res) except: click.secho(u'\u2717', fg='red') raise with open_file(output, 'w') as fp: json.dump(res, fp) click.secho(u'\u2713', fg='green')
def calculate_line_height(img): '''Calculates the average height of a line from a given image :param img: A PIL.Image object :return: The average line height in pixels ''' # Lets get a list of bounding boxes for this image bounding_boxes = pageseg.segment(img.convert('1'))['boxes'] # Each box is a tuple of (top, left, bottom, right) so the height is just top - bottom # So lets just calculate this over the set of all boxes height_accumulator = 0 for box in bounding_boxes: height_accumulator = height_accumulator + box[3] - box[1] # this is a bit tricky, remember that we start counting at the upper left corner in PIL! # now lets just return the average height # lets change it to the nearest full pixel by making it an integer return int(height_accumulator / len(bounding_boxes))
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images, segment_page): ti = transcribe.TranscriptionInterface(font, font_style) if prefill: logger.info('Loading model {}'.format(prefill)) spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) for fp in images: logger.info('Reading {}'.format(fp.name)) spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): logger.info(u'Binarizing page') im = binarization.nlbin(im) if segment_page: logger.info(u'Segmenting page') res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) else: res = { 'text_direction': 'horizontal-tb', 'boxes': [(0, 0) + im.size] } if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: logger.info('{}'.format(pred.prediction)) spin('Recognizing') preds.append(pred) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) logger.info(u'Writing transcription to {}'.format(output.name)) spin('Writing output') ti.write(output) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False)
def show_boxes(img): '''Modifies the passed image to show a series of bounding boxes on an image as run by kraken :param img: A PIL.Image object :return img: The modified PIL.Image object ''' # Lets bring in our ImageDraw object from PIL import ImageDraw # And grab a drawing object to annotate that image drawing_object = ImageDraw.Draw(img) # We can create a set of boxes using pageseg.segment bounding_boxes = pageseg.segment(img.convert('1'))['boxes'] # Now lets go through the list of bounding boxes for box in bounding_boxes: # An just draw a nice rectangle drawing_object.rectangle(box, fill=None, outline='red') # And to make it easy, lets return the image object return img
def show_boxes(img): '''Modifies the passed image to show a series of bounding boxes on an image as run by kraken :param img: A PIL.Image object :return img: The modified PIL.Image object ''' # Lets bring in our ImageDraw object # And grab a drawing object to annotate that image drawing_object = ImageDraw.Draw(img) # We can create a set of boxes using pageseg.segment # black_colseps: If set to True, kraken will assume that columns will be separated by black lines. bounding_boxes = pageseg.segment(img.convert('1'), black_colseps=True)['boxes'] # Now lets go through the list of bounding boxes for box in bounding_boxes: # An just draw a nice rectangle drawing_object.rectangle(box, fill=None, outline='red') # And to make it easy, lets return the image object return img
def bound_image(cv2image): """ :param cv2image: Numpy array representing the text section of the cardboard :return: The detected regions that might contain text according to the kraken page segmenter """ binary = _binarize(cv2image) rbounds = pageseg.segment(binary) bounds = list(rbounds) count = len(bounds) common = [] for i in range(count): for j in range(i + 1, count): rect_a = bounds[i] rect_b = bounds[j] if abs(rect_a[0] - rect_b[0]) < 0.03 * cv2image.shape[1]: if abs(rect_a[3] - rect_b[1]) < 0.03 * cv2image.shape[0]: common.append((i, j)) if abs(rect_a[1] - rect_b[1]) < 0.03 * cv2image.shape[0]: if abs(rect_a[2] - rect_b[0]) < 0.03 * cv2image.shape[1]: common.append((i, j)) if rect_a[0] < rect_b[2] and rect_a[2] > rect_b[0] and rect_a[1] < rect_b[3] and rect_a[3] > rect_b[1]: common.append((i, j)) for (f, s) in common: b1 = bounds[f] b2 = bounds[s] new_bound = [min(b1[0], b2[0]), min(b1[1], b2[1]), max(b1[2], b2[2]), max(b1[3], b2[3])] bounds.append(new_bound) indices = [e for l in common for e in l] for i in sorted(indices, reverse=True): del bounds[i] boxes = [] for x1, y1, x2, y2 in bounds: mid = ((x1 + x2) / 2, (y1 + y2) / 2) # Adding 10 to grow borders a bit rect = (mid, (x2 - x1 + 10, y2 - y1 + 10), 0) boxes.append(np.int0(cv2.boxPoints(rect))) return boxes
def process(self): """ Segment with kraken """ log = getLogger('processor.KrakenSegment') for (n, input_file) in enumerate(self.input_files): log.info("INPUT FILE %i / %s", n, input_file) downloaded_file = self.workspace.download_file(input_file) log.info("downloaded_file %s", downloaded_file) pcgts = page_from_file(downloaded_file) # TODO binarized variant from get_AlternativeImage() image_url = pcgts.get_Page().imageFilename log.info("pcgts %s", pcgts) im = self.workspace.resolve_image_as_pil(image_url) log.info('Segmenting') log.info('Params %s', self.parameter) res = segment(im, self.parameter['text_direction'], self.parameter['scale'], self.parameter['maxcolseps'], self.parameter['black_colseps']) if self.parameter['script_detect']: res = detect_scripts(im, res) dummyRegion = TextRegionType() pcgts.get_Page().add_TextRegion(dummyRegion) # print(res) for lineno, box in enumerate(res['boxes']): textline = TextLineType( id=concat_padded("line", lineno), Coords=CoordsType(points=points_from_x0y0x1y1(box))) dummyRegion.add_TextLine(textline) ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(self.output_file_grp, pageId=input_file.pageId, ID=ID, mimetype=MIMETYPE_PAGE, local_filename="%s/%s.xml" % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def transcribe(book_pages_png: list, book_path: str = "book", name_of_transcribed_file="html_transcribe"): """ A warper for transcribing book pages :param book_pages_png: List with all of the pages that need to be transcribed :param book_path: The path of the pages :return: None """ t_interface = tr.TranscriptionInterface() t_interface.text_direction = "rl" for page in book_pages_png: im = PIL.Image.open(f"{book_path}/{page}").convert(mode='1') # Create segments with the basic segmentor segments = segment(im, 'horizontal-rl') # Manualy fix some errors in the segmentation boxes = fix_overlapping_boxes(segments['boxes']) segments['boxes'] = boxes t_interface.add_page(im, segments) f = open(f"{name_of_transcribed_file}.html", "wb+") t_interface.write(f) f.close()
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError('--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
import sys from PIL import Image, ImageDraw from kraken.pageseg import segment from kraken.binarization import nlbin from kraken.rpred import rpred from itertools import cycle from kraken.lib import models cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127), (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127), (70, 240, 240, 127)]) net = models.load_any(sys.argv[1]) for fname in sys.argv[2:]: im = Image.open(fname) print(fname) im = nlbin(im) res = segment(im, maxcolseps=0) pred = rpred(net, im, res) im = im.convert('RGBA') tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for line in pred: for box in line.cuts: draw.rectangle(box, fill=next(cmap)) im = Image.alpha_composite(im, tmp) im.save('high_{}'.format(os.path.basename(fname)))
from kraken.pageseg import segment from kraken.binarization import nlbin from kraken.rpred import rpred from itertools import cycle from kraken.lib import models cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127), (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127), (70, 240, 240, 127)]) net = models.load_any(sys.argv[1]) for fname in sys.argv[2:]: im = Image.open(fname) print(fname) im = nlbin(im) res = segment(im, maxcolseps=0) pred = rpred(net, im, res) im = im.convert('RGBA') tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for line in pred: for box in line.cuts: draw.rectangle(box, fill=next(cmap)) im = Image.alpha_composite(im, tmp) im.save('high_{}'.format(os.path.basename(fname)))
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, device, input, output) -> None: import json from kraken import pageseg from kraken import blla ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False if 'base_image' not in ctx.meta: ctx.meta['base_image'] = input try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: if legacy: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) else: res = blla.segment(im, text_direction, mask=mask, model=model, device=device) except Exception: message('\u2717', fg='red') raise if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native': with open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) logger.info('Serializing as {} into {}'.format( ctx.meta['output_mode'], output)) from kraken import serialization from kraken.rpred import ocr_record if 'type' in res and res['type'] == 'baselines': records = [ocr_record('', '', '', bl) for bl in res['lines']] else: records = [] for line in res['boxes']: xmin, xmax = min(line[::2]), max(line[::2]) ymin, ymax = min(line[1::2]), max(line[1::2]) records.append( ocr_record('', [], [], [[xmin, ymin], [xmin, ymax], [xmax, ymax], [xmax, ymin]])) fp.write( serialization.serialize( records, image_name=ctx.meta['base_image'], image_size=im.size, regions=res['regions'] if 'regions' in res else None, template=ctx.meta['output_mode'])) else: with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError( '--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning( 'Input {} is in {} color mode. Converting to RGB'.format( fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError( '{} invalid segmentation: {}'.format( lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
def test_segment_color(self): """ Test correct handling of color input. """ with Image.open(os.path.join(resources, 'input.jpg')) as im: segment(im)
# function looks particularly appropriate. I love how expressive this library is on the # documentation front -- I can see immediately that we are working with PIL.Image files, # and the author has even indicated that we need to pass in either a binarized (e.g. '1') # or grayscale (e.g. 'L') image. We can also see that the return value is a dictionary # object with two keys, "text_direction" which will return to us a string of the # direction of the text, and "boxes" which appears to be a list of tuples, where each # tuple is a box in the original image. # # Lets try this on the image of text. I have a simple bit of text in a file called # two_col.png which is from a newspaper on campus here from PIL import Image im = Image.open("readonly/two_col.png") # Lets display the image inline display(im) # Lets now convert it to black and white and segment it up into lines with kraken bounding_boxes = pageseg.segment(im.convert('1'))['boxes'] # And lets print those lines to the screen print(bounding_boxes) # In[41]: # Ok, pretty simple two column text and then a list of lists which are the bounding boxes of # lines of that text. Lets write a little routine to try and see the effects a bit more # clearly. I'm going to clean up my act a bit and write real documentation too, it's a good # practice def show_boxes(img): '''Modifies the passed image to show a series of bounding boxes on an image as run by kraken :param img: A PIL.Image object :return img: The modified PIL.Image object
def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, device, input, output) -> None: import json from kraken import pageseg from kraken import blla ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False if 'base_image' not in ctx.meta: ctx.meta['base_image'] = input try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: if legacy: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) else: res = blla.segment(im, text_direction, mask=mask, model=model, device=device) except Exception: if ctx.meta['raise_failed']: raise message('\u2717', fg='red') ctx.exit(1) if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native': with click.open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) logger.info('Serializing as {} into {}'.format( ctx.meta['output_mode'], output)) from kraken import serialization from kraken.rpred import ocr_record fp.write( serialization.serialize_segmentation( res, image_name=ctx.meta['base_image'], image_size=im.size, template=ctx.meta['output_mode'])) else: with click.open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')