def segmenter(text_direction, script_detect, allowed_scripts, scale, maxcolseps, black_colseps, remove_hlines, pad, base_image, input, output) -> None: import json from kraken import pageseg try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad) if script_detect: res = pageseg.detect_scripts(im, res, valid_scripts=allowed_scripts) except Exception: message('\u2717', fg='red') raise with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def segmenter(text_direction, script_detect, allowed_scripts, scale, maxcolseps, black_colseps, remove_hlines, pad, mask, base_image, input, output) -> None: import json from kraken import pageseg try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) if mask: try: mask = Image.open(mask) except IOError as e: raise click.BadParameter(str(e)) message('Segmenting\t', nl=False) try: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps, no_hlines=remove_hlines, pad=pad, mask=mask) if script_detect: res = pageseg.detect_scripts(im, res, valid_scripts=allowed_scripts) except Exception: message('\u2717', fg='red') raise with open_file(output, 'w') as fp: fp = cast(IO[Any], fp) json.dump(res, fp) message('\u2713', fg='green')
def segmenter(text_direction, script_detect, scale, maxcolseps, black_colseps, base_image, input, output): try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) click.echo('Segmenting\t', nl=False) try: res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) if script_detect: res = pageseg.detect_scripts(im, res) except: click.secho(u'\u2717', fg='red') raise with open_file(output, 'w') as fp: json.dump(res, fp) click.secho(u'\u2713', fg='green')
def process(self): """ Segment with kraken """ log = getLogger('processor.KrakenSegment') for (n, input_file) in enumerate(self.input_files): log.info("INPUT FILE %i / %s", n, input_file) downloaded_file = self.workspace.download_file(input_file) log.info("downloaded_file %s", downloaded_file) pcgts = page_from_file(downloaded_file) # TODO binarized variant from get_AlternativeImage() image_url = pcgts.get_Page().imageFilename log.info("pcgts %s", pcgts) im = self.workspace.resolve_image_as_pil(image_url) log.info('Segmenting') log.info('Params %s', self.parameter) res = segment(im, self.parameter['text_direction'], self.parameter['scale'], self.parameter['maxcolseps'], self.parameter['black_colseps']) if self.parameter['script_detect']: res = detect_scripts(im, res) dummyRegion = TextRegionType() pcgts.get_Page().add_TextRegion(dummyRegion) # print(res) for lineno, box in enumerate(res['boxes']): textline = TextLineType( id=concat_padded("line", lineno), Coords=CoordsType(points=points_from_x0y0x1y1(box))) dummyRegion.add_TextLine(textline) ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(self.output_file_grp, pageId=input_file.pageId, ID=ID, mimetype=MIMETYPE_PAGE, local_filename="%s/%s.xml" % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))