def kraken_nlbin(input_path, output_path, threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80, range=20, low=5, high=90): """ Binarizes an input document utilizing ocropus'/kraken's nlbin algorithm. Args: input_path (unicode): Path to the input image output_path (unicode): Path to the output image threshold (float): zoom (float): escale (float): border (float) perc (int): range (int): low (int): high (int): Raises: NidabaInvalidParameterException: Input parameters are outside the valid range. """ img = Image.open(input_path) binarization.nlbin(img, threshold, zoom, escale, border, perc, range, low, high).save(output_path)
def binarize(directory, extension, new_extension, new_directory) -> None: """Function that binarizes images in a directory and save the new files in an other directory. :param directory: path to the directory where the images are located. :type directory: str :param extension: images' extension. :type extension: str :param new_extension: binarized images extension :type new_extension: str :param new_directory: path to the directory where the binarized images will be saved. :type new_directory: str :return: none """ for file in os.listdir(directory): if file.endswith(extension): try: binarized = binarization.nlbin(PIL.Image.open(os.path.join(directory, file))) except PIL.UnidentifiedImageError: print('--//--> %s' % sys.exc_info()[1]) continue except IsADirectoryError: print('--//--> %s' % sys.exc_info()[1]) continue binarized = binarization.nlbin(PIL.Image.open(os.path.join(directory, file))) filename = file.replace(extension, new_extension) binarized.save(os.path.join(new_directory, filename)) print("------> {} binarized !".format(os.path.join(directory, filename)))
def nlbin(imagepath, resultpath, threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80, range=20, low=5, high=90): """ Converts an 8bpp grayscale image into a black and white one using the non-linear processing algorithm from ocropus/kraken. Args: imagepath: Path of the input image resultpath: Path of the output image threshold (float): zoom (float): escale (float): border (float): perc (int): range (int): low (int): high (int): Returns: unicode: Path of the output file """ img = Image.open(imagepath) binarization.nlbin(img, threshold, zoom, escale, border, perc, range, low, high).save(resultpath) return resultpath
def preprocessing_tabular(path): # Load image img = cv2.imread(path) ## ---Binarization of image--- genrator_image = Image.fromarray(img) genrator_image = binarization.nlbin(genrator_image) # ----Grayscaling Image---- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # --- performing Otsu threshold --- thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Remove text characters with morph open and contour filtering kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1) cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: area = cv2.contourArea(c) if area < 500: cv2.drawContours(opening, [c], -1, (0, 0, 0), -1) # Repair table lines, sort contours, and extract ROI close = 255 - cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=1) cnts = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] cnts, _ = contours.sort_contours(cnts, method="top-to-bottom") return img, genrator_image, cnts
def build_binarized(img_path): from kraken.binarization import nlbin bin_path = img_path.parent.parent / "bin" if not bin_path.exists(): bin_path.mkdir() assert bin_path.is_dir() out_bin_path = bin_path / (img_path.stem + ".png") if not out_bin_path.is_file(): image = PIL.Image.open(img_path) binarized = nlbin(image, threshold=0.6, escale=5, border=0.1, perc=80, range=20, low=20, high=80) binarized = PIL.ImageOps.invert(binarized) binarized = binarized.convert('1') binarized.save(out_bin_path, "PNG")
def QR_decode(): try: file = request.files['file'] filename = file.filename print("Saving :", filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) result = [] img = Image.open(UPLOAD_FOLDER + filename) result.append(decode(img)) bw_im = binarization.nlbin(img, zoom=0.5) #1.binarizacion result.append(decode(bw_im, symbols=[ZBarSymbol.QRCODE])) imgb = ImageEnhance.Brightness(img).enhance(2.0) #2.aumentar brillo result.append(decode(imgb)) imgc = ImageEnhance.Contrast(img).enhance(3.0) #3.aumentar contraste result.append(decode(imgc)) imgs = ImageEnhance.Sharpness(img).enhance(17.0) #4.sharp turn result.append(decode(imgs)) img = imgc.convert('L') #5.convertir escala de grises result.append(decode(img)) result = decode(img) print(result) if len(result) > 0: return result[0].data.decode("utf-8").lower() else: return None except: print(sys.exc_info()) return None return None
def preprocessing_non_tabular(path): img = cv2.imread(path) ## ---Binarization of image--- genrator_image = Image.fromarray(img) genrator_image = binarization.nlbin(genrator_image) # ----Grayscaling Image---- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # --- performing Otsu threshold --- ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) cv2.imwrite("processed_image/threshold.png", thresh1) # cv2.imshow('thresh1', thresh1) # cv2.waitKey(0) # ----Image dialation---- rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 3)) dilation = cv2.dilate(thresh1, rect_kernel, iterations=1) cv2.imwrite("processed_image/dilation.png", dilation) # cv2.imshow('dilation', dilation) # cv2.waitKey(0) # ---Finding contours --- contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) return img, genrator_image, contours[::-1]
def _read(cv2_image): barcode_binary = binarization.nlbin(Image.fromarray(cv2_image), zoom=1.0) codes = zbarlight.scan_codes('code39', barcode_binary) if codes: return codes[0].decode('utf-8') else: return ''
def pagexmllineseg(xmlfile, text_direction = 'horizontal-lr', outputfile=None): """ Opens file 'xmlfile', converts to newest pagexml version 2017, segments the text regions and writes xml to file. Output is written to input file if outfile is 'None'. """ if not outputfile: outputfile = xmlfile root = etree.parse(xmlfile).getroot() ns = {"ns":root.nsmap[None]} #convert point notation from older pagexml versions for c in root.xpath("//ns:Coords[not(@points)]", namespaces=ns): cc = [] for point in c.xpath("./ns:Point", namespaces=ns): #coordstrings = [x.split(",") for x in c.attrib["points"].split()] cx = point.attrib["x"] cy = point.attrib["y"] c.remove(point) cc.append(cx+","+cy) c.attrib["points"] = " ".join(cc) coordmap = {} for r in root.xpath('//ns:TextRegion', namespaces=ns): rid = r.attrib["id"] coordmap[rid] = {"type":r.attrib["type"]} coordmap[rid]["coords"] = [] for c in r.xpath("./ns:Coords", namespaces=ns) + r.xpath("./Coords"): coordstrings = [x.split(",") for x in c.attrib["points"].split()] coordmap[rid]["coords"] += [[int(x[0]), int(x[1])] for x in coordstrings ] filename = root.xpath('//ns:Page', namespaces=ns)[0].attrib["imageFilename"] im = Image.open(filename) for n, c in enumerate(sorted(coordmap)): coords = coordmap[c]['coords'] cropped = cutout(im, coords) offset = (min([x[0] for x in coords]), min([x[1] for x in coords])) if cropped != None: if not binarization.is_bitonal(cropped): cropped = binarization.nlbin(cropped) lines = segment(cropped, text_direction=text_direction, maxcolseps=0)['lines'] else: lines = [] for n, l in enumerate(lines): coords = ((x[1]+offset[0], x[0]+offset[1]) for x in l.polygon) coordstrg = " ".join(str(x[0])+","+str(x[1]) for x in coords) textregion = root.xpath('//ns:TextRegion[@id="'+c+'"]', namespaces=ns)[0] linexml = etree.SubElement(textregion, "TextLine", attrib={"id":c+"_l{:03d}".format(n + 1)}) coordsxml = etree.SubElement(linexml, "Coords", attrib={"points":coordstrg}) xmlstring = etree.tounicode(root.getroottree()).replace( "http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19", "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15" ) with open(outputfile, "w") as f: f.write(xmlstring)
def binarizer(threshold, zoom, escale, border, perc, range, low, high, input, output) -> None: from kraken import binarization ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False else: raise click.UsageError('Binarization has to be the initial process.') try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) message('Binarizing\t', nl=False) try: res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range, low, high) form = None ext = os.path.splitext(output)[1] if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']: form = 'png' if ext: logger.warning( 'jpeg does not support 1bpp images. Forcing to png.') res.save(output, format=form) ctx.meta['base_image'] = output except Exception: message('\u2717', fg='red') raise message('\u2713', fg='green')
def cli(format_type, model, repolygonize, files): """ A small script extracting rectified line polygons as defined in either ALTO or PageXML files or run a model to do the same. """ if len(files) == 0: ctx = click.get_current_context() click.echo(ctx.get_help()) ctx.exit() from PIL import Image from os.path import splitext from kraken import binarization, blla # AHT from kraken.lib import dataset, segmentation, vgsl if model is None: for doc in files: click.echo(f'Processing {doc} ', nl=False) data = dataset.preparse_xml_data([doc], format_type, repolygonize=repolygonize) if len(data) > 0: #bounds = {'type': 'baselines', 'lines': [{'boundary': t['boundary'], 'baseline': t['baseline'], 'text': t['text']} for t in data]} bounds = { 'type': 'baselines', 'lines': [{ 'boundary': t['boundary'], 'baseline': t['baseline'], 'text': t['text'], 'ID': t['line_id'] if 'line_id' in t else '' } for t in data] } # AHT bw_im = binarization.nlbin(Image.open(data[0]['image'])) # AHT #print(bw_im.getcolors()); break # AHT #for idx, (im, box) in enumerate(segmentation.extract_polygons(Image.open(data[0]['image']), bounds)): for idx, (im, box) in enumerate( segmentation.extract_polygons(bw_im, bounds)): # AHT idLin = box['ID'] if box['ID'] else idx click.echo('.', nl=False) #im.save('{}.{}.png'.format(splitext(data[0]['image'])[0], idx)) im.save('{}.{}.png'.format( splitext(data[0]['image'])[0], idLin)) #with open('{}.{}.gt.txt'.format(splitext(data[0]['image'])[0], idx), 'w') as fp: with open( '{}.{}.gt.txt'.format( splitext(data[0]['image'])[0], idLin), 'w') as fp: fp.write(box['text']) else: net = vgsl.TorchVGSLModel.load_model(model) for doc in files: click.echo(f'Processing {doc} ', nl=False) full_im = Image.open(doc) bounds = blla.segment(full_im, model=net) for idx, (im, box) in enumerate( segmentation.extract_polygons(full_im, bounds)): click.echo('.', nl=False) im.save('{}.{}.png'.format(splitext(doc)[0], idx))
def extract(ctx, binarize, normalization, reorder, rotate, output, transcriptions): """ Extracts image-text pairs from a transcription environment created using ``ketos transcribe``. """ try: os.mkdir(output) except: pass idx = 0 manifest = [] for fp in transcriptions: logger.info(u'Reading {}'.format(fp.name)) spin('Reading transcription') doc = html.parse(fp) td = doc.find(".//meta[@itemprop='text_direction']") if td is None: td = 'horizontal-lr' else: td = td.attrib['content'] im = None for section in doc.xpath('//section'): img = section.xpath('.//img')[0].get('src') fd = BytesIO(base64.b64decode(img.split(',')[1])) im = Image.open(fd) if not im: logger.info(u'Skipping {} because image not found'.format( fp.name)) break if binarize: im = binarization.nlbin(im) for line in section.iter('li'): if line.get('contenteditable') and ( not u''.join(line.itertext()).isspace() or not u''.join(line.itertext())): l = im.crop( [int(x) for x in line.get('data-bbox').split(',')]) if rotate and td.startswith('vertical'): im.rotate(90, expand=True) l.save('{}/{:06d}.png'.format(output, idx)) manifest.append('{:06d}.png'.format(idx)) text = u''.join(line.itertext()).strip() if normalization: text = unicodedata.normalize(normalization, text) with open('{}/{:06d}.gt.txt'.format(output, idx), 'wb') as t: if reorder: t.write(get_display(text).encode('utf-8')) else: t.write(text.encode('utf-8')) idx += 1 logger.info(u'Extracted {} lines'.format(idx)) with open('{}/manifest.txt'.format(output), 'w') as fp: fp.write('\n'.join(manifest)) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False)
def binarizer(input_image, threshold, zoom, escale, border, perc, range, low, high) -> Image: try: res = binarization.nlbin(input_image, threshold, zoom, escale, border, perc, range, low, high) return res except Exception: print('Binarization error.') raise
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images): st_time = time.time() ti = transcribe.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def test_binarize_grayscale(self): """ Test binarization of mode 'L' images. """ with Image.open(os.path.join(resources, 'input.tif')) as im: res = nlbin(im.convert('L')) # calculate histogram and check if only pixels of value 0/255 exist self.assertEqual(254, res.histogram().count(0), msg='Output not ' 'binarized')
def test_binarize_no_bw(self): """ Tests binarization of image formats without a 1bpp mode (JPG). """ with Image.open(os.path.join(resources, 'input.jpg')) as im: res = nlbin(im) # calculate histogram and check if only pixels of value 0/255 exist self.assertEqual(254, res.histogram().count(0), msg='Output not ' 'binarized')
def KrakenizeBox(img1, img2): image1 = cv.cvtColor(img1, cv.COLOR_BGR2RGB) im_pil1 = Image.fromarray(image1) image1 = im_pil1 image2 = cv.cvtColor(img2, cv.COLOR_BGR2RGB) im_pil2 = Image.fromarray(image2) image2 = im_pil2 result1 = binarization.nlbin(image1) result2 = binarization.nlbin(image2) rgb_im1 = result1.convert('RGB') rgb_im2 = result2.convert('RGB') open_cv_image1 = np.array(rgb_im1) open_cv_image2 = np.array(rgb_im2) return open_cv_image1, open_cv_image2
def transcription(ctx, font, font_style, prefill, output, images): st_time = time.time() ti = transcrib.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, records=preds) else: ti.add_page(im, res) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def pred(model, device, images): from kraken.binarization import nlbin m = PatchNet() m.load_state_dict(torch.load(model)) device = torch.device(device) m.to(device) transform = ToTensor() cmap = { 0: (230, 25, 75, 127), 1: (60, 180, 75, 127), 2: (255, 225, 25, 127), 3: (0, 130, 200, 127) } for img in images: im = Image.open(img) gray_unscaled = im.convert('L') gray = gray_unscaled.resize((im.size[0] // 8, im.size[1] // 8)) sp = slic(gray, n_segments=3000) props = regionprops(sp) cls = np.zeros(sp.shape) with click.progressbar(props, label='patches') as bar: for prop in bar: y = int(prop.centroid[0]) x = int(prop.centroid[1]) siz = 14 patch = gray.crop((x - siz, y - siz, x + siz, y + siz)) o = m.forward(transform(patch).unsqueeze(0).to(device)) # downscaled label map cls[sp == prop.label] = o.argmax().item() cls = np.array( Image.fromarray(cls).resize(gray_unscaled.size, resample=Image.NEAREST)) bin_im = nlbin(gray_unscaled) bin_im = np.array(bin_im) bin_im = 1 - (bin_im / bin_im.max()) overlay = np.zeros(bin_im.shape + (4, )) fg_labels = bin_im * cls Image.fromarray(fg_labels.astype('uint8')).resize( im.size).save(os.path.splitext(img)[0] + '_labels.png') for idx, val in cmap.items(): overlay[cls == idx] = val layer = np.full(bin_im.shape, 255) layer[fg_labels == idx] = 0 Image.fromarray(layer.astype('uint8')).resize(im.size).save( os.path.splitext(img)[0] + '_class_{}.png'.format(idx)) im = Image.alpha_composite( gray_unscaled.convert('RGBA'), Image.fromarray(overlay.astype('uint8'))).resize(im.size) im.save(os.path.splitext(img)[0] + '_overlay.png')
def binarizer(threshold, zoom, escale, border, perc, range, low, high, base_image, input, output): try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) click.echo('Binarizing\t', nl=False) try: res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range, low, high) res.save(output, format='png') except: click.secho(u'\u2717', fg='red') raise click.secho(u'\u2713', fg='green')
def binarizer(threshold, zoom, escale, border, perc, range, low, high, input, output) -> None: from kraken import binarization ctx = click.get_current_context() if ctx.meta['first_process']: if ctx.meta['input_format_type'] != 'image': input = get_input_parser( ctx.meta['input_format_type'])(input)['image'] ctx.meta['first_process'] = False else: raise click.UsageError('Binarization has to be the initial process.') try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) message('Binarizing\t', nl=False) try: res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range, low, high) if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native': with click.open_file(output, 'w', encoding='utf-8') as fp: fp = cast(IO[Any], fp) logger.info('Serializing as {} into {}'.format( ctx.meta['output_mode'], output)) res.save(f'{output}.png') from kraken import serialization fp.write( serialization.serialize([], image_name=f'{output}.png', image_size=res.size, template=ctx.meta['output_mode'])) else: form = None ext = os.path.splitext(output)[1] if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']: form = 'png' if ext: logger.warning( 'jpeg does not support 1bpp images. Forcing to png.') res.save(output, format=form) ctx.meta['base_image'] = output except Exception: if ctx.meta['raise_failed']: raise message('\u2717', fg='red') ctx.exit(1) message('\u2713', fg='green')
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images, segment_page): ti = transcribe.TranscriptionInterface(font, font_style) if prefill: logger.info('Loading model {}'.format(prefill)) spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) for fp in images: logger.info('Reading {}'.format(fp.name)) spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): logger.info(u'Binarizing page') im = binarization.nlbin(im) if segment_page: logger.info(u'Segmenting page') res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) else: res = { 'text_direction': 'horizontal-tb', 'boxes': [(0, 0) + im.size] } if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: logger.info('{}'.format(pred.prediction)) spin('Recognizing') preds.append(pred) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) logger.info(u'Writing transcription to {}'.format(output.name)) spin('Writing output') ti.write(output) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False)
def nlbin(doc, method=u'nlbin', threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80, range=20, low=5, high=90): """ Binarizes an input document utilizing ocropus'/kraken's nlbin algorithm. Args: doc (unicode, unicode): The input document tuple. method (unicode): The suffix string appended to all output files. threshold (float): zoom (float): escale (float): border (float) perc (int): range (int): low (int): high (int): Returns: (unicode, unicode): Storage tuple of the output file Raises: NidabaInvalidParameterException: Input parameters are outside the valid range. """ input_path = storage.get_abs_path(*doc) output_path = storage.insert_suffix(input_path, method, unicode(threshold), unicode(zoom), unicode(escale), unicode(border), unicode(perc), unicode(range), unicode(low), unicode(high)) img = Image.open(input_path) o_img = binarization.nlbin(img, threshold, zoom, escale, border, perc, range, low, high) o_img.save(output_path) return storage.get_storage_path(output_path)
def ExtractKK(path): img = cv.imread(path) height, width = img.shape[:2] # Binarize img = cv.cvtColor(img, cv.COLOR_BGR2RGB) im_pil1 = Image.fromarray(img) img = im_pil1 result1 = binarization.nlbin(img) rgb_im1 = result1.convert('RGB') img = np.array(rgb_im1) table1, table2, y2ForHeader = box_extraction(path) krtable1, krtable2 = KrakenizeBox(table1, table2) name = 1 x1, height1, y1, width1 = markVertical(krtable1) y1.sort() y1 = cluster(y1, 10) y1 = average(y1) # print(y1) x2, height2, y2, width2 = markVertical(krtable2) groupedX1 = cluster(x1, 10) groupedX2 = cluster(x2, 10) xPoints1 = average(groupedX1) xPoints2 = average(groupedX2) imageToBeCropped1 = main(krtable1) cv.imwrite('tabel1.jpg', imageToBeCropped1) imageToBeCropped2 = main(krtable2) cv.imwrite('tabel2.jpg', imageToBeCropped2) textHeader, textNo = cropHeader(xPoints1, y2ForHeader + 5, img, 1, width) #height1 text1 = crop(xPoints1, height1, imageToBeCropped1, 0.25) text2 = crop(xPoints2, height2, imageToBeCropped2, 0.25) # print(textHeader) rawText = text1 + text2 Clean(rawText) CleanHeader(textHeader) getKKNo(textNo)
def binarizer(threshold, zoom, escale, border, perc, range, low, high, base_image, input, output) -> None: from kraken import binarization try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) message('Binarizing\t', nl=False) try: res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range, low, high) form = None ext = os.path.splitext(output)[1] if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']: form = 'png' if ext: logger.warning('jpeg does not support 1bpp images. Forcing to png.') res.save(output, format=form) except Exception: message('\u2717', fg='red') raise message('\u2713', fg='green')
def binarizer(threshold, zoom, escale, border, perc, range, low, high, base_image, input, output) -> None: from kraken import binarization try: im = Image.open(input) except IOError as e: raise click.BadParameter(str(e)) message('Binarizing\t', nl=False) try: res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range, low, high) form = None ext = os.path.splitext(output)[1] if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']: form = 'png' if ext: logger.warning( 'jpeg does not support 1bpp images. Forcing to png.') res.save(output, format=form) except Exception: message('\u2717', fg='red') raise message('\u2713', fg='green')
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError( '--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning( 'Input {} is in {} color mode. Converting to RGB'.format( fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError( '{} invalid segmentation: {}'.format( lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
def extract(ctx, binarize, normalization, normalize_whitespace, reorder, rotate, output, format, transcriptions): """ Extracts image-text pairs from a transcription environment created using ``ketos transcribe``. """ import regex import base64 from io import BytesIO from PIL import Image from lxml import html, etree from kraken import binarization try: os.mkdir(output) except Exception: pass text_transforms = [] if normalization: text_transforms.append( lambda x: unicodedata.normalize(normalization, x)) if normalize_whitespace: text_transforms.append(lambda x: regex.sub('\s', ' ', x)) if reorder: text_transforms.append(get_display) idx = 0 manifest = [] with log.progressbar(transcriptions, label='Reading transcriptions') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) doc = html.parse(fp) etree.strip_tags(doc, etree.Comment) td = doc.find(".//meta[@itemprop='text_direction']") if td is None: td = 'horizontal-lr' else: td = td.attrib['content'] im = None dest_dict = { 'output': output, 'idx': 0, 'src': fp.name, 'uuid': str(uuid.uuid4()) } for section in doc.xpath('//section'): img = section.xpath('.//img')[0].get('src') fd = BytesIO(base64.b64decode(img.split(',')[1])) im = Image.open(fd) if not im: logger.info('Skipping {} because image not found'.format( fp.name)) break if binarize: im = binarization.nlbin(im) for line in section.iter('li'): if line.get('contenteditable') and ( not u''.join(line.itertext()).isspace() and u''.join(line.itertext())): dest_dict['idx'] = idx dest_dict['uuid'] = str(uuid.uuid4()) logger.debug('Writing line {:06d}'.format(idx)) l_img = im.crop( [int(x) for x in line.get('data-bbox').split(',')]) if rotate and td.startswith('vertical'): im.rotate(90, expand=True) l_img.save(('{output}/' + format + '.png').format(**dest_dict)) manifest.append((format + '.png').format(**dest_dict)) text = u''.join(line.itertext()).strip() for func in text_transforms: text = func(text) with open(('{output}/' + format + '.gt.txt').format(**dest_dict), 'wb') as t: t.write(text.encode('utf-8')) idx += 1 logger.info('Extracted {} lines'.format(idx)) with open('{}/manifest.txt'.format(output), 'w') as fp: fp.write('\n'.join(manifest))
def SimExtract(input, filename): # src = input img = Image.open(input) result = binarization.nlbin(img) rgb_im = result.convert('RGB') #rgb_im = rgb_im.rotate(270) open_cv_image = np.array(rgb_im) # cv.imwrite('converted.jpg',open_cv_image) # Convert RGB to BGR #cv.imwrite('new2.jpg', frame) ##CROPPED kernel = np.ones((2, 2), np.uint8) img = open_cv_image img = cv.dilate(img, kernel, iterations=2) #img -> eroded image = cv.resize(img, (800, 480)) namaAlamat = image[143:192, 0:660] tipe = image[0:109, 609:780] kelamin = image[142:180, 610:765] block = image[220:343, 397:649] rt = image[190:228, 244:646] kernel = np.ones((2, 2), np.uint8) tipe = cv.dilate(tipe, kernel, iterations=4) # cv.imwrite('block.jpg', block) # cv.imwrite('tipe.jpg', tipe) # cv.imwrite('kelamin.jpg', kelamin) # cv.imwrite('namaAlamat.jpg', namaAlamat) # cv.imwrite('rt.jpg', rt) #Cropped for each part text1 = pytesseract.image_to_string(namaAlamat, lang="ind", config='--psm 6 --oem 3') print(text1) text1 = text1.split("\n") text1[0] = text1[0].replace('1', 'I') nama = re.sub(r'[^a-zA-Z .:]', r'', text1[0]) alamat = re.sub(r'[^a-zA-Z0-9 .:]', r'', text1[1]) if 'NAMA' in nama: nama = nama.split('NAMA')[1] return elif 'nama' in nama: nama = nama.replace('nama', '') if 'ALAMAT' in alamat: alamat = alamat.split('ALAMAT')[1] return elif 'alamat' in alamat: alamat = alamat.replace('alamat', '') if ':' in alamat: alamat = alamat.split(':')[1] text4 = pytesseract.image_to_string(rt, lang="ind", config='--psm 6 --oem 3') text4 = text4.split("\n") newText4 = '' for i in text4: newText4 += ' ' + i if ':' in newText4: rt = newText4.split(':')[1] else: rt = newText4 text5 = pytesseract.image_to_string(block, lang="ind", config='--oem 3') print(text5) if 'om' in text5: text5 = text5.replace('om', 'cm') if 'mm' in text5: text5 = text5.replace('mm', 'cm') text5 = text5.split('\n') print('oiiiiiii') print(text5) newText5 = [i for i in text5 if i != ''] print(newText5) if 'cm' in newText5: newText5.remove('cm') newText5[2] += ' cm' print(newText5) datas = [] datas.extend(newText5) if len(datas) != 6: datas.append('') print(datas) text3 = pytesseract.image_to_string(kelamin, lang="ind") print(text3) if 'PRI' or 'PRIA' in text3: text3 = 'PRIA' print('Jenis Kelamin :' + text3) elif 'W' or 'WANITA' or 'WAN' in text3: text3 = 'WANITA' print('Jenis Kelamin :' + text3) kelamin = text3 # from datas teL = datas[0].strip().upper() taL = datas[1].strip().upper() tinggi = datas[2].strip() pekerjaan = datas[3].strip().upper() noSIM = datas[4] noSIM = re.sub(r'[^0-9]', r'', noSIM).strip() mb = datas[5].strip() text2 = pytesseract.image_to_string( tipe, lang="ind", config='-c tessedit_char_whitelist=AaBbCc --psm 6 --oem 3') print(text2) jenisSim = text2.strip().upper() # Cleaning data for JSON nama = re.sub(r'[^a-zA-Z .]', r'', nama).strip().upper() if 'NAMA' in nama: nama = nama.replace('NAMA', '').strip() alamat = alamat.upper() if 'ALAMAT' in alamat: alamat = alamat.replace('ALAMAT', '') rt = rt.upper() alamat = alamat + ' ' + rt alamat = alamat.strip() kelamin = text3.strip().upper() if tinggi[0] != '1': tinggi = '1' + tinggi pekerjaan = text5[3].strip().upper() if 'ARYAWAN SW' or 'SWASTA' or 'AWAN' in pekerjaan: pekerjaan = 'KARYAWAN SWASTA' x = { "message": "OCR Success", "success": True, "documentType": "SIMprocessed", "data": { "Nama": nama, "Alamat": alamat, "JenisKelamin": kelamin, "TempatLahir": teL, "TanggalLahir": taL, "Tinggi": tinggi, "Pekerjaan": pekerjaan, "NoSIM": noSIM, "JenisSIM": jenisSim, "mb": mb }, "img": { "sim": filename, } } # convert into JSON: return x, filename
def test_not_binarize_empty(self): """ Test that mode '1' images aren't binarized again. """ with Image.new('1', (1000,1000)) as im: nlbin(im)
def test_not_binarize_bw(self): """ Test that mode '1' images aren't binarized again. """ with Image.new('1', (1000,1000)) as im: self.assertEqual(im, nlbin(im))
def line_generator(ctx, font, maxlines, encoding, normalization, renormalize, font_size, language, max_length, strip, disable_degradation, binarize, mean, sigma, density, distort, distortion_sigma, legacy, output, text): """ Generates artificial text line training data. """ lines = set() if not text: return st_time = time.time() for t in text: with click.open_file(t, encoding=encoding) as fp: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, t)) else: spin('Reading texts') lines.update(fp.readlines()) if normalization: lines = set([unicodedata.normalize(normalization, line) for line in lines]) if strip: lines = set([line.strip() for line in lines]) if max_length: lines = set([line for line in lines if len(line) < max_length]) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Read {} lines'.format(time.time() - st_time, len(lines))) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) click.echo('Read {} unique lines'.format(len(lines))) if maxlines and maxlines < len(lines): click.echo('Sampling {} lines\t'.format(maxlines), nl=False) lines = list(lines) lines = [lines[idx] for idx in np.random.randint(0, len(lines), maxlines)] click.secho(u'\u2713', fg='green') try: os.makedirs(output) except OSError as e: if e.errno != errno.EEXIST: raise lines = [line.strip() for line in lines] # calculate the alphabet and print it for verification purposes alphabet = set() for line in lines: alphabet.update(line) chars = [] combining = [] for char in sorted(alphabet): if unicodedata.combining(char): combining.append(unicodedata.name(char)) else: chars.append(char) click.echo(u'Σ (len: {})'.format(len(alphabet))) click.echo(u'Symbols: {}'.format(''.join(chars))) if combining: click.echo(u'Combining Characters: {}'.format(', '.join(combining))) lg = linegen.LineGenerator(font, font_size, language) for idx, line in enumerate(lines): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, line)) else: spin('Writing images') try: if renormalize: im = lg.render_line(unicodedata.normalize(renormalize, line)) else: im = lg.render_line(line) except KrakenCairoSurfaceException as e: if ctx.meta['verbose'] > 0: click.echo('[{:2.4f}] {}: {} {}'.format(time.time() - st_time, e.message, e.width, e.height)) else: click.secho(u'\b\u2717', fg='red') click.echo('{}: {} {}'.format(e.message, e.width, e.height)) continue if not disable_degradation and not legacy: im = linegen.distort_line(im, np.random.normal(distort), np.random.normal(distortion_sigma)) im = linegen.degrade_line(im, np.random.normal(mean), np.random.normal(sigma), np.random.normal(density)) elif legacy: im = linegen.ocropy_degrade(im) if binarize: im = binarization.nlbin(im) im.save('{}/{:06d}.png'.format(output, idx)) with open('{}/{:06d}.gt.txt'.format(output, idx), 'wb') as fp: fp.write(line.encode('utf-8')) if ctx.meta['verbose'] == 0: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def _binarize(cv2image): pil_image = Image.fromarray(cv2image) return binarization.nlbin(pil_image, zoom=1.0)
def read(self, page): """Perfoms OCR with Kraken.""" stages = page.stages scan = stages.get("clean", None) if scan is None: return None nonLetter = self.nonLetter model = self.ensureLoaded() blocks = page.blocks ocrChars = [] ocrWords = [] ocrLines = [] stages["char"] = ocrChars stages["word"] = ocrWords stages["line"] = ocrLines binary = pil2array(nlbin(array2pil(scan))) for ((stripe, block), data) in blocks.items(): (left, top, right, bottom) = data["inner"] thisBinary = binary[top:bottom, left:right] lines = data["bands"]["main"]["lines"] for (ln, (up, lo)) in enumerate(lines): lln = ln + 1 roi = thisBinary[up : lo + 1] (b, e, roi) = removeMargins(roi, keep=16) ocrLines.append((stripe, block, lln, left + b, top + up, left + e, top + lo)) (roiH, roiW) = roi.shape[0:2] roi = array2pil(roi) bounds = dict(boxes=([0, 0, roiW, roiH],), text_direction=RL) # adapt the boxes, because they corresponds to peaks of recognition, # not to character extends # # See https://github.com/mittagessen/kraken/issues/184 adaptedPreds = [] for (c, (le, to, ri, bo), conf) in chain.from_iterable( rpred(model, roi, bounds, pad=0, bidi_reordering=True) ): if adaptedPreds: prevPred = adaptedPreds[-1] prevEdge = prevPred[1][0] else: prevEdge = roiW correction = int(round((prevEdge - ri) / 2)) thisRi = ri + correction if adaptedPreds: adaptedPreds[-1][1][0] -= correction adaptedPreds.append([c, [le, to, thisRi, bo], conf]) if adaptedPreds: adaptedPreds[-1][1][0] = 0 # divide into words, not only on spaces, but also on punctuation curWord = [[], []] inWord = True for (c, (le, to, ri, bo), conf) in adaptedPreds: offsetW = left + b offsetH = top + up pos = (le + offsetW, to + offsetH, ri + offsetW, bo + offsetH) conf = int(round(conf * 100)) ocrChars.append((stripe, block, lln, *pos, conf, c)) spaceSeen = c == " " changeWord = not inWord and c not in nonLetter element = (c, pos, conf) if spaceSeen: curWord[1].append(element) if spaceSeen or changeWord: if curWord[0] or curWord[1]: ocrWords.append((stripe, block, lln, *addWord(curWord))) curWord = [[], []] inWord = True continue if inWord: if c in nonLetter: inWord = False dest = 0 if inWord else 1 curWord[dest].append(element) if curWord[0] or curWord[1]: ocrWords.append((stripe, block, lln, *addWord(curWord))) page.write(stage="line,word,char")
def extract(ctx, binarize, normalization, normalize_whitespace, reorder, rotate, output, format, transcriptions): """ Extracts image-text pairs from a transcription environment created using ``ketos transcribe``. """ import regex import base64 from io import BytesIO from PIL import Image from lxml import html, etree from kraken import binarization try: os.mkdir(output) except Exception: pass text_transforms = [] if normalization: text_transforms.append(lambda x: unicodedata.normalize(normalization, x)) if normalize_whitespace: text_transforms.append(lambda x: regex.sub('\s', ' ', x)) if reorder: text_transforms.append(get_display) idx = 0 manifest = [] with log.progressbar(transcriptions, label='Reading transcriptions') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) doc = html.parse(fp) etree.strip_tags(doc, etree.Comment) td = doc.find(".//meta[@itemprop='text_direction']") if td is None: td = 'horizontal-lr' else: td = td.attrib['content'] im = None dest_dict = {'output': output, 'idx': 0, 'src': fp.name, 'uuid': str(uuid.uuid4())} for section in doc.xpath('//section'): img = section.xpath('.//img')[0].get('src') fd = BytesIO(base64.b64decode(img.split(',')[1])) im = Image.open(fd) if not im: logger.info('Skipping {} because image not found'.format(fp.name)) break if binarize: im = binarization.nlbin(im) for line in section.iter('li'): if line.get('contenteditable') and (not u''.join(line.itertext()).isspace() and u''.join(line.itertext())): dest_dict['idx'] = idx dest_dict['uuid'] = str(uuid.uuid4()) logger.debug('Writing line {:06d}'.format(idx)) l_img = im.crop([int(x) for x in line.get('data-bbox').split(',')]) if rotate and td.startswith('vertical'): im.rotate(90, expand=True) l_img.save(('{output}/' + format + '.png').format(**dest_dict)) manifest.append((format + '.png').format(**dest_dict)) text = u''.join(line.itertext()).strip() for func in text_transforms: text = func(text) with open(('{output}/' + format + '.gt.txt').format(**dest_dict), 'wb') as t: t.write(text.encode('utf-8')) idx += 1 logger.info('Extracted {} lines'.format(idx)) with open('{}/manifest.txt'.format(output), 'w') as fp: fp.write('\n'.join(manifest))
from kraken.pageseg import segment from kraken.binarization import nlbin from kraken.rpred import rpred from itertools import cycle from kraken.lib import models cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127), (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127), (70, 240, 240, 127)]) net = models.load_any(sys.argv[1]) for fname in sys.argv[2:]: im = Image.open(fname) print(fname) im = nlbin(im) res = segment(im, maxcolseps=0) pred = rpred(net, im, res) im = im.convert('RGBA') tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for line in pred: for box in line.cuts: draw.rectangle(box, fill=next(cmap)) im = Image.alpha_composite(im, tmp) im.save('high_{}'.format(os.path.basename(fname)))
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError('--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
def test_not_binarize_bw(self): """ Test that mode '1' images aren't binarized again. """ with Image.open(os.path.join(resources, 'bw.png')) as im: self.assertEqual(im, nlbin(im))