コード例 #1
0
def kraken_nlbin(input_path,
                 output_path,
                 threshold=0.5,
                 zoom=0.5,
                 escale=1.0,
                 border=0.1,
                 perc=80,
                 range=20,
                 low=5,
                 high=90):
    """
    Binarizes an input document utilizing ocropus'/kraken's nlbin algorithm.

    Args:
        input_path (unicode): Path to the input image
        output_path (unicode): Path to the output image
        threshold (float):
        zoom (float):
        escale (float):
        border (float)
        perc (int):
        range (int):
        low (int):
        high (int):

    Raises:
        NidabaInvalidParameterException: Input parameters are outside the valid
                                         range.

    """
    img = Image.open(input_path)
    binarization.nlbin(img, threshold, zoom, escale, border, perc, range, low,
                       high).save(output_path)
コード例 #2
0
def binarize(directory, extension, new_extension, new_directory) -> None:
    """Function that binarizes images in a directory and save the new files
    in an other directory.
    :param directory: path to the directory where the images are located.
    :type directory: str
    :param extension: images' extension.
    :type extension: str
    :param new_extension: binarized images extension
    :type new_extension: str
    :param new_directory: path to the directory where the binarized images will be saved.
    :type new_directory: str
    :return: none
    """
    for file in os.listdir(directory):
        if file.endswith(extension):
            try:
                binarized = binarization.nlbin(PIL.Image.open(os.path.join(directory, file)))
            except PIL.UnidentifiedImageError:
                print('--//--> %s' % sys.exc_info()[1])
                continue
            except IsADirectoryError:
                print('--//--> %s' % sys.exc_info()[1])
                continue
            binarized = binarization.nlbin(PIL.Image.open(os.path.join(directory, file)))
            filename = file.replace(extension, new_extension)
            binarized.save(os.path.join(new_directory, filename))
            print("------> {} binarized !".format(os.path.join(directory, filename)))
コード例 #3
0
ファイル: kraken.py プロジェクト: kursataker/nidaba
def nlbin(imagepath, resultpath, threshold=0.5, zoom=0.5, escale=1.0,
          border=0.1, perc=80, range=20, low=5, high=90):
    """
    Converts an 8bpp grayscale image into a black and white one using the
    non-linear processing algorithm from ocropus/kraken.

    Args:
        imagepath: Path of the input image
        resultpath: Path of the output image
        threshold (float):
        zoom (float):
        escale (float):
        border (float):
        perc (int):
        range (int):
        low (int):
        high (int):

    Returns:
        unicode: Path of the output file
    """
    img = Image.open(imagepath)
    binarization.nlbin(img, threshold, zoom, escale, border, perc, range, low,
                       high).save(resultpath)
    return resultpath
コード例 #4
0
ファイル: kraken.py プロジェクト: amitdo/nidaba
def kraken_nlbin(input_path, output_path, threshold=0.5, zoom=0.5, escale=1.0,
                 border=0.1, perc=80, range=20, low=5, high=90):
    """
    Binarizes an input document utilizing ocropus'/kraken's nlbin algorithm.

    Args:
        input_path (unicode): Path to the input image
        output_path (unicode): Path to the output image
        threshold (float):
        zoom (float):
        escale (float):
        border (float)
        perc (int):
        range (int):
        low (int):
        high (int):

    Raises:
        NidabaInvalidParameterException: Input parameters are outside the valid
                                         range.

    """
    img = Image.open(input_path)
    binarization.nlbin(img, threshold, zoom, escale, border, perc, range, low,
                       high).save(output_path)
コード例 #5
0
def preprocessing_tabular(path):
    # Load image
    img = cv2.imread(path)

    ## ---Binarization of image---
    genrator_image = Image.fromarray(img)
    genrator_image = binarization.nlbin(genrator_image)

    # ----Grayscaling Image----
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # --- performing Otsu threshold ---
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Remove text characters with morph open and contour filtering
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

    cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        area = cv2.contourArea(c)
        if area < 500:
            cv2.drawContours(opening, [c], -1, (0, 0, 0), -1)

    # Repair table lines, sort contours, and extract ROI
    close = 255 - cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=1)

    cnts = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    cnts, _ = contours.sort_contours(cnts, method="top-to-bottom")
    return img, genrator_image, cnts
コード例 #6
0
def build_binarized(img_path):
    from kraken.binarization import nlbin

    bin_path = img_path.parent.parent / "bin"
    if not bin_path.exists():
        bin_path.mkdir()
    assert bin_path.is_dir()

    out_bin_path = bin_path / (img_path.stem + ".png")

    if not out_bin_path.is_file():
        image = PIL.Image.open(img_path)

        binarized = nlbin(image,
                          threshold=0.6,
                          escale=5,
                          border=0.1,
                          perc=80,
                          range=20,
                          low=20,
                          high=80)

        binarized = PIL.ImageOps.invert(binarized)
        binarized = binarized.convert('1')

        binarized.save(out_bin_path, "PNG")
コード例 #7
0
ファイル: qr_decoder.py プロジェクト: javimartin22/ecobot
def QR_decode():
    try:
        file = request.files['file']
        filename = file.filename
        print("Saving :", filename)
        file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))

        result = []
        img = Image.open(UPLOAD_FOLDER + filename)
        result.append(decode(img))
        bw_im = binarization.nlbin(img, zoom=0.5)  #1.binarizacion
        result.append(decode(bw_im, symbols=[ZBarSymbol.QRCODE]))
        imgb = ImageEnhance.Brightness(img).enhance(2.0)  #2.aumentar brillo
        result.append(decode(imgb))
        imgc = ImageEnhance.Contrast(img).enhance(3.0)  #3.aumentar contraste
        result.append(decode(imgc))
        imgs = ImageEnhance.Sharpness(img).enhance(17.0)  #4.sharp turn
        result.append(decode(imgs))
        img = imgc.convert('L')  #5.convertir escala de grises
        result.append(decode(img))
        result = decode(img)
        print(result)
        if len(result) > 0:
            return result[0].data.decode("utf-8").lower()
        else:
            return None
    except:
        print(sys.exc_info())
        return None
    return None
コード例 #8
0
def preprocessing_non_tabular(path):
    img = cv2.imread(path)

    ## ---Binarization of image---
    genrator_image = Image.fromarray(img)
    genrator_image = binarization.nlbin(genrator_image)

    # ----Grayscaling Image----
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # --- performing Otsu threshold ---
    ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
    cv2.imwrite("processed_image/threshold.png", thresh1)
    # cv2.imshow('thresh1', thresh1)
    # cv2.waitKey(0)

    # ----Image dialation----
    rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 3))
    dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)
    cv2.imwrite("processed_image/dilation.png", dilation)
    # cv2.imshow('dilation', dilation)
    # cv2.waitKey(0)

    # ---Finding contours ---
    contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    return img, genrator_image, contours[::-1]
コード例 #9
0
ファイル: barcode.py プロジェクト: kingwpf/Cini-OCR
def _read(cv2_image):
    barcode_binary = binarization.nlbin(Image.fromarray(cv2_image), zoom=1.0)
    codes = zbarlight.scan_codes('code39', barcode_binary)
    if codes:
        return codes[0].decode('utf-8')
    else:
        return ''
コード例 #10
0
def pagexmllineseg(xmlfile, text_direction = 'horizontal-lr', outputfile=None):
	"""
	Opens file 'xmlfile', converts to newest pagexml version 2017,
	segments the text regions and writes xml to file.
	Output is written to input file if outfile is 'None'.
	"""
	if not outputfile:
		outputfile = xmlfile
		
	root = etree.parse(xmlfile).getroot()
	ns = {"ns":root.nsmap[None]}

	#convert point notation from older pagexml versions
	for c in root.xpath("//ns:Coords[not(@points)]", namespaces=ns):
		cc = []
		for point in c.xpath("./ns:Point", namespaces=ns):
		#coordstrings = [x.split(",") for x in c.attrib["points"].split()]
			cx = point.attrib["x"]
			cy = point.attrib["y"]
			c.remove(point)
			cc.append(cx+","+cy)
		c.attrib["points"] = " ".join(cc)    

	coordmap = {}
	for r in root.xpath('//ns:TextRegion', namespaces=ns):
		rid = r.attrib["id"]
		coordmap[rid] = {"type":r.attrib["type"]}
		coordmap[rid]["coords"] = []
		for c in r.xpath("./ns:Coords", namespaces=ns) + r.xpath("./Coords"):
			coordstrings = [x.split(",") for x in c.attrib["points"].split()]
			coordmap[rid]["coords"] += [[int(x[0]), int(x[1])] for x in coordstrings ]

	filename = root.xpath('//ns:Page', namespaces=ns)[0].attrib["imageFilename"]
	
	im = Image.open(filename)
	for n, c in enumerate(sorted(coordmap)):
		coords = coordmap[c]['coords']
		cropped = cutout(im, coords)
		offset = (min([x[0] for x in coords]), min([x[1] for x in coords]))
		if cropped != None:
			if not binarization.is_bitonal(cropped):
				cropped = binarization.nlbin(cropped)
			lines = segment(cropped, text_direction=text_direction, maxcolseps=0)['lines']
		else:
			lines = []

		for n, l in enumerate(lines):
			coords = ((x[1]+offset[0], x[0]+offset[1]) for x in l.polygon)
			coordstrg = " ".join(str(x[0])+","+str(x[1]) for x in coords)
			textregion = root.xpath('//ns:TextRegion[@id="'+c+'"]', namespaces=ns)[0]
			linexml = etree.SubElement(textregion, "TextLine", 
									   attrib={"id":c+"_l{:03d}".format(n + 1)})
			coordsxml = etree.SubElement(linexml, "Coords", 
									   attrib={"points":coordstrg})
	xmlstring = etree.tounicode(root.getroottree()).replace(
			 "http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19",
			 "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15"
			)
	with open(outputfile, "w") as f:
		f.write(xmlstring)
コード例 #11
0
def binarizer(threshold, zoom, escale, border, perc, range, low, high, input,
              output) -> None:
    from kraken import binarization

    ctx = click.get_current_context()
    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False
    else:
        raise click.UsageError('Binarization has to be the initial process.')

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    message('Binarizing\t', nl=False)
    try:
        res = binarization.nlbin(im, threshold, zoom, escale, border, perc,
                                 range, low, high)
        form = None
        ext = os.path.splitext(output)[1]
        if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']:
            form = 'png'
            if ext:
                logger.warning(
                    'jpeg does not support 1bpp images. Forcing to png.')
        res.save(output, format=form)
        ctx.meta['base_image'] = output
    except Exception:
        message('\u2717', fg='red')
        raise
    message('\u2713', fg='green')
コード例 #12
0
def cli(format_type, model, repolygonize, files):
    """
    A small script extracting rectified line polygons as defined in either ALTO or
    PageXML files or run a model to do the same.
    """
    if len(files) == 0:
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
        ctx.exit()

    from PIL import Image
    from os.path import splitext
    from kraken import binarization, blla  # AHT
    from kraken.lib import dataset, segmentation, vgsl

    if model is None:
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            data = dataset.preparse_xml_data([doc],
                                             format_type,
                                             repolygonize=repolygonize)
            if len(data) > 0:
                #bounds = {'type': 'baselines', 'lines': [{'boundary': t['boundary'], 'baseline': t['baseline'], 'text': t['text']} for t in data]}
                bounds = {
                    'type':
                    'baselines',
                    'lines': [{
                        'boundary': t['boundary'],
                        'baseline': t['baseline'],
                        'text': t['text'],
                        'ID': t['line_id'] if 'line_id' in t else ''
                    } for t in data]
                }  # AHT
                bw_im = binarization.nlbin(Image.open(data[0]['image']))  # AHT
                #print(bw_im.getcolors()); break # AHT
                #for idx, (im, box) in enumerate(segmentation.extract_polygons(Image.open(data[0]['image']), bounds)):
                for idx, (im, box) in enumerate(
                        segmentation.extract_polygons(bw_im, bounds)):  # AHT
                    idLin = box['ID'] if box['ID'] else idx
                    click.echo('.', nl=False)
                    #im.save('{}.{}.png'.format(splitext(data[0]['image'])[0], idx))
                    im.save('{}.{}.png'.format(
                        splitext(data[0]['image'])[0], idLin))
                    #with open('{}.{}.gt.txt'.format(splitext(data[0]['image'])[0], idx), 'w') as fp:
                    with open(
                            '{}.{}.gt.txt'.format(
                                splitext(data[0]['image'])[0], idLin),
                            'w') as fp:
                        fp.write(box['text'])
    else:
        net = vgsl.TorchVGSLModel.load_model(model)
        for doc in files:
            click.echo(f'Processing {doc} ', nl=False)
            full_im = Image.open(doc)
            bounds = blla.segment(full_im, model=net)
            for idx, (im, box) in enumerate(
                    segmentation.extract_polygons(full_im, bounds)):
                click.echo('.', nl=False)
                im.save('{}.{}.png'.format(splitext(doc)[0], idx))
コード例 #13
0
def extract(ctx, binarize, normalization, reorder, rotate, output,
            transcriptions):
    """
    Extracts image-text pairs from a transcription environment created using
    ``ketos transcribe``.
    """
    try:
        os.mkdir(output)
    except:
        pass
    idx = 0
    manifest = []
    for fp in transcriptions:
        logger.info(u'Reading {}'.format(fp.name))
        spin('Reading transcription')
        doc = html.parse(fp)
        td = doc.find(".//meta[@itemprop='text_direction']")
        if td is None:
            td = 'horizontal-lr'
        else:
            td = td.attrib['content']

        im = None
        for section in doc.xpath('//section'):
            img = section.xpath('.//img')[0].get('src')
            fd = BytesIO(base64.b64decode(img.split(',')[1]))
            im = Image.open(fd)
            if not im:
                logger.info(u'Skipping {} because image not found'.format(
                    fp.name))
                break
            if binarize:
                im = binarization.nlbin(im)
            for line in section.iter('li'):
                if line.get('contenteditable') and (
                        not u''.join(line.itertext()).isspace()
                        or not u''.join(line.itertext())):
                    l = im.crop(
                        [int(x) for x in line.get('data-bbox').split(',')])
                    if rotate and td.startswith('vertical'):
                        im.rotate(90, expand=True)
                    l.save('{}/{:06d}.png'.format(output, idx))
                    manifest.append('{:06d}.png'.format(idx))
                    text = u''.join(line.itertext()).strip()
                    if normalization:
                        text = unicodedata.normalize(normalization, text)
                    with open('{}/{:06d}.gt.txt'.format(output, idx),
                              'wb') as t:
                        if reorder:
                            t.write(get_display(text).encode('utf-8'))
                        else:
                            t.write(text.encode('utf-8'))
                    idx += 1
    logger.info(u'Extracted {} lines'.format(idx))
    with open('{}/manifest.txt'.format(output), 'w') as fp:
        fp.write('\n'.join(manifest))
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
コード例 #14
0
def binarizer(input_image, threshold, zoom, escale, border, perc, range, low,
              high) -> Image:
    try:
        res = binarization.nlbin(input_image, threshold, zoom, escale, border,
                                 perc, range, low, high)
        return res
    except Exception:
        print('Binarization error.')
        raise
コード例 #15
0
ファイル: ketos.py プロジェクト: yufish/kraken
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images):
    st_time = time.time()
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
コード例 #16
0
 def test_binarize_grayscale(self):
     """
     Test binarization of mode 'L' images.
     """
     with Image.open(os.path.join(resources, 'input.tif')) as im:
         res = nlbin(im.convert('L'))
         # calculate histogram and check if only pixels of value 0/255 exist
         self.assertEqual(254, res.histogram().count(0), msg='Output not '
                          'binarized')
コード例 #17
0
 def test_binarize_no_bw(self):
     """
     Tests binarization of image formats without a 1bpp mode (JPG).
     """
     with Image.open(os.path.join(resources, 'input.jpg')) as im:
         res = nlbin(im)
         # calculate histogram and check if only pixels of value 0/255 exist
         self.assertEqual(254, res.histogram().count(0), msg='Output not '
                          'binarized')
コード例 #18
0
def KrakenizeBox(img1, img2):
    image1 = cv.cvtColor(img1, cv.COLOR_BGR2RGB)
    im_pil1 = Image.fromarray(image1)
    image1 = im_pil1

    image2 = cv.cvtColor(img2, cv.COLOR_BGR2RGB)
    im_pil2 = Image.fromarray(image2)
    image2 = im_pil2

    result1 = binarization.nlbin(image1)
    result2 = binarization.nlbin(image2)

    rgb_im1 = result1.convert('RGB')
    rgb_im2 = result2.convert('RGB')

    open_cv_image1 = np.array(rgb_im1)
    open_cv_image2 = np.array(rgb_im2)

    return open_cv_image1, open_cv_image2
コード例 #19
0
ファイル: ketos.py プロジェクト: QuLogic/ocropy
def transcription(ctx, font, font_style, prefill, output, images):
    st_time = time.time()
    ti = transcrib.TranscriptionInterface(font, font_style)

    if prefill:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill))
        else:
            spin('Loading RNN')
        prefill = models.load_any(prefill)
        if not ctx.meta['verbose']:
            click.secho(u'\b\u2713', fg='green', nl=False)
            click.echo('\033[?25h\n', nl=False)

    for fp in images:
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name))
        else:
            spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time))
            im = binarization.nlbin(im)
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time))
        res = pageseg.segment(im)
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it: 
                if ctx.meta['verbose'] > 0:
                    click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction))
                else:
                    spin('Recognizing')
                preds.append(pred)
            if ctx.meta['verbose'] > 0:
                click.echo(u'Execution time: {}s'.format(time.time() - st_time))
            else:
                click.secho(u'\b\u2713', fg='green', nl=False)
                click.echo('\033[?25h\n', nl=False)
            ti.add_page(im, records=preds)
        else:
            ti.add_page(im, res)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name))
    else:
        spin('Writing output')
    ti.write(output)
    if not ctx.meta['verbose']:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
コード例 #20
0
ファイル: main.py プロジェクト: mittagessen/patchseg
def pred(model, device, images):
    from kraken.binarization import nlbin
    m = PatchNet()
    m.load_state_dict(torch.load(model))
    device = torch.device(device)
    m.to(device)

    transform = ToTensor()

    cmap = {
        0: (230, 25, 75, 127),
        1: (60, 180, 75, 127),
        2: (255, 225, 25, 127),
        3: (0, 130, 200, 127)
    }

    for img in images:
        im = Image.open(img)
        gray_unscaled = im.convert('L')
        gray = gray_unscaled.resize((im.size[0] // 8, im.size[1] // 8))
        sp = slic(gray, n_segments=3000)
        props = regionprops(sp)
        cls = np.zeros(sp.shape)
        with click.progressbar(props, label='patches') as bar:
            for prop in bar:
                y = int(prop.centroid[0])
                x = int(prop.centroid[1])
                siz = 14
                patch = gray.crop((x - siz, y - siz, x + siz, y + siz))
                o = m.forward(transform(patch).unsqueeze(0).to(device))
                # downscaled label map
                cls[sp == prop.label] = o.argmax().item()
        cls = np.array(
            Image.fromarray(cls).resize(gray_unscaled.size,
                                        resample=Image.NEAREST))
        bin_im = nlbin(gray_unscaled)
        bin_im = np.array(bin_im)
        bin_im = 1 - (bin_im / bin_im.max())
        overlay = np.zeros(bin_im.shape + (4, ))
        fg_labels = bin_im * cls
        Image.fromarray(fg_labels.astype('uint8')).resize(
            im.size).save(os.path.splitext(img)[0] + '_labels.png')
        for idx, val in cmap.items():
            overlay[cls == idx] = val
            layer = np.full(bin_im.shape, 255)
            layer[fg_labels == idx] = 0
            Image.fromarray(layer.astype('uint8')).resize(im.size).save(
                os.path.splitext(img)[0] + '_class_{}.png'.format(idx))
        im = Image.alpha_composite(
            gray_unscaled.convert('RGBA'),
            Image.fromarray(overlay.astype('uint8'))).resize(im.size)
        im.save(os.path.splitext(img)[0] + '_overlay.png')
コード例 #21
0
ファイル: kraken.py プロジェクト: tianyaqu/kraken
def binarizer(threshold, zoom, escale, border, perc, range, low, high, base_image, input, output):
    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    click.echo('Binarizing\t', nl=False)
    try:
        res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range,
                                 low, high)
        res.save(output, format='png')
    except:
        click.secho(u'\u2717', fg='red')
        raise
    click.secho(u'\u2713', fg='green')
コード例 #22
0
ファイル: kraken.py プロジェクト: asgundogdu/kraken
def binarizer(threshold, zoom, escale, border, perc, range, low, high, base_image, input, output):
    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    click.echo('Binarizing\t', nl=False)
    try:
        res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range,
                                 low, high)
        res.save(output, format='png')
    except:
        click.secho(u'\u2717', fg='red')
        raise
    click.secho(u'\u2713', fg='green')
コード例 #23
0
def binarizer(threshold, zoom, escale, border, perc, range, low, high, input,
              output) -> None:
    from kraken import binarization

    ctx = click.get_current_context()
    if ctx.meta['first_process']:
        if ctx.meta['input_format_type'] != 'image':
            input = get_input_parser(
                ctx.meta['input_format_type'])(input)['image']
        ctx.meta['first_process'] = False
    else:
        raise click.UsageError('Binarization has to be the initial process.')

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    message('Binarizing\t', nl=False)
    try:
        res = binarization.nlbin(im, threshold, zoom, escale, border, perc,
                                 range, low, high)
        if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
            with click.open_file(output, 'w', encoding='utf-8') as fp:
                fp = cast(IO[Any], fp)
                logger.info('Serializing as {} into {}'.format(
                    ctx.meta['output_mode'], output))
                res.save(f'{output}.png')
                from kraken import serialization
                fp.write(
                    serialization.serialize([],
                                            image_name=f'{output}.png',
                                            image_size=res.size,
                                            template=ctx.meta['output_mode']))
        else:
            form = None
            ext = os.path.splitext(output)[1]
            if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']:
                form = 'png'
                if ext:
                    logger.warning(
                        'jpeg does not support 1bpp images. Forcing to png.')
            res.save(output, format=form)
        ctx.meta['base_image'] = output
    except Exception:
        if ctx.meta['raise_failed']:
            raise
        message('\u2717', fg='red')
        ctx.exit(1)
    message('\u2713', fg='green')
コード例 #24
0
ファイル: ketos.py プロジェクト: tewhalen/kraken
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font,
                  font_style, prefill, output, images, segment_page):
    ti = transcribe.TranscriptionInterface(font, font_style)

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        spin('Loading RNN')
        prefill = models.load_any(prefill.encode('utf-8'))
        message(u'\b\u2713', fg='green', nl=False)
        message('\033[?25h\n', nl=False)

    for fp in images:
        logger.info('Reading {}'.format(fp.name))
        spin('Reading images')
        im = Image.open(fp)
        if not binarization.is_bitonal(im):
            logger.info(u'Binarizing page')
            im = binarization.nlbin(im)
        if segment_page:
            logger.info(u'Segmenting page')
            res = pageseg.segment(im, text_direction, scale, maxcolseps,
                                  black_colseps)
        else:
            res = {
                'text_direction': 'horizontal-tb',
                'boxes': [(0, 0) + im.size]
            }
        if prefill:
            it = rpred.rpred(prefill, im, res)
            preds = []
            for pred in it:
                logger.info('{}'.format(pred.prediction))
                spin('Recognizing')
                preds.append(pred)
            message(u'\b\u2713', fg='green', nl=False)
            message('\033[?25h\n', nl=False)
            ti.add_page(im, res, records=preds)
        else:
            ti.add_page(im, res)
        fp.close()
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
    logger.info(u'Writing transcription to {}'.format(output.name))
    spin('Writing output')
    ti.write(output)
    message(u'\b\u2713', fg='green', nl=False)
    message('\033[?25h\n', nl=False)
コード例 #25
0
ファイル: kraken.py プロジェクト: brunsgaard/nidaba
def nlbin(doc,
          method=u'nlbin',
          threshold=0.5,
          zoom=0.5,
          escale=1.0,
          border=0.1,
          perc=80,
          range=20,
          low=5,
          high=90):
    """
    Binarizes an input document utilizing ocropus'/kraken's nlbin algorithm.

    Args:
        doc (unicode, unicode): The input document tuple.
        method (unicode): The suffix string appended to all output files.
        threshold (float):
        zoom (float):
        escale (float):
        border (float)
        perc (int):
        range (int):
        low (int):
        high (int):

    Returns:
        (unicode, unicode): Storage tuple of the output file

    Raises:
        NidabaInvalidParameterException: Input parameters are outside the valid
                                         range.

    """
    input_path = storage.get_abs_path(*doc)
    output_path = storage.insert_suffix(input_path, method, unicode(threshold),
                                        unicode(zoom), unicode(escale),
                                        unicode(border), unicode(perc),
                                        unicode(range), unicode(low),
                                        unicode(high))
    img = Image.open(input_path)
    o_img = binarization.nlbin(img, threshold, zoom, escale, border, perc,
                               range, low, high)
    o_img.save(output_path)
    return storage.get_storage_path(output_path)
コード例 #26
0
def ExtractKK(path):
    img = cv.imread(path)
    height, width = img.shape[:2]
    # Binarize
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    im_pil1 = Image.fromarray(img)
    img = im_pil1
    result1 = binarization.nlbin(img)
    rgb_im1 = result1.convert('RGB')
    img = np.array(rgb_im1)

    table1, table2, y2ForHeader = box_extraction(path)
    krtable1, krtable2 = KrakenizeBox(table1, table2)
    name = 1
    x1, height1, y1, width1 = markVertical(krtable1)

    y1.sort()
    y1 = cluster(y1, 10)
    y1 = average(y1)
    # print(y1)

    x2, height2, y2, width2 = markVertical(krtable2)
    groupedX1 = cluster(x1, 10)
    groupedX2 = cluster(x2, 10)

    xPoints1 = average(groupedX1)
    xPoints2 = average(groupedX2)

    imageToBeCropped1 = main(krtable1)
    cv.imwrite('tabel1.jpg', imageToBeCropped1)
    imageToBeCropped2 = main(krtable2)
    cv.imwrite('tabel2.jpg', imageToBeCropped2)

    textHeader, textNo = cropHeader(xPoints1, y2ForHeader + 5, img, 1,
                                    width)  #height1
    text1 = crop(xPoints1, height1, imageToBeCropped1, 0.25)
    text2 = crop(xPoints2, height2, imageToBeCropped2, 0.25)

    # print(textHeader)

    rawText = text1 + text2
    Clean(rawText)
    CleanHeader(textHeader)
    getKKNo(textNo)
コード例 #27
0
ファイル: kraken.py プロジェクト: mittagessen/kraken
def binarizer(threshold, zoom, escale, border, perc, range, low, high, base_image, input, output) -> None:
    from kraken import binarization

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    message('Binarizing\t', nl=False)
    try:
        res = binarization.nlbin(im, threshold, zoom, escale, border, perc, range,
                                 low, high)
        form = None
        ext = os.path.splitext(output)[1]
        if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']:
            form = 'png'
            if ext:
                logger.warning('jpeg does not support 1bpp images. Forcing to png.')
        res.save(output, format=form)
    except Exception:
        message('\u2717', fg='red')
        raise
    message('\u2713', fg='green')
コード例 #28
0
ファイル: kraken.py プロジェクト: Doreenruirui/kraken
def binarizer(threshold, zoom, escale, border, perc, range, low, high,
              base_image, input, output) -> None:
    from kraken import binarization

    try:
        im = Image.open(input)
    except IOError as e:
        raise click.BadParameter(str(e))
    message('Binarizing\t', nl=False)
    try:
        res = binarization.nlbin(im, threshold, zoom, escale, border, perc,
                                 range, low, high)
        form = None
        ext = os.path.splitext(output)[1]
        if ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '']:
            form = 'png'
            if ext:
                logger.warning(
                    'jpeg does not support 1bpp images. Forcing to png.')
        res.save(output, format=form)
    except Exception:
        message('\u2717', fg='red')
        raise
    message('\u2713', fg='green')
コード例 #29
0
ファイル: kraken.py プロジェクト: ryanfb/nidaba
def nlbin(doc, method=u'nlbin', threshold=0.5, zoom=0.5, escale=1.0,
          border=0.1, perc=80, range=20, low=5, high=90):
    """
    Binarizes an input document utilizing ocropus'/kraken's nlbin algorithm.

    Args:
        doc (unicode, unicode): The input document tuple.
        method (unicode): The suffix string appended to all output files.
        threshold (float):
        zoom (float):
        escale (float):
        border (float)
        perc (int):
        range (int):
        low (int):
        high (int):

    Returns:
        (unicode, unicode): Storage tuple of the output file

    Raises:
        NidabaInvalidParameterException: Input parameters are outside the valid
                                         range.

    """
    input_path = storage.get_abs_path(*doc)
    output_path = storage.insert_suffix(input_path, method, unicode(threshold),
                                        unicode(zoom), unicode(escale),
                                        unicode(border), unicode(perc),
                                        unicode(range), unicode(low),
                                        unicode(high))
    img = Image.open(input_path)
    o_img = binarization.nlbin(img, threshold, zoom, escale, border, perc, range, low,
                       high)
    o_img.save(output_path)
    return storage.get_storage_path(output_path)
コード例 #30
0
ファイル: ketos.py プロジェクト: rsharmapty/kraken
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps,
                  font, font_style, prefill, pad, lines, output, images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError(
            '--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning(
                    'Input {} is in {} color mode. Converting to RGB'.format(
                        fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin,
                                      text_direction,
                                      scale,
                                      maxcolseps,
                                      black_colseps,
                                      pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError(
                            '{} invalid segmentation: {}'.format(
                                lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
コード例 #31
0
ファイル: ketos.py プロジェクト: rsharmapty/kraken
def extract(ctx, binarize, normalization, normalize_whitespace, reorder,
            rotate, output, format, transcriptions):
    """
    Extracts image-text pairs from a transcription environment created using
    ``ketos transcribe``.
    """
    import regex
    import base64

    from io import BytesIO
    from PIL import Image
    from lxml import html, etree

    from kraken import binarization

    try:
        os.mkdir(output)
    except Exception:
        pass

    text_transforms = []
    if normalization:
        text_transforms.append(
            lambda x: unicodedata.normalize(normalization, x))
    if normalize_whitespace:
        text_transforms.append(lambda x: regex.sub('\s', ' ', x))
    if reorder:
        text_transforms.append(get_display)

    idx = 0
    manifest = []
    with log.progressbar(transcriptions,
                         label='Reading transcriptions') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            doc = html.parse(fp)
            etree.strip_tags(doc, etree.Comment)
            td = doc.find(".//meta[@itemprop='text_direction']")
            if td is None:
                td = 'horizontal-lr'
            else:
                td = td.attrib['content']

            im = None
            dest_dict = {
                'output': output,
                'idx': 0,
                'src': fp.name,
                'uuid': str(uuid.uuid4())
            }
            for section in doc.xpath('//section'):
                img = section.xpath('.//img')[0].get('src')
                fd = BytesIO(base64.b64decode(img.split(',')[1]))
                im = Image.open(fd)
                if not im:
                    logger.info('Skipping {} because image not found'.format(
                        fp.name))
                    break
                if binarize:
                    im = binarization.nlbin(im)
                for line in section.iter('li'):
                    if line.get('contenteditable') and (
                            not u''.join(line.itertext()).isspace()
                            and u''.join(line.itertext())):
                        dest_dict['idx'] = idx
                        dest_dict['uuid'] = str(uuid.uuid4())
                        logger.debug('Writing line {:06d}'.format(idx))
                        l_img = im.crop(
                            [int(x) for x in line.get('data-bbox').split(',')])
                        if rotate and td.startswith('vertical'):
                            im.rotate(90, expand=True)
                        l_img.save(('{output}/' + format +
                                    '.png').format(**dest_dict))
                        manifest.append((format + '.png').format(**dest_dict))
                        text = u''.join(line.itertext()).strip()
                        for func in text_transforms:
                            text = func(text)
                        with open(('{output}/' + format +
                                   '.gt.txt').format(**dest_dict), 'wb') as t:
                            t.write(text.encode('utf-8'))
                        idx += 1
    logger.info('Extracted {} lines'.format(idx))
    with open('{}/manifest.txt'.format(output), 'w') as fp:
        fp.write('\n'.join(manifest))
コード例 #32
0
def SimExtract(input, filename):
    # src = input
    img = Image.open(input)

    result = binarization.nlbin(img)
    rgb_im = result.convert('RGB')
    #rgb_im = rgb_im.rotate(270)

    open_cv_image = np.array(rgb_im)
    # cv.imwrite('converted.jpg',open_cv_image)
    # Convert RGB to BGR

    #cv.imwrite('new2.jpg', frame) ##CROPPED

    kernel = np.ones((2, 2), np.uint8)

    img = open_cv_image

    img = cv.dilate(img, kernel, iterations=2)

    #img -> eroded

    image = cv.resize(img, (800, 480))

    namaAlamat = image[143:192, 0:660]
    tipe = image[0:109, 609:780]
    kelamin = image[142:180, 610:765]
    block = image[220:343, 397:649]
    rt = image[190:228, 244:646]

    kernel = np.ones((2, 2), np.uint8)
    tipe = cv.dilate(tipe, kernel, iterations=4)

    # cv.imwrite('block.jpg', block)
    # cv.imwrite('tipe.jpg', tipe)
    # cv.imwrite('kelamin.jpg', kelamin)
    # cv.imwrite('namaAlamat.jpg', namaAlamat)
    # cv.imwrite('rt.jpg', rt)

    #Cropped for each part

    text1 = pytesseract.image_to_string(namaAlamat,
                                        lang="ind",
                                        config='--psm 6 --oem 3')
    print(text1)
    text1 = text1.split("\n")
    text1[0] = text1[0].replace('1', 'I')
    nama = re.sub(r'[^a-zA-Z .:]', r'', text1[0])
    alamat = re.sub(r'[^a-zA-Z0-9 .:]', r'', text1[1])
    if 'NAMA' in nama:
        nama = nama.split('NAMA')[1]
        return
    elif 'nama' in nama:
        nama = nama.replace('nama', '')
    if 'ALAMAT' in alamat:
        alamat = alamat.split('ALAMAT')[1]
        return
    elif 'alamat' in alamat:
        alamat = alamat.replace('alamat', '')

    if ':' in alamat:
        alamat = alamat.split(':')[1]

    text4 = pytesseract.image_to_string(rt,
                                        lang="ind",
                                        config='--psm 6 --oem 3')
    text4 = text4.split("\n")
    newText4 = ''
    for i in text4:
        newText4 += ' ' + i
    if ':' in newText4:
        rt = newText4.split(':')[1]
    else:
        rt = newText4

    text5 = pytesseract.image_to_string(block, lang="ind", config='--oem 3')
    print(text5)
    if 'om' in text5:
        text5 = text5.replace('om', 'cm')
    if 'mm' in text5:
        text5 = text5.replace('mm', 'cm')
    text5 = text5.split('\n')
    print('oiiiiiii')
    print(text5)
    newText5 = [i for i in text5 if i != '']
    print(newText5)
    if 'cm' in newText5:
        newText5.remove('cm')
        newText5[2] += ' cm'
    print(newText5)
    datas = []
    datas.extend(newText5)
    if len(datas) != 6:
        datas.append('')
    print(datas)

    text3 = pytesseract.image_to_string(kelamin, lang="ind")
    print(text3)
    if 'PRI' or 'PRIA' in text3:
        text3 = 'PRIA'
        print('Jenis Kelamin :' + text3)
    elif 'W' or 'WANITA' or 'WAN' in text3:
        text3 = 'WANITA'
        print('Jenis Kelamin :' + text3)
    kelamin = text3

    # from datas
    teL = datas[0].strip().upper()
    taL = datas[1].strip().upper()
    tinggi = datas[2].strip()
    pekerjaan = datas[3].strip().upper()
    noSIM = datas[4]
    noSIM = re.sub(r'[^0-9]', r'', noSIM).strip()
    mb = datas[5].strip()

    text2 = pytesseract.image_to_string(
        tipe,
        lang="ind",
        config='-c tessedit_char_whitelist=AaBbCc --psm 6 --oem 3')
    print(text2)

    jenisSim = text2.strip().upper()

    # Cleaning data for JSON

    nama = re.sub(r'[^a-zA-Z .]', r'', nama).strip().upper()
    if 'NAMA' in nama:
        nama = nama.replace('NAMA', '').strip()

    alamat = alamat.upper()
    if 'ALAMAT' in alamat:
        alamat = alamat.replace('ALAMAT', '')
    rt = rt.upper()
    alamat = alamat + ' ' + rt
    alamat = alamat.strip()

    kelamin = text3.strip().upper()

    if tinggi[0] != '1':
        tinggi = '1' + tinggi
    pekerjaan = text5[3].strip().upper()
    if 'ARYAWAN SW' or 'SWASTA' or 'AWAN' in pekerjaan:
        pekerjaan = 'KARYAWAN SWASTA'

    x = {
        "message": "OCR Success",
        "success": True,
        "documentType": "SIMprocessed",
        "data": {
            "Nama": nama,
            "Alamat": alamat,
            "JenisKelamin": kelamin,
            "TempatLahir": teL,
            "TanggalLahir": taL,
            "Tinggi": tinggi,
            "Pekerjaan": pekerjaan,
            "NoSIM": noSIM,
            "JenisSIM": jenisSim,
            "mb": mb
        },
        "img": {
            "sim": filename,
        }
    }

    # convert into JSON:
    return x, filename
コード例 #33
0
 def test_not_binarize_empty(self):
     """
     Test that mode '1' images aren't binarized again.
     """
     with Image.new('1', (1000,1000)) as im:
         nlbin(im)
コード例 #34
0
ファイル: test_binarization.py プロジェクト: QuLogic/ocropy
 def test_not_binarize_bw(self):
     """
     Test that mode '1' images aren't binarized again.
     """
     with Image.new('1', (1000,1000)) as im:
         self.assertEqual(im, nlbin(im))
コード例 #35
0
ファイル: ketos.py プロジェクト: QuLogic/ocropy
def line_generator(ctx, font, maxlines, encoding, normalization, renormalize,
                   font_size, language, max_length, strip, disable_degradation,
                   binarize, mean, sigma, density, distort, distortion_sigma,
                   legacy, output, text):
    """
    Generates artificial text line training data.
    """
    lines = set()
    if not text:
        return
    st_time = time.time()
    for t in text:
        with click.open_file(t, encoding=encoding) as fp:
            if ctx.meta['verbose'] > 0:
                click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, t))
            else:
                spin('Reading texts')
            lines.update(fp.readlines())
    if normalization:
        lines = set([unicodedata.normalize(normalization, line) for line in lines])
    if strip:
        lines = set([line.strip() for line in lines])
    if max_length:
        lines = set([line for line in lines if len(line) < max_length])
    if ctx.meta['verbose'] > 0:
        click.echo(u'[{:2.4f}] Read {} lines'.format(time.time() - st_time, len(lines)))
    else:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
        click.echo('Read {} unique lines'.format(len(lines)))
    if maxlines and maxlines < len(lines):
        click.echo('Sampling {} lines\t'.format(maxlines), nl=False)
        lines = list(lines)
        lines = [lines[idx] for idx in np.random.randint(0, len(lines), maxlines)]
        click.secho(u'\u2713', fg='green')
    try:
        os.makedirs(output)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    lines = [line.strip() for line in lines]

    # calculate the alphabet and print it for verification purposes
    alphabet = set()
    for line in lines:
        alphabet.update(line)
    chars = []
    combining = []
    for char in sorted(alphabet):
        if unicodedata.combining(char):
            combining.append(unicodedata.name(char))
        else:
            chars.append(char)
    click.echo(u'Σ (len: {})'.format(len(alphabet)))
    click.echo(u'Symbols: {}'.format(''.join(chars)))
    if combining:
        click.echo(u'Combining Characters: {}'.format(', '.join(combining)))
    lg = linegen.LineGenerator(font, font_size, language)
    for idx, line in enumerate(lines):
        if ctx.meta['verbose'] > 0:
            click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, line))
        else:
            spin('Writing images')
        try:
            if renormalize:
                im = lg.render_line(unicodedata.normalize(renormalize, line))
            else:
                im = lg.render_line(line)
        except KrakenCairoSurfaceException as e:
            if ctx.meta['verbose'] > 0:
                click.echo('[{:2.4f}] {}: {} {}'.format(time.time() - st_time, e.message, e.width, e.height))
            else:
                click.secho(u'\b\u2717', fg='red')
                click.echo('{}: {} {}'.format(e.message, e.width, e.height))
            continue
        if not disable_degradation and not legacy:
            im = linegen.distort_line(im, np.random.normal(distort), np.random.normal(distortion_sigma))
            im = linegen.degrade_line(im, np.random.normal(mean), np.random.normal(sigma), np.random.normal(density))
        elif legacy:
            im = linegen.ocropy_degrade(im)
        if binarize:
            im = binarization.nlbin(im)
        im.save('{}/{:06d}.png'.format(output, idx))
        with open('{}/{:06d}.gt.txt'.format(output, idx), 'wb') as fp:
            fp.write(line.encode('utf-8'))
    if ctx.meta['verbose'] == 0:
        click.secho(u'\b\u2713', fg='green', nl=False)
        click.echo('\033[?25h\n', nl=False)
コード例 #36
0
ファイル: extractor.py プロジェクト: Atanahel/Cini-OCR
def _binarize(cv2image):
    pil_image = Image.fromarray(cv2image)
    return binarization.nlbin(pil_image, zoom=1.0)
コード例 #37
0
ファイル: ocr.py プロジェクト: among/fusus
    def read(self, page):
        """Perfoms OCR with Kraken."""

        stages = page.stages
        scan = stages.get("clean", None)
        if scan is None:
            return None

        nonLetter = self.nonLetter

        model = self.ensureLoaded()

        blocks = page.blocks
        ocrChars = []
        ocrWords = []
        ocrLines = []
        stages["char"] = ocrChars
        stages["word"] = ocrWords
        stages["line"] = ocrLines
        binary = pil2array(nlbin(array2pil(scan)))

        for ((stripe, block), data) in blocks.items():
            (left, top, right, bottom) = data["inner"]
            thisBinary = binary[top:bottom, left:right]
            lines = data["bands"]["main"]["lines"]
            for (ln, (up, lo)) in enumerate(lines):
                lln = ln + 1
                roi = thisBinary[up : lo + 1]
                (b, e, roi) = removeMargins(roi, keep=16)
                ocrLines.append((stripe, block, lln, left + b, top + up, left + e, top + lo))
                (roiH, roiW) = roi.shape[0:2]
                roi = array2pil(roi)
                bounds = dict(boxes=([0, 0, roiW, roiH],), text_direction=RL)

                # adapt the boxes, because they corresponds to peaks of recognition,
                # not to character extends
                #
                # See https://github.com/mittagessen/kraken/issues/184

                adaptedPreds = []
                for (c, (le, to, ri, bo), conf) in chain.from_iterable(
                    rpred(model, roi, bounds, pad=0, bidi_reordering=True)
                ):
                    if adaptedPreds:
                        prevPred = adaptedPreds[-1]
                        prevEdge = prevPred[1][0]
                    else:
                        prevEdge = roiW
                    correction = int(round((prevEdge - ri) / 2))
                    thisRi = ri + correction
                    if adaptedPreds:
                        adaptedPreds[-1][1][0] -= correction
                    adaptedPreds.append([c, [le, to, thisRi, bo], conf])
                if adaptedPreds:
                    adaptedPreds[-1][1][0] = 0

                # divide into words, not only on spaces, but also on punctuation

                curWord = [[], []]
                inWord = True

                for (c, (le, to, ri, bo), conf) in adaptedPreds:
                    offsetW = left + b
                    offsetH = top + up
                    pos = (le + offsetW, to + offsetH, ri + offsetW, bo + offsetH)
                    conf = int(round(conf * 100))
                    ocrChars.append((stripe, block, lln, *pos, conf, c))

                    spaceSeen = c == " "
                    changeWord = not inWord and c not in nonLetter
                    element = (c, pos, conf)

                    if spaceSeen:
                        curWord[1].append(element)
                    if spaceSeen or changeWord:
                        if curWord[0] or curWord[1]:
                            ocrWords.append((stripe, block, lln, *addWord(curWord)))
                            curWord = [[], []]
                            inWord = True
                            continue

                    if inWord:
                        if c in nonLetter:
                            inWord = False
                    dest = 0 if inWord else 1
                    curWord[dest].append(element)
                if curWord[0] or curWord[1]:
                    ocrWords.append((stripe, block, lln, *addWord(curWord)))

        page.write(stage="line,word,char")
コード例 #38
0
def _binarize(cv2image):
    pil_image = Image.fromarray(cv2image)
    return binarization.nlbin(pil_image, zoom=1.0)
コード例 #39
0
ファイル: ketos.py プロジェクト: mittagessen/kraken
def extract(ctx, binarize, normalization, normalize_whitespace, reorder,
            rotate, output, format, transcriptions):
    """
    Extracts image-text pairs from a transcription environment created using
    ``ketos transcribe``.
    """
    import regex
    import base64

    from io import BytesIO
    from PIL import Image
    from lxml import html, etree

    from kraken import binarization

    try:
        os.mkdir(output)
    except Exception:
        pass

    text_transforms = []
    if normalization:
        text_transforms.append(lambda x: unicodedata.normalize(normalization, x))
    if normalize_whitespace:
        text_transforms.append(lambda x: regex.sub('\s', ' ', x))
    if reorder:
        text_transforms.append(get_display)

    idx = 0
    manifest = []
    with log.progressbar(transcriptions, label='Reading transcriptions') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            doc = html.parse(fp)
            etree.strip_tags(doc, etree.Comment)
            td = doc.find(".//meta[@itemprop='text_direction']")
            if td is None:
                td = 'horizontal-lr'
            else:
                td = td.attrib['content']

            im = None
            dest_dict = {'output': output, 'idx': 0, 'src': fp.name, 'uuid': str(uuid.uuid4())}
            for section in doc.xpath('//section'):
                img = section.xpath('.//img')[0].get('src')
                fd = BytesIO(base64.b64decode(img.split(',')[1]))
                im = Image.open(fd)
                if not im:
                    logger.info('Skipping {} because image not found'.format(fp.name))
                    break
                if binarize:
                    im = binarization.nlbin(im)
                for line in section.iter('li'):
                    if line.get('contenteditable') and (not u''.join(line.itertext()).isspace() and u''.join(line.itertext())):
                        dest_dict['idx'] = idx
                        dest_dict['uuid'] = str(uuid.uuid4())
                        logger.debug('Writing line {:06d}'.format(idx))
                        l_img = im.crop([int(x) for x in line.get('data-bbox').split(',')])
                        if rotate and td.startswith('vertical'):
                            im.rotate(90, expand=True)
                        l_img.save(('{output}/' + format + '.png').format(**dest_dict))
                        manifest.append((format + '.png').format(**dest_dict))
                        text = u''.join(line.itertext()).strip()
                        for func in text_transforms:
                            text = func(text)
                        with open(('{output}/' + format + '.gt.txt').format(**dest_dict), 'wb') as t:
                            t.write(text.encode('utf-8'))
                        idx += 1
    logger.info('Extracted {} lines'.format(idx))
    with open('{}/manifest.txt'.format(output), 'w') as fp:
        fp.write('\n'.join(manifest))
コード例 #40
0
from kraken.pageseg import segment
from kraken.binarization import nlbin
from kraken.rpred import rpred
from itertools import cycle
from kraken.lib import models

cmap = cycle([(230, 25, 75, 127),
              (60, 180, 75, 127),
              (255, 225, 25, 127),
              (0, 130, 200, 127),
              (245, 130, 48, 127),
              (145, 30, 180, 127),
              (70, 240, 240, 127)])

net = models.load_any(sys.argv[1])

for fname in sys.argv[2:]:
    im = Image.open(fname)
    print(fname)
    im = nlbin(im)
    res = segment(im, maxcolseps=0)
    pred = rpred(net, im, res)
    im = im.convert('RGBA')
    tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(tmp)
    for line in pred:
        for box in line.cuts:
            draw.rectangle(box, fill=next(cmap))
    im = Image.alpha_composite(im, tmp)
    im.save('high_{}'.format(os.path.basename(fname)))
コード例 #41
0
ファイル: ketos.py プロジェクト: mittagessen/kraken
def transcription(ctx, text_direction, scale, bw, maxcolseps,
                  black_colseps, font, font_style, prefill, pad, lines, output,
                  images):
    """
    Creates transcription environments for ground truth generation.
    """
    from PIL import Image

    from kraken import rpred
    from kraken import pageseg
    from kraken import transcribe
    from kraken import binarization

    from kraken.lib import models
    from kraken.lib.util import is_bitonal

    ti = transcribe.TranscriptionInterface(font, font_style)

    if len(images) > 1 and lines:
        raise click.UsageError('--lines option is incompatible with multiple image files')

    if prefill:
        logger.info('Loading model {}'.format(prefill))
        message('Loading RNN', nl=False)
        prefill = models.load_any(prefill)
        message('\u2713', fg='green')

    with log.progressbar(images, label='Reading images') as bar:
        for fp in bar:
            logger.info('Reading {}'.format(fp.name))
            im = Image.open(fp)
            if im.mode not in ['1', 'L', 'P', 'RGB']:
                logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode))
                im = im.convert('RGB')
            logger.info('Binarizing page')
            im_bin = binarization.nlbin(im)
            im_bin = im_bin.convert('1')
            logger.info('Segmenting page')
            if not lines:
                res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
            else:
                with open_file(lines, 'r') as fp:
                    try:
                        fp = cast(IO[Any], fp)
                        res = json.load(fp)
                    except ValueError as e:
                        raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
            if prefill:
                it = rpred.rpred(prefill, im_bin, res)
                preds = []
                logger.info('Recognizing')
                for pred in it:
                    logger.debug('{}'.format(pred.prediction))
                    preds.append(pred)
                ti.add_page(im, res, records=preds)
            else:
                ti.add_page(im, res)
            fp.close()
    logger.info('Writing transcription to {}'.format(output.name))
    message('Writing output', nl=False)
    ti.write(output)
    message('\u2713', fg='green')
コード例 #42
0
 def test_not_binarize_bw(self):
     """
     Test that mode '1' images aren't binarized again.
     """
     with Image.open(os.path.join(resources, 'bw.png')) as im:
         self.assertEqual(im, nlbin(im))