def degrade_line(im, eta=0, alpha=1.7, beta=1.7, alpha_0=1, beta_0=1): """ Degrades a line image by adding noise Args: im (PIL.Image): Input image Returns: PIL.Image in mode 'L' """ im = pil2array(im) im = np.amax(im) - im im = im * 1.0 / np.amax(im) # foreground distance transform and flipping to white probability fg_dist = distance_transform_cdt(im, metric='taxicab') fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta fg_prob[im == 0] = 0 fg_flip = np.random.binomial(1, fg_prob) # background distance transform and flipping to black probability bg_dist = distance_transform_cdt(1 - im, metric='taxicab') bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta bg_prob[im == 1] = 0 bg_flip = np.random.binomial(1, bg_prob) # flip im -= fg_flip im += bg_flip # use a circular kernel of size 3 sel = np.array([[1, 1], [1, 1]]) im = binary_closing(im, sel) return array2pil(255 - im.astype('B') * 255)
def nlbin(im, threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80, range=20, low=5, high=90): """ Performs binarization using non-linear processing. Args: im (PIL.Image): threshold (float): zoom (float): Zoom for background page estimation escale (float): Scale for estimating a mask over the text region border (float): Ignore this much of the border perc (int): Percentage for filters range (int): Range for filters low (int): Percentile for black estimation high (int): Percentile for white estimation Returns: PIL.Image containing the binarized image """ if im.mode == '1': return im raw = pil2array(im) # rescale image to between -1 or 0 and 1 raw = raw/np.float(np.iinfo(raw.dtype).max) if raw.ndim == 3: raw = np.mean(raw, 2) # perform image normalization if np.amax(raw) == np.amin(raw): raise KrakenInputException('Image is empty') image = raw-np.amin(raw) image /= np.amax(image) m = interpolation.zoom(image, zoom) m = filters.percentile_filter(m, perc, size=(range, 2)) m = filters.percentile_filter(m, perc, size=(2, range)) m = interpolation.zoom(m, 1.0/zoom) w, h = np.minimum(np.array(image.shape), np.array(m.shape)) flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1) # estimate low and high thresholds d0, d1 = flat.shape o0, o1 = int(border*d0), int(border*d1) est = flat[o0:d0-o0, o1:d1-o1] # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable v = est-filters.gaussian_filter(est, escale*20.0) v = filters.gaussian_filter(v**2, escale*20.0)**0.5 v = (v > 0.3*np.amax(v)) v = morphology.binary_dilation(v, structure=np.ones((escale*50, 1))) v = morphology.binary_dilation(v, structure=np.ones((1, escale*50))) est = est[v] lo = np.percentile(est.ravel(), low) hi = np.percentile(est.ravel(), high) flat -= lo flat /= (hi-lo) flat = np.clip(flat, 0, 1) bin = np.array(255*(flat > threshold), 'B') return array2pil(bin)
def distort_line(im, distort=3.0, sigma=10, eps=0.03, delta=0.3): """ Distorts a line image. Run BEFORE degrade_line as a white border of 5 pixels will be added. Args: im (PIL.Image): Input image distort (float): sigma (float): eps (float): delta (float): Returns: PIL.Image in mode 'L' """ w, h = im.size # XXX: determine correct output shape from transformation matrices instead # of guesstimating. logger.debug('Pasting source image into canvas') image = Image.new('L', (int(1.5 * w), 4 * h), 255) image.paste(im, (int((image.size[0] - w) / 2), int( (image.size[1] - h) / 2))) line = pil2array(image.convert('L')) # shear in y direction with factor eps * randn(), scaling with 1 + eps * # randn() in x/y axis (all offset at d) logger.debug('Performing affine transformation') m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]]) c = np.array([w / 2.0, h / 2]) d = c - np.dot(m, c) + np.array( [np.random.randn() * delta, np.random.randn() * delta]) line = affine_transform(line, m, offset=d, order=1, mode='constant', cval=255) hs = gaussian_filter(np.random.randn(4 * h, int(1.5 * w)), sigma) ws = gaussian_filter(np.random.randn(4 * h, int(1.5 * w)), sigma) hs *= distort / np.amax(hs) ws *= distort / np.amax(ws) def _f(p): return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]]) logger.debug('Performing geometric transformation') im = array2pil(geometric_transform(line, _f, order=1, mode='nearest')) logger.debug('Cropping canvas to content box') im = im.crop(ImageOps.invert(im).getbbox()) return im
def ocropy_degrade(im, distort=1.0, dsigma=20.0, eps=0.03, delta=0.3, degradations=[(0.5, 0.0, 0.5, 0.0)]): """ Degrades and distorts a line using the same noise model used by ocropus. Args: im (PIL.Image): Input image distort (float): dsigma (float): eps (float): delta (float): degradations (list): list returning 4-tuples corresponding to the degradations argument of ocropus-linegen. Returns: PIL.Image in mode 'L' """ w, h = im.size # XXX: determine correct output shape from transformation matrices instead # of guesstimating. image = Image.new('L', (int(1.5*w), 4*h), 255) image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2))) a = pil2array(image.convert('L')) (sigma,ssigma,threshold,sthreshold) = degradations[np.random.choice(len(degradations))] sigma += (2*np.random.rand()-1)*ssigma threshold += (2*np.random.rand()-1)*sthreshold a = a*1.0/np.amax(a) if sigma>0.0: a = gaussian_filter(a,sigma) a += np.clip(np.random.randn(*a.shape)*0.2,-0.25,0.25) m = np.array([[1+eps*np.random.randn(),0.0],[eps*np.random.randn(),1.0+eps*np.random.randn()]]) w,h = a.shape c = np.array([w/2.0,h/2]) d = c-np.dot(m, c)+np.array([np.random.randn()*delta, np.random.randn()*delta]) a = affine_transform(a, m, offset=d, order=1, mode='constant', cval=a[0,0]) a = np.array(a>threshold,'f') [[r,c]] = find_objects(np.array(a==0,'i')) r0 = r.start r1 = r.stop c0 = c.start c1 = c.stop a = a[r0-5:r1+5,c0-5:c1+5] if distort > 0: h,w = a.shape hs = np.random.randn(h,w) ws = np.random.randn(h,w) hs = gaussian_filter(hs, dsigma) ws = gaussian_filter(ws, dsigma) hs *= distort/np.amax(hs) ws *= distort/np.amax(ws) def f(p): return (p[0]+hs[p[0],p[1]],p[1]+ws[p[0],p[1]]) a = geometric_transform(a, f, output_shape=(h,w), order=1, mode='constant', cval=np.amax(a)) im = array2pil(a).convert('L') return im
def degrade_line(im, eta=0.0, alpha=1.5, beta=1.5, alpha_0=1.0, beta_0=1.0): """ Degrades a line image by adding noise. For parameter meanings consult [1]. Args: im (PIL.Image): Input image eta (float): alpha (float): beta (float): alpha_0 (float): beta_0 (float): Returns: PIL.Image in mode '1' """ logger.debug(u'Inverting and normalizing input image') im = pil2array(im) im = np.amax(im)-im im = im*1.0/np.amax(im) logger.debug(u'Calculating foreground distance transform') fg_dist = distance_transform_cdt(1-im, metric='taxicab') logger.debug(u'Calculating flip to white probability') fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta fg_prob[im == 1] = 0 fg_flip = np.random.binomial(1, fg_prob) logger.debug(u'Calculating background distance transform') bg_dist = distance_transform_cdt(im, metric='taxicab') logger.debug(u'Calculating flip to black probability') bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta bg_prob[im == 0] = 0 bg_flip = np.random.binomial(1, bg_prob) # flip logger.debug(u'Flipping') im -= bg_flip im += fg_flip logger.debug(u'Binary closing') sel = np.array([[1, 1], [1, 1]]) im = binary_closing(im, sel) logger.debug(u'Converting to image') return array2pil(255-im.astype('B')*255)
def degrade_line(im, eta=0.0, alpha=1.5, beta=1.5, alpha_0=1.0, beta_0=1.0): """ Degrades a line image by adding noise. For parameter meanings consult [1]. Args: im (PIL.Image): Input image eta (float): alpha (float): beta (float): alpha_0 (float): beta_0 (float): Returns: PIL.Image in mode '1' """ logger.debug('Inverting and normalizing input image') im = pil2array(im) im = np.amax(im) - im im = im * 1.0 / np.amax(im) logger.debug('Calculating foreground distance transform') fg_dist = distance_transform_cdt(1 - im, metric='taxicab') logger.debug('Calculating flip to white probability') fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta fg_prob[im == 1] = 0 fg_flip = np.random.binomial(1, fg_prob) logger.debug('Calculating background distance transform') bg_dist = distance_transform_cdt(im, metric='taxicab') logger.debug('Calculating flip to black probability') bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta bg_prob[im == 0] = 0 bg_flip = np.random.binomial(1, bg_prob) # flip logger.debug('Flipping') im -= bg_flip im += fg_flip logger.debug('Binary closing') sel = np.array([[1, 1], [1, 1]]) im = binary_closing(im, sel) logger.debug('Converting to image') return array2pil(255 - im.astype('B') * 255)
def distort_line(im, distort=3.0, sigma=10, eps=0.03, delta=0.3): """ Distorts a line image. Run BEFORE degrade_line as a white border of 5 pixels will be added. Args: im (PIL.Image): Input image distort (float): sigma (float): eps (float): delta (float): Returns: PIL.Image in mode 'L' """ w, h = im.size # XXX: determine correct output shape from transformation matrices instead # of guesstimating. logger.debug(u'Pasting source image into canvas') image = Image.new('L', (int(1.5*w), 4*h), 255) image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2))) line = pil2array(image.convert('L')) # shear in y direction with factor eps * randn(), scaling with 1 + eps * # randn() in x/y axis (all offset at d) logger.debug(u'Performing affine transformation') m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]]) c = np.array([w/2.0, h/2]) d = c - np.dot(m, c) + np.array([np.random.randn() * delta, np.random.randn() * delta]) line = affine_transform(line, m, offset=d, order=1, mode='constant', cval=255) hs = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma) ws = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma) hs *= distort/np.amax(hs) ws *= distort/np.amax(ws) def _f(p): return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]]) logger.debug(u'Performing geometric transformation') im = array2pil(geometric_transform(line, _f, order=1, mode='nearest')) logger.debug(u'Cropping canvas to content box') im = im.crop(ImageOps.invert(im).getbbox()) return im
def dewarp(normalizer: CenterNormalizer, im: Image.Image) -> Image.Image: """ Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer instance. Args: normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer instance im (PIL.Image.Image): Image to dewarp Returns: PIL.Image containing the dewarped image. """ line = pil2array(im) temp = np.amax(line)-line temp = temp*1.0/np.amax(temp) normalizer.measure(temp) line = normalizer.normalize(line, cval=np.amax(line)) return array2pil(line)
def dewarp(normalizer, im): """ Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer instance. Args: normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer instance im (PIL.Image): Image to dewarp Returns: PIL.Image containing the dewarped image. """ line = pil2array(im) temp = np.amax(line) - line temp = temp * 1.0 / np.amax(temp) normalizer.measure(temp) line = normalizer.normalize(line, cval=np.amax(line)) return array2pil(line)
def degrade_line(im, mean=0.0, sigma=0.001, density=0.002): """ Degrades a line image by adding several kinds of noise. Args: im (PIL.Image): Input image mean (float): Mean of distribution for Gaussian noise sigma (float): Standard deviation for Gaussian noise density (float): Noise density for Salt and Pepper noise Returns: PIL.Image in mode 'L' """ im = pil2array(im) m = np.amax(im) im = gaussian_filter(im.astype('f')/m, 0.5) im += np.random.normal(mean, sigma, im.shape) flipped = np.ceil(density/2 * im.size) coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape] im[coords] = 255 coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape] im[coords] = 0 return array2pil(np.clip(im * m, 0, 255).astype('uint8'))
def degrade_line(im, mean=0.0, sigma=0.001, density=0.002): """ Degrades a line image by adding several kinds of noise. Args: im (PIL.Image): Input image mean (float): Mean of distribution for Gaussian noise sigma (float): Standard deviation for Gaussian noise density (float): Noise density for Salt and Pepper noiase Returns: PIL.Image in mode 'L' """ im = pil2array(im) m = np.amax(im) im = gaussian_filter(im.astype('f') / m, 0.5) im += np.random.normal(mean, sigma, im.shape) flipped = np.ceil(density / 2 * im.size) coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape] im[coords] = 255 coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape] im[coords] = 0 return array2pil(np.clip(im * m, 0, 255).astype('uint8'))
def nlbin(im: Image.Image, threshold: float = 0.5, zoom: float = 0.5, escale: float = 1.0, border: float = 0.1, perc: int = 80, range: int = 20, low: int = 5, high: int = 90) -> Image: """ Performs binarization using non-linear processing. Args: im (PIL.Image.Image): threshold (float): zoom (float): Zoom for background page estimation escale (float): Scale for estimating a mask over the text region border (float): Ignore this much of the border perc (int): Percentage for filters range (int): Range for filters low (int): Percentile for black estimation high (int): Percentile for white estimation Returns: PIL.Image containing the binarized image Raises: KrakenInputException when trying to binarize an empty image. """ im_str = get_im_str(im) logger.info(f'Binarizing {im_str}') if is_bitonal(im): logger.info(f'Skipping binarization because {im_str} is bitonal.') return im # convert to grayscale first logger.debug(f'Converting {im_str} to grayscale') im = im.convert('L') raw = pil2array(im) logger.debug('Scaling and normalizing') # rescale image to between -1 or 0 and 1 raw = raw / np.float(np.iinfo(raw.dtype).max) # perform image normalization if np.amax(raw) == np.amin(raw): logger.warning(f'Trying to binarize empty image {im_str}') raise KrakenInputException('Image is empty') image = raw - np.amin(raw) image /= np.amax(image) logger.debug('Interpolation and percentile filtering') with warnings.catch_warnings(): warnings.simplefilter('ignore', UserWarning) m = interpolation.zoom(image, zoom) m = filters.percentile_filter(m, perc, size=(range, 2)) m = filters.percentile_filter(m, perc, size=(2, range)) mh, mw = m.shape oh, ow = image.shape scale = np.diag([mh * 1.0 / oh, mw * 1.0 / ow]) m = affine_transform(m, scale, output_shape=image.shape) w, h = np.minimum(np.array(image.shape), np.array(m.shape)) flat = np.clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) # estimate low and high thresholds d0, d1 = flat.shape o0, o1 = int(border * d0), int(border * d1) est = flat[o0:d0 - o0, o1:d1 - o1] logger.debug('Threshold estimates {}'.format(est)) # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable logger.debug('Refine estimates') v = est - filters.gaussian_filter(est, escale * 20.0) v = filters.gaussian_filter(v**2, escale * 20.0)**0.5 v = (v > 0.3 * np.amax(v)) v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1))) v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50)))) est = est[v] lo = np.percentile(est.ravel(), low) hi = np.percentile(est.ravel(), high) flat -= lo flat /= (hi - lo) flat = np.clip(flat, 0, 1) logger.debug(f'Thresholding at {threshold}') bin = np.array(255 * (flat > threshold), 'B') return array2pil(bin)
def read(self, page): """Perfoms OCR with Kraken.""" stages = page.stages scan = stages.get("clean", None) if scan is None: return None nonLetter = self.nonLetter model = self.ensureLoaded() blocks = page.blocks ocrChars = [] ocrWords = [] ocrLines = [] stages["char"] = ocrChars stages["word"] = ocrWords stages["line"] = ocrLines binary = pil2array(nlbin(array2pil(scan))) for ((stripe, block), data) in blocks.items(): (left, top, right, bottom) = data["inner"] thisBinary = binary[top:bottom, left:right] lines = data["bands"]["main"]["lines"] for (ln, (up, lo)) in enumerate(lines): lln = ln + 1 roi = thisBinary[up : lo + 1] (b, e, roi) = removeMargins(roi, keep=16) ocrLines.append((stripe, block, lln, left + b, top + up, left + e, top + lo)) (roiH, roiW) = roi.shape[0:2] roi = array2pil(roi) bounds = dict(boxes=([0, 0, roiW, roiH],), text_direction=RL) # adapt the boxes, because they corresponds to peaks of recognition, # not to character extends # # See https://github.com/mittagessen/kraken/issues/184 adaptedPreds = [] for (c, (le, to, ri, bo), conf) in chain.from_iterable( rpred(model, roi, bounds, pad=0, bidi_reordering=True) ): if adaptedPreds: prevPred = adaptedPreds[-1] prevEdge = prevPred[1][0] else: prevEdge = roiW correction = int(round((prevEdge - ri) / 2)) thisRi = ri + correction if adaptedPreds: adaptedPreds[-1][1][0] -= correction adaptedPreds.append([c, [le, to, thisRi, bo], conf]) if adaptedPreds: adaptedPreds[-1][1][0] = 0 # divide into words, not only on spaces, but also on punctuation curWord = [[], []] inWord = True for (c, (le, to, ri, bo), conf) in adaptedPreds: offsetW = left + b offsetH = top + up pos = (le + offsetW, to + offsetH, ri + offsetW, bo + offsetH) conf = int(round(conf * 100)) ocrChars.append((stripe, block, lln, *pos, conf, c)) spaceSeen = c == " " changeWord = not inWord and c not in nonLetter element = (c, pos, conf) if spaceSeen: curWord[1].append(element) if spaceSeen or changeWord: if curWord[0] or curWord[1]: ocrWords.append((stripe, block, lln, *addWord(curWord))) curWord = [[], []] inWord = True continue if inWord: if c in nonLetter: inWord = False dest = 0 if inWord else 1 curWord[dest].append(element) if curWord[0] or curWord[1]: ocrWords.append((stripe, block, lln, *addWord(curWord))) page.write(stage="line,word,char")
def ocropy_degrade(im, distort=1.0, dsigma=20.0, eps=0.03, delta=0.3, degradations=((0.5, 0.0, 0.5, 0.0), )): """ Degrades and distorts a line using the same noise model used by ocropus. Args: im (PIL.Image): Input image distort (float): dsigma (float): eps (float): delta (float): degradations (list): list returning 4-tuples corresponding to the degradations argument of ocropus-linegen. Returns: PIL.Image in mode 'L' """ w, h = im.size # XXX: determine correct output shape from transformation matrices instead # of guesstimating. logger.debug('Pasting source image into canvas') image = Image.new('L', (int(1.5 * w), 4 * h), 255) image.paste(im, (int((image.size[0] - w) / 2), int( (image.size[1] - h) / 2))) a = pil2array(image.convert('L')) logger.debug('Selecting degradations') (sigma, ssigma, threshold, sthreshold) = degradations[np.random.choice(len(degradations))] sigma += (2 * np.random.rand() - 1) * ssigma threshold += (2 * np.random.rand() - 1) * sthreshold a = a * 1.0 / np.amax(a) if sigma > 0.0: logger.debug('Apply Gaussian filter') a = gaussian_filter(a, sigma) logger.debug('Adding noise') a += np.clip(np.random.randn(*a.shape) * 0.2, -0.25, 0.25) logger.debug('Perform affine transformation and resize') m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]]) w, h = a.shape c = np.array([w / 2.0, h / 2]) d = c - np.dot(m, c) + np.array( [np.random.randn() * delta, np.random.randn() * delta]) a = affine_transform(a, m, offset=d, order=1, mode='constant', cval=a[0, 0]) a = np.array(a > threshold, 'f') [[r, c]] = find_objects(np.array(a == 0, 'i')) r0 = r.start r1 = r.stop c0 = c.start c1 = c.stop a = a[r0 - 5:r1 + 5, c0 - 5:c1 + 5] if distort > 0: logger.debug('Perform geometric transformation') h, w = a.shape hs = np.random.randn(h, w) ws = np.random.randn(h, w) hs = gaussian_filter(hs, dsigma) ws = gaussian_filter(ws, dsigma) hs *= distort / np.amax(hs) ws *= distort / np.amax(ws) def _f(p): return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]]) a = geometric_transform(a, _f, output_shape=(h, w), order=1, mode='constant', cval=np.amax(a)) im = array2pil(a).convert('L') return im
def nlbin(im, threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80, range=20, low=5, high=90): """ Performs binarization using non-linear processing. Args: im (PIL.Image): threshold (float): zoom (float): Zoom for background page estimation escale (float): Scale for estimating a mask over the text region border (float): Ignore this much of the border perc (int): Percentage for filters range (int): Range for filters low (int): Percentile for black estimation high (int): Percentile for white estimation Returns: PIL.Image containing the binarized image """ if im.mode == '1': return im raw = pil2array(im) # rescale image to between -1 or 0 and 1 raw = raw / np.float(np.iinfo(raw.dtype).max) if raw.ndim == 3: raw = np.mean(raw, 2) # perform image normalization if np.amax(raw) == np.amin(raw): raise KrakenInputException('Image is empty') image = raw - np.amin(raw) image /= np.amax(image) with warnings.catch_warnings(): warnings.simplefilter('ignore', UserWarning) m = interpolation.zoom(image, zoom) m = filters.percentile_filter(m, perc, size=(range, 2)) m = filters.percentile_filter(m, perc, size=(2, range)) m = interpolation.zoom(m, 1.0 / zoom) w, h = np.minimum(np.array(image.shape), np.array(m.shape)) flat = np.clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) # estimate low and high thresholds d0, d1 = flat.shape o0, o1 = int(border * d0), int(border * d1) est = flat[o0:d0 - o0, o1:d1 - o1] # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable v = est - filters.gaussian_filter(est, escale * 20.0) v = filters.gaussian_filter(v**2, escale * 20.0)**0.5 v = (v > 0.3 * np.amax(v)) v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1))) v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50)))) est = est[v] lo = np.percentile(est.ravel(), low) hi = np.percentile(est.ravel(), high) flat -= lo flat /= (hi - lo) flat = np.clip(flat, 0, 1) bin = np.array(255 * (flat > threshold), 'B') return array2pil(bin)
def nlbin(im: Image.Image, threshold: float = 0.5, zoom: float = 0.5, escale: float = 1.0, border: float = 0.1, perc: int = 80, range: int = 20, low: int = 5, high: int = 90) -> Image: """ Performs binarization using non-linear processing. Args: im (PIL.Image.Image): threshold (float): zoom (float): Zoom for background page estimation escale (float): Scale for estimating a mask over the text region border (float): Ignore this much of the border perc (int): Percentage for filters range (int): Range for filters low (int): Percentile for black estimation high (int): Percentile for white estimation Returns: PIL.Image containing the binarized image Raises: KrakenInputException when trying to binarize an empty image. """ im_str = get_im_str(im) logger.info('Binarizing {}'.format(im_str)) if is_bitonal(im): logger.info('Skipping binarization because {} is bitonal.'.format(im_str)) return im # convert to grayscale first logger.debug('Converting {} to grayscale'.format(im_str)) im = im.convert('L') raw = pil2array(im) logger.debug('Scaling and normalizing') # rescale image to between -1 or 0 and 1 raw = raw/np.float(np.iinfo(raw.dtype).max) # perform image normalization if np.amax(raw) == np.amin(raw): logger.warning('Trying to binarize empty image {}'.format(im_str)) raise KrakenInputException('Image is empty') image = raw-np.amin(raw) image /= np.amax(image) logger.debug('Interpolation and percentile filtering') with warnings.catch_warnings(): warnings.simplefilter('ignore', UserWarning) m = interpolation.zoom(image, zoom) m = filters.percentile_filter(m, perc, size=(range, 2)) m = filters.percentile_filter(m, perc, size=(2, range)) mh, mw = m.shape oh, ow = image.shape scale = np.diag([mh * 1.0/oh, mw * 1.0/ow]) m = affine_transform(m, scale, output_shape=image.shape) w, h = np.minimum(np.array(image.shape), np.array(m.shape)) flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1) # estimate low and high thresholds d0, d1 = flat.shape o0, o1 = int(border*d0), int(border*d1) est = flat[o0:d0-o0, o1:d1-o1] logger.debug('Threshold estimates {}'.format(est)) # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable logger.debug('Refine estimates') v = est-filters.gaussian_filter(est, escale*20.0) v = filters.gaussian_filter(v**2, escale*20.0)**0.5 v = (v > 0.3*np.amax(v)) v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1))) v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50)))) est = est[v] lo = np.percentile(est.ravel(), low) hi = np.percentile(est.ravel(), high) flat -= lo flat /= (hi-lo) flat = np.clip(flat, 0, 1) logger.debug('Thresholding at {}'.format(threshold)) bin = np.array(255*(flat > threshold), 'B') return array2pil(bin)