Ejemplo n.º 1
0
def degrade_line(im, eta=0, alpha=1.7, beta=1.7, alpha_0=1, beta_0=1):
    """
    Degrades a line image by adding noise

    Args:
        im (PIL.Image): Input image

    Returns:
        PIL.Image in mode 'L'
    """
    im = pil2array(im)
    im = np.amax(im) - im
    im = im * 1.0 / np.amax(im)

    # foreground distance transform and flipping to white probability
    fg_dist = distance_transform_cdt(im, metric='taxicab')
    fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta
    fg_prob[im == 0] = 0
    fg_flip = np.random.binomial(1, fg_prob)

    # background distance transform and flipping to black probability
    bg_dist = distance_transform_cdt(1 - im, metric='taxicab')
    bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta
    bg_prob[im == 1] = 0
    bg_flip = np.random.binomial(1, bg_prob)

    # flip
    im -= fg_flip
    im += bg_flip
    # use a circular kernel of size 3
    sel = np.array([[1, 1], [1, 1]])
    im = binary_closing(im, sel)
    return array2pil(255 - im.astype('B') * 255)
Ejemplo n.º 2
0
def nlbin(im, threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80,
          range=20, low=5, high=90):
    """
    Performs binarization using non-linear processing.

    Args:
        im (PIL.Image):
        threshold (float):
        zoom (float): Zoom for background page estimation
        escale (float): Scale for estimating a mask over the text region
        border (float): Ignore this much of the border
        perc (int): Percentage for filters
        range (int): Range for filters
        low (int): Percentile for black estimation
        high (int): Percentile for white estimation

    Returns:
        PIL.Image containing the binarized image
    """
    if im.mode == '1':
        return im
    raw = pil2array(im)
    # rescale image to between -1 or 0 and 1
    raw = raw/np.float(np.iinfo(raw.dtype).max)
    if raw.ndim == 3:
        raw = np.mean(raw, 2)
    # perform image normalization
    if np.amax(raw) == np.amin(raw):
        raise KrakenInputException('Image is empty')
    image = raw-np.amin(raw)
    image /= np.amax(image)

    m = interpolation.zoom(image, zoom)
    m = filters.percentile_filter(m, perc, size=(range, 2))
    m = filters.percentile_filter(m, perc, size=(2, range))
    m = interpolation.zoom(m, 1.0/zoom)
    w, h = np.minimum(np.array(image.shape), np.array(m.shape))
    flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1)

    # estimate low and high thresholds
    d0, d1 = flat.shape
    o0, o1 = int(border*d0), int(border*d1)
    est = flat[o0:d0-o0, o1:d1-o1]
    # by default, we use only regions that contain
    # significant variance; this makes the percentile
    # based low and high estimates more reliable
    v = est-filters.gaussian_filter(est, escale*20.0)
    v = filters.gaussian_filter(v**2, escale*20.0)**0.5
    v = (v > 0.3*np.amax(v))
    v = morphology.binary_dilation(v, structure=np.ones((escale*50, 1)))
    v = morphology.binary_dilation(v, structure=np.ones((1, escale*50)))
    est = est[v]
    lo = np.percentile(est.ravel(), low)
    hi = np.percentile(est.ravel(), high)

    flat -= lo
    flat /= (hi-lo)
    flat = np.clip(flat, 0, 1)
    bin = np.array(255*(flat > threshold), 'B')
    return array2pil(bin)
Ejemplo n.º 3
0
def distort_line(im, distort=3.0, sigma=10, eps=0.03, delta=0.3):
    """
    Distorts a line image.

    Run BEFORE degrade_line as a white border of 5 pixels will be added.

    Args:
        im (PIL.Image): Input image
        distort (float):
        sigma (float):
        eps (float):
        delta (float):

    Returns:
        PIL.Image in mode 'L'
    """
    w, h = im.size
    # XXX: determine correct output shape from transformation matrices instead
    # of guesstimating.
    logger.debug('Pasting source image into canvas')
    image = Image.new('L', (int(1.5 * w), 4 * h), 255)
    image.paste(im, (int((image.size[0] - w) / 2), int(
        (image.size[1] - h) / 2)))
    line = pil2array(image.convert('L'))

    # shear in y direction with factor eps * randn(), scaling with 1 + eps *
    # randn() in x/y axis (all offset at d)
    logger.debug('Performing affine transformation')
    m = np.array([[1 + eps * np.random.randn(), 0.0],
                  [eps * np.random.randn(), 1.0 + eps * np.random.randn()]])
    c = np.array([w / 2.0, h / 2])
    d = c - np.dot(m, c) + np.array(
        [np.random.randn() * delta,
         np.random.randn() * delta])
    line = affine_transform(line,
                            m,
                            offset=d,
                            order=1,
                            mode='constant',
                            cval=255)

    hs = gaussian_filter(np.random.randn(4 * h, int(1.5 * w)), sigma)
    ws = gaussian_filter(np.random.randn(4 * h, int(1.5 * w)), sigma)
    hs *= distort / np.amax(hs)
    ws *= distort / np.amax(ws)

    def _f(p):
        return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]])

    logger.debug('Performing geometric transformation')
    im = array2pil(geometric_transform(line, _f, order=1, mode='nearest'))
    logger.debug('Cropping canvas to content box')
    im = im.crop(ImageOps.invert(im).getbbox())
    return im
Ejemplo n.º 4
0
def ocropy_degrade(im, distort=1.0, dsigma=20.0, eps=0.03, delta=0.3, degradations=[(0.5, 0.0, 0.5, 0.0)]):
    """
    Degrades and distorts a line using the same noise model used by ocropus.

    Args:
        im (PIL.Image): Input image
        distort (float):
        dsigma (float):
        eps (float):
        delta (float): 
        degradations (list): list returning 4-tuples corresponding to
                             the degradations argument of ocropus-linegen.

    Returns:
        PIL.Image in mode 'L'
    """
    w, h = im.size
    # XXX: determine correct output shape from transformation matrices instead
    # of guesstimating.
    image = Image.new('L', (int(1.5*w), 4*h), 255)
    image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2)))
    a = pil2array(image.convert('L'))
    (sigma,ssigma,threshold,sthreshold) = degradations[np.random.choice(len(degradations))]
    sigma += (2*np.random.rand()-1)*ssigma
    threshold += (2*np.random.rand()-1)*sthreshold
    a = a*1.0/np.amax(a)
    if sigma>0.0:
        a = gaussian_filter(a,sigma)
    a += np.clip(np.random.randn(*a.shape)*0.2,-0.25,0.25)
    m = np.array([[1+eps*np.random.randn(),0.0],[eps*np.random.randn(),1.0+eps*np.random.randn()]])
    w,h = a.shape
    c = np.array([w/2.0,h/2])
    d = c-np.dot(m, c)+np.array([np.random.randn()*delta, np.random.randn()*delta])
    a = affine_transform(a, m, offset=d, order=1, mode='constant', cval=a[0,0])
    a = np.array(a>threshold,'f')
    [[r,c]] = find_objects(np.array(a==0,'i'))
    r0 = r.start
    r1 = r.stop
    c0 = c.start
    c1 = c.stop
    a = a[r0-5:r1+5,c0-5:c1+5]
    if distort > 0:
        h,w = a.shape
        hs = np.random.randn(h,w)
        ws = np.random.randn(h,w)
        hs = gaussian_filter(hs, dsigma)
        ws = gaussian_filter(ws, dsigma)
        hs *= distort/np.amax(hs)
        ws *= distort/np.amax(ws)
        def f(p):
            return (p[0]+hs[p[0],p[1]],p[1]+ws[p[0],p[1]])
        a = geometric_transform(a, f, output_shape=(h,w), order=1, mode='constant', cval=np.amax(a))
    im = array2pil(a).convert('L')
    return im
Ejemplo n.º 5
0
def degrade_line(im, eta=0.0, alpha=1.5, beta=1.5, alpha_0=1.0, beta_0=1.0):
    """
    Degrades a line image by adding noise.

    For parameter meanings consult [1].

    Args:
        im (PIL.Image): Input image
        eta (float):
        alpha (float):
        beta (float):
        alpha_0 (float):
        beta_0 (float):

    Returns:
        PIL.Image in mode '1'
    """
    logger.debug(u'Inverting and normalizing input image')
    im = pil2array(im)
    im = np.amax(im)-im
    im = im*1.0/np.amax(im)

    logger.debug(u'Calculating foreground distance transform')
    fg_dist = distance_transform_cdt(1-im, metric='taxicab')
    logger.debug(u'Calculating flip to white probability')
    fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta
    fg_prob[im == 1] = 0
    fg_flip = np.random.binomial(1, fg_prob)

    logger.debug(u'Calculating background distance transform')
    bg_dist = distance_transform_cdt(im, metric='taxicab')
    logger.debug(u'Calculating flip to black probability')
    bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta
    bg_prob[im == 0] = 0
    bg_flip = np.random.binomial(1, bg_prob)

    # flip
    logger.debug(u'Flipping')
    im -= bg_flip
    im += fg_flip

    logger.debug(u'Binary closing')
    sel = np.array([[1, 1], [1, 1]])
    im = binary_closing(im, sel)
    logger.debug(u'Converting to image')
    return array2pil(255-im.astype('B')*255)
Ejemplo n.º 6
0
def degrade_line(im, eta=0.0, alpha=1.5, beta=1.5, alpha_0=1.0, beta_0=1.0):
    """
    Degrades a line image by adding noise.

    For parameter meanings consult [1].

    Args:
        im (PIL.Image): Input image
        eta (float):
        alpha (float):
        beta (float):
        alpha_0 (float):
        beta_0 (float):

    Returns:
        PIL.Image in mode '1'
    """
    logger.debug('Inverting and normalizing input image')
    im = pil2array(im)
    im = np.amax(im) - im
    im = im * 1.0 / np.amax(im)

    logger.debug('Calculating foreground distance transform')
    fg_dist = distance_transform_cdt(1 - im, metric='taxicab')
    logger.debug('Calculating flip to white probability')
    fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta
    fg_prob[im == 1] = 0
    fg_flip = np.random.binomial(1, fg_prob)

    logger.debug('Calculating background distance transform')
    bg_dist = distance_transform_cdt(im, metric='taxicab')
    logger.debug('Calculating flip to black probability')
    bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta
    bg_prob[im == 0] = 0
    bg_flip = np.random.binomial(1, bg_prob)

    # flip
    logger.debug('Flipping')
    im -= bg_flip
    im += fg_flip

    logger.debug('Binary closing')
    sel = np.array([[1, 1], [1, 1]])
    im = binary_closing(im, sel)
    logger.debug('Converting to image')
    return array2pil(255 - im.astype('B') * 255)
Ejemplo n.º 7
0
def distort_line(im, distort=3.0, sigma=10, eps=0.03, delta=0.3):
    """
    Distorts a line image.

    Run BEFORE degrade_line as a white border of 5 pixels will be added.

    Args:
        im (PIL.Image): Input image
        distort (float):
        sigma (float):
        eps (float):
        delta (float):

    Returns:
        PIL.Image in mode 'L'
    """
    w, h = im.size
    # XXX: determine correct output shape from transformation matrices instead
    # of guesstimating.
    logger.debug(u'Pasting source image into canvas')
    image = Image.new('L', (int(1.5*w), 4*h), 255)
    image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2)))
    line = pil2array(image.convert('L'))

    # shear in y direction with factor eps * randn(), scaling with 1 + eps *
    # randn() in x/y axis (all offset at d)
    logger.debug(u'Performing affine transformation')
    m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]])
    c = np.array([w/2.0, h/2])
    d = c - np.dot(m, c) + np.array([np.random.randn() * delta, np.random.randn() * delta])
    line = affine_transform(line, m, offset=d, order=1, mode='constant', cval=255)

    hs = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma)
    ws = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma)
    hs *= distort/np.amax(hs)
    ws *= distort/np.amax(ws)

    def _f(p):
        return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]])

    logger.debug(u'Performing geometric transformation')
    im = array2pil(geometric_transform(line, _f, order=1, mode='nearest'))
    logger.debug(u'Cropping canvas to content box')
    im = im.crop(ImageOps.invert(im).getbbox())
    return im
Ejemplo n.º 8
0
def dewarp(normalizer: CenterNormalizer, im: Image.Image) -> Image.Image:
    """
    Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer
    instance.

    Args:
        normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer
                                                          instance
        im (PIL.Image.Image): Image to dewarp

    Returns:
        PIL.Image containing the dewarped image.
    """
    line = pil2array(im)
    temp = np.amax(line)-line
    temp = temp*1.0/np.amax(temp)
    normalizer.measure(temp)
    line = normalizer.normalize(line, cval=np.amax(line))
    return array2pil(line)
Ejemplo n.º 9
0
def dewarp(normalizer, im):
    """
    Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer
    instance.

    Args:
        normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer
                                                          instance
        im (PIL.Image): Image to dewarp

    Returns:
        PIL.Image containing the dewarped image.
    """
    line = pil2array(im)
    temp = np.amax(line) - line
    temp = temp * 1.0 / np.amax(temp)
    normalizer.measure(temp)
    line = normalizer.normalize(line, cval=np.amax(line))
    return array2pil(line)
Ejemplo n.º 10
0
def degrade_line(im, mean=0.0, sigma=0.001, density=0.002):
    """
    Degrades a line image by adding several kinds of noise.

    Args:
        im (PIL.Image): Input image
        mean (float): Mean of distribution for Gaussian noise
        sigma (float): Standard deviation for Gaussian noise
        density (float): Noise density for Salt and Pepper noise

    Returns:
        PIL.Image in mode 'L'
    """
    im = pil2array(im)
    m = np.amax(im)
    im = gaussian_filter(im.astype('f')/m, 0.5)
    im += np.random.normal(mean, sigma, im.shape)
    flipped = np.ceil(density/2 * im.size)
    coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape]
    im[coords] = 255
    coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape]
    im[coords] = 0
    return array2pil(np.clip(im * m, 0, 255).astype('uint8'))
Ejemplo n.º 11
0
def degrade_line(im, mean=0.0, sigma=0.001, density=0.002):
    """
    Degrades a line image by adding several kinds of noise.

    Args:
        im (PIL.Image): Input image
        mean (float): Mean of distribution for Gaussian noise
        sigma (float): Standard deviation for Gaussian noise
        density (float): Noise density for Salt and Pepper noiase

    Returns:
        PIL.Image in mode 'L'
    """
    im = pil2array(im)
    m = np.amax(im)
    im = gaussian_filter(im.astype('f') / m, 0.5)
    im += np.random.normal(mean, sigma, im.shape)
    flipped = np.ceil(density / 2 * im.size)
    coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape]
    im[coords] = 255
    coords = [np.random.randint(0, i - 1, int(flipped)) for i in im.shape]
    im[coords] = 0
    return array2pil(np.clip(im * m, 0, 255).astype('uint8'))
Ejemplo n.º 12
0
def nlbin(im: Image.Image,
          threshold: float = 0.5,
          zoom: float = 0.5,
          escale: float = 1.0,
          border: float = 0.1,
          perc: int = 80,
          range: int = 20,
          low: int = 5,
          high: int = 90) -> Image:
    """
    Performs binarization using non-linear processing.

    Args:
        im (PIL.Image.Image):
        threshold (float):
        zoom (float): Zoom for background page estimation
        escale (float): Scale for estimating a mask over the text region
        border (float): Ignore this much of the border
        perc (int): Percentage for filters
        range (int): Range for filters
        low (int): Percentile for black estimation
        high (int): Percentile for white estimation

    Returns:
        PIL.Image containing the binarized image

    Raises:
        KrakenInputException when trying to binarize an empty image.
    """
    im_str = get_im_str(im)
    logger.info(f'Binarizing {im_str}')
    if is_bitonal(im):
        logger.info(f'Skipping binarization because {im_str} is bitonal.')
        return im
    # convert to grayscale first
    logger.debug(f'Converting {im_str} to grayscale')
    im = im.convert('L')
    raw = pil2array(im)
    logger.debug('Scaling and normalizing')
    # rescale image to between -1 or 0 and 1
    raw = raw / np.float(np.iinfo(raw.dtype).max)
    # perform image normalization
    if np.amax(raw) == np.amin(raw):
        logger.warning(f'Trying to binarize empty image {im_str}')
        raise KrakenInputException('Image is empty')
    image = raw - np.amin(raw)
    image /= np.amax(image)

    logger.debug('Interpolation and percentile filtering')
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        m = interpolation.zoom(image, zoom)
        m = filters.percentile_filter(m, perc, size=(range, 2))
        m = filters.percentile_filter(m, perc, size=(2, range))
        mh, mw = m.shape
        oh, ow = image.shape
        scale = np.diag([mh * 1.0 / oh, mw * 1.0 / ow])
        m = affine_transform(m, scale, output_shape=image.shape)
    w, h = np.minimum(np.array(image.shape), np.array(m.shape))
    flat = np.clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)

    # estimate low and high thresholds
    d0, d1 = flat.shape
    o0, o1 = int(border * d0), int(border * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]
    logger.debug('Threshold estimates {}'.format(est))
    # by default, we use only regions that contain
    # significant variance; this makes the percentile
    # based low and high estimates more reliable
    logger.debug('Refine estimates')
    v = est - filters.gaussian_filter(est, escale * 20.0)
    v = filters.gaussian_filter(v**2, escale * 20.0)**0.5
    v = (v > 0.3 * np.amax(v))
    v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1)))
    v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50))))
    est = est[v]
    lo = np.percentile(est.ravel(), low)
    hi = np.percentile(est.ravel(), high)
    flat -= lo
    flat /= (hi - lo)
    flat = np.clip(flat, 0, 1)
    logger.debug(f'Thresholding at {threshold}')
    bin = np.array(255 * (flat > threshold), 'B')
    return array2pil(bin)
Ejemplo n.º 13
0
Archivo: ocr.py Proyecto: among/fusus
    def read(self, page):
        """Perfoms OCR with Kraken."""

        stages = page.stages
        scan = stages.get("clean", None)
        if scan is None:
            return None

        nonLetter = self.nonLetter

        model = self.ensureLoaded()

        blocks = page.blocks
        ocrChars = []
        ocrWords = []
        ocrLines = []
        stages["char"] = ocrChars
        stages["word"] = ocrWords
        stages["line"] = ocrLines
        binary = pil2array(nlbin(array2pil(scan)))

        for ((stripe, block), data) in blocks.items():
            (left, top, right, bottom) = data["inner"]
            thisBinary = binary[top:bottom, left:right]
            lines = data["bands"]["main"]["lines"]
            for (ln, (up, lo)) in enumerate(lines):
                lln = ln + 1
                roi = thisBinary[up : lo + 1]
                (b, e, roi) = removeMargins(roi, keep=16)
                ocrLines.append((stripe, block, lln, left + b, top + up, left + e, top + lo))
                (roiH, roiW) = roi.shape[0:2]
                roi = array2pil(roi)
                bounds = dict(boxes=([0, 0, roiW, roiH],), text_direction=RL)

                # adapt the boxes, because they corresponds to peaks of recognition,
                # not to character extends
                #
                # See https://github.com/mittagessen/kraken/issues/184

                adaptedPreds = []
                for (c, (le, to, ri, bo), conf) in chain.from_iterable(
                    rpred(model, roi, bounds, pad=0, bidi_reordering=True)
                ):
                    if adaptedPreds:
                        prevPred = adaptedPreds[-1]
                        prevEdge = prevPred[1][0]
                    else:
                        prevEdge = roiW
                    correction = int(round((prevEdge - ri) / 2))
                    thisRi = ri + correction
                    if adaptedPreds:
                        adaptedPreds[-1][1][0] -= correction
                    adaptedPreds.append([c, [le, to, thisRi, bo], conf])
                if adaptedPreds:
                    adaptedPreds[-1][1][0] = 0

                # divide into words, not only on spaces, but also on punctuation

                curWord = [[], []]
                inWord = True

                for (c, (le, to, ri, bo), conf) in adaptedPreds:
                    offsetW = left + b
                    offsetH = top + up
                    pos = (le + offsetW, to + offsetH, ri + offsetW, bo + offsetH)
                    conf = int(round(conf * 100))
                    ocrChars.append((stripe, block, lln, *pos, conf, c))

                    spaceSeen = c == " "
                    changeWord = not inWord and c not in nonLetter
                    element = (c, pos, conf)

                    if spaceSeen:
                        curWord[1].append(element)
                    if spaceSeen or changeWord:
                        if curWord[0] or curWord[1]:
                            ocrWords.append((stripe, block, lln, *addWord(curWord)))
                            curWord = [[], []]
                            inWord = True
                            continue

                    if inWord:
                        if c in nonLetter:
                            inWord = False
                    dest = 0 if inWord else 1
                    curWord[dest].append(element)
                if curWord[0] or curWord[1]:
                    ocrWords.append((stripe, block, lln, *addWord(curWord)))

        page.write(stage="line,word,char")
Ejemplo n.º 14
0
def ocropy_degrade(im,
                   distort=1.0,
                   dsigma=20.0,
                   eps=0.03,
                   delta=0.3,
                   degradations=((0.5, 0.0, 0.5, 0.0), )):
    """
    Degrades and distorts a line using the same noise model used by ocropus.

    Args:
        im (PIL.Image): Input image
        distort (float):
        dsigma (float):
        eps (float):
        delta (float):
        degradations (list): list returning 4-tuples corresponding to
                             the degradations argument of ocropus-linegen.

    Returns:
        PIL.Image in mode 'L'
    """
    w, h = im.size
    # XXX: determine correct output shape from transformation matrices instead
    # of guesstimating.
    logger.debug('Pasting source image into canvas')
    image = Image.new('L', (int(1.5 * w), 4 * h), 255)
    image.paste(im, (int((image.size[0] - w) / 2), int(
        (image.size[1] - h) / 2)))
    a = pil2array(image.convert('L'))
    logger.debug('Selecting degradations')
    (sigma, ssigma, threshold,
     sthreshold) = degradations[np.random.choice(len(degradations))]
    sigma += (2 * np.random.rand() - 1) * ssigma
    threshold += (2 * np.random.rand() - 1) * sthreshold
    a = a * 1.0 / np.amax(a)
    if sigma > 0.0:
        logger.debug('Apply Gaussian filter')
        a = gaussian_filter(a, sigma)
    logger.debug('Adding noise')
    a += np.clip(np.random.randn(*a.shape) * 0.2, -0.25, 0.25)
    logger.debug('Perform affine transformation and resize')
    m = np.array([[1 + eps * np.random.randn(), 0.0],
                  [eps * np.random.randn(), 1.0 + eps * np.random.randn()]])
    w, h = a.shape
    c = np.array([w / 2.0, h / 2])
    d = c - np.dot(m, c) + np.array(
        [np.random.randn() * delta,
         np.random.randn() * delta])
    a = affine_transform(a,
                         m,
                         offset=d,
                         order=1,
                         mode='constant',
                         cval=a[0, 0])
    a = np.array(a > threshold, 'f')
    [[r, c]] = find_objects(np.array(a == 0, 'i'))
    r0 = r.start
    r1 = r.stop
    c0 = c.start
    c1 = c.stop
    a = a[r0 - 5:r1 + 5, c0 - 5:c1 + 5]
    if distort > 0:
        logger.debug('Perform geometric transformation')
        h, w = a.shape
        hs = np.random.randn(h, w)
        ws = np.random.randn(h, w)
        hs = gaussian_filter(hs, dsigma)
        ws = gaussian_filter(ws, dsigma)
        hs *= distort / np.amax(hs)
        ws *= distort / np.amax(ws)

        def _f(p):
            return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]])

        a = geometric_transform(a,
                                _f,
                                output_shape=(h, w),
                                order=1,
                                mode='constant',
                                cval=np.amax(a))
    im = array2pil(a).convert('L')
    return im
Ejemplo n.º 15
0
def nlbin(im,
          threshold=0.5,
          zoom=0.5,
          escale=1.0,
          border=0.1,
          perc=80,
          range=20,
          low=5,
          high=90):
    """
    Performs binarization using non-linear processing.

    Args:
        im (PIL.Image):
        threshold (float):
        zoom (float): Zoom for background page estimation
        escale (float): Scale for estimating a mask over the text region
        border (float): Ignore this much of the border
        perc (int): Percentage for filters
        range (int): Range for filters
        low (int): Percentile for black estimation
        high (int): Percentile for white estimation

    Returns:
        PIL.Image containing the binarized image
    """
    if im.mode == '1':
        return im
    raw = pil2array(im)
    # rescale image to between -1 or 0 and 1
    raw = raw / np.float(np.iinfo(raw.dtype).max)
    if raw.ndim == 3:
        raw = np.mean(raw, 2)
    # perform image normalization
    if np.amax(raw) == np.amin(raw):
        raise KrakenInputException('Image is empty')
    image = raw - np.amin(raw)
    image /= np.amax(image)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        m = interpolation.zoom(image, zoom)
        m = filters.percentile_filter(m, perc, size=(range, 2))
        m = filters.percentile_filter(m, perc, size=(2, range))
        m = interpolation.zoom(m, 1.0 / zoom)
    w, h = np.minimum(np.array(image.shape), np.array(m.shape))
    flat = np.clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)

    # estimate low and high thresholds
    d0, d1 = flat.shape
    o0, o1 = int(border * d0), int(border * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]
    # by default, we use only regions that contain
    # significant variance; this makes the percentile
    # based low and high estimates more reliable
    v = est - filters.gaussian_filter(est, escale * 20.0)
    v = filters.gaussian_filter(v**2, escale * 20.0)**0.5
    v = (v > 0.3 * np.amax(v))
    v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1)))
    v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50))))
    est = est[v]
    lo = np.percentile(est.ravel(), low)
    hi = np.percentile(est.ravel(), high)

    flat -= lo
    flat /= (hi - lo)
    flat = np.clip(flat, 0, 1)
    bin = np.array(255 * (flat > threshold), 'B')
    return array2pil(bin)
Ejemplo n.º 16
0
def nlbin(im: Image.Image,
          threshold: float = 0.5,
          zoom: float = 0.5,
          escale: float = 1.0,
          border: float = 0.1,
          perc: int = 80,
          range: int = 20,
          low: int = 5,
          high: int = 90) -> Image:
    """
    Performs binarization using non-linear processing.

    Args:
        im (PIL.Image.Image):
        threshold (float):
        zoom (float): Zoom for background page estimation
        escale (float): Scale for estimating a mask over the text region
        border (float): Ignore this much of the border
        perc (int): Percentage for filters
        range (int): Range for filters
        low (int): Percentile for black estimation
        high (int): Percentile for white estimation

    Returns:
        PIL.Image containing the binarized image

    Raises:
        KrakenInputException when trying to binarize an empty image.
    """
    im_str = get_im_str(im)
    logger.info('Binarizing {}'.format(im_str))
    if is_bitonal(im):
        logger.info('Skipping binarization because {} is bitonal.'.format(im_str))
        return im
    # convert to grayscale first
    logger.debug('Converting {} to grayscale'.format(im_str))
    im = im.convert('L')
    raw = pil2array(im)
    logger.debug('Scaling and normalizing')
    # rescale image to between -1 or 0 and 1
    raw = raw/np.float(np.iinfo(raw.dtype).max)
    # perform image normalization
    if np.amax(raw) == np.amin(raw):
        logger.warning('Trying to binarize empty image {}'.format(im_str))
        raise KrakenInputException('Image is empty')
    image = raw-np.amin(raw)
    image /= np.amax(image)

    logger.debug('Interpolation and percentile filtering')
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        m = interpolation.zoom(image, zoom)
        m = filters.percentile_filter(m, perc, size=(range, 2))
        m = filters.percentile_filter(m, perc, size=(2, range))
        mh, mw = m.shape
        oh, ow = image.shape
        scale = np.diag([mh * 1.0/oh, mw * 1.0/ow])
        m = affine_transform(m, scale, output_shape=image.shape)
    w, h = np.minimum(np.array(image.shape), np.array(m.shape))
    flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1)

    # estimate low and high thresholds
    d0, d1 = flat.shape
    o0, o1 = int(border*d0), int(border*d1)
    est = flat[o0:d0-o0, o1:d1-o1]
    logger.debug('Threshold estimates {}'.format(est))
    # by default, we use only regions that contain
    # significant variance; this makes the percentile
    # based low and high estimates more reliable
    logger.debug('Refine estimates')
    v = est-filters.gaussian_filter(est, escale*20.0)
    v = filters.gaussian_filter(v**2, escale*20.0)**0.5
    v = (v > 0.3*np.amax(v))
    v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1)))
    v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50))))
    est = est[v]
    lo = np.percentile(est.ravel(), low)
    hi = np.percentile(est.ravel(), high)
    flat -= lo
    flat /= (hi-lo)
    flat = np.clip(flat, 0, 1)
    logger.debug('Thresholding at {}'.format(threshold))
    bin = np.array(255*(flat > threshold), 'B')
    return array2pil(bin)