예제 #1
0
def binarize(image_filepath):
    raw = ocrolib.read_image_gray(image_filepath)

    # Perform image normalization.
    image = normalize_raw_image(raw)

    threshold = 0.5  # Threshold, determines lightness.
    zoom = 0.5  # Zoom for page background estimation, smaller=faster.
    escale = 1.0  # Scale for estimating a mask over the text region.
    bignore = 0.1  # Ignore this much of the border for threshold estimation.
    perc = 80  # Percentage for filters.
    range = 20  # Range for filters.
    maxskew = 2  # Skew angle estimation parameters (degrees).
    lo = 5  # Percentile for black estimation.
    hi = 90  # Percentile for white estimation.
    skewsteps = 8  # Steps for skew angle estimation (per degree).
    debug = 0  # Display intermediate results.

    # Flatten it by estimating the local whitelevel.
    flat = estimate_local_whitelevel(image, zoom, perc, range, debug)

    # Estimate skew angle and rotate.
    flat, angle = estimate_skew(flat, bignore, maxskew, skewsteps, debug)

    # Estimate low and high thresholds.
    lo, hi = estimate_thresholds(flat, bignore, escale, lo, hi, debug)

    # Rescale the image to get the gray scale image.
    flat -= lo
    flat /= (hi - lo)
    flat = np.clip(flat, 0, 1)

    bin = 1 * (flat > threshold)

    if False:
        # Output the normalized grayscale and the thresholded images.
        ocrolib.write_image_binary('./ocropy_test.bin.png', bin)
        ocrolib.write_image_gray('./ocropy_test.nrm.png', flat)

    return bin, flat
예제 #2
0
def process1(job):
    fname, i = job
    print_info("# %s" % (fname))
    if args.parallel < 2: print_info("=== %s %-3d" % (fname, i))
    raw = ocrolib.read_image_gray(fname)
    dshow(raw, "input")
    # perform image normalization
    image = raw - amin(raw)
    if amax(image) == amin(image):
        print_info("# image is empty: %s" % (fname))
        return
    image /= amax(image)

    if not args.nocheck:
        check = check_page(amax(image) - image)
        if check is not None:
            print_error(fname + " SKIPPED. " + check +
                        " (use -n to disable this check)")
            return

    # check whether the image is already effectively binarized
    if args.gray:
        extreme = 0
    else:
        extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod(
            image.shape)
    if extreme > 0.95:
        comment = "no-normalization"
        flat = image
    else:
        comment = ""
        # if not, we need to flatten it by estimating the local whitelevel
        if args.parallel < 2: print_info("flattening")
        m = interpolation.zoom(image, args.zoom)
        m = filters.percentile_filter(m, args.perc, size=(args.range, 2))
        m = filters.percentile_filter(m, args.perc, size=(2, args.range))
        m = interpolation.zoom(m, 1.0 / args.zoom)
        if args.debug > 0:
            clf()
            imshow(m, vmin=0, vmax=1)
            ginput(1, args.debug)
        w, h = minimum(array(image.shape), array(m.shape))
        flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
        if args.debug > 0:
            clf()
            imshow(flat, vmin=0, vmax=1)
            ginput(1, args.debug)

    # estimate low and high thresholds
    if args.parallel < 2: print_info("estimating thresholds")
    d0, d1 = flat.shape
    o0, o1 = int(args.bignore * d0), int(args.bignore * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]
    if args.escale > 0:
        # by default, we use only regions that contain
        # significant variance; this makes the percentile
        # based low and high estimates more reliable
        e = args.escale
        v = est - filters.gaussian_filter(est, e * 20.0)
        v = filters.gaussian_filter(v**2, e * 20.0)**0.5
        v = (v > 0.3 * amax(v))
        v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
        v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
        if args.debug > 0:
            imshow(v)
            ginput(1, args.debug)
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(), args.lo)
    hi = stats.scoreatpercentile(est.ravel(), args.hi)
    # rescale the image to get the gray scale image
    if args.parallel < 2: print_info("rescaling")
    flat -= lo
    flat /= (hi - lo)
    flat = clip(flat, 0, 1)
    if args.debug > 0:
        imshow(flat, vmin=0, vmax=1)
        ginput(1, args.debug)
    bin = 1 * (flat > args.threshold)

    # output the normalized grayscale and the thresholded images
    #print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
    print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment))
    if args.parallel < 2: print_info("writing")
    if args.debug > 0 or args.show:
        clf()
        gray()
        imshow(bin)
        ginput(1, max(0.1, args.debug))
    base, _ = ocrolib.allsplitext(fname)
    ocrolib.write_image_binary(base + ".bin.png", bin)
    ocrolib.write_image_gray(base + ".nrm.png", flat)
    #print("########### File path : ", base+".nrm.png")
    #write_to_xml(base+".bin.png")
    return base + ".bin.png"
예제 #3
0
def analyze_page_layout(binary, gray, rgb=None):
    hscale = 1.0  # Non-standard scaling of horizontal parameters.
    vscale = 1.0  # Non-standard scaling of vertical parameters.
    threshold = 0.2  # baseline threshold.
    usegauss = True  # Use gaussian instead of uniform.
    maxseps = 0  # Maximum black column separators.
    sepwiden = 10  # Widen black separators (to account for warping).
    blackseps = True
    maxcolseps = 3  # Maximum # whitespace column separators.
    csminheight = 10  # Minimum column height (units=scale).
    noise = 8  # Noise threshold for removing small components from lines.
    gray_output = True  # Output grayscale lines as well, which are extracted from the grayscale version of the pages.
    pad = 3  # Padding for extracted lines.
    expand = 3  # Expand mask for grayscale extraction.

    if False:
        bin_image_filepath = './ocropy_test.bin.png'
        gray_image_filepath = './ocropy_test.nrm.png'

        binary = ocrolib.read_image_binary(bin_image_filepath)
        gray = ocrolib.read_image_gray(gray_image_filepath)

    binary = 1 - binary  # Invert.

    scale = psegutils.estimate_scale(binary)
    segmentation = compute_segmentation(binary,
                                        scale,
                                        blackseps,
                                        maxseps,
                                        maxcolseps,
                                        csminheight,
                                        sepwiden,
                                        usegauss,
                                        vscale,
                                        hscale,
                                        threshold,
                                        quiet=True)

    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # Renumber the labels so that they conform to the specs.
    nlabels = np.amax(segmentation) + 1
    renumber = np.zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]  # Image.

    lines = [lines[i] for i in lsort]

    # Visualize bounding boxes.
    if False:
        if rgb is not None:
            # REF [function] >> extract_masked() in ${OCROPY_HOME}/ocrolib/psegutils.py.
            for l in lines:
                y0, x0, y1, x1 = [
                    int(x) for x in [
                        l.bounds[0].start, l.bounds[1].start, l.bounds[0].stop,
                        l.bounds[1].stop
                    ]
                ]
                cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1,
                              cv2.LINE_AA)
            cv2.imshow('Image', rgb)
            cv2.waitKey(0)

    # Output everything.
    if False:
        if not os.path.exists(outputdir):
            os.mkdir(outputdir)

        ocrolib.write_page_segmentation("%s.pseg.png" % outputdir,
                                        segmentation)
        cleaned = ocrolib.remove_noise(binary, noise)
        for i, l in enumerate(lines):
            binline = psegutils.extract_masked(1 - cleaned,
                                               l,
                                               pad=pad,
                                               expand=expand)  # Image.
            ocrolib.write_image_binary(
                "%s/01%04x.bin.png" % (outputdir, i + 1), binline)
            if gray_output:
                grayline = psegutils.extract_masked(gray,
                                                    l,
                                                    pad=pad,
                                                    expand=expand)  # Image.
                ocrolib.write_image_gray(
                    "%s/01%04x.nrm.png" % (outputdir, i + 1), grayline)
예제 #4
0
def binarize(inFile, binFile, grayFile):
    print("binarize: inFile=%s binFile=%s grayFile=%s" %
          (inFile, binFile, grayFile))
    fname = inFile
    raw = ocrolib.read_image_gray(inFile)

    # perform image normalization
    image = normalize_raw_image(raw)
    if image is None:
        print("!!  # image is empty: %s" % (inFile))
        assert False
        return False

    check = check_page(np.amax(image) - image)
    if check is not None:
        print(inFile + " SKIPPED " + check + "(use -n to disable this check)")
        # assert False
        return False

    # check whether the image is already effectively binarized
    extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) / np.prod(
        image.shape)
    if extreme > 0.95:
        comment = "no-normalization"
        flat = image
    else:
        comment = ""
        # if not, we need to flatten it by estimating the local whitelevel
        print("flattening")
        flat = estimate_local_whitelevel(image, zoom, perc, size)

    print("comment=%r extreme=%s" % (comment, extreme))
    print("image=%s" % desc(image))
    print(" flat=%s" % desc(flat))
    # assert False

    # estimate skew angle and rotate
    # print("estimating skew angle")
    # flat, angle = estimate_skew(flat, args.bignore, args.maxskew, args.skewsteps)
    angle = 0.0

    # estimate low and high thresholds
    print("estimating thresholds")
    lo, hi, ok = estimate_thresholds(flat, bignore, escale, defLo, defHi)
    if not ok:
        return False
    print("lo=%5.3f (%g)" % (lo, defLo))
    print("hi=%5.3f (%g)" % (hi, defHi))

    # rescale the image to get the gray scale image
    print("rescaling")
    flat -= lo
    flat /= (hi - lo)
    flat = np.clip(flat, 0, 1)
    bin = flat > threshold

    # output the normalized grayscale and the thresholded images
    print("%s lo-hi (%.2f %.2f) angle %4.1f %s" %
          (fname, lo, hi, angle, comment))
    print("##1 flat=%s" % desc(flat))
    print("##2  bin=%s" % desc(bin))
    print("writing %s" % binFile)

    ocrolib.write_image_binary(binFile, bin)
    ocrolib.write_image_gray(grayFile, flat)

    return True
예제 #5
0
def process1(job):
    fname, i = job
    global base
    base, _ = ocrolib.allsplitext(fname)
    outputdir = base

    try:
        binary = ocrolib.read_image_binary(base + ".bin.png")
    except IOError:
        try:
            binary = ocrolib.read_image_binary(fname)
        except IOError:
            if ocrolib.trace:
                traceback.print_exc()
            print("cannot open either", base + ".bin.png", "or", fname)
            return

    checktype(binary, ABINARY2)

    if not args.nocheck:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print(fname, "SKIPPED", check, "(use -n to disable this check)")
            return

    if args.gray:
        if os.path.exists(base + ".nrm.png"):
            gray = ocrolib.read_image_gray(base + ".nrm.png")
        checktype(gray, GRAYSCALE)

    binary = 1 - binary  # invert

    if args.scale == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args.scale
    print("scale", scale)
    if isnan(scale) or scale > 1000.0:
        sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale))
        return
    if scale < args.minscale:
        sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" %
                         (fname, scale))
        return

    # find columns and text lines

    if not args.quiet:
        print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args.maxlines:
        print(fname, ": too many lines", amax(segmentation))
        return
    if not args.quiet:
        print("number of lines", amax(segmentation))

    # compute the reading order

    if not args.quiet:
        print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything

    if not args.quiet:
        print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args.noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args.pad,
                                           expand=args.expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        if args.gray:
            grayline = psegutils.extract_masked(gray,
                                                l,
                                                pad=args.pad,
                                                expand=args.expand)
            ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1),
                                     grayline)
    print("%6d" % i, fname, "%4.1f" % scale, len(lines))