Python read_image_gray Examples, ocrolib.read_image_gray Python Examples

Example #1

0

Show file

File: test_read_image.py Project: jingchaoluan/ocropy

    def test_read_image_gray(self):
        """
		Test whether the function read_image_gray() will return same result 
		when pass a image file name (from disk) and a image object (PIL.Image from memory).
		The return object of read_image_gray() is a 'ndarray' dedfined by 'numpy', thus we use the 
		built-in function 'array_equal' to compare two ndarray objects
		"""
        self.assertTrue(
            numpy.array_equal(ocrolib.read_image_gray(img_disk),
                              ocrolib.read_image_gray(img_mem)))

Example #2

0

Show file

File: ocropus-rpred-skip-errors.py Project: karrireddyy/testing

def process1(arg):
	(trial, fname) = arg
	base, _ = ocrolib.allsplitext(fname)
	line = ocrolib.read_image_gray(fname)
	raw_line = line.copy()
	if prod(line.shape) == 0: return None
	if amax(line) == amin(line): return None

	if not args.nocheck:
		check = check_line(amax(line) - line)
		if check is not None:
			print_error(fname + " SKIPPED " + check + " (use -n to disable this check)")
			return (0, [], 0, trial, fname)

	if not args.nolineest:
		assert "dew.png" not in fname, "don't dewarp dewarped images"
		temp = amax(line) - line
		temp = temp * 1.0 / amax(temp)
		lnorm.measure(temp)
		line = lnorm.normalize(line, cval=amax(line))
	else:
		assert "dew.png" in fname, "only apply to dewarped images"

	line = lstm.prepare_line(line, args.pad)
	try:
		pred = network.predictString(line)
	except RecognitionError, err:
		# TODO: Handle this in the extraction processor
		print_info(fname + " Failed to predict line. Skipping.")
		return (0, [], 0, trial, fname)

Example #3

0

Show file

def process(arg):
    output_list = []
    (trial, fname) = arg
    base, _ = ocrolib.allsplitext(fname)
    line = ocrolib.read_image_gray(fname)
    raw_line = line.copy()
    if prod(line.shape) == 0: return None
    if amax(line) == amin(line): return None

    if not args['nocheck']:
        check = check_line(amax(line) - line)
        if check is not None:
            print_error("%s SKIPPED %s (use -n to disable this check)" %
                        (fname, check))
            return (0, [], 0, trial, fname)

    temp = amax(line) - line
    temp = temp * 1.0 / amax(temp)
    lnorm.measure(temp)
    line = lnorm.normalize(line, cval=amax(line))

    line = lstm.prepare_line(line, args['pad'])
    pred = network.predictString(line)

    if args['llocs']:
        # output recognized LSTM locations of characters
        result = lstm.translate_back(network.outputs, pos=1)
        scale = len(
            raw_line.T) * 1.0 / (len(network.outputs) - 2 * args['pad'])
        output_llocs = base + ".llocs"
        with codecs.open(output_llocs, "w", "utf-8") as locs:
            for r, c in result:
                c = network.l2s([c])
                r = (r - args['pad']) * scale
                locs.write("%s\t%.1f\n" % (c, r))
            output_list.append(output_llocs)
            #plot([r,r],[0,20],'r' if c==" " else 'b')
        #ginput(1,1000)

    if args['probabilities']:
        # output character probabilities
        result = lstm.translate_back(network.outputs, pos=2)
        output_prob = base + ".prob"
        with codecs.open(output_prob, "w", "utf-8") as file:
            for c, p in result:
                c = network.l2s([c])
                file.write("%s\t%s\n" % (c, p))
            output_list.append(output_prob)

    if not args['nonormalize']:
        pred = ocrolib.normalize_text(pred)

    if not args['quiet']:
        print_info(fname + ":" + pred)
    output_text = base + ".txt"
    ocrolib.write_text(output_text, pred)
    output_list.append(output_text)

    return output_list

Example #4

0

Show file

def deskew(fpath, job):
    base,_ = ocrolib.allsplitext(fpath)
    basefile = ocrolib.allsplitext(os.path.basename(fpath))[0]

    if args.parallel<2: print_info("=== %s %-3d" % (fpath, job))
    raw = ocrolib.read_image_gray(fpath)

    flat = raw
    # estimate skew angle and rotate
    if args.maxskew>0:
        if args.parallel<2: print_info("estimating skew angle")
        d0,d1 = flat.shape
        o0,o1 = int(args.bignore*d0),int(args.bignore*d1)
        flat = amax(flat)-flat
        flat -= amin(flat)
        est = flat[o0:d0-o0,o1:d1-o1]
        ma = args.maxskew
        ms = int(2*args.maxskew*args.skewsteps)
        angle = estimate_skew_angle(est,linspace(-ma,ma,ms+1))
        flat = interpolation.rotate(flat,angle,mode='constant',reshape=0)
        flat = amax(flat)-flat
    else:
        angle = 0

    # estimate low and high thresholds
    if args.parallel<2: print_info("estimating thresholds")
    d0,d1 = flat.shape
    o0,o1 = int(args.bignore*d0),int(args.bignore*d1)
    est = flat[o0:d0-o0,o1:d1-o1]
    if args.escale>0:
        # by default, we use only regions that contain
        # significant variance; this makes the percentile
        # based low and high estimates more reliable
        e = args.escale
        v = est-filters.gaussian_filter(est,e*20.0)
        v = filters.gaussian_filter(v**2,e*20.0)**0.5
        v = (v>0.3*amax(v))
        v = morphology.binary_dilation(v,structure=ones((int(e*50),1)))
        v = morphology.binary_dilation(v,structure=ones((1,int(e*50))))
        if args.debug>0: imshow(v); ginput(1,args.debug)
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(),args.lo)
    hi = stats.scoreatpercentile(est.ravel(),args.hi)
    # rescale the image to get the gray scale image
    if args.parallel<2: print_info("rescaling")
    flat -= lo
    flat /= (hi-lo)
    flat = clip(flat,0,1)
    if args.debug>0: imshow(flat,vmin=0,vmax=1); ginput(1,args.debug)
    bin = 1*(flat>args.threshold)

    # output the normalized grayscale and the thresholded images
    print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (basefile, lo, hi, angle))
    if args.parallel<2: print_info("writing")
    ocrolib.write_image_binary(base+".ds.png",bin)
    return base+".ds.png"

Example #5

0

Show file

File: lineest.py Project: beanyh/yeobaek

 def allchars():
     count = 0
     for fno,fname in enumerate(fnames):
         if fno%20==0: print fno,fname,count
         image = 1-ocrolib.read_image_gray(fname)
         try:
             seg = lineseg.ccslineseg(image)
         except:
             traceback.print_exc()
             continue
         seg = morph.renumber_by_xcenter(seg)
         for e in extract_chars(seg):
             count += 1
             yield e

Example #6

0

Show file

File: lineest.py Project: beanyh/yeobaek

def compute_geomaps(fnames,shapedict,old_model,use_gt=1,size=32,debug=0,old_order=1):
    """Given a shape dictionary and an existing line geometry
    estimator, compute updated geometric maps for each entry
    in the shape dictionary."""
    if debug>0: gray(); ion()
    shape = (shapedict.k,size,size)
    bls = zeros(shape)
    xls = zeros(shape)
    count = 0
    for fno,fname in enumerate(fnames):
        if fno%20==0: print fno,fname,count
        if use_gt:
            # don't use lines with many capital letters for training because
            # they result in bad models
            gt = ocrolib.read_text(ocrolib.fvariant(fname,"txt","gt"))
            if len(re.sub(r'[^A-Z]','',gt))>=0.3*len(re.sub(r'[^a-z]','',gt)): continue
            if len(re.sub(r'[^0-9]','',gt))>=0.3*len(re.sub(r'[^a-z]','',gt)): continue
        image = 1-ocrolib.read_image_gray(fname)
        if debug>0 and fno%debug==0: clf(); subplot(411); imshow(image)
        try:
            blp,xlp = old_model.lineFit(image,order=old_order)
        except:
            traceback.print_exc()
            continue
        blimage = zeros(image.shape)
        h,w = image.shape
        for x in range(w): blimage[clip(int(polyval(blp,x)),0,h-1),x] = 1
        xlimage = zeros(image.shape)
        for x in range(w): xlimage[clip(int(polyval(xlp,x)),0,h-1),x] = 1
        if debug>0 and fno%debug==0: 
            subplot(413); imshow(xlimage+0.3*image)
            subplot(414); imshow(blimage+0.3*image)
        try: 
            seg = lineseg.ccslineseg(image)
        except: 
            continue
        if debug>0 and fno%debug==0: subplot(412); morph.showlabels(seg)
        shape = None
        for sub,transform,itransform_add in extract_chars(seg):
            if shape is None: shape = sub.shape
            assert sub.shape==shape
            count += 1
            best = shapedict.predict1(sub)
            bls[best] += transform(blimage)
            xls[best] += transform(xlimage)
        if debug==1: ginput(1,100)
        elif debug>1: ginput(1,0.01)
    for i in range(len(bls)): bls[i] *= bls[i].shape[1]*1.0/max(1e-6,sum(bls[i]))
    for i in range(len(xls)): xls[i] *= xls[i].shape[1]*1.0/max(1e-6,sum(xls[i]))
    return bls,xls

Example #7

0

Show file

 def allchars():
     count = 0
     for fno, fname in enumerate(fnames):
         if fno % 20 == 0: print fno, fname, count
         image = 1 - ocrolib.read_image_gray(fname)
         try:
             seg = lineseg.ccslineseg(image)
         except:
             traceback.print_exc()
             continue
         seg = morph.renumber_by_xcenter(seg)
         for e in extract_chars(seg):
             count += 1
             yield e

Example #8

0

Show file

def binarize(image_filepath):
    raw = ocrolib.read_image_gray(image_filepath)

    # Perform image normalization.
    image = normalize_raw_image(raw)

    threshold = 0.5  # Threshold, determines lightness.
    zoom = 0.5  # Zoom for page background estimation, smaller=faster.
    escale = 1.0  # Scale for estimating a mask over the text region.
    bignore = 0.1  # Ignore this much of the border for threshold estimation.
    perc = 80  # Percentage for filters.
    range = 20  # Range for filters.
    maxskew = 2  # Skew angle estimation parameters (degrees).
    lo = 5  # Percentile for black estimation.
    hi = 90  # Percentile for white estimation.
    skewsteps = 8  # Steps for skew angle estimation (per degree).
    debug = 0  # Display intermediate results.

    # Flatten it by estimating the local whitelevel.
    flat = estimate_local_whitelevel(image, zoom, perc, range, debug)

    # Estimate skew angle and rotate.
    flat, angle = estimate_skew(flat, bignore, maxskew, skewsteps, debug)

    # Estimate low and high thresholds.
    lo, hi = estimate_thresholds(flat, bignore, escale, lo, hi, debug)

    # Rescale the image to get the gray scale image.
    flat -= lo
    flat /= (hi - lo)
    flat = np.clip(flat, 0, 1)

    bin = 1 * (flat > threshold)

    if False:
        # Output the normalized grayscale and the thresholded images.
        ocrolib.write_image_binary('./ocropy_test.bin.png', bin)
        ocrolib.write_image_gray('./ocropy_test.nrm.png', flat)

    return bin, flat

Example #9

0

Show file

def binarize(inFile, binFile, grayFile):
    print("binarize: inFile=%s binFile=%s grayFile=%s" %
          (inFile, binFile, grayFile))
    fname = inFile
    raw = ocrolib.read_image_gray(inFile)

    # perform image normalization
    image = normalize_raw_image(raw)
    if image is None:
        print("!!  # image is empty: %s" % (inFile))
        assert False
        return False

    check = check_page(np.amax(image) - image)
    if check is not None:
        print(inFile + " SKIPPED " + check + "(use -n to disable this check)")
        # assert False
        return False

    # check whether the image is already effectively binarized
    extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) / np.prod(
        image.shape)
    if extreme > 0.95:
        comment = "no-normalization"
        flat = image
    else:
        comment = ""
        # if not, we need to flatten it by estimating the local whitelevel
        print("flattening")
        flat = estimate_local_whitelevel(image, zoom, perc, size)

    print("comment=%r extreme=%s" % (comment, extreme))
    print("image=%s" % desc(image))
    print(" flat=%s" % desc(flat))
    # assert False

    # estimate skew angle and rotate
    # print("estimating skew angle")
    # flat, angle = estimate_skew(flat, args.bignore, args.maxskew, args.skewsteps)
    angle = 0.0

    # estimate low and high thresholds
    print("estimating thresholds")
    lo, hi, ok = estimate_thresholds(flat, bignore, escale, defLo, defHi)
    if not ok:
        return False
    print("lo=%5.3f (%g)" % (lo, defLo))
    print("hi=%5.3f (%g)" % (hi, defHi))

    # rescale the image to get the gray scale image
    print("rescaling")
    flat -= lo
    flat /= (hi - lo)
    flat = np.clip(flat, 0, 1)
    bin = flat > threshold

    # output the normalized grayscale and the thresholded images
    print("%s lo-hi (%.2f %.2f) angle %4.1f %s" %
          (fname, lo, hi, angle, comment))
    print("##1 flat=%s" % desc(flat))
    print("##2  bin=%s" % desc(bin))
    print("writing %s" % binFile)

    ocrolib.write_image_binary(binFile, bin)
    ocrolib.write_image_gray(grayFile, flat)

    return True

Example #10

0

Show file

def process1(job):
    fname, i = job
    global base
    base, _ = ocrolib.allsplitext(fname)
    outputdir = base

    try:
        binary = ocrolib.read_image_binary(base + ".bin.png")
    except IOError:
        try:
            binary = ocrolib.read_image_binary(fname)
        except IOError:
            if ocrolib.trace:
                traceback.print_exc()
            print("cannot open either", base + ".bin.png", "or", fname)
            return

    checktype(binary, ABINARY2)

    if not args.nocheck:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print(fname, "SKIPPED", check, "(use -n to disable this check)")
            return

    if args.gray:
        if os.path.exists(base + ".nrm.png"):
            gray = ocrolib.read_image_gray(base + ".nrm.png")
        checktype(gray, GRAYSCALE)

    binary = 1 - binary  # invert

    if args.scale == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args.scale
    print("scale", scale)
    if isnan(scale) or scale > 1000.0:
        sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale))
        return
    if scale < args.minscale:
        sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" %
                         (fname, scale))
        return

    # find columns and text lines

    if not args.quiet:
        print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args.maxlines:
        print(fname, ": too many lines", amax(segmentation))
        return
    if not args.quiet:
        print("number of lines", amax(segmentation))

    # compute the reading order

    if not args.quiet:
        print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything

    if not args.quiet:
        print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args.noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args.pad,
                                           expand=args.expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        if args.gray:
            grayline = psegutils.extract_masked(gray,
                                                l,
                                                pad=args.pad,
                                                expand=args.expand)
            ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1),
                                     grayline)
    print("%6d" % i, fname, "%4.1f" % scale, len(lines))

Example #11

0

Show file

File: ocrd_anybaseocr_deskew.py Project: bjargal/OCR-D-LAYoutERkennung

    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID
            page = pcgts.get_Page()

            # why does it save the image ??
            page_image, page_xywh, _ = self.workspace.image_from_page(
                page, page_id)

            if self.parameter['parallel'] < 2:
                LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID)
            raw = ocrolib.read_image_gray(page_image.filename)

            flat = raw
            #flat = np.array(binImg)
            # estimate skew angle and rotate
            if self.parameter['maxskew'] > 0:
                if self.parameter['parallel'] < 2:
                    LOG.info("Estimating Skew Angle")
                d0, d1 = flat.shape
                o0, o1 = int(self.parameter['bignore'] * d0), int(
                    self.parameter['bignore'] * d1)
                flat = amax(flat) - flat
                flat -= amin(flat)
                est = flat[o0:d0 - o0, o1:d1 - o1]
                ma = self.parameter['maxskew']
                ms = int(2 * self.parameter['maxskew'] *
                         self.parameter['skewsteps'])
                angle = self.estimate_skew_angle(est,
                                                 linspace(-ma, ma, ms + 1))
                flat = interpolation.rotate(flat,
                                            angle,
                                            mode='constant',
                                            reshape=0)
                flat = amax(flat) - flat
            else:
                angle = 0

            # self.write_angles_to_pageXML(base,angle)
            # estimate low and high thresholds
            if self.parameter['parallel'] < 2:
                LOG.info("Estimating Thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if self.parameter['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = self.parameter['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if self.parameter['debug'] > 0:
                    imshow(v)
                    ginput(1, self.parameter['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
            hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
            # rescale the image to get the gray scale image
            if self.parameter['parallel'] < 2:
                LOG.info("Rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if self.parameter['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            deskewed = 1 * (flat > self.parameter['threshold'])

            # output the normalized grayscale and the thresholded images
            LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %
                     (pcgts.get_Page().imageFilename, lo, hi, angle))
            if self.parameter['parallel'] < 2:
                LOG.info("Writing")
            #ocrolib.write_image_binary(base+".ds.png", deskewed)

            #TODO: Need some clarification as the results effect the following pre-processing steps.
            #orientation = -angle
            #orientation = 180 - ((180 - orientation) % 360)
            pcgts.get_Page().set_orientation(angle)
            #print(orientation, angle)

            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)

            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))

Example #12

0

Show file

    def _process_segment(self, page, filename, page_id, file_id):
        raw = ocrolib.read_image_gray(filename)
        self.dshow(raw, "input")

        # perform image normalization
        image = raw - amin(raw)
        if amax(image) == amin(image):
            LOG.info("# image is empty: %s" % (page_id))
            return
        image /= amax(image)

        if not self.parameter['nocheck']:
            check = self.check_page(amax(image) - image)
            if check is not None:
                LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " +
                          check + " (use -n to disable this check)")
                return

        # check whether the image is already effectively binarized
        if self.parameter['gray']:
            extreme = 0
        else:
            extreme = (np.sum(image < 0.05) +
                       np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
        if extreme > 0.95:
            comment = "no-normalization"
            flat = image
        else:
            comment = ""
            # if not, we need to flatten it by estimating the local whitelevel
            LOG.info("Flattening")
            m = interpolation.zoom(image, self.parameter['zoom'])
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(self.parameter['range'], 2))
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(2, self.parameter['range']))
            m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
            if self.parameter['debug'] > 0:
                clf()
                imshow(m, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            w, h = minimum(array(image.shape), array(m.shape))
            flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
            if self.parameter['debug'] > 0:
                clf()
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])

        # estimate low and high thresholds
        LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
        # rescale the image to get the gray scale image
        LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        binarized = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
        LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment))
        LOG.info("writing")
        if self.parameter['debug'] > 0 or self.parameter['show']:
            clf()
            gray()
            imshow(binarized)
            ginput(1, max(0.1, self.parameter['debug']))
        #base, _ = ocrolib.allsplitext(filename)
        #ocrolib.write_image_binary(base + ".bin.png", binarized)
        # ocrolib.write_image_gray(base +".nrm.png", flat)
        # print("########### File path : ", base+".nrm.png")
        # write_to_xml(base+".bin.png")
        # return base+".bin.png"

        bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B')
        bin_image = ocrolib.array2pil(bin_array)

        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path, comment="binarized"))

Example #13

0

Show file

File: test_lineextract.py Project: tgialoimtr/cnn_lstm_ctc_ocr

def extractLines2(imgpath):
    img_grey = ocrolib.read_image_gray(imgpath)
    img_grey = img_grey[:img_grey.shape[0] / 2, :]

    (h, w) = img_grey.shape[:2]
    img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4],
                       None,
                       fx=0.5,
                       fy=0.5)
    angle = estimate_skew_angle(img00, linspace(-5, 5, 42))
    print 'goc', angle
    rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
    img_grey = cv2.warpAffine(img_grey, rotM, (w, h))

    h, w = img_grey.shape
    img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999,
                             cv2.NORM_MINMAX)

    objects, scale = findBox(img_grey)

    imgwidth = img_grey.shape[1]
    imgheight = img_grey.shape[0]
    cellwidth = 6 * scale
    cellheight = 2.5 * scale
    N_x = int(round(imgwidth / cellwidth))
    cellwidth = int(round(imgwidth / N_x))
    N_y = int(round(imgheight / cellheight))
    cellheight = int(round(imgheight / N_y))
    cells_list = [{}, {}, {}, {}]

    def pixel2cell2id(pixel_x, pixel_y, CELLTYPE):
        dx = 0
        dy = 0
        if CELLTYPE == 3:
            pixel_x -= cellwidth / 2
            pixel_y -= cellheight / 2
            dx = cellwidth / 2
            dy = cellheight / 2
        if CELLTYPE == 2:
            pixel_x -= cellwidth / 2
            dx = cellwidth / 2
        if CELLTYPE == 1:
            pixel_y -= cellheight / 2
            dy = cellheight / 2
        if pixel_x <= 0 or pixel_y <= 0: return None, None
        cellcoord = (pixel_x / cellwidth, pixel_y / cellheight)
        cellid = cellcoord[0] + cellcoord[1] * N_x
        cellcoord = (cellcoord[0] * cellwidth + dx,
                     cellcoord[1] * cellheight + dy)
        return cellcoord, cellid

    def id2cell2pixel(cellid, x, y, CELLTYPE):
        cellcoord = (cellid % N_x, cellid / N_x)
        pixel_x = cellcoord[0] * cellwidth + x
        pixel_y = cellcoord[1] * cellheight + y
        if CELLTYPE == 3:
            pixel_x += cellwidth / 2
            pixel_y += cellheight / 2
        return cellcoord, pixel_x, pixel_y

    illu = cv2.cvtColor(img_grey.astype(np.float32), cv2.COLOR_GRAY2BGR)
    illu = cv2.resize(illu, None, fx=2.0, fy=2.0)
    illu = (illu * 255).astype(np.uint8)

    for o in objects:
        ### Add object as candidate character
        pixel_x, pixel_y = (o[1].start + o[1].stop) / 2, o[0].stop
        for celltype in range(4):
            cellcoord, cellid = pixel2cell2id(pixel_x,
                                              pixel_y,
                                              CELLTYPE=celltype)
            if cellcoord is None or cellid is None: continue
            cellbound = slice(cellcoord[1], cellcoord[1] + cellheight,
                              None), slice(cellcoord[0],
                                           cellcoord[0] + cellwidth, None)
            if cellid not in cells_list[celltype]:
                cells_list[celltype][cellid] = SubLineFinder(
                    window_size=scale / 3, cellbound=cellbound, initChar=o)
            else:
                cells_list[celltype][cellid].addChar(o)
        cv2.rectangle(illu, (o[1].start * 2, o[0].start * 2),
                      (o[1].stop * 2, o[0].stop * 2),
                      (random.randint(0, 255), random.randint(
                          0, 255), random.randint(0, 255)), 1)

    for celltype in range(4):
        if celltype == 0: col = (255, 0, 0)
        if celltype == 1: col = (0, 255, 0)
        if celltype == 2: col = (255, 255, 0)
        if celltype == 3: col = (0, 0, 255)
        for cellid, subline in cells_list[celltype].iteritems():
            #             cv2.rectangle(illu, (subline.cellbound[1].start+celltype, subline.cellbound[0].start+celltype), (subline.cellbound[1].stop+celltype, subline.cellbound[0].stop+celltype), col,1)
            line = subline.subline()
            if line is not None:
                pos1 = (int(line[0][0]) * 2, int(line[0][1]) * 2)
                pos2 = (int(line[1][0]) * 2, int(line[1][1]) * 2)
                #                 print cellid, pos1, pos2
                cv2.line(illu, pos1, pos2, col, 1)
    ### illustrate/debug first round

    return img_grey, illu

Example #14

0

Show file

File: ocropus.py Project: vitorio/ocropodium

 def process(self):
     if not os.path.exists(self._params.get("path", "")):
         return self.null_data()
     return ocrolib.read_image_gray(makesafe(self._params.get("path")))

Example #15

0

Show file

    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)
            param = self.parameter
            base, _ = ocrolib.allsplitext(fname)
            #basefile = ocrolib.allsplitext(os.path.basename(fpath))[0]

            if param['parallel'] < 2:
                print_info("=== %s " % (fname))
            raw = ocrolib.read_image_gray(img.filename)

            flat = raw
            #flat = np.array(binImg)
            # estimate skew angle and rotate
            if param['maxskew'] > 0:
                if param['parallel'] < 2:
                    print_info("estimating skew angle")
                d0, d1 = flat.shape
                o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1)
                flat = amax(flat) - flat
                flat -= amin(flat)
                est = flat[o0:d0 - o0, o1:d1 - o1]
                ma = param['maxskew']
                ms = int(2 * param['maxskew'] * param['skewsteps'])
                angle = self.estimate_skew_angle(est,
                                                 linspace(-ma, ma, ms + 1))
                flat = interpolation.rotate(flat,
                                            angle,
                                            mode='constant',
                                            reshape=0)
                flat = amax(flat) - flat
            else:
                angle = 0

            # self.write_angles_to_pageXML(base,angle)
            # estimate low and high thresholds
            if param['parallel'] < 2:
                print_info("estimating thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if param['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = param['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if param['debug'] > 0:
                    imshow(v)
                    ginput(1, param['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), param['lo'])
            hi = stats.scoreatpercentile(est.ravel(), param['hi'])
            # rescale the image to get the gray scale image
            if param['parallel'] < 2:
                print_info("rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if param['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, param['debug'])
            deskewed = 1 * (flat > param['threshold'])

            # output the normalized grayscale and the thresholded images
            print_info("%s lo-hi (%.2f %.2f) angle %4.1f" %
                       (pcgts.get_Page().imageFilename, lo, hi, angle))
            if param['parallel'] < 2:
                print_info("writing")
            ocrolib.write_image_binary(base + ".ds.png", deskewed)

            orientation = -angle
            orientation = 180 - (180 - orientation) % 360
            pcgts.get_Page().set_orientation(orientation)

            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=ID,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype="image/png",
                                    url=base + ".ds.png",
                                    local_filename='%s/%s' %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts).encode('utf-8'))

Example #16

0

Show file

def compute_geomaps(fnames,
                    shapedict,
                    old_model,
                    use_gt=1,
                    size=32,
                    debug=0,
                    old_order=1):
    """Given a shape dictionary and an existing line geometry
    estimator, compute updated geometric maps for each entry
    in the shape dictionary."""
    if debug > 0:
        gray()
        ion()
    shape = (shapedict.k, size, size)
    bls = zeros(shape)
    xls = zeros(shape)
    count = 0
    for fno, fname in enumerate(fnames):
        if fno % 20 == 0: print fno, fname, count
        if use_gt:
            # don't use lines with many capital letters for training because
            # they result in bad models
            gt = ocrolib.read_text(ocrolib.fvariant(fname, "txt", "gt"))
            if len(re.sub(r'[^A-Z]', '',
                          gt)) >= 0.3 * len(re.sub(r'[^a-z]', '', gt)):
                continue
            if len(re.sub(r'[^0-9]', '',
                          gt)) >= 0.3 * len(re.sub(r'[^a-z]', '', gt)):
                continue
        image = 1 - ocrolib.read_image_gray(fname)
        if debug > 0 and fno % debug == 0:
            clf()
            subplot(411)
            imshow(image)
        try:
            blp, xlp = old_model.lineFit(image, order=old_order)
        except:
            traceback.print_exc()
            continue
        blimage = zeros(image.shape)
        h, w = image.shape
        for x in range(w):
            blimage[clip(int(polyval(blp, x)), 0, h - 1), x] = 1
        xlimage = zeros(image.shape)
        for x in range(w):
            xlimage[clip(int(polyval(xlp, x)), 0, h - 1), x] = 1
        if debug > 0 and fno % debug == 0:
            subplot(413)
            imshow(xlimage + 0.3 * image)
            subplot(414)
            imshow(blimage + 0.3 * image)
        try:
            seg = lineseg.ccslineseg(image)
        except:
            continue
        if debug > 0 and fno % debug == 0:
            subplot(412)
            morph.showlabels(seg)
        shape = None
        for sub, transform, itransform_add in extract_chars(seg):
            if shape is None: shape = sub.shape
            assert sub.shape == shape
            count += 1
            best = shapedict.predict1(sub)
            bls[best] += transform(blimage)
            xls[best] += transform(xlimage)
        if debug == 1: ginput(1, 100)
        elif debug > 1: ginput(1, 0.01)
    for i in range(len(bls)):
        bls[i] *= bls[i].shape[1] * 1.0 / max(1e-6, sum(bls[i]))
    for i in range(len(xls)):
        xls[i] *= xls[i].shape[1] * 1.0 / max(1e-6, sum(xls[i]))
    return bls, xls

Example #17

0

Show file

    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)

            print_info("# %s" % (fname))
            raw = ocrolib.read_image_gray(img.filename)

            self.dshow(raw, "input")

            # perform image normalization
            image = raw - amin(raw)
            if amax(image) == amin(image):
                print_info("# image is empty: %s" % (fname))
                return
            image /= amax(image)

            if not self.parameter['nocheck']:
                check = self.check_page(amax(image) - image)
                if check is not None:
                    print_error(fname + " SKIPPED. " + check +
                                " (use -n to disable this check)")
                    return

            # check whether the image is already effectively binarized
            if self.parameter['gray']:
                extreme = 0
            else:
                extreme = (np.sum(image < 0.05) +
                           np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
            if extreme > 0.95:
                comment = "no-normalization"
                flat = image
            else:
                comment = ""
                # if not, we need to flatten it by estimating the local whitelevel
                print_info("flattening")
                m = interpolation.zoom(image, self.parameter['zoom'])
                m = filters.percentile_filter(m,
                                              self.parameter['perc'],
                                              size=(self.parameter['range'],
                                                    2))
                m = filters.percentile_filter(m,
                                              self.parameter['perc'],
                                              size=(2,
                                                    self.parameter['range']))
                m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
                if self.parameter['debug'] > 0:
                    clf()
                    imshow(m, vmin=0, vmax=1)
                    ginput(1, self.parameter['debug'])
                w, h = minimum(array(image.shape), array(m.shape))
                flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
                if self.parameter['debug'] > 0:
                    clf()
                    imshow(flat, vmin=0, vmax=1)
                    ginput(1, self.parameter['debug'])

            # estimate low and high thresholds
            print_info("estimating thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if self.parameter['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = self.parameter['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if self.parameter['debug'] > 0:
                    imshow(v)
                    ginput(1, self.parameter['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
            hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
            # rescale the image to get the gray scale image
            print_info("rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if self.parameter['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            binarized = 1 * (flat > self.parameter['threshold'])

            # output the normalized grayscale and the thresholded images
            # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
            print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment))
            print_info("writing")
            if self.parameter['debug'] > 0 or self.parameter['show']:
                clf()
                gray()
                imshow(binarized)
                ginput(1, max(0.1, self.parameter['debug']))
            base, _ = ocrolib.allsplitext(img.filename)
            ocrolib.write_image_binary(base + ".bin.png", binarized)
            # ocrolib.write_image_gray(base +".nrm.png", flat)
            # print("########### File path : ", base+".nrm.png")
            # write_to_xml(base+".bin.png")
            # return base+".bin.png"

            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=ID,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype="image/png",
                                    url=base + ".bin.png",
                                    local_filename='%s/%s' %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts).encode('utf-8'))

Example #18

0

Show file

File: test_lineextract3.py Project: tgialoimtr/cnn_lstm_ctc_ocr

def extractLines2(imgpath):
    img_grey = ocrolib.read_image_gray(imgpath)

    (h, w) = img_grey.shape[:2]
    img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4],
                       None,
                       fx=0.5,
                       fy=0.5)
    angle = estimate_skew_angle(img00, linspace(-5, 5, 42))
    print 'goc', angle
    rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
    img_grey = cv2.warpAffine(img_grey, rotM, (w, h))

    img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999,
                             cv2.NORM_MINMAX)

    objects, scale = findBox(img_grey)

    ######### convert
    xfrom = 0
    xto = img_grey.shape[1]
    yfrom = 0
    yto = min(img_grey.shape[0], 800)
    img_grey = img_grey[yfrom:yto, xfrom:xto]
    objects2 = []
    for obj in objects:
        topy = obj[0].start
        boty = obj[0].stop
        x = (obj[1].start + obj[1].stop) / 2
        if yfrom <= topy < yto and yfrom <= boty < yto and xfrom <= x < xto:
            object2 = (slice(obj[0].start - yfrom, obj[0].stop - yfrom, None),
                       slice(obj[1].start - xfrom, obj[1].stop - xfrom, None))
            objects2.append(object2)

    objects = objects2

    ######### end convert

    h, w = img_grey.shape
    img = (cv2.cvtColor(img_grey, cv2.COLOR_GRAY2BGR) * 255).astype(np.uint8)

    nodes = [[None for j in range(h + 1)] for i in range(w + 1)]
    points = [[SubLine.ISEMPTY for j in range(h + 1)] for i in range(w + 1)]
    clearedList = set()  ## temporary solution

    objects = sorted(objects, key=lambda obj: (obj[1].start + obj[1].stop) / 2)
    for bound in objects:
        topy = bound[0].start
        boty = bound[0].stop
        x = (bound[1].start + bound[1].stop) / 2
        top = (x, topy)
        bottom = (x, boty)
        points[bottom[0]][bottom[1]] = SubLine.ISBOT
        points[top[0]][top[1]] = SubLine.ISTOP
        nodes[bottom[0]][bottom[1]] = (topy, boty, x)

    allines = []

    lemodel = LeModelChooseLine(
        '/home/loitg/Downloads/complex-bg/le_model.pkl')

    def move(subline, allnodes, allpoints, img):
        newsublines = subline.next(allnodes, allpoints, img)
        if len(newsublines) > 0:
            for new in newsublines:
                if new.isnew:
                    allines.append(new)
                    new.isnew = False
                print '______________++++++++++++++=' + new.id
                move(new, allnodes, allpoints, img)
        else:
            subline.clear(allnodes, clearedList)

#     illu = img.copy()
#     for bound in objects:
#         cv2.circle(illu,((bound[1].start + bound[1].stop)/2, bound[0].start),2, (255,0,0),-1)
#         cv2.circle(illu,((bound[1].start + bound[1].stop)/2, bound[0].stop), 2, (0,255,0),-1)
#         cv2.line(illu, ((bound[1].start + bound[1].stop)/2, bound[0].start), ((bound[1].start + bound[1].stop)/2, bound[0].stop), (0,0,255),1)
#     cv2.imshow('ii', illu)

    for bound in objects:  # sorted
        topy = bound[0].start
        boty = bound[0].stop
        x = (bound[1].start + bound[1].stop) / 2
        if (topy, boty, x) in clearedList:
            continue
        subline = SubLine(topy=topy, boty=boty, x=x, lemodel=lemodel)
        allines.append(subline)
        subline.isnew = False
        try:
            move(subline, nodes, points, img)
        except Exception as e:
            pass
    ### illustrate
    img2 = img.copy()
    for line in allines:
        try:
            col = str2col(line.id)
            line.draw(img2, col, 0.5, drawyhat=False)
        except Exception as e:
            pass


#     cv2.imshow('lines', img2)
#     cv2.waitKey(-1)

    return img2

Example #19

0

Show file

File: ocropus.py Project: mikesname/python-ocrlab

 def process(self):
     # TODO: Ensure we can also read a filehandle
     if not os.path.exists(self._params.get("path", "")):
         return self.null_data()
     return ocrolib.read_image_gray(makesafe(self._params.get("path")))

Example #20

0

Show file

def analyze_page_layout(binary, gray, rgb=None):
    hscale = 1.0  # Non-standard scaling of horizontal parameters.
    vscale = 1.0  # Non-standard scaling of vertical parameters.
    threshold = 0.2  # baseline threshold.
    usegauss = True  # Use gaussian instead of uniform.
    maxseps = 0  # Maximum black column separators.
    sepwiden = 10  # Widen black separators (to account for warping).
    blackseps = True
    maxcolseps = 3  # Maximum # whitespace column separators.
    csminheight = 10  # Minimum column height (units=scale).
    noise = 8  # Noise threshold for removing small components from lines.
    gray_output = True  # Output grayscale lines as well, which are extracted from the grayscale version of the pages.
    pad = 3  # Padding for extracted lines.
    expand = 3  # Expand mask for grayscale extraction.

    if False:
        bin_image_filepath = './ocropy_test.bin.png'
        gray_image_filepath = './ocropy_test.nrm.png'

        binary = ocrolib.read_image_binary(bin_image_filepath)
        gray = ocrolib.read_image_gray(gray_image_filepath)

    binary = 1 - binary  # Invert.

    scale = psegutils.estimate_scale(binary)
    segmentation = compute_segmentation(binary,
                                        scale,
                                        blackseps,
                                        maxseps,
                                        maxcolseps,
                                        csminheight,
                                        sepwiden,
                                        usegauss,
                                        vscale,
                                        hscale,
                                        threshold,
                                        quiet=True)

    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # Renumber the labels so that they conform to the specs.
    nlabels = np.amax(segmentation) + 1
    renumber = np.zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]  # Image.

    lines = [lines[i] for i in lsort]

    # Visualize bounding boxes.
    if False:
        if rgb is not None:
            # REF [function] >> extract_masked() in ${OCROPY_HOME}/ocrolib/psegutils.py.
            for l in lines:
                y0, x0, y1, x1 = [
                    int(x) for x in [
                        l.bounds[0].start, l.bounds[1].start, l.bounds[0].stop,
                        l.bounds[1].stop
                    ]
                ]
                cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1,
                              cv2.LINE_AA)
            cv2.imshow('Image', rgb)
            cv2.waitKey(0)

    # Output everything.
    if False:
        if not os.path.exists(outputdir):
            os.mkdir(outputdir)

        ocrolib.write_page_segmentation("%s.pseg.png" % outputdir,
                                        segmentation)
        cleaned = ocrolib.remove_noise(binary, noise)
        for i, l in enumerate(lines):
            binline = psegutils.extract_masked(1 - cleaned,
                                               l,
                                               pad=pad,
                                               expand=expand)  # Image.
            ocrolib.write_image_binary(
                "%s/01%04x.bin.png" % (outputdir, i + 1), binline)
            if gray_output:
                grayline = psegutils.extract_masked(gray,
                                                    l,
                                                    pad=pad,
                                                    expand=expand)  # Image.
                ocrolib.write_image_gray(
                    "%s/01%04x.nrm.png" % (outputdir, i + 1), grayline)

Example #21

0

Show file

File: test_lineextract.py Project: tgialoimtr/cnn_lstm_ctc_ocr

def extractLines(imgpath, param):
    img_grey = ocrolib.read_image_gray(imgpath)
    (h, w) = img_grey.shape[:2]
    img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4],
                       None,
                       fx=0.5,
                       fy=0.5)
    angle = estimate_skew_angle(img00, linspace(-5, 5, 42))
    print 'goc', angle

    rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
    img_grey = cv2.warpAffine(img_grey, rotM, (w, h))

    h, w = img_grey.shape
    img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999,
                             cv2.NORM_MINMAX)
    binary = sauvola(img_grey,
                     w=param.w,
                     k=param.k,
                     scaledown=0.2,
                     reverse=True)  ### PARAM
    binary = morph.r_closing(binary.astype(bool), (args.connect, 1))
    binaryary = binary[h / 4:3 * h / 4, w / 4:3 * w / 4]
    binary = binary.astype(np.uint8)
    labels, n = morph.label(binaryary)
    objects = morph.find_objects(labels)

    bysize = sorted(objects, key=sl.area)
    scalemap = zeros(binaryary.shape)
    for o in bysize:
        if amax(scalemap[o]) > 0: continue
        scalemap[o] = sl.area(o)**0.5
    scale = median(scalemap[(scalemap > 3) & (scalemap < 100)])
    objects = psegutils.binary_objects(binary)
    boxmap = zeros(binary.shape, dtype=np.uint8)

    imgwidth = binary.shape[1]
    imgheight = binary.shape[0]
    cellwidth = 6 * scale
    cellheight = 2.5 * scale
    N_x = int(round(imgwidth / cellwidth))
    cellwidth = int(round(imgwidth / N_x))
    N_y = int(round(imgheight / cellheight))
    cellheight = int(round(imgheight / N_y))
    cells_list = [{}, {}, {}, {}]

    def pixel2cell2id(pixel_x, pixel_y, CELLTYPE):
        dx = 0
        dy = 0
        if CELLTYPE == 3:
            pixel_x -= cellwidth / 2
            pixel_y -= cellheight / 2
            dx = cellwidth / 2
            dy = cellheight / 2
        if CELLTYPE == 2:
            pixel_x -= cellwidth / 2
            dx = cellwidth / 2
        if CELLTYPE == 1:
            pixel_y -= cellheight / 2
            dy = cellheight / 2
        if pixel_x <= 0 or pixel_y <= 0: return None, None
        cellcoord = (pixel_x / cellwidth, pixel_y / cellheight)
        cellid = cellcoord[0] + cellcoord[1] * N_x
        cellcoord = (cellcoord[0] * cellwidth + dx,
                     cellcoord[1] * cellheight + dy)
        return cellcoord, cellid

    def id2cell2pixel(cellid, x, y, CELLTYPE):
        cellcoord = (cellid % N_x, cellid / N_x)
        pixel_x = cellcoord[0] * cellwidth + x
        pixel_y = cellcoord[1] * cellheight + y
        if CELLTYPE == 3:
            pixel_x += cellwidth / 2
            pixel_y += cellheight / 2
        return cellcoord, pixel_x, pixel_y

    img_grey = (cv2.cvtColor(img_grey, cv2.COLOR_GRAY2BGR) * 255).astype(
        np.uint8)

    for o in objects:
        h = sl.dim0(o)
        w = sl.dim1(o)
        ratio = float(w) / h
        ### Dirty cheat
        if ratio > 1 and ratio < 6:
            recommended_width = max(int(0.6 * (o[0].stop - o[0].start)),
                                    int(scale * 0.6), 5)
            for pos in range(o[1].start + recommended_width, o[1].stop,
                             recommended_width):
                binary[o[0].start:o[0].stop, pos:pos + 1] = np.uint8(0)
    objects = psegutils.binary_objects(binary)

    for o in objects:
        h = sl.dim0(o)
        w = sl.dim1(o)
        a = h * w
        #         black = float(sum(binary[o]))/a
        #         if sl.area(o)**.5<threshold[0]*scale: continue
        #         if sl.area(o)**.5>threshold[1]*scale: continue
        if h > 5 * scale: continue
        #         if h < 0.4*scale: continue
        if w > 4 * scale and (h > 2 * scale or h < 0.5 * scale): continue
        if a < 0.25 * scale * scale: continue
        if float(h) / w > 10: continue
        ratio = float(w) / h
        if ratio > 10: continue

        ### Add object as candidate character
        pixel_x, pixel_y = (o[1].start + o[1].stop) / 2, o[0].stop
        for celltype in range(4):
            cellcoord, cellid = pixel2cell2id(pixel_x,
                                              pixel_y,
                                              CELLTYPE=celltype)
            if cellcoord is None or cellid is None: continue
            cellbound = slice(cellcoord[1], cellcoord[1] + cellheight,
                              None), slice(cellcoord[0],
                                           cellcoord[0] + cellwidth, None)
            if cellid not in cells_list[celltype]:
                cells_list[celltype][cellid] = SubLineFinder(
                    window_size=max(3, scale / 6),
                    cellbound=cellbound,
                    initChar=o)
            else:
                cells_list[celltype][cellid].addChar(o)

        y0 = o[0].start
        y1 = o[0].stop - 3 if o[0].stop - o[0].start > 8 else o[0].start + 5
        x0 = o[1].start
        x1 = o[1].stop - 3 if o[1].stop - o[1].start > 8 else o[1].start + 5
        boxmap[y0:y1, x0:x1] = 1

    for celltype in range(4):
        if celltype == 0: col = (255, 0, 0)
        if celltype == 1: col = (0, 255, 0)
        if celltype == 2: col = (255, 255, 0)
        if celltype == 3: col = (0, 0, 255)
        for cellid, subline in cells_list[celltype].iteritems():
            #             cv2.rectangle(img_grey, (subline.cellbound[1].start+celltype, subline.cellbound[0].start+celltype), (subline.cellbound[1].stop+celltype, subline.cellbound[0].stop+celltype), col,1)
            line = subline.subline()
            if line is not None:
                pos1 = (int(line[0][0]), int(line[0][1]))
                pos2 = (int(line[1][0]), int(line[1][1]))
                #                 print cellid, pos1, pos2
                cv2.line(img_grey, pos1, pos2, col, 1)
    ### illustrate/debug first round

    return binary, cv2.add(img_grey, (boxmap[:, :, np.newaxis] *
                                      np.array([0, 50, 50])).astype(np.uint8))

Example #22

0

Show file

File: ocropus.py Project: ankitrajshree/python-ocrlab

 def process(self):
     # TODO: Ensure we can also read a filehandle
     if not os.path.exists(self._params.get("path", "")):
         return self.null_data()
     return ocrolib.read_image_gray(makesafe(self._params.get("path")))

Example #23

0

Show file

def process(job):
    fname, i = job
    print_info("# %s" % (fname))
    if args['parallel'] < 2: print_info("=== %s %-3d" % (fname, i))
    raw = ocrolib.read_image_gray(fname)

    # perform image normalization
    image = raw - amin(raw)
    if amax(image) == amin(image):
        print_info("# image is empty: %s" % (fname))
        return
    image /= amax(image)

    if not args['nocheck']:
        check = check_page(amax(image) - image)
        if check is not None:
            print_error(fname + "SKIPPED" + check +
                        "(use -n to disable this check)")
            return

    # flatten the image by estimating the local whitelevel
    comment = ""
    # if not, we need to flatten it by estimating the local whitelevel
    if args['parallel'] < 2: print_info("flattening")
    m = interpolation.zoom(image, args['zoom'])
    m = filters.percentile_filter(m, args['perc'], size=(args['range'], 2))
    m = filters.percentile_filter(m, args['perc'], size=(2, args['range']))
    m = interpolation.zoom(m, 1.0 / args['zoom'])
    w, h = minimum(array(image.shape), array(m.shape))
    flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)

    # estimate skew angle and rotate
    if args['maxskew'] > 0:
        if args['parallel'] < 2: print_info("estimating skew angle")
        d0, d1 = flat.shape
        o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1)
        flat = amax(flat) - flat
        flat -= amin(flat)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        ma = args['maxskew']
        ms = int(2 * args['maxskew'] * args['skewsteps'])
        angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
        flat = interpolation.rotate(flat, angle, mode='constant', reshape=0)
        flat = amax(flat) - flat
    else:
        angle = 0

    # estimate low and high thresholds
    if args['parallel'] < 2: print_info("estimating thresholds")
    d0, d1 = flat.shape
    o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]
    if args['escale'] > 0:
        # by default, we use only regions that contain
        # significant variance; this makes the percentile
        # based low and high estimates more reliable
        e = args['escale']
        v = est - filters.gaussian_filter(est, e * 20.0)
        v = filters.gaussian_filter(v**2, e * 20.0)**0.5
        v = (v > 0.3 * amax(v))
        v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
        v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(), args['lo'])
    hi = stats.scoreatpercentile(est.ravel(), args['hi'])
    # rescale the image to get the gray scale image
    if args['parallel'] < 2: print_info("rescaling")
    flat -= lo
    flat /= (hi - lo)
    flat = clip(flat, 0, 1)
    bin = 1 * (flat > args['threshold'])

    # output the normalized grayscale and the thresholded images
    print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" %
               (fname, lo, hi, angle, comment))
    if args['parallel'] < 2: print_info("writing")
    base, _ = ocrolib.allsplitext(fname)
    outputfile_bin = base + ".bin.png"
    #outputfile_nrm = base+".nrm.png"
    #output_files = [outputfile_bin, outputfile_nrm]
    ocrolib.write_image_binary(outputfile_bin, bin)
    #ocrolib.write_image_gray(outputfile_nrm, flat)
    #return output_files
    return outputfile_bin

Example #24

0

Show file

File: rpred.py Project: muxuezi/expressfee

def process1(arg):
    (trial,fname) = arg
    base,_ = ocrolib.allsplitext(fname)
    line = ocrolib.read_image_gray(fname)
    if prod(line.shape)==0: return None
    if amax(line)==amin(line): return None

    if not args.nolineest:
        assert "dew.png" not in fname,"don't dewarp dewarped images"
        temp = amax(line)-line
        temp = temp*1.0/amax(temp)
        lnorm.measure(temp)
        line = lnorm.normalize(line,cval=amax(line))
    else:
        assert "dew.png" in fname,"only apply to dewarped images"

    line = lstm.prepare_line(line,args.pad)
    pred = network.predictString(line)

    if not args.nonormalize:
        pred = ocrolib.normalize_text(pred)

    if args.estrate:
        try:
            gt = ocrolib.read_text(base+".gt.txt")
        except:
            return (0,[],0,trial,fname)
        pred0 = ocrolib.project_text(pred,args.compare)
        gt0 = ocrolib.project_text(gt,args.compare)
        if args.estconf>0:
            err,conf = edist.xlevenshtein(pred0,gt0,context=args.context)
        else:
            err = edist.xlevenshtein(pred0,gt0)
            conf = []
        if not args.quiet:
            print "%3d %3d"%(err,len(gt)),fname,":",pred
            sys.stdout.flush()
        return (err,conf,len(gt0),trial,fname)

    if not args.quiet:
        print pred
        # print fname,":",pred
    # ocrolib.write_text(base+".txt",pred)

    if args.show>0 or args.save is not None:
        ion()
        matplotlib.rc('xtick',labelsize=7)
        matplotlib.rc('ytick',labelsize=7)
        matplotlib.rcParams.update({"font.size":7})
        if os.path.exists(base+".gt.txt"):
            transcript = ocrolib.read_text(base+".gt.txt")
            transcript = ocrolib.normalize_text(transcript)
        else:
            transcript = pred
        pred2 = network.trainString(line,transcript,update=0)
        figure("result",figsize=(1400//75,800//75),dpi=75)
        clf()
        subplot(311)
        imshow(line.T,cmap=cm.gray)
        title(transcript)
        subplot(312)
        gca().set_xticks([])
        imshow(network.outputs.T[1:],vmin=0,cmap=cm.hot)
        title(pred[:80])
        subplot(313)
        plot(network.outputs[:,0],color='yellow',linewidth=3,alpha=0.5)
        plot(network.outputs[:,1],color='green',linewidth=3,alpha=0.5)
        plot(amax(network.outputs[:,2:],axis=1),color='blue',linewidth=3,alpha=0.5)
        plot(network.aligned[:,0],color='orange',linestyle='dashed',alpha=0.7)
        plot(network.aligned[:,1],color='green',linestyle='dashed',alpha=0.5)
        plot(amax(network.aligned[:,2:],axis=1),color='blue',linestyle='dashed',alpha=0.5)
        if args.save is not None:
            draw()
            savename = args.save
            if "%" in savename: savename = savename%trial
            print "saving",savename
            savefig(savename,bbox_inches=0)
        if trial==len(inputs)-1:
            ginput(1,99999999)
        else:
            ginput(1,args.show)
    return None

Example #25

0

Show file

File: lineest.py Project: beanyh/yeobaek

     km = lem.shapedict
     ion(); gray()
     ocrolib.showgrid(km.centers().reshape(*lem.xls.shape)+lem.xls*2)
     ginput(1,1000)
     ocrolib.showgrid(km.centers().reshape(*lem.bls.shape)+lem.bls*2)
     ginput(1,1000)
     sys.exit(0)
 elif args.subcommand=="showline":
     with open(args.line_estimator) as stream:
         lem = cPickle.load(stream)
     print "loaded",lem
     for fname in args.images:
         try:
             print "***",fname
             clf()
             image = 1-ocrolib.read_image_gray(fname)
             limit = min(image.shape[1],args.xlimit)
             blp,xlp = lem.lineFit(image,order=args.order)
             print "baseline",blp
             print "xline",xlp
             title("fname")
             subplot(311); imshow((lem.blimage-lem.xlimage)[:,:limit])
             title("fname")
             subplot(312); imshow((lem.blimage-lem.xlimage+image)[:,:limit])
             gray()
             subplot(313); imshow(image[:,:limit])
             xlim(0,limit); ylim(len(image),0)
             xs = range(image.shape[1])[:limit]
             plot(xs,polyval(blp,xs))
             plot(xs,polyval(xlp,xs))
             ginput(1,1000)

Example #26

0

Show file

for trial in range(start,args.ntrain):
    network.last_trial = trial+1

    do_display = (args.display>0 and trial%args.display==0)
    do_update = 1

    if args.movie and do_display:
        fname = args.moviesample
        do_update = 0
    else:
        fname = pyrandom.sample(inputs,1)[0]

    base,_ = ocrolib.allsplitext(fname)
    try:
        line = ocrolib.read_image_gray(fname)
        transcript = ocrolib.read_text(base+".gt.txt")
    except IOError as e:
        print("ERROR", e)
        continue

    if not args.nolineest:
        assert "dew.png" not in fname,"don't dewarp already dewarped lines"
        network.lnorm.measure(np.amax(line)-line)
        line = network.lnorm.normalize(line,cval=np.amax(line))
    else:
        assert "dew.png" in fname,"input must already be dewarped"

    if line.size<10 or np.amax(line)==np.amin(line):
        print("EMPTY-INPUT")
        continue

Example #27

0

Show file

     km = lem.shapedict
     ion()
     gray()
     ocrolib.showgrid(km.centers().reshape(*lem.xls.shape) + lem.xls * 2)
     ginput(1, 1000)
     ocrolib.showgrid(km.centers().reshape(*lem.bls.shape) + lem.bls * 2)
     ginput(1, 1000)
     sys.exit(0)
 elif args.subcommand == "showline":
     lem = common.load_object(args.line_estimator)
     print "loaded", lem
     for fname in args.images:
         try:
             print "***", fname
             clf()
             image = 1 - ocrolib.read_image_gray(fname)
             limit = min(image.shape[1], args.xlimit)
             blp, xlp = lem.lineFit(image, order=args.order)
             print "baseline", blp
             print "xline", xlp
             title("fname")
             subplot(311)
             imshow((lem.blimage - lem.xlimage)[:, :limit])
             title("fname")
             subplot(312)
             imshow((lem.blimage - lem.xlimage + image)[:, :limit])
             gray()
             subplot(313)
             imshow(image[:, :limit])
             xlim(0, limit)
             ylim(len(image), 0)

Example #28

0

Show file

File: ocropus.py Project: Ocropodium/ocropodium-ui

 def process(self):
     if not os.path.exists(self._params.get("path", "")):
         return self.null_data()
     return ocrolib.read_image_gray(makesafe(self._params.get("path")))

Example #29

0

Show file

        img_grey = img
    stree = sauvolatree(img_grey)
    scalemin, scalemax = extendRange(min(stree.scales), max(stree.scales), 3,
                                     3.5)
    traverseEditState(stree[(-1, -1)], scalemin, scalemax)
    objects = flattenByKeepState(stree)
    return objects, np.mean(stree.scales)


if __name__ == "__main__":
    #     imgpath = '/home/loitg/Downloads/complex-bg/special_line/'
    imgpath = '/home/loitg/Downloads/complex-bg/tmp/'
    for filename in os.listdir(imgpath):
        if filename[-3:].upper() == 'JPG':
            print filename
            img_grey = ocrolib.read_image_gray(imgpath + filename)
            stree = sauvolatree(img_grey)
            if len(stree.scales) == 0: continue
            scalemin, scalemax = extendRange(min(stree.scales),
                                             max(stree.scales), 3, 3.5)
            traverseEditState(stree[(-1, -1)], scalemin, scalemax)
            objects = flattenByKeepState(stree)
            illu = cv2.cvtColor(stree.bins[1] * 255, cv2.COLOR_GRAY2BGR)
            illu = cv2.resize(illu, None, fx=6.0, fy=6.0)
            for bound in objects:
                cv2.circle(illu, (bound[1].start * 3 + bound[1].stop * 3,
                                  bound[0].start * 6), 3, (0, 0, 255), -1)
                cv2.circle(illu, (bound[1].start * 3 + bound[1].stop * 3,
                                  bound[0].stop * 6), 3, (0, 255, 0), -1)
                cv2.line(illu, (bound[1].start * 3 + bound[1].stop * 3,
                                bound[0].start * 6),

Example #30

0

Show file

    def _process_segment(self, page, filename, page_id, file_id):
        if self.parameter['parallel'] < 2:
            LOG.info("INPUT FILE %s ", filename)
        raw = ocrolib.read_image_gray(filename)

        flat = raw
        #flat = np.array(binImg)
        # estimate skew angle and rotate
        if self.parameter['maxskew'] > 0:
            if self.parameter['parallel'] < 2:
                LOG.info("Estimating Skew Angle")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            flat = amax(flat) - flat
            flat -= amin(flat)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            ma = self.parameter['maxskew']
            ms = int(2 * self.parameter['maxskew'] *
                     self.parameter['skewsteps'])
            angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
            flat = interpolation.rotate(flat,
                                        angle,
                                        mode='constant',
                                        reshape=0)
            flat = amax(flat) - flat
        else:
            angle = 0

        # self.write_angles_to_pageXML(base,angle)
        # estimate low and high thresholds
        if self.parameter['parallel'] < 2:
            LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
        # rescale the image to get the gray scale image
        if self.parameter['parallel'] < 2:
            LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        deskewed = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %
                 (filename, lo, hi, angle))
        if self.parameter['parallel'] < 2:
            LOG.info("Writing")
        #ocrolib.write_image_binary(base+".ds.png", deskewed)

        #TODO: Need some clarification as the results effect the following pre-processing steps.
        #orientation = -angle
        #orientation = 180 - ((180 - orientation) % 360)
        page.set_orientation(angle)

        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path, comment="deskewed"))

Example #31

0

Show file

def process1(job):
    fname, i = job
    print_info("# %s" % (fname))
    if args.parallel < 2: print_info("=== %s %-3d" % (fname, i))
    raw = ocrolib.read_image_gray(fname)
    dshow(raw, "input")
    # perform image normalization
    image = raw - amin(raw)
    if amax(image) == amin(image):
        print_info("# image is empty: %s" % (fname))
        return
    image /= amax(image)

    if not args.nocheck:
        check = check_page(amax(image) - image)
        if check is not None:
            print_error(fname + " SKIPPED. " + check +
                        " (use -n to disable this check)")
            return

    # check whether the image is already effectively binarized
    if args.gray:
        extreme = 0
    else:
        extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod(
            image.shape)
    if extreme > 0.95:
        comment = "no-normalization"
        flat = image
    else:
        comment = ""
        # if not, we need to flatten it by estimating the local whitelevel
        if args.parallel < 2: print_info("flattening")
        m = interpolation.zoom(image, args.zoom)
        m = filters.percentile_filter(m, args.perc, size=(args.range, 2))
        m = filters.percentile_filter(m, args.perc, size=(2, args.range))
        m = interpolation.zoom(m, 1.0 / args.zoom)
        if args.debug > 0:
            clf()
            imshow(m, vmin=0, vmax=1)
            ginput(1, args.debug)
        w, h = minimum(array(image.shape), array(m.shape))
        flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
        if args.debug > 0:
            clf()
            imshow(flat, vmin=0, vmax=1)
            ginput(1, args.debug)

    # estimate low and high thresholds
    if args.parallel < 2: print_info("estimating thresholds")
    d0, d1 = flat.shape
    o0, o1 = int(args.bignore * d0), int(args.bignore * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]
    if args.escale > 0:
        # by default, we use only regions that contain
        # significant variance; this makes the percentile
        # based low and high estimates more reliable
        e = args.escale
        v = est - filters.gaussian_filter(est, e * 20.0)
        v = filters.gaussian_filter(v**2, e * 20.0)**0.5
        v = (v > 0.3 * amax(v))
        v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
        v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
        if args.debug > 0:
            imshow(v)
            ginput(1, args.debug)
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(), args.lo)
    hi = stats.scoreatpercentile(est.ravel(), args.hi)
    # rescale the image to get the gray scale image
    if args.parallel < 2: print_info("rescaling")
    flat -= lo
    flat /= (hi - lo)
    flat = clip(flat, 0, 1)
    if args.debug > 0:
        imshow(flat, vmin=0, vmax=1)
        ginput(1, args.debug)
    bin = 1 * (flat > args.threshold)

    # output the normalized grayscale and the thresholded images
    #print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
    print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment))
    if args.parallel < 2: print_info("writing")
    if args.debug > 0 or args.show:
        clf()
        gray()
        imshow(bin)
        ginput(1, max(0.1, args.debug))
    base, _ = ocrolib.allsplitext(fname)
    ocrolib.write_image_binary(base + ".bin.png", bin)
    ocrolib.write_image_gray(base + ".nrm.png", flat)
    #print("########### File path : ", base+".nrm.png")
    #write_to_xml(base+".bin.png")
    return base + ".bin.png"

Example #32

0

Show file

File: test_lineextract4.py Project: tgialoimtr/cnn_lstm_ctc_ocr

def extractLines2(imgpath):
    clf = joblib.load('/home/loitg/Downloads/complex-bg/le_model_3.pkl')
    tt = time()
    img_grey = ocrolib.read_image_gray(imgpath)

    (h, w) = img_grey.shape[:2]
    img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4],
                       None,
                       fx=0.5,
                       fy=0.5)
    angle = estimate_skew_angle(img00, linspace(-5, 5, 42))
    print 'goc', angle
    rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
    img_grey = cv2.warpAffine(img_grey, rotM, (w, h))

    img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999,
                             cv2.NORM_MINMAX)

    objects, scale = findBox(img_grey)

    ######### convert
    #     xfrom=50; xto=img_grey.shape[1];
    #     yfrom=700; yto=1500#min(img_grey.shape[0], 800);
    #     img_grey = img_grey[yfrom:yto, xfrom:xto]
    #     objects2 = []
    #     for obj in objects:
    #         topy = obj[0].start
    #         boty = obj[0].stop
    #         x = (obj[1].start + obj[1].stop)/2
    #         if yfrom <= topy < yto and yfrom <= boty < yto and xfrom <= x < xto:
    #             object2 = (slice(obj[0].start - yfrom, obj[0].stop - yfrom, None), slice(obj[1].start - xfrom, obj[1].stop - xfrom, None))
    #             objects2.append(object2)
    #
    #     objects = objects2

    ######### end convert

    h, w = img_grey.shape
    img = (cv2.cvtColor(img_grey, cv2.COLOR_GRAY2BGR) * 255).astype(np.uint8)

    cleared_maps = np.zeros((2, h, w), dtype=bool)
    pointsmap = np.zeros((h, w), dtype=np.uint8)

    objects = sorted(objects, key=lambda obj: (obj[1].start + obj[1].stop) / 2)
    for bound in objects:
        topy = bound[0].start
        boty = bound[0].stop
        x = (bound[1].start + bound[1].stop) / 2
        if topy >= h or boty >= h or x >= w: continue
        pointsmap[boty, x] = SubLine.ISBOT
        pointsmap[topy, x] = SubLine.ISTOP

    allines = []

    illu = img.copy()
    for bound in objects:
        cv2.circle(illu,
                   ((bound[1].start + bound[1].stop) / 2, bound[0].start), 2,
                   (255, 0, 0), -1)
        cv2.circle(illu, ((bound[1].start + bound[1].stop) / 2, bound[0].stop),
                   2, (0, 255, 0), -1)

#         cv2.line(illu, ((bound[1].start + bound[1].stop)/2, bound[0].start), ((bound[1].start + bound[1].stop)/2, bound[0].stop), (0,0,255),1)

    def move(subline):
        newsublines = subline.next2()
        if len(newsublines) > 0:
            for new in newsublines:
                move(new)
        elif subline.nextCount > 0:
            subline._updateCurve()
            subline.clear(cleared_maps)
            subline._updateCombineInfo()
            allines.append(subline)


#             if self.nextCount > 1:
#                 temp1 = cv2.addWeighted(cv2.cvtColor((cleared_maps[0]*120).astype(uint8),cv2.COLOR_GRAY2BGR), 0.5, illu, 0.5,0)
#                 temp2 = cv2.addWeighted(cv2.cvtColor((cleared_maps[1]*120).astype(uint8),cv2.COLOR_GRAY2BGR), 0.5, illu, 0.5,0)
#                 cv2.imshow('bb', cv2.addWeighted(temp1,0.5,temp2,0.5,0))
#                 cv2.waitKey(-1)

    for bound in objects:  # sorted
        topy = bound[0].start
        boty = bound[0].stop
        x = (bound[1].start + bound[1].stop) / 2
        if boty - topy < 8: continue
        try:
            if cleared_maps[0][topy, x] and cleared_maps[1][boty, x]:
                continue
        except Exception as e:
            continue
        subline = SubLine(topy=topy,
                          boty=boty,
                          x=x,
                          clf=clf,
                          img=img,
                          pointsmap=pointsmap)
        move(subline)

    allines.sort(key=lambda x: x.bounds[1].stop)
    i = 0
    while i < len(allines):
        result = allines[i]
        if result.available:
            #             print '-------------------------'
            #             img2 = illu.copy()
            #             result.draw(img2, (125,125,125), 0.5, drawyhat=False)
            #             cv2.line(img2, (0, int(result.rightray.m)), (result.imgwidth, \
            #                 int(result.rightray.b*result.imgwidth + result.rightray.m)), (255,0,0),1)
            #             cv2.line(img2, (0, int(result.leftray.m)), (result.imgwidth, \
            #                 int(result.leftray.b*result.imgwidth + result.leftray.m)), (0,255,0),1)
            #             cv2.imshow('cb', img2)
            #             cv2.waitKey(-1)

            linemap = []
            forceCombines = []
            for j in range(i, len(allines)):
                if j == i: continue
                candidate = allines[j]
                if not candidate.available: continue
                forcedCombined, score = result.scoreLineAfter(candidate)
                #                 cv2.waitKey(-1) ####################
                if forcedCombined:
                    forceCombines.append(candidate)
                elif score >= 0:
                    linemap.append((score, candidate))
            result.combineLinesAfter(forceCombines)

            #             for fcb in forceCombines:
            #                 col = str2col(fcb.id)
            #                 fcb.draw(img2, col, 0.5, drawyhat=False)
            #                 cv2.line(img2, (0, int(fcb.rightray.m)), (fcb.imgwidth, \
            #                     int(fcb.rightray.b*fcb.imgwidth + fcb.rightray.m)), (255,0,0),1)
            #                 cv2.line(img2, (0, int(fcb.leftray.m)), (fcb.imgwidth, \
            #                     int(fcb.leftray.b*fcb.imgwidth + fcb.leftray.m)), (0,255,0),1)
            #             cv2.imshow('cb', img2)
            #             cv2.waitKey(-1)

            if len(forceCombines) == 0: i += 1
            continue
        else:
            i += 1
            continue

    #########

    i = 0
    while i < len(allines):
        result = allines[i]
        if result.available:
            #             print '------------------------- FAR'
            #             img2 = illu.copy()
            #             result.draw(img2, (125,125,125), 0.5, drawyhat=False)
            #             cv2.line(img2, (0, int(result.rightray.m)), (result.imgwidth, \
            #                 int(result.rightray.b*result.imgwidth + result.rightray.m)), (255,0,0),1)
            #             cv2.line(img2, (0, int(result.leftray.m)), (result.imgwidth, \
            #                 int(result.leftray.b*result.imgwidth + result.leftray.m)), (0,255,0),1)
            #             cv2.imshow('cb', img2)
            #             cv2.waitKey(-1)
            linemap = []
            for j in range(i, len(allines)):
                if j == i: continue
                candidate = allines[j]
                if not candidate.available: continue
                forcedCombined, score = result.scoreLineAfter(candidate)
                #                 cv2.waitKey(-1)
                if (not forcedCombined) and score >= 0:
                    linemap.append((score, candidate))
            if len(linemap) > 0:
                j, candidate = min(linemap)
                #                 candidate.draw(img2, str2col(candidate.id), 0.5, drawyhat=False)
                #                 cv2.imshow('cb-far', img2)
                #                 cv2.waitKey(-1)
                result.combineLinesAfter([candidate])
                continue
            else:
                i += 1
                continue
        else:
            i += 1
            continue

    allines = [line for line in allines if line.available]

    print 'DONE LINE, now ILLUSTRATE **************, TOTAL LINE COUNT ' + str(
        len(allines))
    print 'TOTAL TIME  ' + str(time() - tt)
    img2 = illu.copy()
    for line in allines:
        try:
            line._updateCurve()
            col = str2col(line.id)
            line.draw(img2, col, 0.5, drawyhat=False, drawline=True)
        except Exception as e:
            pass
    cv2.imshow('lines-ext', img2)
    cv2.waitKey(-1)

    retlines = []
    for line in allines:
        line._updateCurve()
        line.expandPoints()
        imgline = line.extract(img, expands=SubLine.EP_FINAL_EXPAND)
        #         cv2.imshow('line', imgline)
        #         cv2.waitKey(-1)
        retlines.append(imgline)

    return None, None, retlines