def extract(image): try: binary = ocrolib.read_image_binary(image) binary = 1-binary scale = psegutils.estimate_scale(binary) segmentation = compute_segmentation(binary,scale) # ...lines = compute_lines(segmentation,scale) # compute the reading order lines = psegutils.compute_lines(segmentation,scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = amax(compute_segmentation)+1 renumber = zeros(nlabels,'i') for i,v in enumerate(lsort): renumber[lines[v].label] = 0x010000+(i+1) segmentation = renumber[segmentation] outputdir = "http://127.0.0.1:5000/uploads/" lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png"%outputdir,segmentation) cleaned = ocrolib.remove_noise(binary,args.noise) for i,l in enumerate(lines): binline = psegutils.extract_masked(1-cleaned,l,pad=args.pad,expand=args.expand) ocrolib.write_image_binary("%s/01%04x.bin.png"%(outputdir,i+1),binline) #print "%6d"%i,fname,"%4.1f"%scale,len(lines) except: print ('error')
def textimageseg(self, imf): # I: binarized-input-image; imftext: output-text-portion.png; imfimage: output-image-portion.png I = ocrolib.read_image_binary(imf) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) image_part[0, 0] = 0 # only for visualisation purpose text_part = array((1 - I * (1 - Iseedfill)), dtype=int) text_part[0, 0] = 0 # only for visualisation purpose base, _ = ocrolib.allsplitext(imf) ocrolib.write_image_binary(base + ".ts.png", text_part) #imf_image = imf[0:-3] + "nts.png" ocrolib.write_image_binary(base + ".nts.png", image_part) return [base + ".ts.png", base + ".nts.png"]
def deskew(fpath, job): base,_ = ocrolib.allsplitext(fpath) basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if args.parallel<2: print_info("=== %s %-3d" % (fpath, job)) raw = ocrolib.read_image_gray(fpath) flat = raw # estimate skew angle and rotate if args.maxskew>0: if args.parallel<2: print_info("estimating skew angle") d0,d1 = flat.shape o0,o1 = int(args.bignore*d0),int(args.bignore*d1) flat = amax(flat)-flat flat -= amin(flat) est = flat[o0:d0-o0,o1:d1-o1] ma = args.maxskew ms = int(2*args.maxskew*args.skewsteps) angle = estimate_skew_angle(est,linspace(-ma,ma,ms+1)) flat = interpolation.rotate(flat,angle,mode='constant',reshape=0) flat = amax(flat)-flat else: angle = 0 # estimate low and high thresholds if args.parallel<2: print_info("estimating thresholds") d0,d1 = flat.shape o0,o1 = int(args.bignore*d0),int(args.bignore*d1) est = flat[o0:d0-o0,o1:d1-o1] if args.escale>0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args.escale v = est-filters.gaussian_filter(est,e*20.0) v = filters.gaussian_filter(v**2,e*20.0)**0.5 v = (v>0.3*amax(v)) v = morphology.binary_dilation(v,structure=ones((int(e*50),1))) v = morphology.binary_dilation(v,structure=ones((1,int(e*50)))) if args.debug>0: imshow(v); ginput(1,args.debug) est = est[v] lo = stats.scoreatpercentile(est.ravel(),args.lo) hi = stats.scoreatpercentile(est.ravel(),args.hi) # rescale the image to get the gray scale image if args.parallel<2: print_info("rescaling") flat -= lo flat /= (hi-lo) flat = clip(flat,0,1) if args.debug>0: imshow(flat,vmin=0,vmax=1); ginput(1,args.debug) bin = 1*(flat>args.threshold) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (basefile, lo, hi, angle)) if args.parallel<2: print_info("writing") ocrolib.write_image_binary(base+".ds.png",bin) return base+".ds.png"
def binarize(image_filepath): raw = ocrolib.read_image_gray(image_filepath) # Perform image normalization. image = normalize_raw_image(raw) threshold = 0.5 # Threshold, determines lightness. zoom = 0.5 # Zoom for page background estimation, smaller=faster. escale = 1.0 # Scale for estimating a mask over the text region. bignore = 0.1 # Ignore this much of the border for threshold estimation. perc = 80 # Percentage for filters. range = 20 # Range for filters. maxskew = 2 # Skew angle estimation parameters (degrees). lo = 5 # Percentile for black estimation. hi = 90 # Percentile for white estimation. skewsteps = 8 # Steps for skew angle estimation (per degree). debug = 0 # Display intermediate results. # Flatten it by estimating the local whitelevel. flat = estimate_local_whitelevel(image, zoom, perc, range, debug) # Estimate skew angle and rotate. flat, angle = estimate_skew(flat, bignore, maxskew, skewsteps, debug) # Estimate low and high thresholds. lo, hi = estimate_thresholds(flat, bignore, escale, lo, hi, debug) # Rescale the image to get the gray scale image. flat -= lo flat /= (hi - lo) flat = np.clip(flat, 0, 1) bin = 1 * (flat > threshold) if False: # Output the normalized grayscale and the thresholded images. ocrolib.write_image_binary('./ocropy_test.bin.png', bin) ocrolib.write_image_gray('./ocropy_test.nrm.png', flat) return bin, flat
def process1(job): fname, i = job print_info("# %s" % (fname)) if args.parallel < 2: print_info("=== %s %-3d" % (fname, i)) raw = ocrolib.read_image_gray(fname) dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not args.nocheck: check = check_page(amax(image) - image) if check is not None: print_error(fname + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if args.gray: extreme = 0 else: extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod( image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel if args.parallel < 2: print_info("flattening") m = interpolation.zoom(image, args.zoom) m = filters.percentile_filter(m, args.perc, size=(args.range, 2)) m = filters.percentile_filter(m, args.perc, size=(2, args.range)) m = interpolation.zoom(m, 1.0 / args.zoom) if args.debug > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, args.debug) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if args.debug > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, args.debug) # estimate low and high thresholds if args.parallel < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(args.bignore * d0), int(args.bignore * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args.escale > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args.escale v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if args.debug > 0: imshow(v) ginput(1, args.debug) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args.lo) hi = stats.scoreatpercentile(est.ravel(), args.hi) # rescale the image to get the gray scale image if args.parallel < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if args.debug > 0: imshow(flat, vmin=0, vmax=1) ginput(1, args.debug) bin = 1 * (flat > args.threshold) # output the normalized grayscale and the thresholded images #print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment)) if args.parallel < 2: print_info("writing") if args.debug > 0 or args.show: clf() gray() imshow(bin) ginput(1, max(0.1, args.debug)) base, _ = ocrolib.allsplitext(fname) ocrolib.write_image_binary(base + ".bin.png", bin) ocrolib.write_image_gray(base + ".nrm.png", flat) #print("########### File path : ", base+".nrm.png") #write_to_xml(base+".bin.png") return base + ".bin.png"
def process(job): fname, i = job print_info("# %s" % (fname)) if args['parallel'] < 2: print_info("=== %s %-3d" % (fname, i)) raw = ocrolib.read_image_gray(fname) # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not args['nocheck']: check = check_page(amax(image) - image) if check is not None: print_error(fname + "SKIPPED" + check + "(use -n to disable this check)") return # flatten the image by estimating the local whitelevel comment = "" # if not, we need to flatten it by estimating the local whitelevel if args['parallel'] < 2: print_info("flattening") m = interpolation.zoom(image, args['zoom']) m = filters.percentile_filter(m, args['perc'], size=(args['range'], 2)) m = filters.percentile_filter(m, args['perc'], size=(2, args['range'])) m = interpolation.zoom(m, 1.0 / args['zoom']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) # estimate skew angle and rotate if args['maxskew'] > 0: if args['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = args['maxskew'] ms = int(2 * args['maxskew'] * args['skewsteps']) angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # estimate low and high thresholds if args['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args['lo']) hi = stats.scoreatpercentile(est.ravel(), args['hi']) # rescale the image to get the gray scale image if args['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) bin = 1 * (flat > args['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) if args['parallel'] < 2: print_info("writing") base, _ = ocrolib.allsplitext(fname) outputfile_bin = base + ".bin.png" #outputfile_nrm = base+".nrm.png" #output_files = [outputfile_bin, outputfile_nrm] ocrolib.write_image_binary(outputfile_bin, bin) #ocrolib.write_image_gray(outputfile_nrm, flat) #return output_files return outputfile_bin
def analyze_page_layout(binary, gray, rgb=None): hscale = 1.0 # Non-standard scaling of horizontal parameters. vscale = 1.0 # Non-standard scaling of vertical parameters. threshold = 0.2 # baseline threshold. usegauss = True # Use gaussian instead of uniform. maxseps = 0 # Maximum black column separators. sepwiden = 10 # Widen black separators (to account for warping). blackseps = True maxcolseps = 3 # Maximum # whitespace column separators. csminheight = 10 # Minimum column height (units=scale). noise = 8 # Noise threshold for removing small components from lines. gray_output = True # Output grayscale lines as well, which are extracted from the grayscale version of the pages. pad = 3 # Padding for extracted lines. expand = 3 # Expand mask for grayscale extraction. if False: bin_image_filepath = './ocropy_test.bin.png' gray_image_filepath = './ocropy_test.nrm.png' binary = ocrolib.read_image_binary(bin_image_filepath) gray = ocrolib.read_image_gray(gray_image_filepath) binary = 1 - binary # Invert. scale = psegutils.estimate_scale(binary) segmentation = compute_segmentation(binary, scale, blackseps, maxseps, maxcolseps, csminheight, sepwiden, usegauss, vscale, hscale, threshold, quiet=True) lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # Renumber the labels so that they conform to the specs. nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # Image. lines = [lines[i] for i in lsort] # Visualize bounding boxes. if False: if rgb is not None: # REF [function] >> extract_masked() in ${OCROPY_HOME}/ocrolib/psegutils.py. for l in lines: y0, x0, y1, x1 = [ int(x) for x in [ l.bounds[0].start, l.bounds[1].start, l.bounds[0].stop, l.bounds[1].stop ] ] cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow('Image', rgb) cv2.waitKey(0) # Output everything. if False: if not os.path.exists(outputdir): os.mkdir(outputdir) ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, noise) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=pad, expand=expand) # Image. ocrolib.write_image_binary( "%s/01%04x.bin.png" % (outputdir, i + 1), binline) if gray_output: grayline = psegutils.extract_masked(gray, l, pad=pad, expand=expand) # Image. ocrolib.write_image_gray( "%s/01%04x.nrm.png" % (outputdir, i + 1), grayline)
def binarize(inFile, binFile, grayFile): print("binarize: inFile=%s binFile=%s grayFile=%s" % (inFile, binFile, grayFile)) fname = inFile raw = ocrolib.read_image_gray(inFile) # perform image normalization image = normalize_raw_image(raw) if image is None: print("!! # image is empty: %s" % (inFile)) assert False return False check = check_page(np.amax(image) - image) if check is not None: print(inFile + " SKIPPED " + check + "(use -n to disable this check)") # assert False return False # check whether the image is already effectively binarized extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) / np.prod( image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel print("flattening") flat = estimate_local_whitelevel(image, zoom, perc, size) print("comment=%r extreme=%s" % (comment, extreme)) print("image=%s" % desc(image)) print(" flat=%s" % desc(flat)) # assert False # estimate skew angle and rotate # print("estimating skew angle") # flat, angle = estimate_skew(flat, args.bignore, args.maxskew, args.skewsteps) angle = 0.0 # estimate low and high thresholds print("estimating thresholds") lo, hi, ok = estimate_thresholds(flat, bignore, escale, defLo, defHi) if not ok: return False print("lo=%5.3f (%g)" % (lo, defLo)) print("hi=%5.3f (%g)" % (hi, defHi)) # rescale the image to get the gray scale image print("rescaling") flat -= lo flat /= (hi - lo) flat = np.clip(flat, 0, 1) bin = flat > threshold # output the normalized grayscale and the thresholded images print("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print("##1 flat=%s" % desc(flat)) print("##2 bin=%s" % desc(bin)) print("writing %s" % binFile) ocrolib.write_image_binary(binFile, bin) ocrolib.write_image_gray(grayFile, flat) return True
def processPngFile(outRoot, origFile, fileNum): baseName = os.path.basename(origFile) baseBase, _ = os.path.splitext(baseName) outDir = os.path.join(outRoot, "%s.%03d" % (baseBase, fileNum)) inFile = os.path.join(outDir, baseName) os.makedirs(outDir, exist_ok=True) shutil.copy(origFile, inFile) inBase, _ = ocrolib.allsplitext(inFile) print("** inBase=%s" % inBase) # print("** binBase=%s" % binBase) fname = inFile outputdir = inBase binFile = inBase + ".bin.png" outFile = inBase + ".out.png" outRoot2, outDir2 = os.path.split(outRoot) outFile2 = os.path.join(outRoot2, "%s.out" % outDir2, baseName) print("outFile2=%s" % outFile2) # assert False grayFile = inBase + ".nrm.png" psegFile = inBase + ".pseg.png" print(" inFile=%s" % inFile) print(" binFile=%s" % binFile) print("grayFile=%s" % grayFile) print(" outFile=%s" % outFile) assert inFile and binFile assert outFile != inFile assert outFile != binFile if not binarize(inFile, binFile, grayFile): binExists = os.path.exists(binFile) print("Couldn't binarize inFile=%s binFile=%s exists=%s" % (inFile, binFile, binExists)) return False binary = ocrolib.read_image_binary(binFile) print("$$ %s=%s" % (binFile, desc(binary))) height, width = binary.shape checktype(binary, ABINARY2) check = check_page(np.amax(binary) - binary) if check is not None: print("%s SKIPPED %s (use -n to disable this check)" % (inFile, check)) return False # if args.gray: # if os.path.exists(base+".nrm.png"): # gray = ocrolib.read_image_gray(base+".nrm.png") # checktype(gray, GRAYSCALE) # else: # print_error("Grayscale version %s.nrm.png not found. Use ocropus-nlbin for creating " + # "normalized grayscale version of the pages as well." % base) # return binary = 1 - binary # invert scale = psegutils.estimate_scale(binary) print("scale %f" % scale) if np.isnan(scale) or scale > 1000.0: print("%s: bad scale (%g); skipping\n" % (fname, scale)) return False # find columns and text lines print("computing segmentation") segmentation = compute_segmentation(binary, scale) if np.amax(segmentation) > maxlines: print("%s: too many lines %g" % (fname, np.amax(segmentation))) return False print("segmentation=%s" % desc(segmentation)) print("number of lines %g" % np.amax(segmentation)) # compute the reading order print("finding reading order") lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) print("$$ lsort = %d = %s...%s" % (len(lsort), lsort[:10], lsort[-10:])) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # finally, output everything print("writing lines") if not os.path.exists(outputdir): os.mkdir(outputdir) lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, noise) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=pad, expand=expand) ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1), binline) # if args.gray: # grayline = psegutils.extract_masked( # gray, l, pad=args.pad, expand=args.expand) # ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i+1), grayline) print("%6d %s %4.1f %d" % (i, fname, scale, len(lines))) # to proceed, we need a pseg file and a subdirectory containing text lines assert os.path.exists(psegFile), "%s: no such file" % psegFile assert os.path.isdir(inBase), "%s: no such directory" % inBase # iterate through the text lines in reading order, based on the page segmentation file pseg = ocrolib.read_page_segmentation(psegFile) print("$$ %s=%s" % (psegFile, desc(pseg))) regions = ocrolib.RegionExtractor() print("$$ regions=%s" % regions) regions.setPageLines(pseg) im = Image.open(inFile) print("~~%s %s" % (inFile, im.size)) print("$$ regions=%s=%s" % (regions, sorted(regions.__dict__))) print("$$ regions.length=%s" % regions.length()) n = regions.length() for i in range(1, n): id = regions.id(i) y0, x0, y1, x1 = regions.bbox(i) # print("%5d: 0x%05X %s %d x %d" % # (i, id, [y0, x0, y1, x1], y1 - y0, x1 - x0)) draw = ImageDraw.Draw(im) draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0), width=3) draw.rectangle((x0, y0, x1, y1), outline=(0, 0, 255), width=0) # draw.rectangle((x0, y0, x1, y1), outline=255, width=5) # draw.rectangle((x0, y0, x1, y1), outline=10, width=1) del draw # write output files print("outFile=%s" % outFile) im.save(outFile, "PNG") print("outFile2=%s" % outFile2) outDir2 = os.path.dirname(outFile2) os.makedirs(outDir2, exist_ok=True) im.save(outFile2, "PNG") assert os.path.exists(outFile2) # outFile3, _ = os.path.splitext(outFile) # outFile3 = "%s.jpg" % outFile3 # print("outFile3=%s" % outFile3) # im.save(outFile3, "JPEG") # assert os.path.exists(outFile3) return True
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID page = pcgts.get_Page() LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID) page_image, page_xywh, _ = self.workspace.image_from_page( page, page_id) print("----------", type(page_image)) raw = ocrolib.read_image_gray(page_image.filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (input_file.pageId or input_file.ID)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (input_file.pageId or input_file.ID, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) base, _ = ocrolib.allsplitext(page_image.filename) ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" # bin_array = array(255*(binarized>ocrolib.midrange(binarized)),'B') # bin_image = ocrolib.array2pil(bin_array) ''' file_path = self.workspace.save_image_file(bin_image, file_id + ".bin", page_id=page_id, file_grp=self.output_file_grp ) ''' file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) page.add_AlternativeImage( AlternativeImageType(filename=base + ".bin.png", comment="binarized")) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".bin.png", local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) param = self.parameter base, _ = ocrolib.allsplitext(fname) #basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if param['parallel'] < 2: print_info("=== %s " % (fname)) raw = ocrolib.read_image_gray(img.filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if param['maxskew'] > 0: if param['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = param['maxskew'] ms = int(2 * param['maxskew'] * param['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if param['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if param['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = param['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if param['debug'] > 0: imshow(v) ginput(1, param['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), param['lo']) hi = stats.scoreatpercentile(est.ravel(), param['hi']) # rescale the image to get the gray scale image if param['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if param['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, param['debug']) deskewed = 1 * (flat > param['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (pcgts.get_Page().imageFilename, lo, hi, angle)) if param['parallel'] < 2: print_info("writing") ocrolib.write_image_binary(base + ".ds.png", deskewed) orientation = -angle orientation = 180 - (180 - orientation) % 360 pcgts.get_Page().set_orientation(orientation) ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=ID, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".ds.png", local_filename='%s/%s' % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def process(job): imagepath, i = job global base base, _ = ocrolib.allsplitext(imagepath) outputdir = base imagename_base = os.path.basename(os.path.normpath(base)) try: binary = ocrolib.read_image_binary(imagepath) except IOError: if ocrolib.trace: traceback.print_exc() print_error("cannot open either %s.bin.png or %s" % (base, imagepath)) return checktype(binary, ABINARY2) if not args['nocheck']: check = check_page(amax(binary) - binary) if check is not None: print_error("%s SKIPPED %s (use -n to disable this check)" % (imagepath, check)) return binary = 1 - binary # invert if args['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = args['scale'] print_info("scale %f" % (scale)) if isnan(scale) or scale > 1000.0: print_error("%s: bad scale (%g); skipping\n" % (imagepath, scale)) return if scale < args['minscale']: print_error("%s: scale (%g) less than --minscale; skipping\n" % (imagepath, scale)) return # find columns and text lines if not args['quiet']: print_info("computing segmentation") segmentation = compute_segmentation(binary, scale) if amax(segmentation) > args['maxlines']: print_error("%s: too many lines %g" % (imagepath, amax(segmentation))) return if not args['quiet']: print_info("number of lines %g" % amax(segmentation)) # compute the reading order if not args['quiet']: print_info("finding reading order") lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = amax(segmentation) + 1 renumber = zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # finally, output everything if not args['quiet']: print_info("writing lines") if not os.path.exists(outputdir): os.mkdir(outputdir) lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, args['noise']) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=args['pad'], expand=args['expand']) ocrolib.write_image_binary( "%s/%s_01%04x.bin.png" % (outputdir, imagename_base, i + 1), binline) print_info("%6d %s %4.1f %d" % (i, imagepath, scale, len(lines))) return outputdir
def process1(job): fname, i = job global base base, _ = ocrolib.allsplitext(fname) outputdir = base try: binary = ocrolib.read_image_binary(base + ".bin.png") except IOError: try: binary = ocrolib.read_image_binary(fname) except IOError: if ocrolib.trace: traceback.print_exc() print("cannot open either", base + ".bin.png", "or", fname) return checktype(binary, ABINARY2) if not args.nocheck: check = check_page(amax(binary) - binary) if check is not None: print(fname, "SKIPPED", check, "(use -n to disable this check)") return if args.gray: if os.path.exists(base + ".nrm.png"): gray = ocrolib.read_image_gray(base + ".nrm.png") checktype(gray, GRAYSCALE) binary = 1 - binary # invert if args.scale == 0: scale = psegutils.estimate_scale(binary) else: scale = args.scale print("scale", scale) if isnan(scale) or scale > 1000.0: sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale)) return if scale < args.minscale: sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" % (fname, scale)) return # find columns and text lines if not args.quiet: print("computing segmentation") segmentation = compute_segmentation(binary, scale) if amax(segmentation) > args.maxlines: print(fname, ": too many lines", amax(segmentation)) return if not args.quiet: print("number of lines", amax(segmentation)) # compute the reading order if not args.quiet: print("finding reading order") lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = amax(segmentation) + 1 renumber = zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # finally, output everything if not args.quiet: print("writing lines") if not os.path.exists(outputdir): os.mkdir(outputdir) lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, args.noise) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=args.pad, expand=args.expand) ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1), binline) if args.gray: grayline = psegutils.extract_masked(gray, l, pad=args.pad, expand=args.expand) ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1), grayline) print("%6d" % i, fname, "%4.1f" % scale, len(lines))
def _process_segment(self, page_image, page, region_xywh, page_id, input_file, n): binary = ocrolib.pil2array(page_image) binary = np.array(1 - binary / np.amax(binary), 'B') if page.get_TextRegion() is None or len(page.get_TextRegion()) < 1: min_x, max_x = (0, binary.shape[0]) min_y, max_y = (0, binary.shape[1]) textregion = TextRegionType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) page.add_TextRegion(textregion) else: textregion = page.get_TextRegion()[-1] ocrolib.write_image_binary("test.bin.png", binary) if self.parameter['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = self.parameter['scale'] if np.isnan( scale) or scale > 1000.0 or scale < self.parameter['minscale']: LOG.warning("%s: bad scale (%g); skipping\n" % (fname, scale)) return segmentation = self.compute_segmentation(binary, scale) if np.amax(segmentation) > self.parameter['maxlines']: LOG.warning("%s: too many lines %i", (fname, np.amax(segmentation))) return lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] lines = [lines[i] for i in lsort] cleaned = ocrolib.remove_noise(binary, self.parameter['noise']) region_xywh['features'] += ",textline" for i, l in enumerate(lines): ocrolib.write_image_binary("test.bin.png", binary[l.bounds[0], l.bounds[1]]) min_x, max_x = (l.bounds[0].start, l.bounds[0].stop) min_y, max_y = (l.bounds[1].start, l.bounds[1].stop) img = binary[l.bounds[0], l.bounds[1]] img = np.array(255 * (img > ocrolib.midrange(img)), 'B') img = ocrolib.array2pil(img) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(img, file_id + "_" + str(i), page_id=page_id, file_grp=self.image_grp) ai = AlternativeImageType(filename=file_path, comments=region_xywh['features']) line = TextLineType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) line.add_AlternativeImage(ai) textregion.add_TextLine(line)