def test_read_image_gray(self): """ Test whether the function read_image_gray() will return same result when pass a image file name (from disk) and a image object (PIL.Image from memory). The return object of read_image_gray() is a 'ndarray' dedfined by 'numpy', thus we use the built-in function 'array_equal' to compare two ndarray objects """ self.assertTrue( numpy.array_equal(ocrolib.read_image_gray(img_disk), ocrolib.read_image_gray(img_mem)))
def process1(arg): (trial, fname) = arg base, _ = ocrolib.allsplitext(fname) line = ocrolib.read_image_gray(fname) raw_line = line.copy() if prod(line.shape) == 0: return None if amax(line) == amin(line): return None if not args.nocheck: check = check_line(amax(line) - line) if check is not None: print_error(fname + " SKIPPED " + check + " (use -n to disable this check)") return (0, [], 0, trial, fname) if not args.nolineest: assert "dew.png" not in fname, "don't dewarp dewarped images" temp = amax(line) - line temp = temp * 1.0 / amax(temp) lnorm.measure(temp) line = lnorm.normalize(line, cval=amax(line)) else: assert "dew.png" in fname, "only apply to dewarped images" line = lstm.prepare_line(line, args.pad) try: pred = network.predictString(line) except RecognitionError, err: # TODO: Handle this in the extraction processor print_info(fname + " Failed to predict line. Skipping.") return (0, [], 0, trial, fname)
def process(arg): output_list = [] (trial, fname) = arg base, _ = ocrolib.allsplitext(fname) line = ocrolib.read_image_gray(fname) raw_line = line.copy() if prod(line.shape) == 0: return None if amax(line) == amin(line): return None if not args['nocheck']: check = check_line(amax(line) - line) if check is not None: print_error("%s SKIPPED %s (use -n to disable this check)" % (fname, check)) return (0, [], 0, trial, fname) temp = amax(line) - line temp = temp * 1.0 / amax(temp) lnorm.measure(temp) line = lnorm.normalize(line, cval=amax(line)) line = lstm.prepare_line(line, args['pad']) pred = network.predictString(line) if args['llocs']: # output recognized LSTM locations of characters result = lstm.translate_back(network.outputs, pos=1) scale = len( raw_line.T) * 1.0 / (len(network.outputs) - 2 * args['pad']) output_llocs = base + ".llocs" with codecs.open(output_llocs, "w", "utf-8") as locs: for r, c in result: c = network.l2s([c]) r = (r - args['pad']) * scale locs.write("%s\t%.1f\n" % (c, r)) output_list.append(output_llocs) #plot([r,r],[0,20],'r' if c==" " else 'b') #ginput(1,1000) if args['probabilities']: # output character probabilities result = lstm.translate_back(network.outputs, pos=2) output_prob = base + ".prob" with codecs.open(output_prob, "w", "utf-8") as file: for c, p in result: c = network.l2s([c]) file.write("%s\t%s\n" % (c, p)) output_list.append(output_prob) if not args['nonormalize']: pred = ocrolib.normalize_text(pred) if not args['quiet']: print_info(fname + ":" + pred) output_text = base + ".txt" ocrolib.write_text(output_text, pred) output_list.append(output_text) return output_list
def deskew(fpath, job): base,_ = ocrolib.allsplitext(fpath) basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if args.parallel<2: print_info("=== %s %-3d" % (fpath, job)) raw = ocrolib.read_image_gray(fpath) flat = raw # estimate skew angle and rotate if args.maxskew>0: if args.parallel<2: print_info("estimating skew angle") d0,d1 = flat.shape o0,o1 = int(args.bignore*d0),int(args.bignore*d1) flat = amax(flat)-flat flat -= amin(flat) est = flat[o0:d0-o0,o1:d1-o1] ma = args.maxskew ms = int(2*args.maxskew*args.skewsteps) angle = estimate_skew_angle(est,linspace(-ma,ma,ms+1)) flat = interpolation.rotate(flat,angle,mode='constant',reshape=0) flat = amax(flat)-flat else: angle = 0 # estimate low and high thresholds if args.parallel<2: print_info("estimating thresholds") d0,d1 = flat.shape o0,o1 = int(args.bignore*d0),int(args.bignore*d1) est = flat[o0:d0-o0,o1:d1-o1] if args.escale>0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args.escale v = est-filters.gaussian_filter(est,e*20.0) v = filters.gaussian_filter(v**2,e*20.0)**0.5 v = (v>0.3*amax(v)) v = morphology.binary_dilation(v,structure=ones((int(e*50),1))) v = morphology.binary_dilation(v,structure=ones((1,int(e*50)))) if args.debug>0: imshow(v); ginput(1,args.debug) est = est[v] lo = stats.scoreatpercentile(est.ravel(),args.lo) hi = stats.scoreatpercentile(est.ravel(),args.hi) # rescale the image to get the gray scale image if args.parallel<2: print_info("rescaling") flat -= lo flat /= (hi-lo) flat = clip(flat,0,1) if args.debug>0: imshow(flat,vmin=0,vmax=1); ginput(1,args.debug) bin = 1*(flat>args.threshold) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (basefile, lo, hi, angle)) if args.parallel<2: print_info("writing") ocrolib.write_image_binary(base+".ds.png",bin) return base+".ds.png"
def allchars(): count = 0 for fno,fname in enumerate(fnames): if fno%20==0: print fno,fname,count image = 1-ocrolib.read_image_gray(fname) try: seg = lineseg.ccslineseg(image) except: traceback.print_exc() continue seg = morph.renumber_by_xcenter(seg) for e in extract_chars(seg): count += 1 yield e
def compute_geomaps(fnames,shapedict,old_model,use_gt=1,size=32,debug=0,old_order=1): """Given a shape dictionary and an existing line geometry estimator, compute updated geometric maps for each entry in the shape dictionary.""" if debug>0: gray(); ion() shape = (shapedict.k,size,size) bls = zeros(shape) xls = zeros(shape) count = 0 for fno,fname in enumerate(fnames): if fno%20==0: print fno,fname,count if use_gt: # don't use lines with many capital letters for training because # they result in bad models gt = ocrolib.read_text(ocrolib.fvariant(fname,"txt","gt")) if len(re.sub(r'[^A-Z]','',gt))>=0.3*len(re.sub(r'[^a-z]','',gt)): continue if len(re.sub(r'[^0-9]','',gt))>=0.3*len(re.sub(r'[^a-z]','',gt)): continue image = 1-ocrolib.read_image_gray(fname) if debug>0 and fno%debug==0: clf(); subplot(411); imshow(image) try: blp,xlp = old_model.lineFit(image,order=old_order) except: traceback.print_exc() continue blimage = zeros(image.shape) h,w = image.shape for x in range(w): blimage[clip(int(polyval(blp,x)),0,h-1),x] = 1 xlimage = zeros(image.shape) for x in range(w): xlimage[clip(int(polyval(xlp,x)),0,h-1),x] = 1 if debug>0 and fno%debug==0: subplot(413); imshow(xlimage+0.3*image) subplot(414); imshow(blimage+0.3*image) try: seg = lineseg.ccslineseg(image) except: continue if debug>0 and fno%debug==0: subplot(412); morph.showlabels(seg) shape = None for sub,transform,itransform_add in extract_chars(seg): if shape is None: shape = sub.shape assert sub.shape==shape count += 1 best = shapedict.predict1(sub) bls[best] += transform(blimage) xls[best] += transform(xlimage) if debug==1: ginput(1,100) elif debug>1: ginput(1,0.01) for i in range(len(bls)): bls[i] *= bls[i].shape[1]*1.0/max(1e-6,sum(bls[i])) for i in range(len(xls)): xls[i] *= xls[i].shape[1]*1.0/max(1e-6,sum(xls[i])) return bls,xls
def allchars(): count = 0 for fno, fname in enumerate(fnames): if fno % 20 == 0: print fno, fname, count image = 1 - ocrolib.read_image_gray(fname) try: seg = lineseg.ccslineseg(image) except: traceback.print_exc() continue seg = morph.renumber_by_xcenter(seg) for e in extract_chars(seg): count += 1 yield e
def binarize(image_filepath): raw = ocrolib.read_image_gray(image_filepath) # Perform image normalization. image = normalize_raw_image(raw) threshold = 0.5 # Threshold, determines lightness. zoom = 0.5 # Zoom for page background estimation, smaller=faster. escale = 1.0 # Scale for estimating a mask over the text region. bignore = 0.1 # Ignore this much of the border for threshold estimation. perc = 80 # Percentage for filters. range = 20 # Range for filters. maxskew = 2 # Skew angle estimation parameters (degrees). lo = 5 # Percentile for black estimation. hi = 90 # Percentile for white estimation. skewsteps = 8 # Steps for skew angle estimation (per degree). debug = 0 # Display intermediate results. # Flatten it by estimating the local whitelevel. flat = estimate_local_whitelevel(image, zoom, perc, range, debug) # Estimate skew angle and rotate. flat, angle = estimate_skew(flat, bignore, maxskew, skewsteps, debug) # Estimate low and high thresholds. lo, hi = estimate_thresholds(flat, bignore, escale, lo, hi, debug) # Rescale the image to get the gray scale image. flat -= lo flat /= (hi - lo) flat = np.clip(flat, 0, 1) bin = 1 * (flat > threshold) if False: # Output the normalized grayscale and the thresholded images. ocrolib.write_image_binary('./ocropy_test.bin.png', bin) ocrolib.write_image_gray('./ocropy_test.nrm.png', flat) return bin, flat
def binarize(inFile, binFile, grayFile): print("binarize: inFile=%s binFile=%s grayFile=%s" % (inFile, binFile, grayFile)) fname = inFile raw = ocrolib.read_image_gray(inFile) # perform image normalization image = normalize_raw_image(raw) if image is None: print("!! # image is empty: %s" % (inFile)) assert False return False check = check_page(np.amax(image) - image) if check is not None: print(inFile + " SKIPPED " + check + "(use -n to disable this check)") # assert False return False # check whether the image is already effectively binarized extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) / np.prod( image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel print("flattening") flat = estimate_local_whitelevel(image, zoom, perc, size) print("comment=%r extreme=%s" % (comment, extreme)) print("image=%s" % desc(image)) print(" flat=%s" % desc(flat)) # assert False # estimate skew angle and rotate # print("estimating skew angle") # flat, angle = estimate_skew(flat, args.bignore, args.maxskew, args.skewsteps) angle = 0.0 # estimate low and high thresholds print("estimating thresholds") lo, hi, ok = estimate_thresholds(flat, bignore, escale, defLo, defHi) if not ok: return False print("lo=%5.3f (%g)" % (lo, defLo)) print("hi=%5.3f (%g)" % (hi, defHi)) # rescale the image to get the gray scale image print("rescaling") flat -= lo flat /= (hi - lo) flat = np.clip(flat, 0, 1) bin = flat > threshold # output the normalized grayscale and the thresholded images print("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print("##1 flat=%s" % desc(flat)) print("##2 bin=%s" % desc(bin)) print("writing %s" % binFile) ocrolib.write_image_binary(binFile, bin) ocrolib.write_image_gray(grayFile, flat) return True
def process1(job): fname, i = job global base base, _ = ocrolib.allsplitext(fname) outputdir = base try: binary = ocrolib.read_image_binary(base + ".bin.png") except IOError: try: binary = ocrolib.read_image_binary(fname) except IOError: if ocrolib.trace: traceback.print_exc() print("cannot open either", base + ".bin.png", "or", fname) return checktype(binary, ABINARY2) if not args.nocheck: check = check_page(amax(binary) - binary) if check is not None: print(fname, "SKIPPED", check, "(use -n to disable this check)") return if args.gray: if os.path.exists(base + ".nrm.png"): gray = ocrolib.read_image_gray(base + ".nrm.png") checktype(gray, GRAYSCALE) binary = 1 - binary # invert if args.scale == 0: scale = psegutils.estimate_scale(binary) else: scale = args.scale print("scale", scale) if isnan(scale) or scale > 1000.0: sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale)) return if scale < args.minscale: sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" % (fname, scale)) return # find columns and text lines if not args.quiet: print("computing segmentation") segmentation = compute_segmentation(binary, scale) if amax(segmentation) > args.maxlines: print(fname, ": too many lines", amax(segmentation)) return if not args.quiet: print("number of lines", amax(segmentation)) # compute the reading order if not args.quiet: print("finding reading order") lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = amax(segmentation) + 1 renumber = zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # finally, output everything if not args.quiet: print("writing lines") if not os.path.exists(outputdir): os.mkdir(outputdir) lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, args.noise) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=args.pad, expand=args.expand) ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1), binline) if args.gray: grayline = psegutils.extract_masked(gray, l, pad=args.pad, expand=args.expand) ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1), grayline) print("%6d" % i, fname, "%4.1f" % scale, len(lines))
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID page = pcgts.get_Page() # why does it save the image ?? page_image, page_xywh, _ = self.workspace.image_from_page( page, page_id) if self.parameter['parallel'] < 2: LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID) raw = ocrolib.read_image_gray(page_image.filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" % (pcgts.get_Page().imageFilename, lo, hi, angle)) if self.parameter['parallel'] < 2: LOG.info("Writing") #ocrolib.write_image_binary(base+".ds.png", deskewed) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) pcgts.get_Page().set_orientation(angle) #print(orientation, angle) file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
def _process_segment(self, page, filename, page_id, file_id): raw = ocrolib.read_image_gray(filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) #base, _ = ocrolib.allsplitext(filename) #ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comment="binarized"))
def extractLines2(imgpath): img_grey = ocrolib.read_image_gray(imgpath) img_grey = img_grey[:img_grey.shape[0] / 2, :] (h, w) = img_grey.shape[:2] img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4], None, fx=0.5, fy=0.5) angle = estimate_skew_angle(img00, linspace(-5, 5, 42)) print 'goc', angle rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) img_grey = cv2.warpAffine(img_grey, rotM, (w, h)) h, w = img_grey.shape img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999, cv2.NORM_MINMAX) objects, scale = findBox(img_grey) imgwidth = img_grey.shape[1] imgheight = img_grey.shape[0] cellwidth = 6 * scale cellheight = 2.5 * scale N_x = int(round(imgwidth / cellwidth)) cellwidth = int(round(imgwidth / N_x)) N_y = int(round(imgheight / cellheight)) cellheight = int(round(imgheight / N_y)) cells_list = [{}, {}, {}, {}] def pixel2cell2id(pixel_x, pixel_y, CELLTYPE): dx = 0 dy = 0 if CELLTYPE == 3: pixel_x -= cellwidth / 2 pixel_y -= cellheight / 2 dx = cellwidth / 2 dy = cellheight / 2 if CELLTYPE == 2: pixel_x -= cellwidth / 2 dx = cellwidth / 2 if CELLTYPE == 1: pixel_y -= cellheight / 2 dy = cellheight / 2 if pixel_x <= 0 or pixel_y <= 0: return None, None cellcoord = (pixel_x / cellwidth, pixel_y / cellheight) cellid = cellcoord[0] + cellcoord[1] * N_x cellcoord = (cellcoord[0] * cellwidth + dx, cellcoord[1] * cellheight + dy) return cellcoord, cellid def id2cell2pixel(cellid, x, y, CELLTYPE): cellcoord = (cellid % N_x, cellid / N_x) pixel_x = cellcoord[0] * cellwidth + x pixel_y = cellcoord[1] * cellheight + y if CELLTYPE == 3: pixel_x += cellwidth / 2 pixel_y += cellheight / 2 return cellcoord, pixel_x, pixel_y illu = cv2.cvtColor(img_grey.astype(np.float32), cv2.COLOR_GRAY2BGR) illu = cv2.resize(illu, None, fx=2.0, fy=2.0) illu = (illu * 255).astype(np.uint8) for o in objects: ### Add object as candidate character pixel_x, pixel_y = (o[1].start + o[1].stop) / 2, o[0].stop for celltype in range(4): cellcoord, cellid = pixel2cell2id(pixel_x, pixel_y, CELLTYPE=celltype) if cellcoord is None or cellid is None: continue cellbound = slice(cellcoord[1], cellcoord[1] + cellheight, None), slice(cellcoord[0], cellcoord[0] + cellwidth, None) if cellid not in cells_list[celltype]: cells_list[celltype][cellid] = SubLineFinder( window_size=scale / 3, cellbound=cellbound, initChar=o) else: cells_list[celltype][cellid].addChar(o) cv2.rectangle(illu, (o[1].start * 2, o[0].start * 2), (o[1].stop * 2, o[0].stop * 2), (random.randint(0, 255), random.randint( 0, 255), random.randint(0, 255)), 1) for celltype in range(4): if celltype == 0: col = (255, 0, 0) if celltype == 1: col = (0, 255, 0) if celltype == 2: col = (255, 255, 0) if celltype == 3: col = (0, 0, 255) for cellid, subline in cells_list[celltype].iteritems(): # cv2.rectangle(illu, (subline.cellbound[1].start+celltype, subline.cellbound[0].start+celltype), (subline.cellbound[1].stop+celltype, subline.cellbound[0].stop+celltype), col,1) line = subline.subline() if line is not None: pos1 = (int(line[0][0]) * 2, int(line[0][1]) * 2) pos2 = (int(line[1][0]) * 2, int(line[1][1]) * 2) # print cellid, pos1, pos2 cv2.line(illu, pos1, pos2, col, 1) ### illustrate/debug first round return img_grey, illu
def process(self): if not os.path.exists(self._params.get("path", "")): return self.null_data() return ocrolib.read_image_gray(makesafe(self._params.get("path")))
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) param = self.parameter base, _ = ocrolib.allsplitext(fname) #basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if param['parallel'] < 2: print_info("=== %s " % (fname)) raw = ocrolib.read_image_gray(img.filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if param['maxskew'] > 0: if param['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = param['maxskew'] ms = int(2 * param['maxskew'] * param['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if param['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if param['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = param['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if param['debug'] > 0: imshow(v) ginput(1, param['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), param['lo']) hi = stats.scoreatpercentile(est.ravel(), param['hi']) # rescale the image to get the gray scale image if param['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if param['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, param['debug']) deskewed = 1 * (flat > param['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (pcgts.get_Page().imageFilename, lo, hi, angle)) if param['parallel'] < 2: print_info("writing") ocrolib.write_image_binary(base + ".ds.png", deskewed) orientation = -angle orientation = 180 - (180 - orientation) % 360 pcgts.get_Page().set_orientation(orientation) ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=ID, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".ds.png", local_filename='%s/%s' % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def compute_geomaps(fnames, shapedict, old_model, use_gt=1, size=32, debug=0, old_order=1): """Given a shape dictionary and an existing line geometry estimator, compute updated geometric maps for each entry in the shape dictionary.""" if debug > 0: gray() ion() shape = (shapedict.k, size, size) bls = zeros(shape) xls = zeros(shape) count = 0 for fno, fname in enumerate(fnames): if fno % 20 == 0: print fno, fname, count if use_gt: # don't use lines with many capital letters for training because # they result in bad models gt = ocrolib.read_text(ocrolib.fvariant(fname, "txt", "gt")) if len(re.sub(r'[^A-Z]', '', gt)) >= 0.3 * len(re.sub(r'[^a-z]', '', gt)): continue if len(re.sub(r'[^0-9]', '', gt)) >= 0.3 * len(re.sub(r'[^a-z]', '', gt)): continue image = 1 - ocrolib.read_image_gray(fname) if debug > 0 and fno % debug == 0: clf() subplot(411) imshow(image) try: blp, xlp = old_model.lineFit(image, order=old_order) except: traceback.print_exc() continue blimage = zeros(image.shape) h, w = image.shape for x in range(w): blimage[clip(int(polyval(blp, x)), 0, h - 1), x] = 1 xlimage = zeros(image.shape) for x in range(w): xlimage[clip(int(polyval(xlp, x)), 0, h - 1), x] = 1 if debug > 0 and fno % debug == 0: subplot(413) imshow(xlimage + 0.3 * image) subplot(414) imshow(blimage + 0.3 * image) try: seg = lineseg.ccslineseg(image) except: continue if debug > 0 and fno % debug == 0: subplot(412) morph.showlabels(seg) shape = None for sub, transform, itransform_add in extract_chars(seg): if shape is None: shape = sub.shape assert sub.shape == shape count += 1 best = shapedict.predict1(sub) bls[best] += transform(blimage) xls[best] += transform(xlimage) if debug == 1: ginput(1, 100) elif debug > 1: ginput(1, 0.01) for i in range(len(bls)): bls[i] *= bls[i].shape[1] * 1.0 / max(1e-6, sum(bls[i])) for i in range(len(xls)): xls[i] *= xls[i].shape[1] * 1.0 / max(1e-6, sum(xls[i])) return bls, xls
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) print_info("# %s" % (fname)) raw = ocrolib.read_image_gray(img.filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: print_error(fname + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel print_info("flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment)) print_info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) base, _ = ocrolib.allsplitext(img.filename) ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=ID, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".bin.png", local_filename='%s/%s' % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def extractLines2(imgpath): img_grey = ocrolib.read_image_gray(imgpath) (h, w) = img_grey.shape[:2] img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4], None, fx=0.5, fy=0.5) angle = estimate_skew_angle(img00, linspace(-5, 5, 42)) print 'goc', angle rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) img_grey = cv2.warpAffine(img_grey, rotM, (w, h)) img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999, cv2.NORM_MINMAX) objects, scale = findBox(img_grey) ######### convert xfrom = 0 xto = img_grey.shape[1] yfrom = 0 yto = min(img_grey.shape[0], 800) img_grey = img_grey[yfrom:yto, xfrom:xto] objects2 = [] for obj in objects: topy = obj[0].start boty = obj[0].stop x = (obj[1].start + obj[1].stop) / 2 if yfrom <= topy < yto and yfrom <= boty < yto and xfrom <= x < xto: object2 = (slice(obj[0].start - yfrom, obj[0].stop - yfrom, None), slice(obj[1].start - xfrom, obj[1].stop - xfrom, None)) objects2.append(object2) objects = objects2 ######### end convert h, w = img_grey.shape img = (cv2.cvtColor(img_grey, cv2.COLOR_GRAY2BGR) * 255).astype(np.uint8) nodes = [[None for j in range(h + 1)] for i in range(w + 1)] points = [[SubLine.ISEMPTY for j in range(h + 1)] for i in range(w + 1)] clearedList = set() ## temporary solution objects = sorted(objects, key=lambda obj: (obj[1].start + obj[1].stop) / 2) for bound in objects: topy = bound[0].start boty = bound[0].stop x = (bound[1].start + bound[1].stop) / 2 top = (x, topy) bottom = (x, boty) points[bottom[0]][bottom[1]] = SubLine.ISBOT points[top[0]][top[1]] = SubLine.ISTOP nodes[bottom[0]][bottom[1]] = (topy, boty, x) allines = [] lemodel = LeModelChooseLine( '/home/loitg/Downloads/complex-bg/le_model.pkl') def move(subline, allnodes, allpoints, img): newsublines = subline.next(allnodes, allpoints, img) if len(newsublines) > 0: for new in newsublines: if new.isnew: allines.append(new) new.isnew = False print '______________++++++++++++++=' + new.id move(new, allnodes, allpoints, img) else: subline.clear(allnodes, clearedList) # illu = img.copy() # for bound in objects: # cv2.circle(illu,((bound[1].start + bound[1].stop)/2, bound[0].start),2, (255,0,0),-1) # cv2.circle(illu,((bound[1].start + bound[1].stop)/2, bound[0].stop), 2, (0,255,0),-1) # cv2.line(illu, ((bound[1].start + bound[1].stop)/2, bound[0].start), ((bound[1].start + bound[1].stop)/2, bound[0].stop), (0,0,255),1) # cv2.imshow('ii', illu) for bound in objects: # sorted topy = bound[0].start boty = bound[0].stop x = (bound[1].start + bound[1].stop) / 2 if (topy, boty, x) in clearedList: continue subline = SubLine(topy=topy, boty=boty, x=x, lemodel=lemodel) allines.append(subline) subline.isnew = False try: move(subline, nodes, points, img) except Exception as e: pass ### illustrate img2 = img.copy() for line in allines: try: col = str2col(line.id) line.draw(img2, col, 0.5, drawyhat=False) except Exception as e: pass # cv2.imshow('lines', img2) # cv2.waitKey(-1) return img2
def process(self): # TODO: Ensure we can also read a filehandle if not os.path.exists(self._params.get("path", "")): return self.null_data() return ocrolib.read_image_gray(makesafe(self._params.get("path")))
def analyze_page_layout(binary, gray, rgb=None): hscale = 1.0 # Non-standard scaling of horizontal parameters. vscale = 1.0 # Non-standard scaling of vertical parameters. threshold = 0.2 # baseline threshold. usegauss = True # Use gaussian instead of uniform. maxseps = 0 # Maximum black column separators. sepwiden = 10 # Widen black separators (to account for warping). blackseps = True maxcolseps = 3 # Maximum # whitespace column separators. csminheight = 10 # Minimum column height (units=scale). noise = 8 # Noise threshold for removing small components from lines. gray_output = True # Output grayscale lines as well, which are extracted from the grayscale version of the pages. pad = 3 # Padding for extracted lines. expand = 3 # Expand mask for grayscale extraction. if False: bin_image_filepath = './ocropy_test.bin.png' gray_image_filepath = './ocropy_test.nrm.png' binary = ocrolib.read_image_binary(bin_image_filepath) gray = ocrolib.read_image_gray(gray_image_filepath) binary = 1 - binary # Invert. scale = psegutils.estimate_scale(binary) segmentation = compute_segmentation(binary, scale, blackseps, maxseps, maxcolseps, csminheight, sepwiden, usegauss, vscale, hscale, threshold, quiet=True) lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # Renumber the labels so that they conform to the specs. nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # Image. lines = [lines[i] for i in lsort] # Visualize bounding boxes. if False: if rgb is not None: # REF [function] >> extract_masked() in ${OCROPY_HOME}/ocrolib/psegutils.py. for l in lines: y0, x0, y1, x1 = [ int(x) for x in [ l.bounds[0].start, l.bounds[1].start, l.bounds[0].stop, l.bounds[1].stop ] ] cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow('Image', rgb) cv2.waitKey(0) # Output everything. if False: if not os.path.exists(outputdir): os.mkdir(outputdir) ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, noise) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=pad, expand=expand) # Image. ocrolib.write_image_binary( "%s/01%04x.bin.png" % (outputdir, i + 1), binline) if gray_output: grayline = psegutils.extract_masked(gray, l, pad=pad, expand=expand) # Image. ocrolib.write_image_gray( "%s/01%04x.nrm.png" % (outputdir, i + 1), grayline)
def extractLines(imgpath, param): img_grey = ocrolib.read_image_gray(imgpath) (h, w) = img_grey.shape[:2] img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4], None, fx=0.5, fy=0.5) angle = estimate_skew_angle(img00, linspace(-5, 5, 42)) print 'goc', angle rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) img_grey = cv2.warpAffine(img_grey, rotM, (w, h)) h, w = img_grey.shape img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999, cv2.NORM_MINMAX) binary = sauvola(img_grey, w=param.w, k=param.k, scaledown=0.2, reverse=True) ### PARAM binary = morph.r_closing(binary.astype(bool), (args.connect, 1)) binaryary = binary[h / 4:3 * h / 4, w / 4:3 * w / 4] binary = binary.astype(np.uint8) labels, n = morph.label(binaryary) objects = morph.find_objects(labels) bysize = sorted(objects, key=sl.area) scalemap = zeros(binaryary.shape) for o in bysize: if amax(scalemap[o]) > 0: continue scalemap[o] = sl.area(o)**0.5 scale = median(scalemap[(scalemap > 3) & (scalemap < 100)]) objects = psegutils.binary_objects(binary) boxmap = zeros(binary.shape, dtype=np.uint8) imgwidth = binary.shape[1] imgheight = binary.shape[0] cellwidth = 6 * scale cellheight = 2.5 * scale N_x = int(round(imgwidth / cellwidth)) cellwidth = int(round(imgwidth / N_x)) N_y = int(round(imgheight / cellheight)) cellheight = int(round(imgheight / N_y)) cells_list = [{}, {}, {}, {}] def pixel2cell2id(pixel_x, pixel_y, CELLTYPE): dx = 0 dy = 0 if CELLTYPE == 3: pixel_x -= cellwidth / 2 pixel_y -= cellheight / 2 dx = cellwidth / 2 dy = cellheight / 2 if CELLTYPE == 2: pixel_x -= cellwidth / 2 dx = cellwidth / 2 if CELLTYPE == 1: pixel_y -= cellheight / 2 dy = cellheight / 2 if pixel_x <= 0 or pixel_y <= 0: return None, None cellcoord = (pixel_x / cellwidth, pixel_y / cellheight) cellid = cellcoord[0] + cellcoord[1] * N_x cellcoord = (cellcoord[0] * cellwidth + dx, cellcoord[1] * cellheight + dy) return cellcoord, cellid def id2cell2pixel(cellid, x, y, CELLTYPE): cellcoord = (cellid % N_x, cellid / N_x) pixel_x = cellcoord[0] * cellwidth + x pixel_y = cellcoord[1] * cellheight + y if CELLTYPE == 3: pixel_x += cellwidth / 2 pixel_y += cellheight / 2 return cellcoord, pixel_x, pixel_y img_grey = (cv2.cvtColor(img_grey, cv2.COLOR_GRAY2BGR) * 255).astype( np.uint8) for o in objects: h = sl.dim0(o) w = sl.dim1(o) ratio = float(w) / h ### Dirty cheat if ratio > 1 and ratio < 6: recommended_width = max(int(0.6 * (o[0].stop - o[0].start)), int(scale * 0.6), 5) for pos in range(o[1].start + recommended_width, o[1].stop, recommended_width): binary[o[0].start:o[0].stop, pos:pos + 1] = np.uint8(0) objects = psegutils.binary_objects(binary) for o in objects: h = sl.dim0(o) w = sl.dim1(o) a = h * w # black = float(sum(binary[o]))/a # if sl.area(o)**.5<threshold[0]*scale: continue # if sl.area(o)**.5>threshold[1]*scale: continue if h > 5 * scale: continue # if h < 0.4*scale: continue if w > 4 * scale and (h > 2 * scale or h < 0.5 * scale): continue if a < 0.25 * scale * scale: continue if float(h) / w > 10: continue ratio = float(w) / h if ratio > 10: continue ### Add object as candidate character pixel_x, pixel_y = (o[1].start + o[1].stop) / 2, o[0].stop for celltype in range(4): cellcoord, cellid = pixel2cell2id(pixel_x, pixel_y, CELLTYPE=celltype) if cellcoord is None or cellid is None: continue cellbound = slice(cellcoord[1], cellcoord[1] + cellheight, None), slice(cellcoord[0], cellcoord[0] + cellwidth, None) if cellid not in cells_list[celltype]: cells_list[celltype][cellid] = SubLineFinder( window_size=max(3, scale / 6), cellbound=cellbound, initChar=o) else: cells_list[celltype][cellid].addChar(o) y0 = o[0].start y1 = o[0].stop - 3 if o[0].stop - o[0].start > 8 else o[0].start + 5 x0 = o[1].start x1 = o[1].stop - 3 if o[1].stop - o[1].start > 8 else o[1].start + 5 boxmap[y0:y1, x0:x1] = 1 for celltype in range(4): if celltype == 0: col = (255, 0, 0) if celltype == 1: col = (0, 255, 0) if celltype == 2: col = (255, 255, 0) if celltype == 3: col = (0, 0, 255) for cellid, subline in cells_list[celltype].iteritems(): # cv2.rectangle(img_grey, (subline.cellbound[1].start+celltype, subline.cellbound[0].start+celltype), (subline.cellbound[1].stop+celltype, subline.cellbound[0].stop+celltype), col,1) line = subline.subline() if line is not None: pos1 = (int(line[0][0]), int(line[0][1])) pos2 = (int(line[1][0]), int(line[1][1])) # print cellid, pos1, pos2 cv2.line(img_grey, pos1, pos2, col, 1) ### illustrate/debug first round return binary, cv2.add(img_grey, (boxmap[:, :, np.newaxis] * np.array([0, 50, 50])).astype(np.uint8))
def process(job): fname, i = job print_info("# %s" % (fname)) if args['parallel'] < 2: print_info("=== %s %-3d" % (fname, i)) raw = ocrolib.read_image_gray(fname) # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not args['nocheck']: check = check_page(amax(image) - image) if check is not None: print_error(fname + "SKIPPED" + check + "(use -n to disable this check)") return # flatten the image by estimating the local whitelevel comment = "" # if not, we need to flatten it by estimating the local whitelevel if args['parallel'] < 2: print_info("flattening") m = interpolation.zoom(image, args['zoom']) m = filters.percentile_filter(m, args['perc'], size=(args['range'], 2)) m = filters.percentile_filter(m, args['perc'], size=(2, args['range'])) m = interpolation.zoom(m, 1.0 / args['zoom']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) # estimate skew angle and rotate if args['maxskew'] > 0: if args['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = args['maxskew'] ms = int(2 * args['maxskew'] * args['skewsteps']) angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # estimate low and high thresholds if args['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args['lo']) hi = stats.scoreatpercentile(est.ravel(), args['hi']) # rescale the image to get the gray scale image if args['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) bin = 1 * (flat > args['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) if args['parallel'] < 2: print_info("writing") base, _ = ocrolib.allsplitext(fname) outputfile_bin = base + ".bin.png" #outputfile_nrm = base+".nrm.png" #output_files = [outputfile_bin, outputfile_nrm] ocrolib.write_image_binary(outputfile_bin, bin) #ocrolib.write_image_gray(outputfile_nrm, flat) #return output_files return outputfile_bin
def process1(arg): (trial,fname) = arg base,_ = ocrolib.allsplitext(fname) line = ocrolib.read_image_gray(fname) if prod(line.shape)==0: return None if amax(line)==amin(line): return None if not args.nolineest: assert "dew.png" not in fname,"don't dewarp dewarped images" temp = amax(line)-line temp = temp*1.0/amax(temp) lnorm.measure(temp) line = lnorm.normalize(line,cval=amax(line)) else: assert "dew.png" in fname,"only apply to dewarped images" line = lstm.prepare_line(line,args.pad) pred = network.predictString(line) if not args.nonormalize: pred = ocrolib.normalize_text(pred) if args.estrate: try: gt = ocrolib.read_text(base+".gt.txt") except: return (0,[],0,trial,fname) pred0 = ocrolib.project_text(pred,args.compare) gt0 = ocrolib.project_text(gt,args.compare) if args.estconf>0: err,conf = edist.xlevenshtein(pred0,gt0,context=args.context) else: err = edist.xlevenshtein(pred0,gt0) conf = [] if not args.quiet: print "%3d %3d"%(err,len(gt)),fname,":",pred sys.stdout.flush() return (err,conf,len(gt0),trial,fname) if not args.quiet: print pred # print fname,":",pred # ocrolib.write_text(base+".txt",pred) if args.show>0 or args.save is not None: ion() matplotlib.rc('xtick',labelsize=7) matplotlib.rc('ytick',labelsize=7) matplotlib.rcParams.update({"font.size":7}) if os.path.exists(base+".gt.txt"): transcript = ocrolib.read_text(base+".gt.txt") transcript = ocrolib.normalize_text(transcript) else: transcript = pred pred2 = network.trainString(line,transcript,update=0) figure("result",figsize=(1400//75,800//75),dpi=75) clf() subplot(311) imshow(line.T,cmap=cm.gray) title(transcript) subplot(312) gca().set_xticks([]) imshow(network.outputs.T[1:],vmin=0,cmap=cm.hot) title(pred[:80]) subplot(313) plot(network.outputs[:,0],color='yellow',linewidth=3,alpha=0.5) plot(network.outputs[:,1],color='green',linewidth=3,alpha=0.5) plot(amax(network.outputs[:,2:],axis=1),color='blue',linewidth=3,alpha=0.5) plot(network.aligned[:,0],color='orange',linestyle='dashed',alpha=0.7) plot(network.aligned[:,1],color='green',linestyle='dashed',alpha=0.5) plot(amax(network.aligned[:,2:],axis=1),color='blue',linestyle='dashed',alpha=0.5) if args.save is not None: draw() savename = args.save if "%" in savename: savename = savename%trial print "saving",savename savefig(savename,bbox_inches=0) if trial==len(inputs)-1: ginput(1,99999999) else: ginput(1,args.show) return None
km = lem.shapedict ion(); gray() ocrolib.showgrid(km.centers().reshape(*lem.xls.shape)+lem.xls*2) ginput(1,1000) ocrolib.showgrid(km.centers().reshape(*lem.bls.shape)+lem.bls*2) ginput(1,1000) sys.exit(0) elif args.subcommand=="showline": with open(args.line_estimator) as stream: lem = cPickle.load(stream) print "loaded",lem for fname in args.images: try: print "***",fname clf() image = 1-ocrolib.read_image_gray(fname) limit = min(image.shape[1],args.xlimit) blp,xlp = lem.lineFit(image,order=args.order) print "baseline",blp print "xline",xlp title("fname") subplot(311); imshow((lem.blimage-lem.xlimage)[:,:limit]) title("fname") subplot(312); imshow((lem.blimage-lem.xlimage+image)[:,:limit]) gray() subplot(313); imshow(image[:,:limit]) xlim(0,limit); ylim(len(image),0) xs = range(image.shape[1])[:limit] plot(xs,polyval(blp,xs)) plot(xs,polyval(xlp,xs)) ginput(1,1000)
for trial in range(start,args.ntrain): network.last_trial = trial+1 do_display = (args.display>0 and trial%args.display==0) do_update = 1 if args.movie and do_display: fname = args.moviesample do_update = 0 else: fname = pyrandom.sample(inputs,1)[0] base,_ = ocrolib.allsplitext(fname) try: line = ocrolib.read_image_gray(fname) transcript = ocrolib.read_text(base+".gt.txt") except IOError as e: print("ERROR", e) continue if not args.nolineest: assert "dew.png" not in fname,"don't dewarp already dewarped lines" network.lnorm.measure(np.amax(line)-line) line = network.lnorm.normalize(line,cval=np.amax(line)) else: assert "dew.png" in fname,"input must already be dewarped" if line.size<10 or np.amax(line)==np.amin(line): print("EMPTY-INPUT") continue
km = lem.shapedict ion() gray() ocrolib.showgrid(km.centers().reshape(*lem.xls.shape) + lem.xls * 2) ginput(1, 1000) ocrolib.showgrid(km.centers().reshape(*lem.bls.shape) + lem.bls * 2) ginput(1, 1000) sys.exit(0) elif args.subcommand == "showline": lem = common.load_object(args.line_estimator) print "loaded", lem for fname in args.images: try: print "***", fname clf() image = 1 - ocrolib.read_image_gray(fname) limit = min(image.shape[1], args.xlimit) blp, xlp = lem.lineFit(image, order=args.order) print "baseline", blp print "xline", xlp title("fname") subplot(311) imshow((lem.blimage - lem.xlimage)[:, :limit]) title("fname") subplot(312) imshow((lem.blimage - lem.xlimage + image)[:, :limit]) gray() subplot(313) imshow(image[:, :limit]) xlim(0, limit) ylim(len(image), 0)
img_grey = img stree = sauvolatree(img_grey) scalemin, scalemax = extendRange(min(stree.scales), max(stree.scales), 3, 3.5) traverseEditState(stree[(-1, -1)], scalemin, scalemax) objects = flattenByKeepState(stree) return objects, np.mean(stree.scales) if __name__ == "__main__": # imgpath = '/home/loitg/Downloads/complex-bg/special_line/' imgpath = '/home/loitg/Downloads/complex-bg/tmp/' for filename in os.listdir(imgpath): if filename[-3:].upper() == 'JPG': print filename img_grey = ocrolib.read_image_gray(imgpath + filename) stree = sauvolatree(img_grey) if len(stree.scales) == 0: continue scalemin, scalemax = extendRange(min(stree.scales), max(stree.scales), 3, 3.5) traverseEditState(stree[(-1, -1)], scalemin, scalemax) objects = flattenByKeepState(stree) illu = cv2.cvtColor(stree.bins[1] * 255, cv2.COLOR_GRAY2BGR) illu = cv2.resize(illu, None, fx=6.0, fy=6.0) for bound in objects: cv2.circle(illu, (bound[1].start * 3 + bound[1].stop * 3, bound[0].start * 6), 3, (0, 0, 255), -1) cv2.circle(illu, (bound[1].start * 3 + bound[1].stop * 3, bound[0].stop * 6), 3, (0, 255, 0), -1) cv2.line(illu, (bound[1].start * 3 + bound[1].stop * 3, bound[0].start * 6),
def _process_segment(self, page, filename, page_id, file_id): if self.parameter['parallel'] < 2: LOG.info("INPUT FILE %s ", filename) raw = ocrolib.read_image_gray(filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" % (filename, lo, hi, angle)) if self.parameter['parallel'] < 2: LOG.info("Writing") #ocrolib.write_image_binary(base+".ds.png", deskewed) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) page.set_orientation(angle) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comment="deskewed"))
def process1(job): fname, i = job print_info("# %s" % (fname)) if args.parallel < 2: print_info("=== %s %-3d" % (fname, i)) raw = ocrolib.read_image_gray(fname) dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not args.nocheck: check = check_page(amax(image) - image) if check is not None: print_error(fname + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if args.gray: extreme = 0 else: extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod( image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel if args.parallel < 2: print_info("flattening") m = interpolation.zoom(image, args.zoom) m = filters.percentile_filter(m, args.perc, size=(args.range, 2)) m = filters.percentile_filter(m, args.perc, size=(2, args.range)) m = interpolation.zoom(m, 1.0 / args.zoom) if args.debug > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, args.debug) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if args.debug > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, args.debug) # estimate low and high thresholds if args.parallel < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(args.bignore * d0), int(args.bignore * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args.escale > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args.escale v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if args.debug > 0: imshow(v) ginput(1, args.debug) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args.lo) hi = stats.scoreatpercentile(est.ravel(), args.hi) # rescale the image to get the gray scale image if args.parallel < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if args.debug > 0: imshow(flat, vmin=0, vmax=1) ginput(1, args.debug) bin = 1 * (flat > args.threshold) # output the normalized grayscale and the thresholded images #print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment)) if args.parallel < 2: print_info("writing") if args.debug > 0 or args.show: clf() gray() imshow(bin) ginput(1, max(0.1, args.debug)) base, _ = ocrolib.allsplitext(fname) ocrolib.write_image_binary(base + ".bin.png", bin) ocrolib.write_image_gray(base + ".nrm.png", flat) #print("########### File path : ", base+".nrm.png") #write_to_xml(base+".bin.png") return base + ".bin.png"
def extractLines2(imgpath): clf = joblib.load('/home/loitg/Downloads/complex-bg/le_model_3.pkl') tt = time() img_grey = ocrolib.read_image_gray(imgpath) (h, w) = img_grey.shape[:2] img00 = cv2.resize(img_grey[h / 4:3 * h / 4, w / 4:3 * w / 4], None, fx=0.5, fy=0.5) angle = estimate_skew_angle(img00, linspace(-5, 5, 42)) print 'goc', angle rotM = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) img_grey = cv2.warpAffine(img_grey, rotM, (w, h)) img_grey = cv2.normalize(img_grey.astype(float32), None, 0.0, 0.999, cv2.NORM_MINMAX) objects, scale = findBox(img_grey) ######### convert # xfrom=50; xto=img_grey.shape[1]; # yfrom=700; yto=1500#min(img_grey.shape[0], 800); # img_grey = img_grey[yfrom:yto, xfrom:xto] # objects2 = [] # for obj in objects: # topy = obj[0].start # boty = obj[0].stop # x = (obj[1].start + obj[1].stop)/2 # if yfrom <= topy < yto and yfrom <= boty < yto and xfrom <= x < xto: # object2 = (slice(obj[0].start - yfrom, obj[0].stop - yfrom, None), slice(obj[1].start - xfrom, obj[1].stop - xfrom, None)) # objects2.append(object2) # # objects = objects2 ######### end convert h, w = img_grey.shape img = (cv2.cvtColor(img_grey, cv2.COLOR_GRAY2BGR) * 255).astype(np.uint8) cleared_maps = np.zeros((2, h, w), dtype=bool) pointsmap = np.zeros((h, w), dtype=np.uint8) objects = sorted(objects, key=lambda obj: (obj[1].start + obj[1].stop) / 2) for bound in objects: topy = bound[0].start boty = bound[0].stop x = (bound[1].start + bound[1].stop) / 2 if topy >= h or boty >= h or x >= w: continue pointsmap[boty, x] = SubLine.ISBOT pointsmap[topy, x] = SubLine.ISTOP allines = [] illu = img.copy() for bound in objects: cv2.circle(illu, ((bound[1].start + bound[1].stop) / 2, bound[0].start), 2, (255, 0, 0), -1) cv2.circle(illu, ((bound[1].start + bound[1].stop) / 2, bound[0].stop), 2, (0, 255, 0), -1) # cv2.line(illu, ((bound[1].start + bound[1].stop)/2, bound[0].start), ((bound[1].start + bound[1].stop)/2, bound[0].stop), (0,0,255),1) def move(subline): newsublines = subline.next2() if len(newsublines) > 0: for new in newsublines: move(new) elif subline.nextCount > 0: subline._updateCurve() subline.clear(cleared_maps) subline._updateCombineInfo() allines.append(subline) # if self.nextCount > 1: # temp1 = cv2.addWeighted(cv2.cvtColor((cleared_maps[0]*120).astype(uint8),cv2.COLOR_GRAY2BGR), 0.5, illu, 0.5,0) # temp2 = cv2.addWeighted(cv2.cvtColor((cleared_maps[1]*120).astype(uint8),cv2.COLOR_GRAY2BGR), 0.5, illu, 0.5,0) # cv2.imshow('bb', cv2.addWeighted(temp1,0.5,temp2,0.5,0)) # cv2.waitKey(-1) for bound in objects: # sorted topy = bound[0].start boty = bound[0].stop x = (bound[1].start + bound[1].stop) / 2 if boty - topy < 8: continue try: if cleared_maps[0][topy, x] and cleared_maps[1][boty, x]: continue except Exception as e: continue subline = SubLine(topy=topy, boty=boty, x=x, clf=clf, img=img, pointsmap=pointsmap) move(subline) allines.sort(key=lambda x: x.bounds[1].stop) i = 0 while i < len(allines): result = allines[i] if result.available: # print '-------------------------' # img2 = illu.copy() # result.draw(img2, (125,125,125), 0.5, drawyhat=False) # cv2.line(img2, (0, int(result.rightray.m)), (result.imgwidth, \ # int(result.rightray.b*result.imgwidth + result.rightray.m)), (255,0,0),1) # cv2.line(img2, (0, int(result.leftray.m)), (result.imgwidth, \ # int(result.leftray.b*result.imgwidth + result.leftray.m)), (0,255,0),1) # cv2.imshow('cb', img2) # cv2.waitKey(-1) linemap = [] forceCombines = [] for j in range(i, len(allines)): if j == i: continue candidate = allines[j] if not candidate.available: continue forcedCombined, score = result.scoreLineAfter(candidate) # cv2.waitKey(-1) #################### if forcedCombined: forceCombines.append(candidate) elif score >= 0: linemap.append((score, candidate)) result.combineLinesAfter(forceCombines) # for fcb in forceCombines: # col = str2col(fcb.id) # fcb.draw(img2, col, 0.5, drawyhat=False) # cv2.line(img2, (0, int(fcb.rightray.m)), (fcb.imgwidth, \ # int(fcb.rightray.b*fcb.imgwidth + fcb.rightray.m)), (255,0,0),1) # cv2.line(img2, (0, int(fcb.leftray.m)), (fcb.imgwidth, \ # int(fcb.leftray.b*fcb.imgwidth + fcb.leftray.m)), (0,255,0),1) # cv2.imshow('cb', img2) # cv2.waitKey(-1) if len(forceCombines) == 0: i += 1 continue else: i += 1 continue ######### i = 0 while i < len(allines): result = allines[i] if result.available: # print '------------------------- FAR' # img2 = illu.copy() # result.draw(img2, (125,125,125), 0.5, drawyhat=False) # cv2.line(img2, (0, int(result.rightray.m)), (result.imgwidth, \ # int(result.rightray.b*result.imgwidth + result.rightray.m)), (255,0,0),1) # cv2.line(img2, (0, int(result.leftray.m)), (result.imgwidth, \ # int(result.leftray.b*result.imgwidth + result.leftray.m)), (0,255,0),1) # cv2.imshow('cb', img2) # cv2.waitKey(-1) linemap = [] for j in range(i, len(allines)): if j == i: continue candidate = allines[j] if not candidate.available: continue forcedCombined, score = result.scoreLineAfter(candidate) # cv2.waitKey(-1) if (not forcedCombined) and score >= 0: linemap.append((score, candidate)) if len(linemap) > 0: j, candidate = min(linemap) # candidate.draw(img2, str2col(candidate.id), 0.5, drawyhat=False) # cv2.imshow('cb-far', img2) # cv2.waitKey(-1) result.combineLinesAfter([candidate]) continue else: i += 1 continue else: i += 1 continue allines = [line for line in allines if line.available] print 'DONE LINE, now ILLUSTRATE **************, TOTAL LINE COUNT ' + str( len(allines)) print 'TOTAL TIME ' + str(time() - tt) img2 = illu.copy() for line in allines: try: line._updateCurve() col = str2col(line.id) line.draw(img2, col, 0.5, drawyhat=False, drawline=True) except Exception as e: pass cv2.imshow('lines-ext', img2) cv2.waitKey(-1) retlines = [] for line in allines: line._updateCurve() line.expandPoints() imgline = line.extract(img, expands=SubLine.EP_FINAL_EXPAND) # cv2.imshow('line', imgline) # cv2.waitKey(-1) retlines.append(imgline) return None, None, retlines