def deskew(fpath, job): base,_ = ocrolib.allsplitext(fpath) basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if args.parallel<2: print_info("=== %s %-3d" % (fpath, job)) raw = ocrolib.read_image_gray(fpath) flat = raw # estimate skew angle and rotate if args.maxskew>0: if args.parallel<2: print_info("estimating skew angle") d0,d1 = flat.shape o0,o1 = int(args.bignore*d0),int(args.bignore*d1) flat = amax(flat)-flat flat -= amin(flat) est = flat[o0:d0-o0,o1:d1-o1] ma = args.maxskew ms = int(2*args.maxskew*args.skewsteps) angle = estimate_skew_angle(est,linspace(-ma,ma,ms+1)) flat = interpolation.rotate(flat,angle,mode='constant',reshape=0) flat = amax(flat)-flat else: angle = 0 # estimate low and high thresholds if args.parallel<2: print_info("estimating thresholds") d0,d1 = flat.shape o0,o1 = int(args.bignore*d0),int(args.bignore*d1) est = flat[o0:d0-o0,o1:d1-o1] if args.escale>0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args.escale v = est-filters.gaussian_filter(est,e*20.0) v = filters.gaussian_filter(v**2,e*20.0)**0.5 v = (v>0.3*amax(v)) v = morphology.binary_dilation(v,structure=ones((int(e*50),1))) v = morphology.binary_dilation(v,structure=ones((1,int(e*50)))) if args.debug>0: imshow(v); ginput(1,args.debug) est = est[v] lo = stats.scoreatpercentile(est.ravel(),args.lo) hi = stats.scoreatpercentile(est.ravel(),args.hi) # rescale the image to get the gray scale image if args.parallel<2: print_info("rescaling") flat -= lo flat /= (hi-lo) flat = clip(flat,0,1) if args.debug>0: imshow(flat,vmin=0,vmax=1); ginput(1,args.debug) bin = 1*(flat>args.threshold) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (basefile, lo, hi, angle)) if args.parallel<2: print_info("writing") ocrolib.write_image_binary(base+".ds.png",bin) return base+".ds.png"
def write_to_xml(fpath): xmldoc = minidom.parse(args.mets) subRoot = xmldoc.createElement('mets:fileGrp') subRoot.setAttribute('USE', args.Output) for f in fpath: #basefile = os.path.splitext(os.path.splitext(os.path.basename(f))[0])[0] basefile = ocrolib.allsplitext(os.path.basename(f))[0] child = xmldoc.createElement('mets:file') child.setAttribute('ID', 'BIN_' + basefile) child.setAttribute('GROUPID', 'P_' + basefile) child.setAttribute('MIMETYPE', "image/png") subChild = xmldoc.createElement('mets:FLocat') subChild.setAttribute('LOCTYPE', "URL") subChild.setAttribute('xlink:href', f) #xmldoc.getElementsByTagName('mets:file')[0].appendChild(subChild); subRoot.appendChild(child) child.appendChild(subChild) #subRoot.appendChild(child) xmldoc.getElementsByTagName('mets:fileSec')[0].appendChild(subRoot) if not args.OutputMets: metsFileSave = open( os.path.join(args.work, os.path.basename(args.mets)), "w") else: metsFileSave = open( os.path.join( args.work, args.OutputMets if args.OutputMets.endswith(".xml") else args.OutputMets + '.xml'), "w") metsFileSave.write(xmldoc.toxml())
def select_borderLine(arg, base): basefile = ocrolib.allsplitext(os.path.basename(arg))[0] img, imgHeight, imgWidth, Hlines, Vlines = detect_lines(arg) # top side BorderLine(imgHeight*0.25, Hlines, index=1, flag="top") # left side BorderLine(imgWidth*0.4, Vlines, index=0, flag="left") # bottom side BorderLine(imgHeight*0.75, Hlines, index=1, flag="bottom") # right side BorderLine(imgWidth*0.6, Vlines, index=0, flag="right") intersectPoint=[] for l1 in lineDetectH: for l2 in lineDetectV: x ,y = get_intersect((l1[0],l1[1]), (l1[2],l1[3]), (l2[0],l2[1]), (l2[2],l2[3])) intersectPoint.append([x,y]) Xstart = 0; Xend = imgWidth; Ystart = 0; Yend = imgHeight for i in intersectPoint: Xs = int(i[0])+10 if i[0]<imgWidth*0.4 else 10 if Xs>Xstart: Xstart = Xs Xe = int(i[0])-10 if i[0]>imgWidth*0.6 else int(imgWidth)-10 if Xe<Xend: Xend = Xe Ys = int(i[1])+10 if i[1]<imgHeight*0.25 else 10 #print("Ys,Ystart:",Ys,Ystart) if Ys>Ystart: Ystart = Ys Ye = int(i[1])-15 if i[1]>imgHeight*0.75 else int(imgHeight)-15 if Ye<Yend: Yend = Ye if Xend<0: Xend = 10 if Yend<0: Yend = 15 save_pf(base, [Xstart,Ystart,Xend,Yend]) return [Xstart,Ystart,Xend,Yend]
def run(self, fname, i): fname = str(fname) print("Process file: ", fname, i + 1) base, _ = ocrolib.allsplitext(fname) binImg = ocrolib.read_image_binary(fname) lineDetectH = [] lineDetectV = [] fpath = self.remove_rular(fname, base) textarea, rgb, height, width = self.detect_textarea(fpath) self.param['colSeparator'] = int(width * self.param['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, binImg, rgb, base) if len(textarea) == 0: self.select_borderLine(fpath, base, lineDetectH, lineDetectV) elif len(textarea) == 1 and (height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height self.save_pf(base, [x1, y1, x2, y2]) else: self.select_borderLine(fpath, base, lineDetectH, lineDetectV) return '%s.pf.png' % base
def process1(arg): (trial, fname) = arg base, _ = ocrolib.allsplitext(fname) line = ocrolib.read_image_gray(fname) raw_line = line.copy() if prod(line.shape) == 0: return None if amax(line) == amin(line): return None if not args.nocheck: check = check_line(amax(line) - line) if check is not None: print_error(fname + " SKIPPED " + check + " (use -n to disable this check)") return (0, [], 0, trial, fname) if not args.nolineest: assert "dew.png" not in fname, "don't dewarp dewarped images" temp = amax(line) - line temp = temp * 1.0 / amax(temp) lnorm.measure(temp) line = lnorm.normalize(line, cval=amax(line)) else: assert "dew.png" in fname, "only apply to dewarped images" line = lstm.prepare_line(line, args.pad) try: pred = network.predictString(line) except RecognitionError, err: # TODO: Handle this in the extraction processor print_info(fname + " Failed to predict line. Skipping.") return (0, [], 0, trial, fname)
def textimageseg(self, imf): # I: binarized-input-image; imftext: output-text-portion.png; imfimage: output-image-portion.png I = ocrolib.read_image_binary(imf) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) image_part[0, 0] = 0 # only for visualisation purpose text_part = array((1 - I * (1 - Iseedfill)), dtype=int) text_part[0, 0] = 0 # only for visualisation purpose base, _ = ocrolib.allsplitext(imf) ocrolib.write_image_binary(base + ".ts.png", text_part) #imf_image = imf[0:-3] + "nts.png" ocrolib.write_image_binary(base + ".nts.png", image_part) return [base + ".ts.png", base + ".nts.png"]
def process(self): print(Path(self.parameter['pix2pixHD']).absolute()) if not torch.cuda.is_available(): print("Your system has no CUDA installed. No GPU detected.") sys.exit(1) path = Path(self.parameter['pix2pixHD']).absolute() if not Path(path).is_dir(): print("""\ NVIDIA's pix2pixHD was not found at '%s'. Make sure the `pix2pixHD` parameter points to the local path to the cloned pix2pixHD repository. pix2pixHD can be downloaded from https://github.com/NVIDIA/pix2pixHD """ % path) sys.exit(1) for (_, input_file) in enumerate(self.input_files): local_input_file = self.workspace.download_file(input_file) pcgts = parse(local_input_file.url, silence=True) image_coords = pcgts.get_Page().get_Border().get_Coords( ).points.split() fname = pcgts.get_Page().imageFilename # Get page Co-ordinates min_x, min_y = image_coords[0].split(",") max_x, max_y = image_coords[2].split(",") img_tmp_dir = "OCR-D-IMG/test_A" img_dir = os.path.dirname(str(fname)) # Path of pix2pixHD Path(img_tmp_dir).mkdir(parents=True, exist_ok=True) crop_region = int(min_x), int(min_y), int(max_x), int(max_y) cropped_img = self.crop_image(fname, crop_region) base, _ = ocrolib.allsplitext(fname) filename = base.split("/")[-1] + ".png" cropped_img.save(img_tmp_dir + "/" + filename) #os.system("cp %s %s" % (str(fname), os.path.join(img_tmp_dir, os.path.basename(str(fname))))) #os.system("mkdir -p %s" % img_tmp_dir) #os.system("cp %s %s" % (str(fname), os.path.join(img_tmp_dir, os.path.basename(str(fname))))) os.system( "python " + str(path) + "/test.py --dataroot %s --checkpoints_dir ./ --name models --results_dir %s --label_nc 0 --no_instance --no_flip --resize_or_crop none --n_blocks_global 10 --n_local_enhancers 2 --gpu_ids %s --loadSize %d --fineSize %d --resize_or_crop %s" % (os.path.dirname(img_tmp_dir), img_dir, self.parameter['gpu_id'], self.parameter['resizeHeight'], self.parameter['resizeWidth'], self.parameter['imgresize'])) synthesized_image = filename.split( ".")[0] + "_synthesized_image.jpg" pix2pix_img_dir = img_dir + "/models/test_latest/images/" dewarped_image = Path(pix2pix_img_dir + synthesized_image) if (dewarped_image.is_file()): shutil.copy(dewarped_image, img_dir + "/" + filename.split(".")[0] + ".dw.jpg") if (Path(img_tmp_dir).is_dir()): shutil.rmtree(img_tmp_dir) if (Path(img_dir + "/models").is_dir()): shutil.rmtree(img_dir + "/models")
def process(arg): output_list = [] (trial, fname) = arg base, _ = ocrolib.allsplitext(fname) line = ocrolib.read_image_gray(fname) raw_line = line.copy() if prod(line.shape) == 0: return None if amax(line) == amin(line): return None if not args['nocheck']: check = check_line(amax(line) - line) if check is not None: print_error("%s SKIPPED %s (use -n to disable this check)" % (fname, check)) return (0, [], 0, trial, fname) temp = amax(line) - line temp = temp * 1.0 / amax(temp) lnorm.measure(temp) line = lnorm.normalize(line, cval=amax(line)) line = lstm.prepare_line(line, args['pad']) pred = network.predictString(line) if args['llocs']: # output recognized LSTM locations of characters result = lstm.translate_back(network.outputs, pos=1) scale = len( raw_line.T) * 1.0 / (len(network.outputs) - 2 * args['pad']) output_llocs = base + ".llocs" with codecs.open(output_llocs, "w", "utf-8") as locs: for r, c in result: c = network.l2s([c]) r = (r - args['pad']) * scale locs.write("%s\t%.1f\n" % (c, r)) output_list.append(output_llocs) #plot([r,r],[0,20],'r' if c==" " else 'b') #ginput(1,1000) if args['probabilities']: # output character probabilities result = lstm.translate_back(network.outputs, pos=2) output_prob = base + ".prob" with codecs.open(output_prob, "w", "utf-8") as file: for c, p in result: c = network.l2s([c]) file.write("%s\t%s\n" % (c, p)) output_list.append(output_prob) if not args['nonormalize']: pred = ocrolib.normalize_text(pred) if not args['quiet']: print_info(fname + ":" + pred) output_text = base + ".txt" ocrolib.write_text(output_text, pred) output_list.append(output_text) return output_list
def remove_rular(arg, base): basefile = ocrolib.allsplitext(os.path.basename(arg))[0] img = cv2.imread(arg) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, contours, hierarchy = cv2.findContours(gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) height, width = gray.shape imgArea = height*width ## Get bounding box x,y,w,h of each contours rects = [cv2.boundingRect(cnt) for cnt in contours] rects = sorted(rects,key=lambda x:(x[2]*x[3]),reverse=True) rects = [r for r in rects if (imgArea*args.maxRularArea)>(r[2]*r[3])>(imgArea*args.minRularArea)] ## consider those rectangle whose area>10000 and less than one-fourth of images ## detect child rectangles. Usually those are not rular. Rular position are basically any one side. removeRect=[] for i, rect1 in enumerate(rects): (x1,y1,w1,h1)=rect1 for rect2 in rects[i+1:len(rects)]: (x2,y2,w2,h2)=rect2 if (x1<x2) and (y1<y2) and (x1+w1>x2+w2) and (y1+h1>y2+h2): removeRect.append(rect2) ## removed child rectangles. rects = [x for x in rects if x not in removeRect] predictRular=[] for rect in rects: (x,y,w,h)=rect if (w<width*args.rularWidth) and ((y>height*args.positionBelow) or ((x+w)<width*args.positionLeft) or (x>width*args.positionRight)): if (args.rularRatioMin<round(float(w)/float(h),2)<args.rularRatioMax) or (args.rularRatioMin<round(float(h)/float(w),2)<args.rularRatioMax): blackPixel = np.count_nonzero(img[y:y+h,x:x+w]==0) predictRular.append((x,y,w,h,blackPixel)) ## Finally check number of black pixel to avoid false rular if predictRular: predictRular = sorted(predictRular,key=lambda x:(x[4]),reverse=True) x,y,w,h,t = predictRular[0] cv2.rectangle(img, (x-15,y-15), (x+w+20,y+h+20), (255, 255, 255), cv2.FILLED) save_file_path = base + '.pf.png' cv2.imwrite(save_file_path, img) return save_file_path
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) #fname = str(fname) print("Process file: ", fname) base, _ = ocrolib.allsplitext(fname) img_array = ocrolib.pil2array(img) img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) self.parameter['colSeparator'] = int( width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and ( height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height #self.save_pf(base, [x1, y1, x2, y2]) min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) pcgts.get_Page().set_Border(brd) # Use input_file's basename for the new file - # this way the files retain the same basenames: file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
def process(job): fname, i = job print_info("# %s" % (fname)) if args['parallel'] < 2: print_info("=== %s %-3d" % (fname, i)) raw = ocrolib.read_image_gray(fname) # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not args['nocheck']: check = check_page(amax(image) - image) if check is not None: print_error(fname + "SKIPPED" + check + "(use -n to disable this check)") return # flatten the image by estimating the local whitelevel comment = "" # if not, we need to flatten it by estimating the local whitelevel if args['parallel'] < 2: print_info("flattening") m = interpolation.zoom(image, args['zoom']) m = filters.percentile_filter(m, args['perc'], size=(args['range'], 2)) m = filters.percentile_filter(m, args['perc'], size=(2, args['range'])) m = interpolation.zoom(m, 1.0 / args['zoom']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) # estimate skew angle and rotate if args['maxskew'] > 0: if args['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = args['maxskew'] ms = int(2 * args['maxskew'] * args['skewsteps']) angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # estimate low and high thresholds if args['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args['lo']) hi = stats.scoreatpercentile(est.ravel(), args['hi']) # rescale the image to get the gray scale image if args['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) bin = 1 * (flat > args['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) if args['parallel'] < 2: print_info("writing") base, _ = ocrolib.allsplitext(fname) outputfile_bin = base + ".bin.png" #outputfile_nrm = base+".nrm.png" #output_files = [outputfile_bin, outputfile_nrm] ocrolib.write_image_binary(outputfile_bin, bin) #ocrolib.write_image_gray(outputfile_nrm, flat) #return output_files return outputfile_bin
def processPngFile(outRoot, origFile, fileNum): baseName = os.path.basename(origFile) baseBase, _ = os.path.splitext(baseName) outDir = os.path.join(outRoot, "%s.%03d" % (baseBase, fileNum)) inFile = os.path.join(outDir, baseName) os.makedirs(outDir, exist_ok=True) shutil.copy(origFile, inFile) inBase, _ = ocrolib.allsplitext(inFile) print("** inBase=%s" % inBase) # print("** binBase=%s" % binBase) fname = inFile outputdir = inBase binFile = inBase + ".bin.png" outFile = inBase + ".out.png" outRoot2, outDir2 = os.path.split(outRoot) outFile2 = os.path.join(outRoot2, "%s.out" % outDir2, baseName) print("outFile2=%s" % outFile2) # assert False grayFile = inBase + ".nrm.png" psegFile = inBase + ".pseg.png" print(" inFile=%s" % inFile) print(" binFile=%s" % binFile) print("grayFile=%s" % grayFile) print(" outFile=%s" % outFile) assert inFile and binFile assert outFile != inFile assert outFile != binFile if not binarize(inFile, binFile, grayFile): binExists = os.path.exists(binFile) print("Couldn't binarize inFile=%s binFile=%s exists=%s" % (inFile, binFile, binExists)) return False binary = ocrolib.read_image_binary(binFile) print("$$ %s=%s" % (binFile, desc(binary))) height, width = binary.shape checktype(binary, ABINARY2) check = check_page(np.amax(binary) - binary) if check is not None: print("%s SKIPPED %s (use -n to disable this check)" % (inFile, check)) return False # if args.gray: # if os.path.exists(base+".nrm.png"): # gray = ocrolib.read_image_gray(base+".nrm.png") # checktype(gray, GRAYSCALE) # else: # print_error("Grayscale version %s.nrm.png not found. Use ocropus-nlbin for creating " + # "normalized grayscale version of the pages as well." % base) # return binary = 1 - binary # invert scale = psegutils.estimate_scale(binary) print("scale %f" % scale) if np.isnan(scale) or scale > 1000.0: print("%s: bad scale (%g); skipping\n" % (fname, scale)) return False # find columns and text lines print("computing segmentation") segmentation = compute_segmentation(binary, scale) if np.amax(segmentation) > maxlines: print("%s: too many lines %g" % (fname, np.amax(segmentation))) return False print("segmentation=%s" % desc(segmentation)) print("number of lines %g" % np.amax(segmentation)) # compute the reading order print("finding reading order") lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) print("$$ lsort = %d = %s...%s" % (len(lsort), lsort[:10], lsort[-10:])) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # finally, output everything print("writing lines") if not os.path.exists(outputdir): os.mkdir(outputdir) lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, noise) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=pad, expand=expand) ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1), binline) # if args.gray: # grayline = psegutils.extract_masked( # gray, l, pad=args.pad, expand=args.expand) # ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i+1), grayline) print("%6d %s %4.1f %d" % (i, fname, scale, len(lines))) # to proceed, we need a pseg file and a subdirectory containing text lines assert os.path.exists(psegFile), "%s: no such file" % psegFile assert os.path.isdir(inBase), "%s: no such directory" % inBase # iterate through the text lines in reading order, based on the page segmentation file pseg = ocrolib.read_page_segmentation(psegFile) print("$$ %s=%s" % (psegFile, desc(pseg))) regions = ocrolib.RegionExtractor() print("$$ regions=%s" % regions) regions.setPageLines(pseg) im = Image.open(inFile) print("~~%s %s" % (inFile, im.size)) print("$$ regions=%s=%s" % (regions, sorted(regions.__dict__))) print("$$ regions.length=%s" % regions.length()) n = regions.length() for i in range(1, n): id = regions.id(i) y0, x0, y1, x1 = regions.bbox(i) # print("%5d: 0x%05X %s %d x %d" % # (i, id, [y0, x0, y1, x1], y1 - y0, x1 - x0)) draw = ImageDraw.Draw(im) draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0), width=3) draw.rectangle((x0, y0, x1, y1), outline=(0, 0, 255), width=0) # draw.rectangle((x0, y0, x1, y1), outline=255, width=5) # draw.rectangle((x0, y0, x1, y1), outline=10, width=1) del draw # write output files print("outFile=%s" % outFile) im.save(outFile, "PNG") print("outFile2=%s" % outFile2) outDir2 = os.path.dirname(outFile2) os.makedirs(outDir2, exist_ok=True) im.save(outFile2, "PNG") assert os.path.exists(outFile2) # outFile3, _ = os.path.splitext(outFile) # outFile3 = "%s.jpg" % outFile3 # print("outFile3=%s" % outFile3) # im.save(outFile3, "JPEG") # assert os.path.exists(outFile3) return True
def textline(self, arg): image = ocrolib.read_image_binary(arg) height, width = image.shape H = height W = width base, _ = ocrolib.allsplitext(arg) base2 = os.path.splitext(arg)[0] if not os.path.exists("%s/lines" % base): os.system("mkdir -p %s/lines" % base) #if os.path.exists(base2 + ".ts.png") : # f = ocrolib.read_image_binary(base2 + ".ts.png") # height, width = f.shape # os.system("python "+args.libpath+"/anyBaseOCR-nlbin.py %s.pf.bin.png" % base2) #else: # os.system("python "+args.libpath+"/anyBaseOCR-nlbin.py %s" % arg) #print("convert %s.ts.png %s/block-000.bin.png" % (base,base)) #os.system("convert %s.ts.png %s/block-000.bin.png" % (base,base)) #os.system("rm %s.bin.png %s.nrm.png" % (base, base)) file = open('%s/sorted_cuts.dat' % base, 'w') l = "0 0 " + str(int(width)) + " " + str( int(height)) + " 0 0 0 0\n" file.write(l) file.close() #if not os.path.exists("%s/lines" % base) : # os.system("mkdir %s/lines" % base) blockarray = [] if os.path.exists(base + "/sorted_cuts.dat"): blocks = open(base + "/sorted_cuts.dat", "r") i = 0 for block in blocks: words = block.split() blockarray.append((int(words[0]), -int(words[1]), int(words[2]), int(words[3]), i)) i += 1 else: blockarray.append((0, 0, width, height, 0)) i = 0 j = 0 lines = [] for block in blockarray: (x0, y0, x1, y1, i) = block y0 = -y0 #blockImage = "%s/block-%03d" % (base, i) os.system("convert %s.ts.png %s/temp.png" % (base, base)) img = Image.open("%s.ts.png" % base, 'r') img_w, img_h = img.size background = Image.new('RGBA', (W, H), (255, 255, 255, 255)) bg_w, bg_h = background.size offX = (bg_w - img_w) // 2 offY = (bg_h - img_h) // 2 offset = (offX, offY) background.paste(img, offset) background.save("%s/temp.png" % base) command = "python " + self.param[ 'libpath'] + "/cli/anyBaseOCR-gpageseg.py %s/temp.png -n --minscale %f --maxlines %f --scale %f --hscale %f --vscale %f --threshold %f --noise %d --maxseps %d --sepwiden %d --maxcolseps %d --csminaspect %f --csminheight %f -p %d -e %d -Q %d" % ( base, self.param['minscale'], self.param['maxlines'], self. param['scale'], self.param['hscale'], self.param['vscale'], self.param['threshold'], self.param['noise'], self.param['maxseps'], self.param['sepwiden'], self.param['maxcolseps'], self.param['csminaspect'], self.param['csminheight'], self.param['pad'], self.param['expand'], self.param['parallel']) if (self.param['blackseps']): command = command + " -b" if (self.param['usegauss']): command = command + " --usegauss" os.system(command) pseg = ocrolib.read_page_segmentation("%s/temp.pseg.png" % base) regions = ocrolib.RegionExtractor() regions.setPageLines(pseg) file = open('%s/sorted_lines.dat' % base, 'w') for h in range(1, regions.length()): id = regions.id(h) y0, x0, y1, x1 = regions.bbox(h) l = str(int(x0 - offX)) + " " + str( int(img_h - (y1 - offY))) + " " + str(int(x1 - offX)) + " " + str( int(img_h - (y0 - offY))) + " 0 0 0 0\n" file.write(l) filelist = glob.glob("%s/temp/*" % base) for infile in sorted(filelist): os.system("convert %s %s/lines/01%02x%02x.bin.png" % (infile, base, i + 1, j + 1)) lines.append("%s/lines/01%02x%02x.bin.png" % (base, i + 1, j + 1)) j += 1 os.system("rm -r %s/temp/" % base) os.system("rm %s/temp.png %s/temp.pseg.png" % (base, base)) i += 1 return lines
# mendatory parameter check if not args.mets or not args.Input or not args.Output or not args.work: parser.print_help() print("Example: python ocrd-anyBaseOCR-cropping.py -m (mets input file path) -I (input-file-grp name) -O (output-file-grp name) -w (Working directory)") sys.exit(0) if args.work: if not os.path.exists(args.work): os.mkdir(args.work) files = parseXML(args.mets) fname=[] for i, f in enumerate(files): print "Process file: ", str(f) , i+1 base,_ = ocrolib.allsplitext(str(f)) binImg = ocrolib.read_image_binary(str(f)) lineDetectH=[]; lineDetectV=[] fpath = remove_rular(str(f), base) textarea, rgb, height, width = detect_textarea(fpath) args.colSeparator = int(width * args.colSeparator) if len(textarea)>1: textarea = crop_area(textarea, binImg, rgb, base) if len(textarea)==0: select_borderLine(fpath, base) elif len(textarea)==1 and (height*width*0.5 < (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))): x1,y1,x2,y2 = textarea[0] x1 = x1-20 if x1>20 else 0 x2 = x2+20 if x2<width-20 else width
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID page = pcgts.get_Page() LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID) page_image, page_xywh, _ = self.workspace.image_from_page( page, page_id) print("----------", type(page_image)) raw = ocrolib.read_image_gray(page_image.filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (input_file.pageId or input_file.ID)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (input_file.pageId or input_file.ID, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) base, _ = ocrolib.allsplitext(page_image.filename) ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" # bin_array = array(255*(binarized>ocrolib.midrange(binarized)),'B') # bin_image = ocrolib.array2pil(bin_array) ''' file_path = self.workspace.save_image_file(bin_image, file_id + ".bin", page_id=page_id, file_grp=self.output_file_grp ) ''' file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) page.add_AlternativeImage( AlternativeImageType(filename=base + ".bin.png", comment="binarized")) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".bin.png", local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
def process1(job): fname, i = job global base base, _ = ocrolib.allsplitext(fname) outputdir = base try: binary = ocrolib.read_image_binary(base + ".bin.png") except IOError: try: binary = ocrolib.read_image_binary(fname) except IOError: if ocrolib.trace: traceback.print_exc() print("cannot open either", base + ".bin.png", "or", fname) return checktype(binary, ABINARY2) if not args.nocheck: check = check_page(amax(binary) - binary) if check is not None: print(fname, "SKIPPED", check, "(use -n to disable this check)") return if args.gray: if os.path.exists(base + ".nrm.png"): gray = ocrolib.read_image_gray(base + ".nrm.png") checktype(gray, GRAYSCALE) binary = 1 - binary # invert if args.scale == 0: scale = psegutils.estimate_scale(binary) else: scale = args.scale print("scale", scale) if isnan(scale) or scale > 1000.0: sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale)) return if scale < args.minscale: sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" % (fname, scale)) return # find columns and text lines if not args.quiet: print("computing segmentation") segmentation = compute_segmentation(binary, scale) if amax(segmentation) > args.maxlines: print(fname, ": too many lines", amax(segmentation)) return if not args.quiet: print("number of lines", amax(segmentation)) # compute the reading order if not args.quiet: print("finding reading order") lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = amax(segmentation) + 1 renumber = zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # finally, output everything if not args.quiet: print("writing lines") if not os.path.exists(outputdir): os.mkdir(outputdir) lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, args.noise) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=args.pad, expand=args.expand) ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1), binline) if args.gray: grayline = psegutils.extract_masked(gray, l, pad=args.pad, expand=args.expand) ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1), grayline) print("%6d" % i, fname, "%4.1f" % scale, len(lines))
def process(job): imagepath, i = job global base base, _ = ocrolib.allsplitext(imagepath) outputdir = base imagename_base = os.path.basename(os.path.normpath(base)) try: binary = ocrolib.read_image_binary(imagepath) except IOError: if ocrolib.trace: traceback.print_exc() print_error("cannot open either %s.bin.png or %s" % (base, imagepath)) return checktype(binary, ABINARY2) if not args['nocheck']: check = check_page(amax(binary) - binary) if check is not None: print_error("%s SKIPPED %s (use -n to disable this check)" % (imagepath, check)) return binary = 1 - binary # invert if args['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = args['scale'] print_info("scale %f" % (scale)) if isnan(scale) or scale > 1000.0: print_error("%s: bad scale (%g); skipping\n" % (imagepath, scale)) return if scale < args['minscale']: print_error("%s: scale (%g) less than --minscale; skipping\n" % (imagepath, scale)) return # find columns and text lines if not args['quiet']: print_info("computing segmentation") segmentation = compute_segmentation(binary, scale) if amax(segmentation) > args['maxlines']: print_error("%s: too many lines %g" % (imagepath, amax(segmentation))) return if not args['quiet']: print_info("number of lines %g" % amax(segmentation)) # compute the reading order if not args['quiet']: print_info("finding reading order") lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = amax(segmentation) + 1 renumber = zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] # finally, output everything if not args['quiet']: print_info("writing lines") if not os.path.exists(outputdir): os.mkdir(outputdir) lines = [lines[i] for i in lsort] ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) cleaned = ocrolib.remove_noise(binary, args['noise']) for i, l in enumerate(lines): binline = psegutils.extract_masked(1 - cleaned, l, pad=args['pad'], expand=args['expand']) ocrolib.write_image_binary( "%s/%s_01%04x.bin.png" % (outputdir, imagename_base, i + 1), binline) print_info("%6d %s %4.1f %d" % (i, imagepath, scale, len(lines))) return outputdir
start = args.start if args.start>=0 else network.last_trial for trial in range(start,args.ntrain): network.last_trial = trial+1 do_display = (args.display>0 and trial%args.display==0) do_update = 1 if args.movie and do_display: fname = args.moviesample do_update = 0 else: fname = pyrandom.sample(inputs,1)[0] base,_ = ocrolib.allsplitext(fname) try: line = ocrolib.read_image_gray(fname) transcript = ocrolib.read_text(base+".gt.txt") except IOError as e: print("ERROR", e) continue if not args.nolineest: assert "dew.png" not in fname,"don't dewarp already dewarped lines" network.lnorm.measure(np.amax(line)-line) line = network.lnorm.normalize(line,cval=np.amax(line)) else: assert "dew.png" in fname,"input must already be dewarped" if line.size<10 or np.amax(line)==np.amin(line):
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) param = self.parameter base, _ = ocrolib.allsplitext(fname) #basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if param['parallel'] < 2: print_info("=== %s " % (fname)) raw = ocrolib.read_image_gray(img.filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if param['maxskew'] > 0: if param['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = param['maxskew'] ms = int(2 * param['maxskew'] * param['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if param['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if param['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = param['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if param['debug'] > 0: imshow(v) ginput(1, param['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), param['lo']) hi = stats.scoreatpercentile(est.ravel(), param['hi']) # rescale the image to get the gray scale image if param['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if param['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, param['debug']) deskewed = 1 * (flat > param['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (pcgts.get_Page().imageFilename, lo, hi, angle)) if param['parallel'] < 2: print_info("writing") ocrolib.write_image_binary(base + ".ds.png", deskewed) orientation = -angle orientation = 180 - (180 - orientation) % 360 pcgts.get_Page().set_orientation(orientation) ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=ID, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".ds.png", local_filename='%s/%s' % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def process1(job): fname, i = job print_info("# %s" % (fname)) if args.parallel < 2: print_info("=== %s %-3d" % (fname, i)) raw = ocrolib.read_image_gray(fname) dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not args.nocheck: check = check_page(amax(image) - image) if check is not None: print_error(fname + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if args.gray: extreme = 0 else: extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod( image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel if args.parallel < 2: print_info("flattening") m = interpolation.zoom(image, args.zoom) m = filters.percentile_filter(m, args.perc, size=(args.range, 2)) m = filters.percentile_filter(m, args.perc, size=(2, args.range)) m = interpolation.zoom(m, 1.0 / args.zoom) if args.debug > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, args.debug) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if args.debug > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, args.debug) # estimate low and high thresholds if args.parallel < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(args.bignore * d0), int(args.bignore * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args.escale > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = args.escale v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if args.debug > 0: imshow(v) ginput(1, args.debug) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args.lo) hi = stats.scoreatpercentile(est.ravel(), args.hi) # rescale the image to get the gray scale image if args.parallel < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if args.debug > 0: imshow(flat, vmin=0, vmax=1) ginput(1, args.debug) bin = 1 * (flat > args.threshold) # output the normalized grayscale and the thresholded images #print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment)) if args.parallel < 2: print_info("writing") if args.debug > 0 or args.show: clf() gray() imshow(bin) ginput(1, max(0.1, args.debug)) base, _ = ocrolib.allsplitext(fname) ocrolib.write_image_binary(base + ".bin.png", bin) ocrolib.write_image_gray(base + ".nrm.png", flat) #print("########### File path : ", base+".nrm.png") #write_to_xml(base+".bin.png") return base + ".bin.png"
def process1(arg): (trial,fname) = arg base,_ = ocrolib.allsplitext(fname) line = ocrolib.read_image_gray(fname) if prod(line.shape)==0: return None if amax(line)==amin(line): return None if not args.nolineest: assert "dew.png" not in fname,"don't dewarp dewarped images" temp = amax(line)-line temp = temp*1.0/amax(temp) lnorm.measure(temp) line = lnorm.normalize(line,cval=amax(line)) else: assert "dew.png" in fname,"only apply to dewarped images" line = lstm.prepare_line(line,args.pad) pred = network.predictString(line) if not args.nonormalize: pred = ocrolib.normalize_text(pred) if args.estrate: try: gt = ocrolib.read_text(base+".gt.txt") except: return (0,[],0,trial,fname) pred0 = ocrolib.project_text(pred,args.compare) gt0 = ocrolib.project_text(gt,args.compare) if args.estconf>0: err,conf = edist.xlevenshtein(pred0,gt0,context=args.context) else: err = edist.xlevenshtein(pred0,gt0) conf = [] if not args.quiet: print "%3d %3d"%(err,len(gt)),fname,":",pred sys.stdout.flush() return (err,conf,len(gt0),trial,fname) if not args.quiet: print pred # print fname,":",pred # ocrolib.write_text(base+".txt",pred) if args.show>0 or args.save is not None: ion() matplotlib.rc('xtick',labelsize=7) matplotlib.rc('ytick',labelsize=7) matplotlib.rcParams.update({"font.size":7}) if os.path.exists(base+".gt.txt"): transcript = ocrolib.read_text(base+".gt.txt") transcript = ocrolib.normalize_text(transcript) else: transcript = pred pred2 = network.trainString(line,transcript,update=0) figure("result",figsize=(1400//75,800//75),dpi=75) clf() subplot(311) imshow(line.T,cmap=cm.gray) title(transcript) subplot(312) gca().set_xticks([]) imshow(network.outputs.T[1:],vmin=0,cmap=cm.hot) title(pred[:80]) subplot(313) plot(network.outputs[:,0],color='yellow',linewidth=3,alpha=0.5) plot(network.outputs[:,1],color='green',linewidth=3,alpha=0.5) plot(amax(network.outputs[:,2:],axis=1),color='blue',linewidth=3,alpha=0.5) plot(network.aligned[:,0],color='orange',linestyle='dashed',alpha=0.7) plot(network.aligned[:,1],color='green',linestyle='dashed',alpha=0.5) plot(amax(network.aligned[:,2:],axis=1),color='blue',linestyle='dashed',alpha=0.5) if args.save is not None: draw() savename = args.save if "%" in savename: savename = savename%trial print "saving",savename savefig(savename,bbox_inches=0) if trial==len(inputs)-1: ginput(1,99999999) else: ginput(1,args.show) return None