def compute_geomaps(fnames,shapedict,old_model,use_gt=1,size=32,debug=0,old_order=1): """Given a shape dictionary and an existing line geometry estimator, compute updated geometric maps for each entry in the shape dictionary.""" if debug>0: gray(); ion() shape = (shapedict.k,size,size) bls = zeros(shape) xls = zeros(shape) count = 0 for fno,fname in enumerate(fnames): if fno%20==0: print fno,fname,count if use_gt: # don't use lines with many capital letters for training because # they result in bad models gt = ocrolib.read_text(ocrolib.fvariant(fname,"txt","gt")) if len(re.sub(r'[^A-Z]','',gt))>=0.3*len(re.sub(r'[^a-z]','',gt)): continue if len(re.sub(r'[^0-9]','',gt))>=0.3*len(re.sub(r'[^a-z]','',gt)): continue image = 1-ocrolib.read_image_gray(fname) if debug>0 and fno%debug==0: clf(); subplot(411); imshow(image) try: blp,xlp = old_model.lineFit(image,order=old_order) except: traceback.print_exc() continue blimage = zeros(image.shape) h,w = image.shape for x in range(w): blimage[clip(int(polyval(blp,x)),0,h-1),x] = 1 xlimage = zeros(image.shape) for x in range(w): xlimage[clip(int(polyval(xlp,x)),0,h-1),x] = 1 if debug>0 and fno%debug==0: subplot(413); imshow(xlimage+0.3*image) subplot(414); imshow(blimage+0.3*image) try: seg = lineseg.ccslineseg(image) except: continue if debug>0 and fno%debug==0: subplot(412); morph.showlabels(seg) shape = None for sub,transform,itransform_add in extract_chars(seg): if shape is None: shape = sub.shape assert sub.shape==shape count += 1 best = shapedict.predict1(sub) bls[best] += transform(blimage) xls[best] += transform(xlimage) if debug==1: ginput(1,100) elif debug>1: ginput(1,0.01) for i in range(len(bls)): bls[i] *= bls[i].shape[1]*1.0/max(1e-6,sum(bls[i])) for i in range(len(xls)): xls[i] *= xls[i].shape[1]*1.0/max(1e-6,sum(xls[i])) return bls,xls
def allchars(): count = 0 for fno,fname in enumerate(fnames): if fno%20==0: print fno,fname,count image = 1-ocrolib.read_image_gray(fname) try: seg = lineseg.ccslineseg(image) except: traceback.print_exc() continue seg = morph.renumber_by_xcenter(seg) for e in extract_chars(seg): count += 1 yield e
def allchars(): count = 0 for fno, fname in enumerate(fnames): if fno % 20 == 0: print fno, fname, count image = 1 - ocrolib.read_image_gray(fname) try: seg = lineseg.ccslineseg(image) except: traceback.print_exc() continue seg = morph.renumber_by_xcenter(seg) for e in extract_chars(seg): count += 1 yield e
def blxlimages(image,shapedict,bls,xls): image = (image>ocrolib.midrange(image)) if amax(image)==0: raise RecognitionError("empty line") seg = lineseg.ccslineseg(image) # ion(); subplot(311); imshow(image); subplot(312); morph.showlabels(seg); ginput(1,0.1); raw_input() seg = morph.renumber_by_xcenter(seg) blimage = zeros(image.shape) xlimage = zeros(image.shape) for sub,transform,itransform_add in extract_chars(seg): best = shapedict.predict1(sub) bli = bls[best].reshape(32,32) xli = xls[best].reshape(32,32) itransform_add(blimage,bli) itransform_add(xlimage,xli) return blimage,xlimage
def blxlimages(image, shapedict, bls, xls): image = (image > ocrolib.midrange(image)) if amax(image) == 0: raise RecognitionError("empty line") seg = lineseg.ccslineseg(image) # ion(); subplot(311); imshow(image); subplot(312); morph.showlabels(seg); ginput(1,0.1); raw_input() seg = morph.renumber_by_xcenter(seg) blimage = zeros(image.shape) xlimage = zeros(image.shape) for sub, transform, itransform_add in extract_chars(seg): best = shapedict.predict1(sub) bli = bls[best].reshape(32, 32) xli = xls[best].reshape(32, 32) itransform_add(blimage, bli) itransform_add(xlimage, xli) return blimage, xlimage
def compute_geomaps(fnames, shapedict, old_model, use_gt=1, size=32, debug=0, old_order=1): """Given a shape dictionary and an existing line geometry estimator, compute updated geometric maps for each entry in the shape dictionary.""" if debug > 0: gray() ion() shape = (shapedict.k, size, size) bls = zeros(shape) xls = zeros(shape) count = 0 for fno, fname in enumerate(fnames): if fno % 20 == 0: print fno, fname, count if use_gt: # don't use lines with many capital letters for training because # they result in bad models gt = ocrolib.read_text(ocrolib.fvariant(fname, "txt", "gt")) if len(re.sub(r'[^A-Z]', '', gt)) >= 0.3 * len(re.sub(r'[^a-z]', '', gt)): continue if len(re.sub(r'[^0-9]', '', gt)) >= 0.3 * len(re.sub(r'[^a-z]', '', gt)): continue image = 1 - ocrolib.read_image_gray(fname) if debug > 0 and fno % debug == 0: clf() subplot(411) imshow(image) try: blp, xlp = old_model.lineFit(image, order=old_order) except: traceback.print_exc() continue blimage = zeros(image.shape) h, w = image.shape for x in range(w): blimage[clip(int(polyval(blp, x)), 0, h - 1), x] = 1 xlimage = zeros(image.shape) for x in range(w): xlimage[clip(int(polyval(xlp, x)), 0, h - 1), x] = 1 if debug > 0 and fno % debug == 0: subplot(413) imshow(xlimage + 0.3 * image) subplot(414) imshow(blimage + 0.3 * image) try: seg = lineseg.ccslineseg(image) except: continue if debug > 0 and fno % debug == 0: subplot(412) morph.showlabels(seg) shape = None for sub, transform, itransform_add in extract_chars(seg): if shape is None: shape = sub.shape assert sub.shape == shape count += 1 best = shapedict.predict1(sub) bls[best] += transform(blimage) xls[best] += transform(xlimage) if debug == 1: ginput(1, 100) elif debug > 1: ginput(1, 0.01) for i in range(len(bls)): bls[i] *= bls[i].shape[1] * 1.0 / max(1e-6, sum(bls[i])) for i in range(len(xls)): xls[i] *= xls[i].shape[1] * 1.0 / max(1e-6, sum(xls[i])) return bls, xls