def checkBallotFlipped(I, Iref, verbose=False): rszFac = sh.resizeOrNot(I.shape, sh.FLIP_CHECK_HEIGHT) Iref1 = sh.fastResize(Iref, rszFac) I1 = sh.fastResize(I, rszFac) IR = sh.fastFlip(I1) (H, Io, err) = imagesAlign(I1, Iref1, trfm_type='translation') (HR, IoR, errR) = imagesAlign(IR, Iref1, trfm_type='translation') if (verbose): print 'flip margin: ', err, errR if err > errR: return (True, sh.fastFlip(I), errR) else: return (False, I, err)
def imagesAlign(I, Iref, fillval=np.nan, trfm_type='similarity', vCells=1, hCells=1, rszFac=1, verbose=False, minArea=np.power(2, 11), applyWarp=True): """ Aligns I to IREF. Input: np.array I: Image you want to align. I must be larger than IREF. np.array Iref: Image you want to align against. int fillval: str trfm_type: What image transformation to solve for. They are (in order of complexity): 'translation', 'rigid', 'similarity', 'affine', and 'projective'. A nice page that describes these are at: http://homepages.inf.ed.ac.uk/rbf/HIPR2/affine.htm int vCells, hCells: Params to allow aligning subcells of the image, followed by stitching. Appears to rarely be used. float rszFac: Amount by which to scale the image - for performance, you want to scale down (i.e. 0.75). applyWarp: Causes imagesAlign to apply the found transformation to the input image I and return it. Without applyWarp, the function will only return the transformation matrix. This is used when cropped images are passed to the function, so the warp should not yet be applied to the cropped image but rather the original image, which is now the responsibility of the caller. Output: (H, Ireg, err). H is the transformation matrix that was found to best align I to Iref. Ireg is the result of aligning I to Iref. err is the alignment error. """ if len(I.shape) == 3: I1 = sh.rgb2gray(I) else: I1 = I if len(Iref.shape) == 3: Iref1 = sh.rgb2gray(Iref) else: Iref1 = Iref WARN_USER, ORIG_DTYPE = False, None if I1.dtype != 'float32': WARN_USER, ORIG_DTYPE = True, I1.dtype I1 = I1.astype('float32') if Iref1.dtype != 'float32': WARN_USER, ORIG_DTYPE = True, Iref1.dtype Iref1 = Iref1.astype('float32') if WARN_USER: print "(Info) imagesAlign was called with input image dtype={0}. \ imagesAlign expects image dtype='float32' (Also, intensity vals in range \ [0.0,1.0]. The image dtype conversion was \ automatically done, but this slows down the computation a little. Consider \ trying to work in 'float32' in the first place if convenient for a little \ speed boost.".format(ORIG_DTYPE) t1 = time.clock() # check if more than one vertical and horizontal cell if (vCells > 1) and (hCells > 1): I2 = imagesAlign(I1, Iref1, trfm_type=trfm_type, minArea=minArea)[1] Iout = np.copy(Iref1) pFac = .25 vStep = math.ceil(I1.shape[0] / vCells) vPad = pFac * vStep hStep = math.ceil(I1.shape[1] / hCells) hPad = pFac * vStep for i in range(vCells): for j in range(hCells): # 2. chop + pad each cell then align # 3. stitch back together i1 = i * vStep i1 = max(i1, 0) i2 = (i + 1) * vStep i2 = min(i2, I1.shape[0] - 1) j1 = j * hStep j1 = max(j1, 0) j2 = (j + 1) * hStep j2 = min(j2, I1.shape[1] - 1) i1p = i1 - vPad i1p = max(i1p, 0) i2p = i2 + vPad i2p = min(i2p, I1.shape[0] - 1) j1p = j1 - hPad j1p = max(j1p, 0) j2p = j2 + hPad j2p = min(j2p, I1.shape[1] - 1) Ic = I2[i1p:i2p, j1p:j2p] Irefc = Iref1[i1p:i2p, j1p:j2p] (H, err) = imagesAlign1(Ic, Irefc, trfm_type=trfm_type, verbose=verbose, minArea=minArea) IcT = sh.imtransform(Ic, H) Iout[i1:i2, j1:j2] = IcT[ i1 - i1p:(i1 - i1p) + (i2 - i1), j1 - j1p:(j1 - j1p) + (j2 - j1)] return (np.eye(3), Iout, -1) if rszFac == 1: t0 = time.clock() (H, err) = imagesAlign1(I1, Iref1, trfm_type=trfm_type, verbose=verbose, minArea=minArea) if verbose: print 'alignment time:', time.clock() - t0, '(s)' # print 'alignment time:',time.clock()-t0,'(s)' else: I1 = sh.fastResize(I1, rszFac) Iref1 = sh.fastResize(Iref1, rszFac) S = np.eye(3, dtype=np.float32) S[0, 0] = 1 / rszFac S[1, 1] = 1 / rszFac H0 = np.eye(3, dtype=np.float32) H0 = np.dot(np.dot(np.linalg.inv(S), H0), S) t0 = time.clock() (H, err) = imagesAlign1(I1, Iref1, H0=H0, trfm_type=trfm_type, verbose=verbose, minArea=minArea) if verbose: print 'alignment time:', time.clock() - t0, '(s)' # print 'alignment time:',time.clock()-t0,'(s)' H = np.dot(S, np.dot(H, np.linalg.inv(S))) # print "overall time: ", time.clock() - t1 if applyWarp: return (H, sh.imtransform(I, H, fillval=fillval), err) else: return (H, err)
def dist2patches(patchTuples, scale, debug=False): """ Input: list patchTuples: EITHER (!) of the form: ((imgpatch_i, attrpatch_i, str attval_i, isflip_i), ...) or ((imgpatch_i, [attrpatch_i, ...], str attrval_i, int page_i, isflip_i), ...) I'm not entirely sure when it's a 4-tuple or a 5-tuple...but beware. float scale: Current scale factor. Output: (scores, locs, exemplar_idxs) """ # patchTuples ((K img super regions),(K template patches)) # for each pair, compute avg distance at scale sc scores = np.zeros(len(patchTuples)) idx = 0 locs = [] exemplar_idxs = [ ] # Keeps track of which exemplar patch was the best for a given voted ballot for idx in range(len(patchTuples)): # pt is either 4-tuple: # ((imgpatch_i,[attrpatch_i, ...],,attrval_i,isflip_i), ...) # or a 5-tuple: # ((imgpatch_i,[attrpatch_i, ...],attrval_i,page_i,isflip_i), ...) pt = patchTuples[idx] imgpatch = pt[0] attrpatches = pt[1] attrval = pt[2] flag = False # A fix for a very bizarre openCv bug follows..... [check pixel_reg/opencv_bug_repo.py] I = np.round(sh.fastResize(imgpatch, scale) * 255.) / 255. # opencv appears to not like pure 1.0 and 0.0 values. #I[I==1.0]=.999; I[I==0.0]=.001 #patchScale = sh.resizeOrNot(attrpatch.shape, int(round(max(attrpatch.shape)*scale))) bestscore = None bestloc = None best_idx_ex = None # Index of the best exemplar # Get the best score, over all possible exemplars (this is to # account for background variation). for idx_ex, attrpatch in enumerate(attrpatches): patch = np.round(sh.fastResize(attrpatch, scale) * 255.) / 255. #patch[patch==1.0]=.999; patch[patch==0.0]=.001 try: res = evalPatchSimilarity2(I, patch, debug=flag) except Exception as e: traceback.print_exc() print "CRASHED AT IDX:", idx print " Scale was: {0}".format(scale) print " I.shape: {0} patch.shape: {1}".format( I.shape, patch.shape) print " imgpatch: {0} attrpatch: {1}".format( imgpatch.shape, attrpatch.shape) pdb.set_trace() raise e # TODO: Do I want to maximize, or minimize 'score'? score = res[0] # I'm pretty sure we want to maximize. score = res[0] / (patch.shape[0] * patch.shape[1]) if bestscore == None or score > bestscore: bestscore = score best_idx_ex = idx_ex bestloc = (res[1][0] / scale, res[1][1] / scale) #scores[idx]=res[0] #locs.append((res[1][0]/scale,res[1][1]/scale)) scores[idx] = bestscore locs.append(bestloc) exemplar_idxs.append(best_idx_ex) return (scores, locs, exemplar_idxs)
def imagesAlign1(I,Iref,H0=np.eye(3,dtype=np.float32), trfm_type='similarity',verbose=False, minArea = np.power(2, 11)): """ Input: nparray I: Assumes that I is larger than IREF nparray Iref: nparray H0: Trans. mat. str trfm_type: Transformation trfm_type. int minArea: The minimum area that IREF is allowed to be - if IREF.width*IREF.height is greater than this, then imagesAlign1 will shrink both I and IREF by 50% until the area < MINAREA. Smaller values of MINAREA allow higher tolerance for wider translations, yet can lead to less-predictable results. Suggestion: For coarse global alignment, try smaller values of MINAREA. For finer local alignment, use larger MINAREA. """ lbda=1e-6 wh=Iref.shape eps=1e-3 sig=2 # recursive check if np.prod(wh)<minArea: H=H0 else: I1=sh.fastResize(I,.5) Iref1=sh.fastResize(Iref,.5) S=np.eye(3); S[0,0]=2; S[1,1]=2; H0=np.dot(np.dot(np.linalg.inv(S),H0),S) (H,errx)=imagesAlign1(I1,Iref1,H0=H0,trfm_type=trfm_type,verbose=verbose, minArea=minArea) H=np.dot(S,np.dot(H,np.linalg.inv(S))) # smooth images Iref=gaussian_filter(Iref,sig) I=gaussian_filter(I,sig) # pad image with NaNs ws=np.concatenate(([0],[0],range(wh[0]),[wh[0]-1],[wh[0]-1])) hs=np.concatenate(([0],[0],range(wh[1]),[wh[1]-1],[wh[1]-1])) try: Iref=Iref[np.ix_(ws,hs)] I=I[np.ix_(ws,hs)] except Exception as e: traceback.print_exc() print '...Iref.shape:', Iref.shape print '...I.shape:', I.shape misc.imsave("_Iref_{0}.png".format(str(t)), Iref) misc.imsave("_I_{0}.png".format(str(t)), I) raise e hs=np.array([0,1,wh[1]+2,wh[1]+3]) ws=np.array([0,1,wh[0]+2,wh[0]+3]) Iref[ws,:]=np.nan; I[ws,:]=np.nan; Iref[:,hs]=np.nan; I[:,hs]=np.nan; wts=np.array([1,1,1.0204,.03125,1.0313,.0204,.000555,.000555]); s=math.sqrt(Iref.size)/128.0 wts[2]=math.pow(wts[2],1/s) wts[3]=wts[3]/s wts[4]=math.pow(wts[4],1/s) wts[5]=wts[5]/s wts[6]=wts[6]/(s*s) wts[7]=wts[7]/(s*s) # compute differences if trfm_type=='translation': keep=[0,1]; elif trfm_type=='rigid': keep=[0,1,5]; elif trfm_type=='similarity': keep=[0,1,2,5]; elif trfm_type=='affine': keep=[0,1,2,3,4,5]; elif trfm_type=='projective': keep=[0,1,2,3,4,5,6,7]; # compute transformations HH=ds2H(-1*np.ones(8),wts) Hs=HH[1][keep,:] # apply transformations Ts=np.zeros([Hs.shape[0],Iref.shape[0],Iref.shape[1]]) Ms=np.ones([Iref.shape[0],Iref.shape[1]],dtype=np.float32) for i in range(Hs.shape[0]): Ts[i,:,:]=sh.imtransform(Iref,Hs[i,:,:]) Ms=Ms * (np.float32(~np.isnan(Ts[i,:,:]))) Ds=Ts-np.tile(Iref,[Hs.shape[0],1,1]) D=Ds.reshape(Ds.shape[0],np.prod(Iref.shape)) Lbda=lbda*np.prod(Iref.shape)*np.eye(Ds.shape[0]) err=np.Inf ds=np.zeros([8,1]) for i in xrange(100): # warp image with current esimate Ip=sh.imtransform(I,H) M=Ms * np.float32(~np.isnan(Ip) & ~np.isnan(Iref)) Mf=M.reshape(I.size,1) dI=Ip-Iref; dIf=dI.reshape(np.prod(I.shape),1) # guard against bad things if np.sum(Mf) < 2: H = np.eye(3) err = np.Inf break # check if > half of pixels turn to NAN # subtract new nans from old nans, divide by old valids origValidPixels=np.sum(1-(np.isnan(I)+0)) newValidPixels=np.sum(1-(np.isnan(Ip+I)+0)) if newValidPixels<(origValidPixels/3.): return (np.eye(3),np.inf) #=== CODE PRIOR TO REFACTOR === idx=np.nonzero(np.squeeze(Mf)) D_valid=D[:,idx] D0=np.squeeze(D_valid); dI1=dIf[idx] _A = np.dot(D0, D0.T) _B = np.linalg.inv(_A + Lbda) _C = np.dot(D0, dI1) ds1 = np.dot(_B, _C) #ds1=np.dot(np.linalg.inv(np.dot(D0,D0.T)+Lbda),np.dot(D0,dI1)) ds[keep]=ds1; ds = np.squeeze(ds) HH=ds2H(ds,wts); H=np.dot(H,HH[0]); H=H/H[2,2] err0=err; err=np.abs(dI1); err=np.mean(err); delta=err0-err; if verbose: print I.shape," i=",i," err=",err," del=",delta if delta<eps: break return (H,err)
def imagesAlign1(I, Iref, H0=np.eye(3, dtype=np.float32), trfm_type='similarity', verbose=False, minArea=np.power(2, 11)): """ Input: nparray I: Assumes that I is larger than IREF nparray Iref: nparray H0: Trans. mat. str trfm_type: Transformation trfm_type. int minArea: The minimum area that IREF is allowed to be - if IREF.width*IREF.height is greater than this, then imagesAlign1 will shrink both I and IREF by 50% until the area < MINAREA. Smaller values of MINAREA allow higher tolerance for wider translations, yet can lead to less-predictable results. Suggestion: For coarse global alignment, try smaller values of MINAREA. For finer local alignment, use larger MINAREA. """ lbda = 1e-6 wh = Iref.shape eps = 1e-3 sig = 2 # recursive check if np.prod(wh) < minArea: H = H0 else: I1 = sh.fastResize(I, .5) Iref1 = sh.fastResize(Iref, .5) S = np.eye(3) S[0, 0] = 2 S[1, 1] = 2 H0 = np.dot(np.dot(np.linalg.inv(S), H0), S) (H, errx) = imagesAlign1(I1, Iref1, H0=H0, trfm_type=trfm_type, verbose=verbose, minArea=minArea) H = np.dot(S, np.dot(H, np.linalg.inv(S))) # smooth images Iref = gaussian_filter(Iref, sig) I = gaussian_filter(I, sig) # pad image with NaNs ws = np.concatenate(([0], [0], range(wh[0]), [wh[0] - 1], [wh[0] - 1])) hs = np.concatenate(([0], [0], range(wh[1]), [wh[1] - 1], [wh[1] - 1])) try: Iref = Iref[np.ix_(ws, hs)] I = I[np.ix_(ws, hs)] except Exception as e: traceback.print_exc() print '...Iref.shape:', Iref.shape print '...I.shape:', I.shape misc.imsave("_Iref_{0}.png".format(str(t)), Iref) misc.imsave("_I_{0}.png".format(str(t)), I) raise e hs = np.array([0, 1, wh[1] + 2, wh[1] + 3]) ws = np.array([0, 1, wh[0] + 2, wh[0] + 3]) Iref[ws, :] = np.nan I[ws, :] = np.nan Iref[:, hs] = np.nan I[:, hs] = np.nan wts = np.array([1, 1, 1.0204, .03125, 1.0313, .0204, .000555, .000555]) s = math.sqrt(Iref.size) / 128.0 wts[2] = math.pow(wts[2], 1 / s) wts[3] = wts[3] / s wts[4] = math.pow(wts[4], 1 / s) wts[5] = wts[5] / s wts[6] = wts[6] / (s * s) wts[7] = wts[7] / (s * s) # compute differences if trfm_type == 'translation': keep = [0, 1] elif trfm_type == 'rigid': keep = [0, 1, 5] elif trfm_type == 'similarity': keep = [0, 1, 2, 5] elif trfm_type == 'affine': keep = [0, 1, 2, 3, 4, 5] elif trfm_type == 'projective': keep = [0, 1, 2, 3, 4, 5, 6, 7] # compute transformations HH = ds2H(-1 * np.ones(8), wts) Hs = HH[1][keep, :] # apply transformations Ts = np.zeros([Hs.shape[0], Iref.shape[0], Iref.shape[1]]) Ms = np.ones([Iref.shape[0], Iref.shape[1]], dtype=np.float32) for i in range(Hs.shape[0]): Ts[i, :, :] = sh.imtransform(Iref, Hs[i, :, :]) Ms = Ms * (np.float32(~np.isnan(Ts[i, :, :]))) Ds = Ts - np.tile(Iref, [Hs.shape[0], 1, 1]) D = Ds.reshape(Ds.shape[0], np.prod(Iref.shape)) Lbda = lbda * np.prod(Iref.shape) * np.eye(Ds.shape[0]) err = np.Inf ds = np.zeros([8, 1]) D_zerod = np.nan_to_num(Ds.reshape(Ds.shape[0], np.prod(Iref.shape))) use_refactored_loop = True for i in xrange(100): # warp image with current esimate Ip = sh.imtransform(I, H) M = Ms * np.float32(~np.isnan(Ip) & ~np.isnan(Iref)) Mf = M.reshape(I.size, 1) dI = Ip - Iref dIf = dI.reshape(np.prod(I.shape), 1) # guard against bad things if np.sum(Mf) < 2: H = np.eye(3) err = np.Inf break # check if > half of pixels turn to NAN # subtract new nans from old nans, divide by old valids origValidPixels = np.sum(1 - (np.isnan(I) + 0)) newValidPixels = np.sum(1 - (np.isnan(Ip + I) + 0)) if newValidPixels < (origValidPixels / 3.): return (np.eye(3), np.inf) # === CODE PRIOR TO REFACTOR === ''' idx=np.nonzero(np.squeeze(Mf)) D_valid=D[:,idx] D0=np.squeeze(D_valid); dI1=dIf[idx] _A = np.dot(D0, D0.T) _B = np.linalg.inv(_A + Lbda) _C = np.dot(D0, dI1) ds1 = np.dot(_B, _C) ''' # NEW: apply mask via multiply rather than index Mf_stacked = np.tile(Mf.T, (D.shape[0], 1)) D0_masked = np.multiply(D_zerod, Mf_stacked) dI1 = np.nan_to_num(np.multiply(dIf, Mf)) _A_masked = np.dot(D0_masked, D0_masked.T) _B_masked = np.linalg.inv(_A_masked + Lbda) _C_masked = np.dot(D0_masked, dI1) ds1 = np.dot(_B_masked, _C_masked) # ds1=np.dot(np.linalg.inv(np.dot(D0,D0.T)+Lbda),np.dot(D0,dI1)) ds[keep] = ds1 ds = np.squeeze(ds) HH = ds2H(ds, wts) H = np.dot(H, HH[0]) H = H / H[2, 2] err0 = err err = np.abs(dI1) err = np.mean(err) delta = err0 - err if verbose: print I.shape, " i=", i, " err=", err, " del=", delta if delta < eps: break return (H, err)
def dist2patches(patchTuples,scale,debug=False): """ Input: list patchTuples: EITHER (!) of the form: ((imgpatch_i, attrpatch_i, str attval_i, isflip_i), ...) or ((imgpatch_i, [attrpatch_i, ...], str attrval_i, int page_i, isflip_i), ...) I'm not entirely sure when it's a 4-tuple or a 5-tuple...but beware. float scale: Current scale factor. Output: (scores, locs, exemplar_idxs) """ # patchTuples ((K img super regions),(K template patches)) # for each pair, compute avg distance at scale sc scores=np.zeros(len(patchTuples)) idx=0; locs=[] exemplar_idxs = [] # Keeps track of which exemplar patch was the best for a given voted ballot for idx in range(len(patchTuples)): # pt is either 4-tuple: # ((imgpatch_i,[attrpatch_i, ...],,attrval_i,isflip_i), ...) # or a 5-tuple: # ((imgpatch_i,[attrpatch_i, ...],attrval_i,page_i,isflip_i), ...) pt=patchTuples[idx] imgpatch = pt[0] attrpatches = pt[1] attrval = pt[2] flag = False # A fix for a very bizarre openCv bug follows..... [check pixel_reg/opencv_bug_repo.py] I=np.round(sh.fastResize(imgpatch,scale)*255.)/255. # opencv appears to not like pure 1.0 and 0.0 values. #I[I==1.0]=.999; I[I==0.0]=.001 #patchScale = sh.resizeOrNot(attrpatch.shape, int(round(max(attrpatch.shape)*scale))) bestscore = None bestloc = None best_idx_ex = None # Index of the best exemplar # Get the best score, over all possible exemplars (this is to # account for background variation). for idx_ex, attrpatch in enumerate(attrpatches): patch=np.round(sh.fastResize(attrpatch,scale)*255.)/255. #patch[patch==1.0]=.999; patch[patch==0.0]=.001 try: res=evalPatchSimilarity2(I,patch, debug=flag) except Exception as e: traceback.print_exc() print "CRASHED AT IDX:", idx print " Scale was: {0}".format(scale) print " I.shape: {0} patch.shape: {1}".format(I.shape, patch.shape) print " imgpatch: {0} attrpatch: {1}".format(imgpatch.shape, attrpatch.shape) pdb.set_trace() raise e # TODO: Do I want to maximize, or minimize 'score'? score = res[0] # I'm pretty sure we want to maximize. score = res[0] / (patch.shape[0]*patch.shape[1]) if bestscore == None or score > bestscore: bestscore = score best_idx_ex = idx_ex bestloc = (res[1][0]/scale,res[1][1]/scale) #scores[idx]=res[0] #locs.append((res[1][0]/scale,res[1][1]/scale)) scores[idx] = bestscore locs.append(bestloc) exemplar_idxs.append(best_idx_ex) return (scores,locs, exemplar_idxs)