def __init__( self, ms, level, data, sigma_lm, params_flow_int, # src=None,dst=None,transformed=None ): # ipshell('hi') src = data['src'] dst = data['dst'] transformed = data['transformed'] if not isinstance(src, CpuGpuArray): raise ObsoleteError if not isinstance(dst, CpuGpuArray): raise ObsoleteError if not isinstance(transformed, CpuGpuArray): raise ObsoleteError self.nCalls = 0 self.nCallbacks = 0 self.sigma_lm = sigma_lm cpa_space = ms.L_cpa_space[level] self.cpa_space = cpa_space if src.shape[1] != cpa_space.dim_domain: raise ValueError(src.shape, cpa_space.dim_domain) self.mu = cpa_space.get_zeros_theta() self.src = src self.dst = dst self.transformed = transformed nPts = len(src) self.nPts = nPts self.err = CpuGpuArray.zeros_like(src) self.ll = CpuGpuArray.zeros(nPts, dtype=src.dtype) if nPts <= 1: raise ValueError self.err_by_der = CpuGpuArray.zeros((nPts - 1, src.shape[1]), dtype=src.dtype) self.ll_by_der = CpuGpuArray.zeros(nPts - 1, dtype=src.dtype) self.params_flow_int = params_flow_int self._pat = PAT(pa_space=cpa_space, Avees=cpa_space.get_zeros_PA())
def get_init_seg(dimy, dimx, nPixels_in_square_side, use_hex): """ """ M = nPixels_in_square_side if use_hex: s = create_string(dimx, dimy, nPixels_in_square_side) # print(s) fname = os.path.join(dirname_precomputed_hex_inits, s) try: FilesDirs.raise_if_file_does_not_exist(fname) print("Loading", fname) seg = np.load(fname) return seg except FileDoesNotExistError: pass msg = """ I could not find a precomputed (image-independent) honeycomb initilization for this image size and this values of n. So I will compute it from scratch and we will save the result in {} Next time you will run the code for an image of size nRows={}, nCols={}, with n = {}, it will be faster. """.format(fname, dimy, dimx, nPixels_in_square_side) print(msg) seg = CpuGpuArray.zeros((dimy, dimx), dtype=np.int32) # length of each side a = np.sqrt(M**2 / (1.5 * np.sqrt(3))) H = a W = np.sqrt(3) * H # XX and YY need to be float YY, XX = np.mgrid[0:float(dimy) + 0 * 1.5 * H:1.5 * H, 0:float(dimx) + 0 * W:W] XX[::2] += float(W) / 2 centers = np.vstack([XX.ravel(), YY.ravel()]).T.copy() centers = CpuGpuArray(centers) honeycomb(seg.gpu, centers.gpu, seg.size) seg.gpu2cpu() np.save(fname, seg.cpu) return seg.cpu else: seg_cpu = np.zeros((dimy, dimx), dtype=np.int32) yy, xx = np.mgrid[:dimy, :dimx] xx = xx.astype(np.float) yy = yy.astype(np.float) dimx = float(dimx) dimy = float(dimy) nTimesInX = np.floor(xx / M).max() + 1 seg_cpu = np.floor(yy / M) * nTimesInX + np.floor(xx / M) seg_cpu = seg_cpu.astype(np.int32) return seg_cpu
def __init__(self,ms,level,data, sigma_lm, params_flow_int, # src=None,dst=None,transformed=None ): # ipshell('hi') src=data['src'] dst=data['dst'] transformed=data['transformed'] if not isinstance(src,CpuGpuArray): raise ObsoleteError if not isinstance(dst,CpuGpuArray): raise ObsoleteError if not isinstance(transformed,CpuGpuArray): raise ObsoleteError self.nCalls = 0 self.nCallbacks = 0 self.sigma_lm=sigma_lm cpa_space=ms.L_cpa_space[level] self.cpa_space = cpa_space if src.shape[1] != cpa_space.dim_domain: raise ValueError(src.shape,cpa_space.dim_domain) self.mu = cpa_space.get_zeros_theta() self.src = src self.dst = dst self.transformed = transformed nPts = len(src) self.nPts = nPts self.err = CpuGpuArray.zeros_like(src) self.ll = CpuGpuArray.zeros(nPts,dtype=src.dtype) if nPts <= 1: raise ValueError self.err_by_der = CpuGpuArray.zeros((nPts-1,src.shape[1]),dtype=src.dtype) self.ll_by_der = CpuGpuArray.zeros(nPts-1,dtype=src.dtype) self.params_flow_int=params_flow_int self._pat = PAT(pa_space=cpa_space, Avees=cpa_space.get_zeros_PA())
def get_init_seg(dimy,dimx,nPixels_in_square_side,use_hex): """ """ M=nPixels_in_square_side if use_hex: s = create_string(dimx,dimy,nPixels_in_square_side) # print s fname = os.path.join(dirname_precomputed_hex_inits,s) try: FilesDirs.raise_if_file_does_not_exist(fname) print "Loading",fname seg = np.load(fname) return seg except FileDoesNotExistError: pass msg = """ I could not find a precomputed (image-independent) honeycomb initilization for this image size and this values of n. So I will compute it from scratch and we will save the result in {} Next time you will run the code for an image of size nRows={}, nCols={}, with n = {}, it will be faster. """.format(fname,dimy,dimx,nPixels_in_square_side) print msg seg = CpuGpuArray.zeros((dimy,dimx),dtype=np.int32) # length of each side a = np.sqrt(M ** 2 / ( 1.5 * np.sqrt(3) )) H = a W = np.sqrt(3)*H # XX and YY need to be float YY,XX = np.mgrid[0:float(dimy)+0*1.5*H:1.5*H,0:float(dimx)+0*W:W] XX[::2]+= float(W)/2 centers = np.vstack([XX.ravel(),YY.ravel()]).T.copy() centers = CpuGpuArray(centers) honeycomb(seg.gpu,centers.gpu,seg.size) seg.gpu2cpu() np.save(fname,seg.cpu) return seg.cpu else: seg_cpu = np.zeros((dimy,dimx),dtype=np.int32) yy,xx = np.mgrid[:dimy,:dimx] xx = xx.astype(np.float) yy = yy.astype(np.float) dimx = float(dimx) dimy=float(dimy) nTimesInX = np.floor(xx / M).max() + 1 seg_cpu = np.floor(yy / M) * nTimesInX + np.floor(xx / M) seg_cpu = seg_cpu.astype(np.int32) return seg_cpu
def get_cartoon(self): """ Replace pixels with superpixels means. """ img = self.img.cpu nChannels = self.nChannels img_disp = CpuGpuArray.zeros((img.shape[0],img.shape[1],3),dtype=np.int32) get_cartoon(seg_gpu = self.seg.gpu, mu_i_gpu = self.superpixels.params.mu_i.gpu, img_gpu= img_disp.gpu, nChannels=nChannels) img_disp.gpu2cpu() return img_disp.cpu
def get_cartoon(self): """ Replace pixels with superpixels means. """ img = self.img.cpu nChannels = self.nChannels img_disp = CpuGpuArray.zeros((img.shape[0], img.shape[1], 3), dtype=np.int32) get_cartoon(seg_gpu=self.seg.gpu, mu_i_gpu=self.superpixels.params.mu_i.gpu, img_gpu=img_disp.gpu, nChannels=nChannels) img_disp.gpu2cpu() return img_disp.cpu
def __init__(self,ms,level,data, sigma_lm, params_flow_int, # src=None,dst=None,transformed=None ): # ipshell('hi') """ Cost is level-dependent. TODO: GPU in the LL part. """ src=data['src'] dst=data['dst'] transformed=data['transformed'] if not isinstance(src,CpuGpuArray): raise ObsoleteError if not isinstance(dst,CpuGpuArray): raise ObsoleteError if not isinstance(transformed,CpuGpuArray): raise ObsoleteError self.nCalls = 0 self.nCallbacks = 0 self.sigma_lm=sigma_lm cpa_space=ms.L_cpa_space[level] self.cpa_space = cpa_space # 1/0 if src.shape[1] != cpa_space.dim_domain: raise ValueError(src.shape,cpa_space.dim_domain) # # self.cpa_space = cpa_space # self.cpa_cov_inv = msp.L_cpa_space_covs[level].cpa_cov_inv self.mu = cpa_space.get_zeros_theta() self.src = src self.dst = dst self.transformed = transformed nPts = len(src) self.nPts = nPts self.err = CpuGpuArray.zeros_like(src) self.ll = CpuGpuArray.zeros(nPts,dtype=src.dtype) self.params_flow_int=params_flow_int self._pat = PAT(pa_space=cpa_space, Avees=cpa_space.get_zeros_PA())
mu = cpa_space.get_zeros_theta() np.random.seed(0) # theta *= 4 # cpa_space.theta2Avees(theta=theta) cpa_space.update_pat() # params_flow_int.nTimeSteps *= 10 cell_idx = CpuGpuArray.zeros(len(pts),dtype=np.int32) cpa_space.calc_cell_idx(pts,cell_idx) # ipshell('st') # 1/0 # 1/0 v = CpuGpuArray.zeros_like(pts) cpa_space.calc_v(pts=pts,out=v) print params_flow_int print '#pts=',len(pts) tic=time.clock() cpa_space.calc_T_fwd(pts=pts,out=pts_transformed, **params_flow_int)
def __init__(self, nSuperpixels, s_std, i_std, prior_count, nChannels): """ Initilize the parameters for the superpixels: The means are set to zeros at this point, and will be set later in the first M step. The space/color covariances (and their inverse), however, are being set to initial values here. We use a Inverse-Wishart prior on the space covariance Arguments: nSuperpixels: the number of superpixels to generate s_std: should be fixed as nPixels_on_side i_std: control the relative importance between RGB and location. The smaller it is, bigger the RGB effect is / more irregular the superpixels are. prior_count: determines the weight of Inverse-Wishart prior of space covariance(ex:1,5,10) nChannels: the number of channels of the input image (gray:1, LAB/RGB: 3) """ if nChannels not in (1,3): raise NotImplementedError(nChannels) dim_i=nChannels dim_s=2 self.dim_i=dim_i self.dim_s=dim_s self.nSuperpixels=nSuperpixels self.s_std, self.i_std, self.prior_count = s_std,i_std,prior_count mu_s = CpuGpuArray.zeros((nSuperpixels,dim_s)) mu_i = CpuGpuArray.zeros((nSuperpixels,dim_i)) Sigma_s = CpuGpuArray.zeros(shape = (nSuperpixels,dim_s,dim_s)) J_s = CpuGpuArray.zeros_like(Sigma_s) Sigma_i = CpuGpuArray.zeros((nSuperpixels,dim_i,dim_i)) J_i = CpuGpuArray.zeros_like(Sigma_i) logdet_Sigma_i = CpuGpuArray.zeros((nSuperpixels,1)) # scalars logdet_Sigma_s = CpuGpuArray.zeros((nSuperpixels,1)) # start with unnormalized counts (uniform) counts = np.ones(nSuperpixels,dtype=np.int32) counts = CpuGpuArray(counts) self.params = Bunch() self.params.mu_i = mu_i self.params.mu_s = mu_s self.params.Sigma_i = Sigma_i self.params.Sigma_s = Sigma_s self.params.prior_sigma_s_sum = Sigma_s self.params.J_i = J_i self.params.J_s = J_s self.params.logdet_Sigma_i = logdet_Sigma_i self.params.logdet_Sigma_s = logdet_Sigma_s self.params.counts = counts # set those parameters related to covariance self.initialize_params() # intermediate arrays needed for the Gaussian parameter calculation on GPU self.gpu_helper = Bunch() self.gpu_helper.mu_i_helper = gpuarray.zeros((nSuperpixels,dim_i),dtype=np.int64) self.gpu_helper.mu_s_helper = gpuarray.zeros((nSuperpixels,dim_s),dtype=np.int64) self.gpu_helper.prior_sigma_s = self.params.prior_sigma_s_sum.gpu.copy() self.gpu_helper.sigma_s_helper = gpuarray.zeros((nSuperpixels,3),dtype=np.int64) self.gpu_helper.log_count_helper = gpuarray.zeros(nSuperpixels,dtype=np.double) self.gpu_helper.non_NaN_count = gpuarray.zeros(nSuperpixels,dtype=np.int32) self.gpu_helper.NaN_count = gpuarray.zeros(nSuperpixels,dtype=np.int32)
def __init__(self,ms,level,data, sigma_signal, params_flow_int, interp_type_for_ll, # src=None,dst=None,transformed=None ): # ipshell('hi') if interp_type_for_ll not in self.supported_interp_types: msg = """ interp_type_for_ll must be in ['gpu_linear', cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4] """ raise ValueError(msg,interp_type_for_ll) self.interp_type_for_ll=interp_type_for_ll src=data['src'] transformed=data['transformed'] signal=data['signal'] for obj in [src,transformed]: if not isinstance(obj,CpuGpuArray): raise TypeError for obj in [signal.src,signal.dst,signal.transformed]: if not isinstance(obj,CpuGpuArray): raise TypeError self.nCalls = 0 self.nCallbacks = 0 self.sigma_signal=sigma_signal cpa_space=ms.L_cpa_space[level] self.cpa_space = cpa_space if src.shape[1] != cpa_space.dim_domain: raise ValueError(src.shape,cpa_space.dim_domain) # self.mu = cpa_simple_mean(cpa_space) self.my = cpa_space.get_zeros_theta() self.src = src self.transformed = transformed self.signal = signal # self.dim_signal = signal.src.shape[1] if signal.src.ndim==2: self.dim_signal = 2 else: raise NotImplementedError if self.dim_signal != 2: raise NotImplementedError(signal.src.shape) if self.signal.src.shape != self.signal.dst.shape: raise ValueError if self.signal.src.shape != self.signal.transformed.shape: raise ValueError nPts = len(src) self.nPts = nPts # self.err = CpuGpuArray.zeros_like(src) # self.signal.err = CpuGpuArray.zeros_like(src) self.signal.err = CpuGpuArray.zeros_like(self.signal.src) self.ll = CpuGpuArray.zeros(nPts,dtype=src.dtype) if nPts <= 1: raise ValueError self.params_flow_int=params_flow_int self._pat = PAT(pa_space=cpa_space, Avees=cpa_space.get_zeros_PA())
def example(img=None,tess='I',eval_cell_idx=True,eval_v=True,show_downsampled_pts=True, valid_outside=True,base=[1,1], scale_spatial=.1, scale_value=100, permute_cell_idx_for_display=True, nLevels=3, vol_preserve=False, zero_v_across_bdry=[0,0], use_lims_when_plotting=True): show_downsampled_pts = bool(show_downsampled_pts) eval_cell_idx = bool(eval_cell_idx) eval_v = bool(eval_cell_idx) valid_outside = bool(valid_outside) permute_cell_idx_for_display = bool(permute_cell_idx_for_display) vol_preserve = bool(vol_preserve) if img is None: img = Img(get_std_test_img()) else: img=Img(img) img = img[:,:,::-1] # bgr2rgb tw = TransformWrapper(nRows=img.shape[0], nCols=img.shape[1], nLevels=nLevels, base=base, scale_spatial=scale_spatial, # controls the prior's smoothness scale_value=scale_value, # controls the prior's variance tess=tess, vol_preserve=vol_preserve, zero_v_across_bdry=zero_v_across_bdry, valid_outside=valid_outside) print tw # You probably want to do that: padding image border with zeros border_width=1 img[:border_width]=0 img[-border_width:]=0 img[:,:border_width]=0 img[:,-border_width:]=0 # The tw.calc_T_fwd (or tw.calc_T_inv) is always done in gpu. # After using it to compute new pts, # you may want to use remap (to warp an image accordingly). # If you will use tw.remap_fwd (or tw.remap_inv), which is done in gpu, # then the image type can be either float32 or float64. # But if you plan to use tw.tw.remap_fwd_opencv (or tw.remap_inv_opencv), # which is done in cpu (hence slightly lower) but supports better # interpolation methods, then the image type must be np.float32. # img_original = CpuGpuArray(img.copy().astype(np.float32)) img_original = CpuGpuArray(img.copy().astype(np.float64)) img_wrapped_fwd= CpuGpuArray.zeros_like(img_original) img_wrapped_bwd= CpuGpuArray.zeros_like(img_original) seed=0 np.random.seed(seed) ms_Avees=tw.get_zeros_PA_all_levels() ms_theta=tw.get_zeros_theta_all_levels() for level in range(tw.ms.nLevels): if level==0: tw.sample_gaussian(level,ms_Avees[level],ms_theta[level],mu=None)# zero mean else: tw.sample_from_the_ms_prior_coarse2fine_one_level(ms_Avees,ms_theta, level_fine=level) print('\nimg shape: {}\n'.format(img_original.shape)) # You don't have use these. You can use any 2d array # that has two columns (regardless of the number of rows). pts_src = tw.pts_src_dense # Create buffers for the output pts_fwd = CpuGpuArray.zeros_like(pts_src) pts_inv = CpuGpuArray.zeros_like(pts_src) for level in range(tw.ms.nLevels): ####################################################################### # instead of the tw.sample_from_the_ms_prior() above, # you may want to use one of the following. # 1) # tw.sample_gaussian(level,ms_Avees[level],ms_theta[level],mu=None)# zero mean # 2) # tw.sample_gaussian(level,ms_Avees[level],ms_theta[level],mu=some_user_specified_mu) # The following should be used only for level>0 : # 3) # tw.sample_normal_in_one_level_using_the_coarser_as_mean(Avees_coarse=ms_Avees[level-1], # Avees_fine=ms_Avees[level], # theta_fine=ms_theta[level], # level_fine=level) # ####################################################################### # You can also change the values this way: # cpa_space = tw.ms.L_cpa_space[level] # theta = cpa_space.get_zeros_theta() # theta[:] = some values # Avees = cpa_space.get_zeros_PA() # cpa_space.theta2Avees(theta,Avees) # cpa_space.update_pat(Avees) # This step is important and must be done # before are trying to "use" the new values of # the (vectorized) A's. tw.update_pat_from_Avees(ms_Avees[level],level) if eval_v: # Evaluating the velocity field. # You don't have to do it in unless you want to visualize v. # (when evaluting the treansformation, v will be internally # evaluated anyway -- but its result won't be stored) tw.calc_v(level=level) # optional, if you want to time it timer_gpu_T_fwd = GpuTimer() # Simply calling # tic = time.clock() # and then # tic = time.clock() # won't work. # In fact, most likely you will get that toc-tic is zero. # You need to use the GpuTimer object. When you do that, # one side effect is that suddenly the toc-tic from above will # give you a more realistic result. tic = time.clock() timer_gpu_T_fwd.tic() tw.calc_T_fwd(pts_src,pts_fwd,level=level) timer_gpu_T_fwd.toc() toc = time.clock() print 'Time, in sec, for computing T_fwd:' print timer_gpu_T_fwd.secs print toc-tic # likely to be 0, unless you also used the GpuTimer. # You can also time the inv of course. Results will be similar. tw.calc_T_inv(pts_src,pts_inv,level=level) if eval_cell_idx: # cell_idx is computed here just for display. cell_idx = CpuGpuArray.zeros(len(pts_src),dtype=np.int32) tw.calc_cell_idx(pts_src,cell_idx,level, permute_for_disp=permute_cell_idx_for_display) # If may also want ro to time the remap. # However, the remap is usually very fast (e.g, about 2 milisec). # timer_gpu_remap_fwd = GpuTimer() # tic = time.clock() # timer_gpu_remap_fwd.tic() # tw.remap_fwd(pts_inv=pts_inv,img=img_original,img_wrapped_fwd=img_wrapped_fwd) tw.remap_fwd(pts_inv=pts_inv,img=img_original,img_wrapped_fwd=img_wrapped_fwd) # timer_gpu_remap_fwd.toc() # toc = time.clock() # If the img type is np.float32, you may also use # tw.remap_fwd_opencv instead of tw.remap_fw. The differences between # the two methods are explained above tw.remap_inv(pts_fwd=pts_fwd,img=img_original,img_wrapped_inv=img_wrapped_bwd) # For display purposes, do gpu2cpu transfer print ("For display purposes, do gpu2cpu transfer") if eval_cell_idx: cell_idx.gpu2cpu() if eval_v: tw.v_dense.gpu2cpu() pts_fwd.gpu2cpu() pts_inv.gpu2cpu() img_wrapped_fwd.gpu2cpu() img_wrapped_bwd.gpu2cpu() figsize = (12,12) plt.figure(figsize=figsize) if eval_v: plt.subplot(332) tw.imshow_vx() plt.title('vx') plt.subplot(333) tw.imshow_vy() plt.title('vy') if eval_cell_idx: plt.subplot(331) cell_idx_disp = cell_idx.cpu.reshape(img.shape[0],-1) plt.imshow(cell_idx_disp) plt.title('tess (type {})'.format(tess)) if show_downsampled_pts: ds=20 pts_src_grid = pts_src.cpu.reshape(tw.nRows,-1,2) pts_src_ds=pts_src_grid[::ds,::ds].reshape(-1,2) pts_fwd_grid = pts_fwd.cpu.reshape(tw.nRows,-1,2) pts_fwd_ds=pts_fwd_grid[::ds,::ds].reshape(-1,2) pts_inv_grid = pts_inv.cpu.reshape(tw.nRows,-1,2) pts_inv_ds=pts_inv_grid[::ds,::ds].reshape(-1,2) use_lims=use_lims_when_plotting # return tw plt.subplot(334) plt.plot(pts_src_ds[:,0],pts_src_ds[:,1],'r.') plt.title('pts ds') tw.config_plt() plt.subplot(335) plt.plot(pts_fwd_ds[:,0],pts_fwd_ds[:,1],'g.') plt.title('fwd(pts)') tw.config_plt(axis_on_or_off='on',use_lims=use_lims) plt.subplot(336) plt.plot(pts_inv_ds[:,0],pts_inv_ds[:,1],'b.') plt.title('inv(pts)') tw.config_plt(axis_on_or_off='on',use_lims=use_lims) plt.subplot(337) plt.imshow(img_original.cpu.astype(np.uint8)) plt.title('img') # plt.axis('off') plt.subplot(338) plt.imshow(img_wrapped_fwd.cpu.astype(np.uint8)) # plt.axis('off') plt.title('fwd(img)') plt.subplot(339) plt.imshow(img_wrapped_bwd.cpu.astype(np.uint8)) # plt.axis('off') plt.title('inv(img)') return tw
msg=""" The code below is for landmarks, not signals""" raise NotImplementedError(msg) yy,xx = np.mgrid[-2:2:1,-2:2:1] x = np.vstack([xx.ravel(),yy.ravel()]).T del xx,yy x = CpuGpuArray(x.copy().astype(np.float)) print x y = np.random.standard_normal(x.shape) y = CpuGpuArray(y) err = CpuGpuArray.zeros_like(y) nPts = len(err) ll = CpuGpuArray.zeros(nPts) calc_signal_err_per_sample(x.gpu,y.gpu,err.gpu) sigma=1.0 calc_ll_per_sample(ll.gpu,err.gpu,sigma) err.gpu2cpu() ll.gpu2cpu() print np.allclose( ll.cpu, -0.5*(err.cpu[:,0]**2+err.cpu[:,1]**2)/(sigma**2))
# np.random.seed(0) theta = np.random.multivariate_normal(mean=mu,cov=cpa_covs.cpa_cov) theta *=100 ## # theta.fill(0) # theta[8]=1 # theta[10]=1 cpa_space.theta2Avees(theta=theta) cpa_space.update_pat() # 1/0 # params_flow_int.nTimeSteps *= 10 cell_idx = CpuGpuArray.zeros(len(pts),dtype=np.int32) cpa_space.calc_cell_idx(pts,cell_idx) cell_idx.gpu2cpu() print cell_idx img=cell_idx.cpu.reshape(cpa_space.x_dense_grid.shape[1:]) # img = pts.cpu[:,0].reshape(cpa_space.x_dense_grid.shape[1:]) plt.figure(1) of.plt.set_figure_size_and_location(0,0,800,800) plt.clf() plt.subplot(131) plt.imshow(img[:,:,0,0],interpolation="None");plt.colorbar() v = CpuGpuArray.zeros_like(pts)
def __init__( self, ms, level, data, sigma_signal, params_flow_int, interp_type_for_ll, # src=None,dst=None,transformed=None ): # ipshell('hi') if interp_type_for_ll not in self.supported_interp_types: msg = """ interp_type_for_ll must be in ['gpu_linear', cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4] """ raise ValueError(msg, interp_type_for_ll) self.interp_type_for_ll = interp_type_for_ll src = data['src'] transformed = data['transformed'] signal = data['signal'] for obj in [src, transformed]: if not isinstance(obj, CpuGpuArray): raise TypeError for obj in [signal.src, signal.dst, signal.transformed]: if not isinstance(obj, CpuGpuArray): raise TypeError self.nCalls = 0 self.nCallbacks = 0 self.sigma_signal = sigma_signal cpa_space = ms.L_cpa_space[level] self.cpa_space = cpa_space if src.shape[1] != cpa_space.dim_domain: raise ValueError(src.shape, cpa_space.dim_domain) # self.mu = cpa_simple_mean(cpa_space) self.my = cpa_space.get_zeros_theta() self.src = src self.transformed = transformed self.signal = signal # self.dim_signal = signal.src.shape[1] if signal.src.ndim == 2: self.dim_signal = 2 else: raise NotImplementedError if self.dim_signal != 2: raise NotImplementedError(signal.src.shape) if self.signal.src.shape != self.signal.dst.shape: raise ValueError if self.signal.src.shape != self.signal.transformed.shape: raise ValueError nPts = len(src) self.nPts = nPts # self.err = CpuGpuArray.zeros_like(src) # self.signal.err = CpuGpuArray.zeros_like(src) self.signal.err = CpuGpuArray.zeros_like(self.signal.src) self.ll = CpuGpuArray.zeros(nPts, dtype=src.dtype) if nPts <= 1: raise ValueError self.params_flow_int = params_flow_int self._pat = PAT(pa_space=cpa_space, Avees=cpa_space.get_zeros_PA())
def __init__(self,XMINS,XMAXS,nCs, zero_v_across_bdry, vol_preserve, warp_around=None, conformal=False, zero_vals=None, cpa_calcs=None, tess='II', valid_outside=None, only_local=False, cont_constraints_are_separable=False): if conformal: raise ValueError("This was a bad idea") if not self.has_GPU: raise ValueError("Where is my gpu?") if conformal: raise ValueError if tess not in ['I','II']: raise ValueError(tess) if tess == 'I' and self.dim_domain == 1: raise ValueError if tess == 'I' and self.dim_domain not in (2,3): raise NotImplementedError if only_local and tess != 'I': raise NotImplementedError if zero_vals is None: raise ValueError if cpa_calcs is None: raise ValueError("You must pass this argument") self._calcs = cpa_calcs if len(nCs) != self.dim_domain: raise ValueError('len(nCs) = {0} =/= {1} = dim_domain'.format(len(nCs),self.dim_domain)) if warp_around is None: # warp_around = [False] * self.dim_domain raise ValueError("You must pass this argument") try:# Test if iterable zero_vals.__iter__ except AttributeError: raise ValueError(zero_vals) try: # Test if iterable nCs.__iter__ except AttributeError: raise ValueError(nCs) try: # Test if iterable zero_v_across_bdry.__iter__ except: raise try: # Test if iterable warp_around.__iter__ except: raise if len(warp_around) != self.dim_domain: raise ValueError(len(warp_around) , self.dim_domain) if len(zero_v_across_bdry) != self.dim_domain: raise ValueError(len(zero_v_across_bdry) , self.dim_domain) if tess=='I': if self.dim_domain==2: if any(zero_v_across_bdry) and valid_outside: raise ValueError("dim_domain==2","tess='I'", "zero_v_across_bdry={}".format(zero_v_across_bdry), "valid_outside={}".format(valid_outside), "These choices are inconsistent with each other") if not all(zero_v_across_bdry) and not valid_outside: raise ValueError("dim_domain>1","tess='I'", "zero_v_across_bdry={}".format(zero_v_across_bdry), "valid_outside={}".format(valid_outside), "These choices are inconsistent with each other") elif self.dim_domain==3: if valid_outside: raise NotImplementedError elif not all(zero_v_across_bdry): raise ValueError("dim_domain==3","tess='I'", "zero_v_across_bdry={}".format(zero_v_across_bdry), "These choices are inconsistent with each other") else: raise NotImplementedError self.XMINS = np.asarray(XMINS,dtype=my_dtype) self.XMAXS = np.asarray(XMAXS,dtype=my_dtype) if (self.XMINS>=self.XMAXS).any(): raise ValueError(XMINS,XMAXS) self.warp_around = warp_around self.tess=tess if tess == 'II': nC = reduce(np.dot,nCs) # of cells elif tess == 'I': if self.dim_domain == 2: nC = reduce(np.dot,nCs) * 4 elif self.dim_domain == 3: nC = reduce(np.dot,nCs) * 5 else: raise NotImplementedError else: raise ValueError(tess) self.nCs = np.asarray(nCs) self.nC=nC if self.dim_domain !=1: if self.dim_domain in (2,3): self.expm_eff = ExpmEff(nC) else: self.expm_eff = ExpmEff(nC,use_parallel=1) nHomoCoo=self.nHomoCoo self._signed_sqAs_times_dt= np.empty((nC,nHomoCoo,nHomoCoo), dtype=self.my_dtype) # In each matrix, fill last row with zeros # self._signed_sqAs_times_dt[:,-1].fill(0) # self._sqAs_vectorized = np.zeros((nC,nHomoCoo*nHomoCoo), # dtype=self.my_dtype) # self._Tlocals_vectorized = np.empty((nC,nHomoCoo*nHomoCoo),dtype=self.my_dtype) self._As_vectorized = CpuGpuArray.zeros((nC,self.lengthAvee),dtype=self.my_dtype) self._signed_As_vectorized = CpuGpuArray.zeros((nC,self.lengthAvee),dtype=self.my_dtype) self._signed_As_times_dt_vectorized = CpuGpuArray.zeros((nC,self.lengthAvee),dtype=self.my_dtype) self._Tlocals_vectorized = CpuGpuArray.zeros((nC,self.lengthAvee),dtype=self.my_dtype) if self.has_GPU: self.sharedmemory = decide_sharedmemory(self.dim_domain, self.dim_range, self.nC) self._gpu_calcs = GpuCalcs(nC,my_dtype, dim_domain=self.dim_domain, dim_range=self.dim_range, tess=self.tess, sharedmemory=self.sharedmemory) else: raise NotImplementedError self.only_local=only_local self.zero_v_across_bdry=zero_v_across_bdry self.vol_preserve=vol_preserve self.subspace_string=self.create_subspace_string(self.XMINS, self.XMAXS, nCs, zero_v_across_bdry, vol_preserve, warp_around, conformal, zero_vals, valid_outside=valid_outside, cont_constraints_are_separable=cont_constraints_are_separable) self.directory = os.path.join(dirnames.cpa,'{0}d'.format(self.dim_domain), self.subspace_string) FilesDirs.mkdirs_if_needed(self.directory) if self.only_local: self.filename_subspace = os.path.join(self.directory,'local.pkl') else: self.filename_subspace = os.path.join(self.directory,'subspace.pkl')
def example(tess='I', base=[2, 2, 2], nLevels=1, zero_v_across_bdry=[True] * 3, vol_preserve=False, nRows=100, nCols=100, nSlices=100, use_mayavi=False, eval_v=False, eval_cell_idx=False): tw = TransformWrapper(nRows=nRows, nCols=nCols, nSlices=nSlices, nLevels=nLevels, base=base, zero_v_across_bdry=zero_v_across_bdry, tess=tess, valid_outside=False, only_local=False, vol_preserve=vol_preserve) print_iterable(tw.ms.L_cpa_space) print tw # create some fake 3D image. img = np.zeros((nCols, nRows, nSlices), dtype=np.float64) # img[:]=np.random.random_integers(0,255,img.shape) # Fill the image with the x coordinates as fake values img[:] = tw.pts_src_dense.cpu[:, 0].reshape(img.shape) img0 = CpuGpuArray(img.copy().astype(np.float64)) img_wrapped_fwd = CpuGpuArray.zeros_like(img0) img_wrapped_inv = CpuGpuArray.zeros_like(img0) seed = 0 np.random.seed(seed) ms_Avees = tw.get_zeros_PA_all_levels() ms_theta = tw.get_zeros_theta_all_levels() if tess == 'II': for level in range(tw.ms.nLevels): cpa_space = tw.ms.L_cpa_space[level] Avees = ms_Avees[level] # 1/0 if level == 0: tw.sample_gaussian(level, ms_Avees[level], ms_theta[level], mu=None) # zero mean # ms_theta[level].fill(0) # ms_theta[level][-4]=10 cpa_space.theta2Avees(theta=ms_theta[level], Avees=Avees) else: tw.sample_from_the_ms_prior_coarse2fine_one_level( ms_Avees, ms_theta, level_fine=level) else: # For tess='I' in 3D, I have yet to implement the coarse-to-fine sampling. for level in range(tw.ms.nLevels): cpa_space = tw.ms.L_cpa_space[level] velTess = cpa_space.zeros_velTess() ms_Avees[level].fill(0) Avees = ms_Avees[level] tw.sample_gaussian_velTess(level, Avees, velTess, mu=None) print 'img shape:', img0.shape # You don't have use these. You can use any 2d array # that has 3 columns (regardless of the number of rows). pts_src = tw.pts_src_dense pts_src = CpuGpuArray(pts_src.cpu[::1].copy()) # Create a buffer for the output pts_fwd = CpuGpuArray.zeros_like(pts_src) pts_inv = CpuGpuArray.zeros_like(pts_src) for level in range(tw.ms.nLevels): tw.update_pat_from_Avees(ms_Avees[level], level) if eval_v: # Evaluating the velocity field. # You don't have to do it in unless you want to visualize v. # (when evaluting the treansformation, v will be internally # evaluated anyway -- but its result won't be stored) tw.calc_v(level=level) print 'level', level print print 'number of points:', len(pts_src) print 'number of cells:', tw.ms.L_cpa_space[level].nC # optional, if you want to time it timer_gpu_T_fwd = GpuTimer() # Simply calling # tic = time.clock() # and then # tic = time.clock() # won't work. # In fact, most likely you will get that toc-tic is zero. # You need to use the GpuTimer object. When you do that, # one side effect is that suddenly the toc-tic from above will # give you a more realistic result. tic = time.clock() timer_gpu_T_fwd.tic() tw.calc_T_fwd(pts_src, pts_fwd, level=level) timer_gpu_T_fwd.toc() toc = time.clock() print 'Time, in sec, for computing T_fwd:' print timer_gpu_T_fwd.secs print toc - tic # likely to be 0, unless you also used the GpuTimer. # You can also time the inv of course. Results will be similar. tw.calc_T_inv(pts_src, pts_inv, level=level) if eval_cell_idx: # cell_idx is computed here just for display. cell_idx = CpuGpuArray.zeros(len(pts_src), dtype=np.int32) tw.calc_cell_idx(pts_src, cell_idx, level) tw.remap_fwd(pts_inv, img0, img_wrapped_fwd) tw.remap_inv(pts_fwd, img0, img_wrapped_inv) # For display purposes, do gpu2cpu transfer print "For display purposes, do gpu2cpu transfer" if eval_cell_idx: cell_idx.gpu2cpu() if eval_v: tw.v_dense.gpu2cpu() pts_fwd.gpu2cpu() pts_inv.gpu2cpu() img_wrapped_fwd.gpu2cpu() img_wrapped_inv.gpu2cpu() if use_mayavi: ds = 1 # downsampling factor i = 17 pts_src_grid = pts_src.cpu.reshape(tw.nRows, tw.nCols, -1, 3) pts_src_ds = pts_src_grid[::ds, ::ds, i].reshape(-1, 3) pts_fwd_grid = pts_fwd.cpu.reshape(tw.nRows, tw.nCols, -1, 3) pts_fwd_ds = pts_fwd_grid[::ds, ::ds, i].reshape(-1, 3) pts_inv_grid = pts_inv.cpu.reshape(tw.nRows, tw.nCols, -1, 3) pts_inv_ds = pts_inv_grid[::ds, ::ds, i].reshape(-1, 3) from of.my_mayavi import * mayavi_mlab_close_all() mayavi_mlab_figure_bgwhite('src') x, y, z = pts_src_ds.T mayavi_mlab_plot3d(x, y, z) mayavi_mlab_figure_bgwhite('fwd') x, y, z = pts_fwd_ds.T mayavi_mlab_plot3d(x, y, z) figsize = (12, 12) plt.figure(figsize=figsize) i = 17 # some slice plt.subplot(131) plt.imshow(img0.cpu[:, :, i].astype(np.uint8), interpolation="Nearest") plt.title('slice from img') plt.subplot(132) plt.imshow(img_wrapped_fwd.cpu[:, :, i].astype(np.uint8), interpolation="Nearest") plt.axis('off') plt.title('slice from fwd(img)') plt.subplot(133) plt.imshow(img_wrapped_inv.cpu[:, :, i].astype(np.uint8), interpolation="Nearest") plt.axis('off') plt.title('slice from inv(img)') if 0: # debug cpa_space = tw.ms.L_cpa_space[level] if eval_v: vx = tw.v_dense.cpu[:, 0].reshape( cpa_space.x_dense_grid_img.shape[1:]) vy = tw.v_dense.cpu[:, 1].reshape( cpa_space.x_dense_grid_img.shape[1:]) vz = tw.v_dense.cpu[:, 2].reshape( cpa_space.x_dense_grid_img.shape[1:]) plt.figure() plt.imshow(vz[:, :, 17], interpolation="Nearest") plt.colorbar() plt.title('vz in some slice') return tw
def example(tess='I',base=[2,2,2],nLevels=1, zero_v_across_bdry=[True]*3, vol_preserve=False, nRows=100, nCols=100,nSlices=100, use_mayavi=False, eval_v=False, eval_cell_idx=False): tw = TransformWrapper(nRows=nRows, nCols=nCols, nSlices=nSlices, nLevels=nLevels, base=base, zero_v_across_bdry=zero_v_across_bdry, tess=tess, valid_outside=False, only_local=False, vol_preserve=vol_preserve) print_iterable(tw.ms.L_cpa_space) print tw # create some fake 3D image. img = np.zeros((nCols,nRows,nSlices),dtype=np.float64) # img[:]=np.random.random_integers(0,255,img.shape) # Fill the image with the x coordinates as fake values img[:]=tw.pts_src_dense.cpu[:,0].reshape(img.shape) img0 = CpuGpuArray(img.copy().astype(np.float64)) img_wrapped_fwd= CpuGpuArray.zeros_like(img0) img_wrapped_inv= CpuGpuArray.zeros_like(img0) seed=0 np.random.seed(seed) ms_Avees=tw.get_zeros_PA_all_levels() ms_theta=tw.get_zeros_theta_all_levels() if tess == 'II' : for level in range(tw.ms.nLevels): cpa_space = tw.ms.L_cpa_space[level] Avees = ms_Avees[level] # 1/0 if level==0: tw.sample_gaussian(level,ms_Avees[level],ms_theta[level],mu=None)# zero mean # ms_theta[level].fill(0) # ms_theta[level][-4]=10 cpa_space.theta2Avees(theta=ms_theta[level],Avees=Avees) else: tw.sample_from_the_ms_prior_coarse2fine_one_level(ms_Avees,ms_theta, level_fine=level) else: # For tess='I' in 3D, I have yet to implement the coarse-to-fine sampling. for level in range(tw.ms.nLevels): cpa_space = tw.ms.L_cpa_space[level] velTess = cpa_space.zeros_velTess() ms_Avees[level].fill(0) Avees = ms_Avees[level] tw.sample_gaussian_velTess(level,Avees,velTess,mu=None) print 'img shape:',img0.shape # You don't have use these. You can use any 2d array # that has 3 columns (regardless of the number of rows). pts_src = tw.pts_src_dense pts_src=CpuGpuArray(pts_src.cpu[::1].copy()) # Create a buffer for the output pts_fwd = CpuGpuArray.zeros_like(pts_src) pts_inv = CpuGpuArray.zeros_like(pts_src) for level in range(tw.ms.nLevels): tw.update_pat_from_Avees(ms_Avees[level],level) if eval_v: # Evaluating the velocity field. # You don't have to do it in unless you want to visualize v. # (when evaluting the treansformation, v will be internally # evaluated anyway -- but its result won't be stored) tw.calc_v(level=level) print 'level',level print print 'number of points:',len(pts_src) print 'number of cells:',tw.ms.L_cpa_space[level].nC # optional, if you want to time it timer_gpu_T_fwd = GpuTimer() # Simply calling # tic = time.clock() # and then # tic = time.clock() # won't work. # In fact, most likely you will get that toc-tic is zero. # You need to use the GpuTimer object. When you do that, # one side effect is that suddenly the toc-tic from above will # give you a more realistic result. tic = time.clock() timer_gpu_T_fwd.tic() tw.calc_T_fwd(pts_src,pts_fwd,level=level) timer_gpu_T_fwd.toc() toc = time.clock() print 'Time, in sec, for computing T_fwd:' print timer_gpu_T_fwd.secs print toc-tic # likely to be 0, unless you also used the GpuTimer. # You can also time the inv of course. Results will be similar. tw.calc_T_inv(pts_src,pts_inv,level=level) if eval_cell_idx: # cell_idx is computed here just for display. cell_idx = CpuGpuArray.zeros(len(pts_src),dtype=np.int32) tw.calc_cell_idx(pts_src,cell_idx,level) tw.remap_fwd(pts_inv,img0,img_wrapped_fwd) tw.remap_inv(pts_fwd,img0,img_wrapped_inv) # For display purposes, do gpu2cpu transfer print "For display purposes, do gpu2cpu transfer" if eval_cell_idx: cell_idx.gpu2cpu() if eval_v: tw.v_dense.gpu2cpu() pts_fwd.gpu2cpu() pts_inv.gpu2cpu() img_wrapped_fwd.gpu2cpu() img_wrapped_inv.gpu2cpu() if use_mayavi: ds=1 # downsampling factor i= 17 pts_src_grid = pts_src.cpu.reshape(tw.nRows,tw.nCols,-1,3) pts_src_ds=pts_src_grid[::ds,::ds,i].reshape(-1,3) pts_fwd_grid = pts_fwd.cpu.reshape(tw.nRows,tw.nCols,-1,3) pts_fwd_ds=pts_fwd_grid[::ds,::ds,i].reshape(-1,3) pts_inv_grid = pts_inv.cpu.reshape(tw.nRows,tw.nCols,-1,3) pts_inv_ds=pts_inv_grid[::ds,::ds,i].reshape(-1,3) from of.my_mayavi import * mayavi_mlab_close_all() mayavi_mlab_figure_bgwhite('src') x,y,z=pts_src_ds.T mayavi_mlab_plot3d(x,y,z) mayavi_mlab_figure_bgwhite('fwd') x,y,z=pts_fwd_ds.T mayavi_mlab_plot3d(x,y,z) figsize = (12,12) plt.figure(figsize=figsize) i= 17 # some slice plt.subplot(131) plt.imshow(img0.cpu[:,:,i].astype(np.uint8),interpolation="Nearest") plt.title('slice from img') plt.subplot(132) plt.imshow(img_wrapped_fwd.cpu[:,:,i].astype(np.uint8),interpolation="Nearest") plt.axis('off') plt.title('slice from fwd(img)') plt.subplot(133) plt.imshow(img_wrapped_inv.cpu[:,:,i].astype(np.uint8),interpolation="Nearest") plt.axis('off') plt.title('slice from inv(img)') if 0: # debug cpa_space=tw.ms.L_cpa_space[level] if eval_v: vx=tw.v_dense.cpu[:,0].reshape(cpa_space.x_dense_grid_img.shape[1:]) vy=tw.v_dense.cpu[:,1].reshape(cpa_space.x_dense_grid_img.shape[1:]) vz=tw.v_dense.cpu[:,2].reshape(cpa_space.x_dense_grid_img.shape[1:]) plt.figure() plt.imshow(vz[:,:,17],interpolation="Nearest");plt.colorbar() plt.title('vz in some slice') return tw
from pycuda import autoinit from of.gpu import CpuGpuArray import numpy as np msg = """ The code below is for landmarks, not signals""" raise NotImplementedError(msg) yy, xx = np.mgrid[-2:2:1, -2:2:1] x = np.vstack([xx.ravel(), yy.ravel()]).T del xx, yy x = CpuGpuArray(x.copy().astype(np.float)) print x y = np.random.standard_normal(x.shape) y = CpuGpuArray(y) err = CpuGpuArray.zeros_like(y) nPts = len(err) ll = CpuGpuArray.zeros(nPts) calc_signal_err_per_sample(x.gpu, y.gpu, err.gpu) sigma = 1.0 calc_ll_per_sample(ll.gpu, err.gpu, sigma) err.gpu2cpu() ll.gpu2cpu() print np.allclose( ll.cpu, -0.5 * (err.cpu[:, 0]**2 + err.cpu[:, 1]**2) / (sigma**2))
def __init__(self, nSuperpixels, s_std, i_std, prior_count, nChannels): """ Initilize the parameters for the superpixels: The means are set to zeros at this point, and will be set later in the first M step. The space/color covariances (and their inverse), however, are being set to initial values here. We use a Inverse-Wishart prior on the space covariance Arguments: nSuperpixels: the number of superpixels to generate s_std: should be fixed as nPixels_on_side i_std: control the relative importance between RGB and location. The smaller it is, bigger the RGB effect is / more irregular the superpixels are. prior_count: determines the weight of Inverse-Wishart prior of space covariance(ex:1,5,10) nChannels: the number of channels of the input image (gray:1, LAB/RGB: 3) """ if nChannels not in (1,3): raise NotImplementedError(nChannels) dim_i=nChannels dim_s=2 self.dim_i=dim_i self.dim_s=dim_s self.nSuperpixels=nSuperpixels self.s_std, self.i_std, self.prior_count = s_std,i_std,prior_count mu_s = CpuGpuArray.zeros((nSuperpixels,dim_s)) mu_i = CpuGpuArray.zeros((nSuperpixels,dim_i)) Sigma_s = CpuGpuArray.zeros(shape = (nSuperpixels,dim_s,dim_s)) J_s = CpuGpuArray.zeros_like(Sigma_s) Sigma_i = CpuGpuArray.zeros((nSuperpixels,dim_i,dim_i)) J_i = CpuGpuArray.zeros_like(Sigma_i) logdet_Sigma_i = CpuGpuArray.zeros((nSuperpixels,1)) # scalars logdet_Sigma_s = CpuGpuArray.zeros((nSuperpixels,1)) # start with unnormalized counts (uniform) counts = np.ones(nSuperpixels,dtype=np.int32) counts = CpuGpuArray(counts) self.params = Bunch() self.params.mu_i = mu_i self.params.mu_s = mu_s self.params.Sigma_i = Sigma_i self.params.Sigma_s = Sigma_s self.params.prior_sigma_s_sum = Sigma_s self.params.J_i = J_i self.params.J_s = J_s self.params.logdet_Sigma_i = logdet_Sigma_i self.params.logdet_Sigma_s = logdet_Sigma_s self.params.counts = counts # set those parameters related to covariance self.initialize_params() # intermediate arrays needed for the Gaussian parameter calculation on GPU self.gpu_helper = Bunch() self.gpu_helper.mu_i_helper = gpuarray.zeros((nSuperpixels,dim_i),dtype=np.int32) self.gpu_helper.mu_s_helper = gpuarray.zeros((nSuperpixels,dim_s),dtype=np.int32) self.gpu_helper.prior_sigma_s = self.params.prior_sigma_s_sum.gpu.copy() self.gpu_helper.sigma_s_helper = gpuarray.zeros((nSuperpixels,3),dtype=np.int64) self.gpu_helper.log_count_helper = gpuarray.zeros((nSuperpixels,1),dtype=np.double)
def example(img=None, tess='I', eval_cell_idx=True, eval_v=True, show_downsampled_pts=True, valid_outside=True, base=[1, 1], scale_spatial=.1, scale_value=100, permute_cell_idx_for_display=True, nLevels=3, vol_preserve=False, zero_v_across_bdry=[0, 0], use_lims_when_plotting=True): show_downsampled_pts = bool(show_downsampled_pts) eval_cell_idx = bool(eval_cell_idx) eval_v = bool(eval_cell_idx) valid_outside = bool(valid_outside) permute_cell_idx_for_display = bool(permute_cell_idx_for_display) vol_preserve = bool(vol_preserve) if img is None: img = Img(get_std_test_img()) else: img = Img(img) img = img[:, :, ::-1] # bgr2rgb tw = TransformWrapper( nRows=img.shape[0], nCols=img.shape[1], nLevels=nLevels, base=base, scale_spatial=scale_spatial, # controls the prior's smoothness scale_value=scale_value, # controls the prior's variance tess=tess, vol_preserve=vol_preserve, zero_v_across_bdry=zero_v_across_bdry, valid_outside=valid_outside) print tw # You probably want to do that: padding image border with zeros border_width = 1 img[:border_width] = 0 img[-border_width:] = 0 img[:, :border_width] = 0 img[:, -border_width:] = 0 # The tw.calc_T_fwd (or tw.calc_T_inv) is always done in gpu. # After using it to compute new pts, # you may want to use remap (to warp an image accordingly). # If you will use tw.remap_fwd (or tw.remap_inv), which is done in gpu, # then the image type can be either float32 or float64. # But if you plan to use tw.tw.remap_fwd_opencv (or tw.remap_inv_opencv), # which is done in cpu (hence slightly lower) but supports better # interpolation methods, then the image type must be np.float32. # img_original = CpuGpuArray(img.copy().astype(np.float32)) img_original = CpuGpuArray(img.copy().astype(np.float64)) img_wrapped_fwd = CpuGpuArray.zeros_like(img_original) img_wrapped_bwd = CpuGpuArray.zeros_like(img_original) seed = 0 np.random.seed(seed) ms_Avees = tw.get_zeros_PA_all_levels() ms_theta = tw.get_zeros_theta_all_levels() for level in range(tw.ms.nLevels): if level == 0: tw.sample_gaussian(level, ms_Avees[level], ms_theta[level], mu=None) # zero mean else: tw.sample_from_the_ms_prior_coarse2fine_one_level(ms_Avees, ms_theta, level_fine=level) print('\nimg shape: {}\n'.format(img_original.shape)) # You don't have use these. You can use any 2d array # that has two columns (regardless of the number of rows). pts_src = tw.pts_src_dense # Create buffers for the output pts_fwd = CpuGpuArray.zeros_like(pts_src) pts_inv = CpuGpuArray.zeros_like(pts_src) for level in range(tw.ms.nLevels): ####################################################################### # instead of the tw.sample_from_the_ms_prior() above, # you may want to use one of the following. # 1) # tw.sample_gaussian(level,ms_Avees[level],ms_theta[level],mu=None)# zero mean # 2) # tw.sample_gaussian(level,ms_Avees[level],ms_theta[level],mu=some_user_specified_mu) # The following should be used only for level>0 : # 3) # tw.sample_normal_in_one_level_using_the_coarser_as_mean(Avees_coarse=ms_Avees[level-1], # Avees_fine=ms_Avees[level], # theta_fine=ms_theta[level], # level_fine=level) # ####################################################################### # You can also change the values this way: # cpa_space = tw.ms.L_cpa_space[level] # theta = cpa_space.get_zeros_theta() # theta[:] = some values # Avees = cpa_space.get_zeros_PA() # cpa_space.theta2Avees(theta,Avees) # cpa_space.update_pat(Avees) # This step is important and must be done # before are trying to "use" the new values of # the (vectorized) A's. tw.update_pat_from_Avees(ms_Avees[level], level) if eval_v: # Evaluating the velocity field. # You don't have to do it in unless you want to visualize v. # (when evaluting the treansformation, v will be internally # evaluated anyway -- but its result won't be stored) tw.calc_v(level=level) # optional, if you want to time it timer_gpu_T_fwd = GpuTimer() # Simply calling # tic = time.clock() # and then # tic = time.clock() # won't work. # In fact, most likely you will get that toc-tic is zero. # You need to use the GpuTimer object. When you do that, # one side effect is that suddenly the toc-tic from above will # give you a more realistic result. tic = time.clock() timer_gpu_T_fwd.tic() tw.calc_T_fwd(pts_src, pts_fwd, level=level) timer_gpu_T_fwd.toc() toc = time.clock() print 'Time, in sec, for computing T_fwd:' print timer_gpu_T_fwd.secs print toc - tic # likely to be 0, unless you also used the GpuTimer. # You can also time the inv of course. Results will be similar. tw.calc_T_inv(pts_src, pts_inv, level=level) if eval_cell_idx: # cell_idx is computed here just for display. cell_idx = CpuGpuArray.zeros(len(pts_src), dtype=np.int32) tw.calc_cell_idx(pts_src, cell_idx, level, permute_for_disp=permute_cell_idx_for_display) # If may also want ro to time the remap. # However, the remap is usually very fast (e.g, about 2 milisec). # timer_gpu_remap_fwd = GpuTimer() # tic = time.clock() # timer_gpu_remap_fwd.tic() # tw.remap_fwd(pts_inv=pts_inv,img=img_original,img_wrapped_fwd=img_wrapped_fwd) tw.remap_fwd(pts_inv=pts_inv, img=img_original, img_wrapped_fwd=img_wrapped_fwd) # timer_gpu_remap_fwd.toc() # toc = time.clock() # If the img type is np.float32, you may also use # tw.remap_fwd_opencv instead of tw.remap_fw. The differences between # the two methods are explained above tw.remap_inv(pts_fwd=pts_fwd, img=img_original, img_wrapped_inv=img_wrapped_bwd) # For display purposes, do gpu2cpu transfer print("For display purposes, do gpu2cpu transfer") if eval_cell_idx: cell_idx.gpu2cpu() if eval_v: tw.v_dense.gpu2cpu() pts_fwd.gpu2cpu() pts_inv.gpu2cpu() img_wrapped_fwd.gpu2cpu() img_wrapped_bwd.gpu2cpu() figsize = (12, 12) plt.figure(figsize=figsize) if eval_v: plt.subplot(332) tw.imshow_vx() plt.title('vx') plt.subplot(333) tw.imshow_vy() plt.title('vy') if eval_cell_idx: plt.subplot(331) cell_idx_disp = cell_idx.cpu.reshape(img.shape[0], -1) plt.imshow(cell_idx_disp) plt.title('tess (type {})'.format(tess)) if show_downsampled_pts: ds = 20 pts_src_grid = pts_src.cpu.reshape(tw.nRows, -1, 2) pts_src_ds = pts_src_grid[::ds, ::ds].reshape(-1, 2) pts_fwd_grid = pts_fwd.cpu.reshape(tw.nRows, -1, 2) pts_fwd_ds = pts_fwd_grid[::ds, ::ds].reshape(-1, 2) pts_inv_grid = pts_inv.cpu.reshape(tw.nRows, -1, 2) pts_inv_ds = pts_inv_grid[::ds, ::ds].reshape(-1, 2) use_lims = use_lims_when_plotting # return tw plt.subplot(334) plt.plot(pts_src_ds[:, 0], pts_src_ds[:, 1], 'r.') plt.title('pts ds') tw.config_plt() plt.subplot(335) plt.plot(pts_fwd_ds[:, 0], pts_fwd_ds[:, 1], 'g.') plt.title('fwd(pts)') tw.config_plt(axis_on_or_off='on', use_lims=use_lims) plt.subplot(336) plt.plot(pts_inv_ds[:, 0], pts_inv_ds[:, 1], 'b.') plt.title('inv(pts)') tw.config_plt(axis_on_or_off='on', use_lims=use_lims) plt.subplot(337) plt.imshow(img_original.cpu.astype(np.uint8)) plt.title('img') # plt.axis('off') plt.subplot(338) plt.imshow(img_wrapped_fwd.cpu.astype(np.uint8)) # plt.axis('off') plt.title('fwd(img)') plt.subplot(339) plt.imshow(img_wrapped_bwd.cpu.astype(np.uint8)) # plt.axis('off') plt.title('inv(img)') return tw