# Normalize PSF kernels to sum up to one fs = gputools.normalize(fs) del X # ------------------------------------------------------------------------ # Latent image estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with estimated PSF F = olaGPU.OlaGPU(fs, sx, 'valid', winaux=winaux) # Latent image estimation by performing one gradient descent step # multiplicative update is used which preserves positivity factor_gpu = F.cnvtp( mask_gpu * y_gpu) / (F.cnvtp(mask_gpu * F.cnv(x_gpu)) + tol) gputools.cliplower_GPU(factor_gpu, tol) x_gpu = x_gpu * factor_gpu x_max = x_gpu.get()[sf[0]:-sf[0], sf[1]:-sf[1]].max() gputools.clipupper_GPU(x_gpu, x_max) del factor_gpu del F # ------------------------------------------------------------------------ # For backup intermediate results # ------------------------------------------------------------------------ if backup: # Write intermediate results to disk incl. input imagetools.imwrite(y_gpu.get(), yname(i)) # Crop image to input size
# Normalize PSF kernels to sum up to one fs = gputools.normalize(fs) del X # ------------------------------------------------------------------------ # Latent image estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with estimated PSF F = olaGPU.OlaGPU(fs,sx,'valid',winaux=winaux) # Latent image estimation by performing one gradient descent step # multiplicative update is used which preserves positivity factor_gpu = F.cnvtp(mask_gpu*y_gpu)/(F.cnvtp(mask_gpu*F.cnv(x_gpu))+tol) gputools.cliplower_GPU(factor_gpu, tol) x_gpu = x_gpu * factor_gpu x_max = x_gpu.get()[sf[0]:-sf[0],sf[1]:-sf[1]].max() gputools.clipupper_GPU(x_gpu, x_max) del factor_gpu del F # ------------------------------------------------------------------------ # For backup intermediate results # ------------------------------------------------------------------------ if backup: # Write intermediate results to disk incl. input imagetools.imwrite(y_gpu.get(), yname(i)) # Crop image to input size
def __init__(self, objective, x_init, options): self.objective = objective self.options = options self.time_start = time.clock() self.iter = 0 self.status = 'Failure' # ------------------------------------------ # Initialisation # ----------------------------------------- self.initialisation(x_init) # ------------------------------------------ # Sanity checks # ----------------------------------------- if np.sqrt(cua.dot(self.x, self.x).get()) < 1e-12: raise IOError('Initial vector close to zero. Cannot proceed'); # ------------------------------------------ # Prime the pump # ----------------------------------------- if options.verbose: print 'Running Projected Barzilai Borwein:\n' # ------------------------------------------ # Main iterative loop # ----------------------------------------- for i in range(options.maxiter): self.iter += 1 self.show_status() dx = self.x - self.oldx dg = self.g - self.oldg if not options.unconstrained: clip2bound(dx, self.x, self.g) clip2bound(dg, self.x, self.g) self.dx = dx self.dg = dg # Check termination criteria self.check_termination() if self.term_reason: break # store x & gradient self.oldx = self.x self.oldg = self.g # update x & gradient if (np.mod(self.iter, 2) == 0): step = (cua.sum(dx*dx) / (0.00001+cua.sum(dx*dg))).get() else: step = (cua.sum(dx*dg) / (0.00001+cua.sum(dg*dg))).get() self.x = self.x - self.g * step if not options.unconstrained: gputools.cliplower_GPU(self.x, 0) # projection if options.compute_both: self.oldobj = self.obj self.obj, self.g = objective.compute_both(self.x); elif options.compute_obj: self.g = objective.compute_grad(self.x) self.oldobj = self.obj; self.obj = objective.compute_obj(self.x); else: self.g = objective.compute_grad(self.x) # ------------------------------------------ # Final statistics and wrap up # ----------------------------------------- self.time = time.clock() - self.time_start self.status = 'Success' if self.options.verbose: print self.status print self.term_reason print 'Done\n' self.result = self.x
def process(opts): # ============================================================================ # Specify some parameter settings # ---------------------------------------------------------------------------- # Specify data path and file identifier DATAPATH = '/DATA/LSST/FITS' RESPATH = '../../../DATA/results'; BASE_N = 141 FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH,(BASE_N+i)) ID = 'LSST' # ---------------------------------------------------------------------------- # Specify parameter settings # General doshow = opts.doShow # put 1 to show intermediate results backup = opts.backup # put 1 to write intermediate results to disk N = opts.N # how many frames to process N0 = opts.N0 # number of averaged frames for initialisation # OlaGPU parameters sf = np.array([40,40]) # estimated size of PSF csf =(3,3) # number of kernels across x and y direction overlap = 0.5 # overlap of neighboring patches in percent # Regularization parameters for kernel estimation f_alpha = opts.f_alpha # promotes smoothness f_beta = opts.f_beta # Thikhonov regularization optiter = opts.optiter # number of iterations for minimization tol = opts.tol # tolerance for when to stop minimization # ============================================================================ # Create helper functions for file handling # # # HACK for chunking into available GPU mem # # # # - loads one 1kx1k block out of the fits image xOffset=2000 yOffset=0 chunkSize=1000 yload = lambda i: 1. * fitsTools.readFITS(FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset+chunkSize,xOffset:xOffset+chunkSize] # ---------------------------------------------------------------------------- # Some more code for backuping the results # ---------------------------------------------------------------------------- # For backup purposes EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \ (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta) xname = lambda i: '%s/x_%04d.png' % (EXPPATH,i) yname = lambda i: '%s/y_%04d.png' % (EXPPATH,i) fname = lambda i: '%s/f_%04d.png' % (EXPPATH,i) if os.path.exists(EXPPATH) and opts.overwrite: try: rmtree(EXPPATH) except: print "[ERROR] removing old results dir:",EXPPATH exit() elif os.path.exists(EXPPATH): print "[ERROR] results directory already exists, please remove or use '-o' to overwrite" exit() # Create results path if not existing try: os.makedirs(EXPPATH) except: print "[ERROR] creating results dir:",EXPPATH exit() print 'Results are saved to: \n %s \n' % EXPPATH # ---------------------------------------------------------------------------- # For displaying intermediate results create target figure # ---------------------------------------------------------------------------- # Create figure for displaying intermediate results if doshow: print "showing intermediate results is currently disabled.." #pl.figure(1) #pl.draw() # ---------------------------------------------------------------------------- # Code for initialising the online multi-frame deconvolution # ---------------------------------------------------------------------------- # Initialisation of latent image by averaging the first 20 frames y0 = 0. for i in np.arange(1,N0): y0 += yload(i) y0 /= N0 y_gpu = cua.to_gpu(y0) # Pad image since we perform deconvolution with valid boundary conditions x_gpu = gputools.impad_gpu(y_gpu, sf-1) # Create windows for OlaGPU sx = y0.shape + sf - 1 sf2 = np.floor(sf/2) winaux = imagetools.win2winaux(sx, csf, overlap) # ---------------------------------------------------------------------------- # Loop over all frames and do online blind deconvolution # ---------------------------------------------------------------------------- import time as t ti = t.clock() t1 = stopwatch.timer() t2 = stopwatch.timer() t3 = stopwatch.timer() t4 = stopwatch.timer() t4.start() for i in np.arange(1,N+1): print 'Processing frame %d/%d \r' % (i,N) # Load next observed image t3.start() y = yload(i) print "TIMER load:", t3.elapsed() # Compute mask for determining saturated regions mask_gpu = 1. * cua.to_gpu(y < 1.) y_gpu = cua.to_gpu(y) # ------------------------------------------------------------------------ # PSF estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with current estimate of latent image t2.start() X = olaGPU.OlaGPU(x_gpu,sf,'valid',winaux=winaux) print "TIMER GPU: ", t2.elapsed() t1.start() # PSF estimation for given estimate of latent image and current observation f = X.deconv(y_gpu, mode = 'lbfgsb', alpha = f_alpha, beta = f_beta, maxfun = optiter, verbose = 10) print "TIMER Optimization: ", t1.elapsed() fs = f[0] # Normalize PSF kernels to sum up to one fs = gputools.normalize(fs) # ------------------------------------------------------------------------ # Latent image estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with estimated PSF t2.start() F = olaGPU.OlaGPU(fs,sx,'valid',winaux=winaux) # Latent image estimation by performing one gradient descent step # multiplicative update is used which preserves positivity factor_gpu = F.cnvtp(mask_gpu*y_gpu)/(F.cnvtp(mask_gpu*F.cnv(x_gpu))+tol) gputools.cliplower_GPU(factor_gpu, tol) x_gpu = x_gpu * factor_gpu x_max = x_gpu.get()[sf[0]:-sf[0],sf[1]:-sf[1]].max() gputools.clipupper_GPU(x_gpu, x_max) print "TIMER GPU: ", t2.elapsed() # ------------------------------------------------------------------------ # For backup intermediate results # ------------------------------------------------------------------------ if backup or i == N: # Write intermediate results to disk incl. input y_img = y_gpu.get()*1e5 fitsTools.asinhScale(y_img, 450, -50, minCut=0.0, maxCut=40000, fname=yname(i)) # Crop image to input size xi = (x_gpu.get()[sf2[0]:-sf2[0],sf2[1]:-sf2[1]] / x_max)*1e5 fitsTools.fitsStats(xi) fitsTools.asinhScale(xi, 450, -50, minCut=0.0, maxCut=40000, fname=xname(i)) # Concatenate PSF kernels for ease of visualisation f = imagetools.gridF(fs,csf) f = f*1e5 fitsTools.asinhScale(f, 450, -50, minCut=0.0, maxCut=40000, fname=fname(i)) # ------------------------------------------------------------------------ # For displaying intermediate results # ------------------------------------------------------------------------ ''' if np.mod(i,1) == 0 and doshow: pl.figure(1) pl.subplot(121) # what is SY? pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray') pl.title('x after %d observations' % i) pl.subplot(122) pl.imshow(y_gpu.get(),'gray') pl.title('y(%d)' % i) pl.draw() pl.figure(2) pl.title('PSF(%d)' % i) imagetools.cellplot(fs, winaux.csf) tf = t.clock() print('Time elapsed after %d frames %.3f' % (i,(tf-ti))) ''' tf = t.clock() print('Time elapsed for total image sequence %.3f' % (tf-ti)) # ---------------------------------------------------------------------------- print "TOTAL: %.3f" % (t4.elapsed()) print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100*(t1.getTotal()/t4.getTotal())) print "GPUtime %.3f %.3f" % (t2.getTotal(), 100*(t2.getTotal()/t4.getTotal())) print "LoadTime %.3f %.3f" % (t3.getTotal(), 100*(t3.getTotal()/t4.getTotal()))
def deconv(self, y, z0=None, mode='lbfgsb', maxfun=100, alpha=0., beta=0.01, verbose=10, m=5, edgetapering=1, factor=3, gamma=1e-4): """ deconv implements various deconvolution methods. It expects a blurry image and outputs an estimated blur kernel or a sharp latent image. Currently, the following algorithms are implemented: 'lbfgsb' uses the lbfgsb optimization code to minimize the following constrained regularized problem: |y-Zu|^2 + alpha * |grad(u)|^2 + beta * |u|^2 s.t. u>0 The alpha term promotes smoothness of the solution, while the beta term is an ordinary Thikhonov regularization 'direct' as above but solves the problem directly, i.e. via division in Fourier space instead of an iterative minimization scheme at the cost of the positivity constraint. 'xdirect' as 'direct' but without corrective term which reduces artifacts stemming from the windowing 'gdirect' solves the following problem |grad(y)-grad(Zu)|^2 + alpha * |grad(u)|^2 + beta * |u|^2 This is particularly useful for kernel estimation in the case of blurred natural images featuring many edges. The advantage vs. 'direct' is the suppression of noise in the estimated PSF kernels. 'xdirect' as 'direct' but without corrective term which reduces artifacts stemming from the windowing 'Fast Image Deconvolution using Hyper-Laplacian Priors' by Dilip Krishnan and Rob Fergus, NIPS 2009. It minimizes the following problem |y-Zu|^2 + gamma * |grad(u)|^(2/3) via half-quadratic splitting. See paper for details. ---------------------------------------------------------------------- Usage: Call: Z = OlaGPU(z, sw, mode, winaux) u = Z.deconv(y) Input: y blurry image Ouput: u either image or PSF sized object """ from numpy import array if not all(array(y.shape) == self.sy): raise IOError ('Sizes incompatible. Expected blurred image!') # Potential data transfer to GPU if y.__class__ == cua.GPUArray: y_gpu = 1. * y else: y_gpu = cua.to_gpu(y.astype(np.float32)) # -------------------------------------------------------------------- if mode == 'lbfgsb': from scipy.optimize import fmin_l_bfgs_b self.res_gpu = cua.empty_like(y_gpu) if self.__id__ == 'X': sz = ((int(np.prod(self.winaux.csf)), int(self.sz[0]),int(self.sz[1]))) elif self.__id__ == 'F': sz = self.sz lf = np.prod(sz) if z0 == None: z0_gpu = self.cnvtp(y_gpu) z0 = z0_gpu.get() z0 = z0.flatten() #z0 = np.ones(lf)/(1. * lf) # initialisation with flat kernels else: z0 = z0.flatten() lb = 0. # lower bound ub = np.infty # upper bound zhat = fmin_l_bfgs_b(func = self.cnvinv_objfun, x0 = z0, \ fprime = self.cnvinv_gradfun,\ args = [sz, y_gpu, alpha, beta],\ factr = 10., pgtol = 10e-15, \ maxfun = maxfun, bounds = [(lb, ub)] * lf,\ m = m, iprint = verbose) return np.reshape(zhat[0], sz), zhat[1], zhat[2] # -------------------------------------------------------------------- elif mode == 'gdirect': # Use this method only for estimating the PSF if self.__id__ != 'X': raise Exception('Use direct mode for image estimation!') # Compute Laplacian if alpha > 0.: gx_gpu = gputools.pad_cpu2gpu( np.array([[-1,1],[-1,1],[-1,1]]), self.sfft_gpu, dtype='complex') gy_gpu = gputools.pad_cpu2gpu( np.array([[-1,-1,-1],[1,1,1]]), self.sfft_gpu, dtype='complex') self.plan.execute(gx_gpu) self.plan.execute(gy_gpu) L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj() else: L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64) if edgetapering == 1: gputools.edgetaper_gpu(y_gpu, 2*self.sf, 'barthann') # Transfer to GPU if self.x.__class__ == cua.GPUArray: x_gpu = self.x else: x_gpu = cua.to_gpu(self.x) # Compute gradient images xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu) yx_gpu, yy_gpu = gputools.gradient_gpu(y_gpu) # Chop and pad business if self.mode == 'valid': yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, self.sf-1, 'complex') yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, self.sf-1, 'complex') elif self.mode == 'same': yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, np.floor(self.sf/2), 'complex') yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, np.floor(self.sf/2), 'complex') else: raise NotImplementedError('Not a valid mode!') xx_gpu = gputools.chop_pad_GPU(xx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') xy_gpu = gputools.chop_pad_GPU(xy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') # Here each patch should be windowed to reduce ringing artifacts, # however since we are working in the gradient domain, the effect # is negligible # ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, # self.sfft_gpu, self.sf-1, # dtype='complex') # xx_gpu = ws_gpu * xx_gpu # xy_gpu = ws_gpu * xy_gpu # yx_gpu = ws_gpu * yx_gpu # yy_gpu = ws_gpu * yy_gpu # Compute Fourier transform self.fft(yx_gpu, self.fft_gpu.shape[0]) self.fft(yy_gpu, self.fft_gpu.shape[0]) self.fft(xx_gpu, self.fft_gpu.shape[0]) self.fft(xy_gpu, self.fft_gpu.shape[0]) # Do division in Fourier space z_gpu = cua.zeros(xy_gpu.shape, np.complex64) z_gpu = gputools.comp_ola_gdeconv(xx_gpu, xy_gpu, yx_gpu, yy_gpu, L_gpu, alpha, beta) # Computing the inverse FFT z_gpu = z_gpu.conj() self.fft(z_gpu, self.fft_gpu.shape[0]) z_gpu = z_gpu.conj()/np.prod(z_gpu.shape[-2::]) # Crop out the kernels zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf) return zc_gpu # -------------------------------------------------------------------- elif mode == 'direct': const_gpu = cua.empty_like(y_gpu) const_gpu.fill(1.) # First deconvolution without corrective term y_gpu = self.deconv(y_gpu, mode = 'xdirect', alpha = alpha, beta = beta, edgetapering = edgetapering) gputools.cliplower_GPU(y_gpu,0) # Now same for constant image to get rid of window artifacts if edgetapering == 1: gputools.edgetaper_gpu(const_gpu, 2*self.sf, 'barthann') const_gpu = self.deconv(const_gpu, mode = 'xdirect', alpha = alpha, beta = beta, edgetapering = edgetapering) gputools.edgetaper_gpu(const_gpu, 2*self.sf, 'barthann') gputools.clip_GPU(const_gpu, 0.01, 10.) # Division of deconvolved latent and constant image to get rid # of artifacts stemming from windowing y_gpu = y_gpu / const_gpu sz = y_gpu.shape #gputools.clip_GPU(y_gpu, 0., 1.0) #gputools.edgetaper_gpu(y_gpu, 3*self.sf, 'barthann') # Do cropping and padding since edges are corrupted by division y_gpu = gputools.crop_gpu2cpu(y_gpu, sz-factor*self.sf-1, offset=np.floor((factor*self.sf-1)/2.)) y_gpu = gputools.impad_gpu(y_gpu, tuple(np.array(sz)-y_gpu.shape)) return y_gpu # -------------------------------------------------------------------- elif mode == 'xdirect': # Compute Laplacian if alpha > 0.: gx_gpu = gputools.pad_cpu2gpu( np.array([[-1,1]]), self.sfft_gpu, dtype='complex') gy_gpu = gputools.pad_cpu2gpu( np.array([[-1],[1]]), self.sfft_gpu, dtype='complex') self.plan.execute(gx_gpu) self.plan.execute(gy_gpu) L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj() else: L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64) # Edgetapering of blurry input image if edgetapering == 1: gputools.edgetaper_gpu(y_gpu, 3*self.sf, 'barthann') if self.mode == 'valid': #y_gpu = gputools.pad_cpu2gpu(y_gpu, self.sx, self.sf-1, dtype='real') offset = self.sf-1 elif self.mode == 'same': offset = np.floor(self.sf/2) else: raise NotImplementedError('Not a valid mode!') # Chop and pad business y_gpu = gputools.chop_pad_GPU(y, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, offset, 'complex') ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, self.sfft_gpu, dtype='complex') # Windowing y_gpu = ws_gpu * y_gpu # Compute FFT self.fft(y_gpu, self.fft_gpu.shape[0]) # Do division in Fourier space z_gpu = gputools.comp_ola_deconv(self.fft_gpu, y_gpu, L_gpu, alpha, beta) # Computing the inverse FFT z_gpu = z_gpu.conj() self.fft(z_gpu, self.fft_gpu.shape[0]) z_gpu = z_gpu.conj()/np.prod(z_gpu.shape[-2::]) # Crop the solution to correct output size if self.__id__ == 'X': zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf) return zc_gpu elif self.__id__ == 'F': zs_gpu = gputools.crop_stack_GPU(z_gpu, self.winaux.sw) #zs_gpu = self.winaux.ws_gpu * zs_gpu zc_gpu = gputools.ola_GPU_test(zs_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop) zc_gpu = gputools.crop_gpu2cpu(zc_gpu, self.sx) return zc_gpu # -------------------------------------------------------------------- elif mode == 'sparse': # Compute Laplacian gx_gpu = gputools.pad_cpu2gpu(np.sqrt(2.)/2. * np.array([[-1,1]]), self.sfft_gpu, dtype='complex') gy_gpu = gputools.pad_cpu2gpu(np.sqrt(2.)/2. * np.array([[-1],[1]]), self.sfft_gpu, dtype='complex') self.plan.execute(gx_gpu) self.plan.execute(gy_gpu) L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj() const_gpu = cua.empty_like(y_gpu) const_gpu.fill(1.) # Edgetapering if edgetapering == 1: gputools.edgetaper_gpu(y_gpu, 2*self.sf, 'barthann') gputools.edgetaper_gpu(const_gpu, 2*self.sf, 'barthann') # Parameter settings beta = 1. beta_rate = 2. * np.sqrt(2.) beta_max = 2.**8 # Initialisation of x with padded version of y x_gpu = 1 * y_gpu if self.mode == 'valid': offset = self.sf-1 elif self.mode == 'same': offset = np.floor(self.sf/2) else: raise NotImplementedError('Not a valid mode!') # Chop and pad business y_gpu = gputools.chop_pad_GPU(y_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, offset,'complex') const_gpu = gputools.chop_pad_GPU(const_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, offset,'complex') ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, self.sfft_gpu, offset, dtype='complex') # Windowing y_gpu = y_gpu * ws_gpu # Constant image for corrective weighting term const_gpu = const_gpu * ws_gpu del ws_gpu self.fft(const_gpu, self.fft_gpu.shape[0]) const_gpu = gputools.comp_ola_deconv(self.fft_gpu, const_gpu, L_gpu, alpha, gamma) const_gpu = const_gpu.conj() self.fft(const_gpu, self.fft_gpu.shape[0]) const_gpu = const_gpu.conj()/np.prod(const_gpu.shape[-2::]) const_gpu = gputools.crop_stack_GPU(const_gpu, self.winaux.sw) const_gpu = const_gpu * self.winaux.ws_gpu const_gpu = gputools.ola_GPU_test(const_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop) const_gpu = gputools.crop_gpu2cpu(const_gpu, self.sx) # For debugging purposes #scipy.misc.imsave('const1.png', const_gpu.get()/const_gpu.get().max()) gputools.cliplower_GPU(const_gpu, 0.01) const_gpu = 0.01 / const_gpu # Precompute F'y self.fft(y_gpu, self.fft_gpu.shape[0]) y_gpu = y_gpu * self.fft_gpu.conj() while beta < beta_max: # Compute gradient images of x xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu) del x_gpu # w sub-problem for alpha 2/3 gputools.modify_sparse23_gpu(xx_gpu, beta) gputools.modify_sparse23_gpu(xy_gpu, beta) #gputools.modify_sparse_gpu(xx_gpu, beta, 0.01) #gputools.modify_sparse_gpu(xy_gpu, beta, 0.01) # Chop and pad to size of FFT xx_gpu = gputools.chop_pad_GPU(xx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') xy_gpu = gputools.chop_pad_GPU(xy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') # Compute Fourier transform self.fft(xx_gpu, self.fft_gpu.shape[0]) self.fft(xy_gpu, self.fft_gpu.shape[0]) # Do division in Fourier space x_gpu = gputools.comp_ola_sdeconv(gx_gpu, gy_gpu, xx_gpu, xy_gpu, y_gpu, self.fft_gpu, L_gpu, alpha, beta, gamma) del xx_gpu, xy_gpu # Computing the inverse FFT x_gpu = x_gpu.conj() self.fft(x_gpu, self.fft_gpu.shape[0]) x_gpu = x_gpu.conj() x_gpu /= np.prod(x_gpu.shape[-2::]) # Ola and cropping x_gpu = gputools.crop_stack_GPU(x_gpu, self.winaux.sw) x_gpu = x_gpu * self.winaux.ws_gpu x_gpu = gputools.ola_GPU_test(x_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop) x_gpu = gputools.crop_gpu2cpu(x_gpu, self.sx) # Enforce positivity x_gpu = x_gpu * const_gpu gputools.cliplower_GPU(x_gpu, 0.) beta *= beta_rate return x_gpu else: raise NotImplementedError('Not a valid deconv mode!')
def deconv(self, y, z0=None, mode='lbfgsb', maxfun=100, alpha=0., beta=0.01, verbose=10, m=5, edgetapering=1, factor=3, gamma=1e-4): """ deconv implements various deconvolution methods. It expects a blurry image and outputs an estimated blur kernel or a sharp latent image. Currently, the following algorithms are implemented: 'lbfgsb' uses the lbfgsb optimization code to minimize the following constrained regularized problem: |y-Zu|^2 + alpha * |grad(u)|^2 + beta * |u|^2 s.t. u>0 The alpha term promotes smoothness of the solution, while the beta term is an ordinary Thikhonov regularization 'direct' as above but solves the problem directly, i.e. via division in Fourier space instead of an iterative minimization scheme at the cost of the positivity constraint. 'xdirect' as 'direct' but without corrective term which reduces artifacts stemming from the windowing 'gdirect' solves the following problem |grad(y)-grad(Zu)|^2 + alpha * |grad(u)|^2 + beta * |u|^2 This is particularly useful for kernel estimation in the case of blurred natural images featuring many edges. The advantage vs. 'direct' is the suppression of noise in the estimated PSF kernels. 'xdirect' as 'direct' but without corrective term which reduces artifacts stemming from the windowing 'Fast Image Deconvolution using Hyper-Laplacian Priors' by Dilip Krishnan and Rob Fergus, NIPS 2009. It minimizes the following problem |y-Zu|^2 + gamma * |grad(u)|^(2/3) via half-quadratic splitting. See paper for details. ---------------------------------------------------------------------- Usage: Call: Z = OlaGPU(z, sw, mode, winaux) u = Z.deconv(y) Input: y blurry image Ouput: u either image or PSF sized object """ from numpy import array if not all(array(y.shape) == self.sy): raise IOError('Sizes incompatible. Expected blurred image!') # Potential data transfer to GPU if y.__class__ == cua.GPUArray: y_gpu = 1. * y else: y_gpu = cua.to_gpu(y.astype(np.float32)) # -------------------------------------------------------------------- if mode == 'lbfgsb': from scipy.optimize import fmin_l_bfgs_b self.res_gpu = cua.empty_like(y_gpu) if self.__id__ == 'X': sz = ((int(np.prod(self.winaux.csf)), int(self.sz[0]), int(self.sz[1]))) elif self.__id__ == 'F': sz = self.sz lf = np.prod(sz) if z0 == None: z0_gpu = self.cnvtp(y_gpu) z0 = z0_gpu.get() z0 = z0.flatten() #z0 = np.zeros(self.sf) # initialisation with flat kernels #z0[self.sf[0]/2,self.sf[1]/2] = 1. #z0 = np.tile(z0, [np.prod(self.csf),1,1]) #z0 = z0.flatten() else: z0 = z0.flatten() lb = 0. # lower bound ub = np.infty # upper bound zhat = fmin_l_bfgs_b(func = self.cnvinv_objfun, x0 = z0, \ fprime = self.cnvinv_gradfun,\ args = [sz, y_gpu, alpha, beta],\ factr = 10., pgtol = 10e-15, \ maxfun = maxfun, bounds = [(lb, ub)] * lf,\ m = m, iprint = verbose) return np.reshape(zhat[0], sz), zhat[1], zhat[2] # -------------------------------------------------------------------- elif mode == 'gdirect': # Use this method only for estimating the PSF if self.__id__ != 'X': raise Exception('Use direct mode for image estimation!') # Compute Laplacian if alpha > 0.: gx_gpu = gputools.pad_cpu2gpu(np.array([[-1, 1], [-1, 1], [-1, 1]]), self.sfft_gpu, dtype='complex') gy_gpu = gputools.pad_cpu2gpu(np.array([[-1, -1, -1], [1, 1, 1]]), self.sfft_gpu, dtype='complex') self.plan.execute(gx_gpu) self.plan.execute(gy_gpu) L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj() else: L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64) if edgetapering == 1: gputools.edgetaper_gpu(y_gpu, 2 * self.sf, 'barthann') # Transfer to GPU if self.x.__class__ == cua.GPUArray: x_gpu = self.x else: x_gpu = cua.to_gpu(self.x) # Compute gradient images xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu) yx_gpu, yy_gpu = gputools.gradient_gpu(y_gpu) # Chop and pad business if self.mode == 'valid': yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, self.sf - 1, 'complex') yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, self.sf - 1, 'complex') elif self.mode == 'same': yx_gpu = gputools.chop_pad_GPU(yx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, np.floor(self.sf / 2), 'complex') yy_gpu = gputools.chop_pad_GPU(yy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, np.floor(self.sf / 2), 'complex') else: raise NotImplementedError('Not a valid mode!') xx_gpu = gputools.chop_pad_GPU(xx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') xy_gpu = gputools.chop_pad_GPU(xy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') # Here each patch should be windowed to reduce ringing artifacts, # however since we are working in the gradient domain, the effect # is negligible # ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, # self.sfft_gpu, self.sf-1, # dtype='complex') # xx_gpu = ws_gpu * xx_gpu # xy_gpu = ws_gpu * xy_gpu # yx_gpu = ws_gpu * yx_gpu # yy_gpu = ws_gpu * yy_gpu # Compute Fourier transform self.fft(yx_gpu, self.fft_gpu.shape[0]) self.fft(yy_gpu, self.fft_gpu.shape[0]) self.fft(xx_gpu, self.fft_gpu.shape[0]) self.fft(xy_gpu, self.fft_gpu.shape[0]) # Do division in Fourier space z_gpu = cua.zeros(xy_gpu.shape, np.complex64) z_gpu = gputools.comp_ola_gdeconv(xx_gpu, xy_gpu, yx_gpu, yy_gpu, L_gpu, alpha, beta) # Computing the inverse FFT z_gpu = z_gpu.conj() self.fft(z_gpu, self.fft_gpu.shape[0]) z_gpu = z_gpu.conj() / np.prod(z_gpu.shape[-2::]) # Crop out the kernels zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf) return zc_gpu # -------------------------------------------------------------------- elif mode == 'direct': const_gpu = cua.empty_like(y_gpu) const_gpu.fill(1.) # First deconvolution without corrective term y_gpu = self.deconv(y_gpu, mode='xdirect', alpha=alpha, beta=beta, edgetapering=edgetapering) gputools.cliplower_GPU(y_gpu, 0) # Now same for constant image to get rid of window artifacts if edgetapering == 1: gputools.edgetaper_gpu(const_gpu, 2 * self.sf, 'barthann') const_gpu = self.deconv(const_gpu, mode='xdirect', alpha=alpha, beta=beta, edgetapering=edgetapering) gputools.edgetaper_gpu(const_gpu, 2 * self.sf, 'barthann') gputools.clip_GPU(const_gpu, 0.01, 10.) # Division of deconvolved latent and constant image to get rid # of artifacts stemming from windowing y_gpu = y_gpu / const_gpu sz = y_gpu.shape #gputools.clip_GPU(y_gpu, 0., 1.0) #gputools.edgetaper_gpu(y_gpu, 3*self.sf, 'barthann') # Do cropping and padding since edges are corrupted by division y_gpu = gputools.crop_gpu2cpu(y_gpu, sz - factor * self.sf - 1, offset=np.floor( (factor * self.sf - 1) / 2.)) y_gpu = gputools.impad_gpu(y_gpu, tuple(np.array(sz) - y_gpu.shape)) return y_gpu # -------------------------------------------------------------------- elif mode == 'xdirect': # Compute Laplacian if alpha > 0.: gx_gpu = gputools.pad_cpu2gpu(np.array([[-1, 1]]), self.sfft_gpu, dtype='complex') gy_gpu = gputools.pad_cpu2gpu(np.array([[-1], [1]]), self.sfft_gpu, dtype='complex') self.plan.execute(gx_gpu) self.plan.execute(gy_gpu) L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj() else: L_gpu = cua.zeros(self.fft_gpu.shape, np.complex64) # Edgetapering of blurry input image if edgetapering == 1: gputools.edgetaper_gpu(y_gpu, 3 * self.sf, 'barthann') if self.mode == 'valid': #y_gpu = gputools.pad_cpu2gpu(y_gpu, self.sx, self.sf-1, dtype='real') offset = self.sf - 1 elif self.mode == 'same': offset = np.floor(self.sf / 2) else: raise NotImplementedError('Not a valid mode!') # Chop and pad business y_gpu = gputools.chop_pad_GPU(y, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, offset, 'complex') ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, self.sfft_gpu, dtype='complex') # Windowing y_gpu = ws_gpu * y_gpu # Compute FFT self.fft(y_gpu, self.fft_gpu.shape[0]) # Do division in Fourier space z_gpu = gputools.comp_ola_deconv(self.fft_gpu, y_gpu, L_gpu, alpha, beta) # Computing the inverse FFT z_gpu = z_gpu.conj() self.fft(z_gpu, self.fft_gpu.shape[0]) z_gpu = z_gpu.conj() / np.prod(z_gpu.shape[-2::]) # Crop the solution to correct output size if self.__id__ == 'X': zc_gpu = gputools.crop_stack_GPU(z_gpu, self.sf) return zc_gpu elif self.__id__ == 'F': zs_gpu = gputools.crop_stack_GPU(z_gpu, self.winaux.sw) #zs_gpu = self.winaux.ws_gpu * zs_gpu zc_gpu = gputools.ola_GPU_test(zs_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop) zc_gpu = gputools.crop_gpu2cpu(zc_gpu, self.sx) return zc_gpu # -------------------------------------------------------------------- elif mode == 'sparse': # Compute Laplacian gx_gpu = gputools.pad_cpu2gpu(np.sqrt(2.) / 2. * np.array([[-1, 1]]), self.sfft_gpu, dtype='complex') gy_gpu = gputools.pad_cpu2gpu(np.sqrt(2.) / 2. * np.array([[-1], [1]]), self.sfft_gpu, dtype='complex') self.plan.execute(gx_gpu) self.plan.execute(gy_gpu) L_gpu = gx_gpu * gx_gpu.conj() + gy_gpu * gy_gpu.conj() const_gpu = cua.empty_like(y_gpu) const_gpu.fill(1.) # Edgetapering if edgetapering == 1: gputools.edgetaper_gpu(y_gpu, 2 * self.sf, 'barthann') gputools.edgetaper_gpu(const_gpu, 2 * self.sf, 'barthann') # Parameter settings beta = 1. beta_rate = 2. * np.sqrt(2.) beta_max = 2.**8 # Initialisation of x with padded version of y x_gpu = 1 * y_gpu if self.mode == 'valid': offset = self.sf - 1 elif self.mode == 'same': offset = np.floor(self.sf / 2) else: raise NotImplementedError('Not a valid mode!') # Chop and pad business y_gpu = gputools.chop_pad_GPU(y_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, offset, 'complex') const_gpu = gputools.chop_pad_GPU(const_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, offset, 'complex') ws_gpu = gputools.pad_stack_GPU(self.winaux.ws_gpu, self.sfft_gpu, offset, dtype='complex') # Windowing y_gpu = y_gpu * ws_gpu # Constant image for corrective weighting term const_gpu = const_gpu * ws_gpu del ws_gpu self.fft(const_gpu, self.fft_gpu.shape[0]) const_gpu = gputools.comp_ola_deconv(self.fft_gpu, const_gpu, L_gpu, alpha, gamma) const_gpu = const_gpu.conj() self.fft(const_gpu, self.fft_gpu.shape[0]) const_gpu = const_gpu.conj() / np.prod(const_gpu.shape[-2::]) const_gpu = gputools.crop_stack_GPU(const_gpu, self.winaux.sw) const_gpu = const_gpu * self.winaux.ws_gpu const_gpu = gputools.ola_GPU_test(const_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop) const_gpu = gputools.crop_gpu2cpu(const_gpu, self.sx) # For debugging purposes #scipy.misc.imsave('const1.png', const_gpu.get()/const_gpu.get().max()) gputools.cliplower_GPU(const_gpu, 0.01) const_gpu = 0.01 / const_gpu # Precompute F'y self.fft(y_gpu, self.fft_gpu.shape[0]) y_gpu = y_gpu * self.fft_gpu.conj() while beta < beta_max: # Compute gradient images of x xx_gpu, xy_gpu = gputools.gradient_gpu(x_gpu) del x_gpu # w sub-problem for alpha 2/3 gputools.modify_sparse23_gpu(xx_gpu, beta) gputools.modify_sparse23_gpu(xy_gpu, beta) #gputools.modify_sparse_gpu(xx_gpu, beta, 0.01) #gputools.modify_sparse_gpu(xy_gpu, beta, 0.01) # Chop and pad to size of FFT xx_gpu = gputools.chop_pad_GPU(xx_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') xy_gpu = gputools.chop_pad_GPU(xy_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop, self.sfft_gpu, dtype='complex') # Compute Fourier transform self.fft(xx_gpu, self.fft_gpu.shape[0]) self.fft(xy_gpu, self.fft_gpu.shape[0]) # Do division in Fourier space x_gpu = gputools.comp_ola_sdeconv(gx_gpu, gy_gpu, xx_gpu, xy_gpu, y_gpu, self.fft_gpu, L_gpu, alpha, beta, gamma) del xx_gpu, xy_gpu # Computing the inverse FFT x_gpu = x_gpu.conj() self.fft(x_gpu, self.fft_gpu.shape[0]) x_gpu = x_gpu.conj() x_gpu /= np.prod(x_gpu.shape[-2::]) # Ola and cropping x_gpu = gputools.crop_stack_GPU(x_gpu, self.winaux.sw) x_gpu = x_gpu * self.winaux.ws_gpu x_gpu = gputools.ola_GPU_test(x_gpu, self.winaux.csf, self.winaux.sw, self.winaux.nhop) x_gpu = gputools.crop_gpu2cpu(x_gpu, self.sx) # Enforce positivity x_gpu = x_gpu * const_gpu gputools.cliplower_GPU(x_gpu, 0.) beta *= beta_rate return x_gpu else: raise NotImplementedError('Not a valid deconv mode!')
def __init__(self, objective, x_init, options): self.objective = objective self.options = options self.time_start = time.clock() self.iter = 0 self.status = 'Failure' # ------------------------------------------ # Initialisation # ----------------------------------------- self.initialisation(x_init) # ------------------------------------------ # Sanity checks # ----------------------------------------- if np.sqrt(cua.dot(self.x, self.x).get()) < 1e-12: raise IOError('Initial vector close to zero. Cannot proceed') # ------------------------------------------ # Prime the pump # ----------------------------------------- if options.verbose: print 'Running Projected Barzilai Borwein:\n' # ------------------------------------------ # Main iterative loop # ----------------------------------------- for i in range(options.maxiter): self.iter += 1 self.show_status() dx = self.x - self.oldx dg = self.g - self.oldg if not options.unconstrained: clip2bound(dx, self.x, self.g) clip2bound(dg, self.x, self.g) self.dx = dx self.dg = dg # Check termination criteria self.check_termination() if self.term_reason: break # store x & gradient self.oldx = self.x self.oldg = self.g # update x & gradient if (np.mod(self.iter, 2) == 0): step = (cua.sum(dx * dx) / (0.00001 + cua.sum(dx * dg))).get() else: step = (cua.sum(dx * dg) / (0.00001 + cua.sum(dg * dg))).get() self.x = self.x - self.g * step if not options.unconstrained: gputools.cliplower_GPU(self.x, 0) # projection if options.compute_both: self.oldobj = self.obj self.obj, self.g = objective.compute_both(self.x) elif options.compute_obj: self.g = objective.compute_grad(self.x) self.oldobj = self.obj self.obj = objective.compute_obj(self.x) else: self.g = objective.compute_grad(self.x) # ------------------------------------------ # Final statistics and wrap up # ----------------------------------------- self.time = time.clock() - self.time_start self.status = 'Success' if self.options.verbose: print self.status print self.term_reason print 'Done\n' self.result = self.x
def process(opts): # ============================================================================ # Specify some parameter settings # ---------------------------------------------------------------------------- # Specify data path and file identifier DATAPATH = '/DATA/LSST/FITS' RESPATH = '../../../DATA/results' BASE_N = 141 FILENAME = lambda i: '%s/v88827%03d-fz.R22.S11.fits' % (DATAPATH, (BASE_N + i)) ID = 'LSST' # ---------------------------------------------------------------------------- # Specify parameter settings # General doshow = opts.doShow # put 1 to show intermediate results backup = opts.backup # put 1 to write intermediate results to disk N = opts.N # how many frames to process N0 = opts.N0 # number of averaged frames for initialisation # OlaGPU parameters sf = np.array([40, 40]) # estimated size of PSF csf = (3, 3) # number of kernels across x and y direction overlap = 0.5 # overlap of neighboring patches in percent # Regularization parameters for kernel estimation f_alpha = opts.f_alpha # promotes smoothness f_beta = opts.f_beta # Thikhonov regularization optiter = opts.optiter # number of iterations for minimization tol = opts.tol # tolerance for when to stop minimization # ============================================================================ # Create helper functions for file handling # # # HACK for chunking into available GPU mem # # # # - loads one 1kx1k block out of the fits image xOffset = 2000 yOffset = 0 chunkSize = 1000 yload = lambda i: 1. * fitsTools.readFITS( FILENAME(i), use_mask=True, norm=True)[yOffset:yOffset + chunkSize, xOffset:xOffset + chunkSize] # ---------------------------------------------------------------------------- # Some more code for backuping the results # ---------------------------------------------------------------------------- # For backup purposes EXPPATH = '%s/%s_sf%dx%d_csf%dx%d_maxiter%d_alpha%.2f_beta%.2f' % \ (RESPATH,ID,sf[0],sf[1],csf[0],csf[1],optiter,f_alpha,f_beta) xname = lambda i: '%s/x_%04d.png' % (EXPPATH, i) yname = lambda i: '%s/y_%04d.png' % (EXPPATH, i) fname = lambda i: '%s/f_%04d.png' % (EXPPATH, i) if os.path.exists(EXPPATH) and opts.overwrite: try: rmtree(EXPPATH) except: print "[ERROR] removing old results dir:", EXPPATH exit() elif os.path.exists(EXPPATH): print "[ERROR] results directory already exists, please remove or use '-o' to overwrite" exit() # Create results path if not existing try: os.makedirs(EXPPATH) except: print "[ERROR] creating results dir:", EXPPATH exit() print 'Results are saved to: \n %s \n' % EXPPATH # ---------------------------------------------------------------------------- # For displaying intermediate results create target figure # ---------------------------------------------------------------------------- # Create figure for displaying intermediate results if doshow: print "showing intermediate results is currently disabled.." #pl.figure(1) #pl.draw() # ---------------------------------------------------------------------------- # Code for initialising the online multi-frame deconvolution # ---------------------------------------------------------------------------- # Initialisation of latent image by averaging the first 20 frames y0 = 0. for i in np.arange(1, N0): y0 += yload(i) y0 /= N0 y_gpu = cua.to_gpu(y0) # Pad image since we perform deconvolution with valid boundary conditions x_gpu = gputools.impad_gpu(y_gpu, sf - 1) # Create windows for OlaGPU sx = y0.shape + sf - 1 sf2 = np.floor(sf / 2) winaux = imagetools.win2winaux(sx, csf, overlap) # ---------------------------------------------------------------------------- # Loop over all frames and do online blind deconvolution # ---------------------------------------------------------------------------- import time as t ti = t.clock() t1 = stopwatch.timer() t2 = stopwatch.timer() t3 = stopwatch.timer() t4 = stopwatch.timer() t4.start() for i in np.arange(1, N + 1): print 'Processing frame %d/%d \r' % (i, N) # Load next observed image t3.start() y = yload(i) print "TIMER load:", t3.elapsed() # Compute mask for determining saturated regions mask_gpu = 1. * cua.to_gpu(y < 1.) y_gpu = cua.to_gpu(y) # ------------------------------------------------------------------------ # PSF estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with current estimate of latent image t2.start() X = olaGPU.OlaGPU(x_gpu, sf, 'valid', winaux=winaux) print "TIMER GPU: ", t2.elapsed() t1.start() # PSF estimation for given estimate of latent image and current observation f = X.deconv(y_gpu, mode='lbfgsb', alpha=f_alpha, beta=f_beta, maxfun=optiter, verbose=10) print "TIMER Optimization: ", t1.elapsed() fs = f[0] # Normalize PSF kernels to sum up to one fs = gputools.normalize(fs) # ------------------------------------------------------------------------ # Latent image estimation # ------------------------------------------------------------------------ # Create OlaGPU instance with estimated PSF t2.start() F = olaGPU.OlaGPU(fs, sx, 'valid', winaux=winaux) # Latent image estimation by performing one gradient descent step # multiplicative update is used which preserves positivity factor_gpu = F.cnvtp( mask_gpu * y_gpu) / (F.cnvtp(mask_gpu * F.cnv(x_gpu)) + tol) gputools.cliplower_GPU(factor_gpu, tol) x_gpu = x_gpu * factor_gpu x_max = x_gpu.get()[sf[0]:-sf[0], sf[1]:-sf[1]].max() gputools.clipupper_GPU(x_gpu, x_max) print "TIMER GPU: ", t2.elapsed() # ------------------------------------------------------------------------ # For backup intermediate results # ------------------------------------------------------------------------ if backup or i == N: # Write intermediate results to disk incl. input y_img = y_gpu.get() * 1e5 fitsTools.asinhScale(y_img, 450, -50, minCut=0.0, maxCut=40000, fname=yname(i)) # Crop image to input size xi = (x_gpu.get()[sf2[0]:-sf2[0], sf2[1]:-sf2[1]] / x_max) * 1e5 fitsTools.fitsStats(xi) fitsTools.asinhScale(xi, 450, -50, minCut=0.0, maxCut=40000, fname=xname(i)) # Concatenate PSF kernels for ease of visualisation f = imagetools.gridF(fs, csf) f = f * 1e5 fitsTools.asinhScale(f, 450, -50, minCut=0.0, maxCut=40000, fname=fname(i)) # ------------------------------------------------------------------------ # For displaying intermediate results # ------------------------------------------------------------------------ ''' if np.mod(i,1) == 0 and doshow: pl.figure(1) pl.subplot(121) # what is SY? pl.imshow(imagetools.crop(x_gpu.get(),sy,np.ceil(sf/2)),'gray') pl.title('x after %d observations' % i) pl.subplot(122) pl.imshow(y_gpu.get(),'gray') pl.title('y(%d)' % i) pl.draw() pl.figure(2) pl.title('PSF(%d)' % i) imagetools.cellplot(fs, winaux.csf) tf = t.clock() print('Time elapsed after %d frames %.3f' % (i,(tf-ti))) ''' tf = t.clock() print('Time elapsed for total image sequence %.3f' % (tf - ti)) # ---------------------------------------------------------------------------- print "TOTAL: %.3f" % (t4.elapsed()) print "OptimizeCPUtime %.3f %.3f" % (t1.getTotal(), 100 * (t1.getTotal() / t4.getTotal())) print "GPUtime %.3f %.3f" % (t2.getTotal(), 100 * (t2.getTotal() / t4.getTotal())) print "LoadTime %.3f %.3f" % (t3.getTotal(), 100 * (t3.getTotal() / t4.getTotal()))