def test_fft_1d_roundtrip_double_2(self): from pyculib.fft import fft_inplace, ifft_inplace N = 32 x = np.asarray(np.arange(N), dtype=np.complex128) x0 = x.copy() fft_inplace(x) ifft_inplace(x) self.assertTrue( np.allclose(x / N, x0, atol=1e-6) )
def test_fft_2d_roundtrip_single_2(self): from pyculib.fft import fft_inplace, ifft_inplace N2 = 2 N1 = 32 N = N1 * N2 x = np.asarray(np.arange(N), dtype=np.complex64).reshape(N2, N1) x0 = x.copy() fft_inplace(x) ifft_inplace(x) self.assertTrue( np.allclose(x / N, x0, atol=1e-6) )
def test_fft_3d_roundtrip_double(self): from pyculib.fft import fft_inplace, ifft_inplace N3 = 2 N2 = 2 N1 = 8 N = N3 * N2 * N1 x = np.asarray(np.arange(N), dtype=np.complex128).reshape(N3, N2, N1) x0 = x.copy() fft_inplace(x) ifft_inplace(x) self.assertTrue( np.allclose(x / N, x0, atol=1e-6) )
def _get(self): #Performing this function gives other external functions access to change the devicearray, and hence #subsequently running self.get() function may not have the correct answer anymore. Avoid running self.get() #after self._get(). fft.ifft_inplace(self._data, stream=self.stream) # self.stream.synchronize() self._division[self.gridim, self.threadim, self.stream](self._data) self.stream.synchronize() return self._data
def xytpropagator(E, x, y, t, dz, crys, key, ref=False): kx = 2 * np.pi * np.fft.fftfreq(x.size, d=(x[1] - x[0])).astype( x.dtype) # k in x ky = 2 * np.pi * np.fft.fftfreq(y.size, d=(y[1] - y[0])).astype( y.dtype) # k in y dw = 2 * np.pi * np.fft.fftfreq(t.size, d=(t[1] - t[0])).astype( t.dtype) # dw in t kxx, kyy, dww = np.meshgrid(kx, ky, dw, indexing='ij') para = kpara(crys, key) if ref: kphase = phase(para, kx=kxx, ky=kyy, dw=dww) - dww * para['dw'] #most time-consuming step else: kphase = phase(para, kx=kxx, ky=kyy, dw=dww) P = 1j * dz * kphase TPB = np.array([8, 8, 4]) BPG = np.array([x.size, y.size, t.size]) / TPB BPG = BPG.astype(np.int) gridim = tuple(BPG) threadim = tuple(TPB) stream = cuda.stream() # fft.FFTPlan(shape=E.shape, itype=E.dtype, otype=E.dtype, stream=stream) dE = cuda.to_device(E, stream=stream) dP = cuda.to_device(P, stream=stream) fft.fft_inplace(dE, stream=stream) # stream.synchronize() multiple3[gridim, threadim, stream](dE, dP) # stream.synchronize() fft.ifft_inplace(dE, stream=stream) # stream.synchronize() sol = dE.copy_to_host(stream=stream) / np.prod(E.shape, dtype=kphase.dtype) stream.synchronize() return sol
def xypropagator(E, x, y, dz, crys, key): kx = 2 * np.pi * np.fft.fftfreq(x.size, d=(x[1] - x[0])).astype( x.dtype) # k in x ky = 2 * np.pi * np.fft.fftfreq(y.size, d=(y[1] - y[0])).astype( y.dtype) # k in y kxx, kyy = np.meshgrid(kx, ky, indexing='ij') #xx, yy in k space para = kpara(crys, key) kphase = phase(para, kx=kxx, ky=kyy) P = 1j * dz * kphase TPB = np.array([16, 16]) BPG = np.array([x.size, y.size]) / TPB BPG = BPG.astype(np.int) gridim = tuple(BPG) threadim = tuple(TPB) stream = cuda.stream() # fft.FFTPlan(shape=E.shape, itype=E.dtype, otype=E.dtype, stream=stream) dE = cuda.to_device(E, stream=stream) dP = cuda.to_device(P, stream=stream) fft.fft_inplace(dE, stream=stream) # stream.synchronize() multiple2[gridim, threadim, stream](dE, dP) # stream.synchronize() fft.ifft_inplace(dE, stream=stream) # stream.synchronize() sol = dE.copy_to_host(stream=stream) / np.prod(E.shape, dtype=kphase.dtype) stream.synchronize() return sol
def compute_estimate(self): """ Calculates a single RL fusion estimate. There is no reason to call this function -- it is used internally by the class during fusion process. """ print('Beginning the computation of the %i. estimate' % \ (self.iteration_count + 1)) self.estimate_new[:] = numpy.zeros(self.image_size, dtype=numpy.float32) # Iterate over blocks stream1 = cuda.stream() stream2 = cuda.stream() iterables = (range(0, m, n) for m, n in zip(self.image_size, self.block_size)) pad = self.options.block_pad block_idx = tuple(slice(pad, pad + block) for block in self.block_size) if self.imdims == 2: for pos in itertools.product(*iterables): estimate_idx = tuple(slice(j, j + k) for j, k in zip(idx, self.block_size)) index = numpy.array(pos, dtype=int) if self.options.block_pad > 0: h_estimate_block = self.get_padded_block(self.estimate, index.copy()).astype(numpy.complex64) else: h_estimate_block = self.estimate[estimate_idx].astype(numpy.complex64) d_estimate_block = cuda.to_device(h_estimate_block, stream=stream1) d_psf = cuda.to_device(self.psf_fft, stream=stream2) # Execute: cache = convolve(PSF, estimate), non-normalized fft_inplace(d_estimate_block, stream=stream1) stream2.synchronize() self.vmult(d_estimate_block, d_psf, out=d_estimate_block) ifft_inplace(d_estimate_block) h_estimate_block_new = d_estimate_block.copy_to_host() # Execute: cache = data/cache h_image_block = self.get_padded_block(self.image, index.copy()).astype(numpy.float32) ops_ext.inverse_division_inplace(h_estimate_block_new, h_image_block) d_estimate_block = cuda.to_device(h_estimate_block_new, stream=stream1) d_adj_psf = cuda.to_device(self.adj_psf_fft, stream=stream2) fft_inplace(d_estimate_block, stream=stream1) stream2.synchronize() self.vmult(d_estimate_block, d_adj_psf, out=d_estimate_block) ifft_inplace(d_estimate_block) h_estimate_block_new = d_estimate_block.copy_to_host().real self.estimate_new[estimate_idx] = h_estimate_block_new[block_idx] # TV Regularization (doesn't seem to do anything miraculous). if self.options.rltv_lambda > 0 and self.iteration_count > 0: dv_est = ops_ext.div_unit_grad(self.estimate, self.image_spacing) with numpy.errstate(divide="ignore"): self.estimate_new /= (1.0 - self.options.rltv_lambda * dv_est) self.estimate_new[self.estimate_new == numpy.inf] = 0.0 self.estimate_new[:] = numpy.nan_to_num(self.estimate_new) # Update estimate inplace. Get convergence statistics. return ops_ext.update_estimate_poisson(self.estimate, self.estimate_new, self.options.convergence_epsilon)
stime = time.time() while (True): t = time.time() - stime # Capture frame-by-frame frame = vs.read() # scale, gray, display original frame = cv2.resize(frame, (w, w)) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) d_fft.copy_to_device(np.array(frame / 255, dtype=ctype)) fft_inplace(d_fft) gpu_mul[(w, w), 1](d_fft, d_mask) gpu_shift[(w, w), 1](d_fft, d_shfft, 60, 100, w) ifft_inplace(d_shfft) d_shfft.to_host() cv2.imshow('orig_frame', frame) # cv2.imshow('fft', np.log(np.abs(np.fft.fftshift(shfft)))/10) cv2.imshow('result', np.real(shfft) / (w * w)) shfft[:, :] = 0 d_shfft.copy_to_device(shfft) key = cv2.waitKey(1) & 0xFF if key == ord('h'): gpu_mul[(w, w), 1](d_mask, d_lens) # kill on q if key == ord('q'): break
def compute_estimate(self): """ Calculates a single RL fusion estimate. There is no reason to call this function -- it is used internally by the class during fusion process. """ print('Beginning the computation of the %i. estimate' % \ (self.iteration_count + 1)) if "multiplicative" in self.options.fusion_method: self.estimate_new[:] = numpy.ones(self.image_size, dtype=numpy.float32) else: self.estimate_new[:] = numpy.zeros(self.image_size, dtype=numpy.float32) stream1 = cuda.stream() stream2 = cuda.stream() # Iterate over views for idx, view in enumerate(self.views): psf_fft = self.psfs_fft[idx] adj_psf_fft = self.adj_psfs_fft[idx] self.data.set_active_image(view, self.options.channel, self.options.scale, "registered") weighting = self.weights[idx] iterables = (range(0, m, n) for m, n in zip(self.image_size, self.block_size)) pad = self.options.block_pad block_idx = tuple( slice(pad, pad + block) for block in self.block_size) for pos in itertools.product(*iterables): estimate_idx = tuple( slice(j, j + k) for j, k in zip(pos, self.block_size)) index = numpy.array(pos, dtype=int) if self.options.block_pad > 0: h_estimate_block = self.get_padded_block( index.copy()).astype(numpy.complex64) else: h_estimate_block = self.estimate[estimate_idx].astype( numpy.complex64) d_estimate_block = cuda.to_device(h_estimate_block, stream=stream1) d_psf = cuda.to_device(psf_fft, stream=stream2) # Execute: cache = convolve(PSF, estimate), non-normalized fft_inplace(d_estimate_block, stream=stream1) stream2.synchronize() self.vmult(d_estimate_block, d_psf, out=d_estimate_block) ifft_inplace(d_estimate_block) h_estimate_block_new = d_estimate_block.copy_to_host() # Execute: cache = data/cache h_image_block = self.data.get_registered_block( self.block_size, self.options.block_pad, index.copy()).astype(numpy.float32) h_estimate_block_new *= weighting ops_ext.inverse_division_inplace(h_estimate_block_new, h_image_block) d_estimate_block = cuda.to_device(h_estimate_block_new, stream=stream1) d_adj_psf = cuda.to_device(adj_psf_fft, stream=stream2) fft_inplace(d_estimate_block, stream=stream1) stream2.synchronize() self.vmult(d_estimate_block, d_adj_psf, out=d_estimate_block) ifft_inplace(d_estimate_block) h_estimate_block_new = d_estimate_block.copy_to_host().real # Update the contribution from a single view to the new estimate if self.options.block_pad == 0: if "multiplicative" in self.options.fusion_method: self.estimate_new[estimate_idx] *= h_estimate_block_new else: self.estimate_new[estimate_idx] += h_estimate_block_new else: if "multiplicative" in self.options.fusion_method: self.estimate_new[ estimate_idx] *= h_estimate_block_new[block_idx] else: # print "The block size is ", self.block_size self.estimate_new[ estimate_idx] += h_estimate_block_new[block_idx] # # Divide with the number of projections # if "summative" in self.options.fusion_method: # # self.estimate_new[:] = self.float_vmult(self.estimate_new, # # self.scaler) # self.estimate_new *= (1.0 / self.n_views) # else: # self.estimate_new[self.estimate_new < 0] = 0 # self.estimate_new[:] = ops_array.nroot(self.estimate_new, # self.n_views) # TV Regularization (doesn't seem to do anything miraculous). if self.options.rltv_lambda > 0 and self.iteration_count > 0: dv_est = ops_ext.div_unit_grad(self.estimate, self.voxel_size) with numpy.errstate(divide="ignore"): self.estimate_new /= (1.0 - self.options.rltv_lambda * dv_est) self.estimate_new[self.estimate_new == numpy.inf] = 0.0 self.estimate_new[:] = numpy.nan_to_num(self.estimate_new) # Update estimate inplace. Get convergence statistics. return ops_ext.update_estimate_poisson( self.estimate, self.estimate_new, self.options.convergence_epsilon)