def run(self, x, cpu_core): """ In this method, the NUFFT_hsa are created and executed on a fixed CPU core. """ pid= os.getpid() print('pid=', pid) os.system("taskset -p -c %d %d" % (cpu_core, pid)) """ Control the CPU affinity. Otherwise the process on one core can be switched to another core. """ # create NUFFT NUFFT = NUFFT_hsa(self.API, self.device_number,0) # plan the NUFFT NUFFT.plan(self.om, self.Nd, self.Kd, self.Jd) # send the image to device gx = NUFFT.to_device(x) # carry out 10000 forward transform for pp in range(0, 100): gy = NUFFT.forward(gx) # return the object return gy.get()
def test_cuda(): import numpy import matplotlib.pyplot # load example image import pkg_resources ## Define the source of data DATA_PATH = pkg_resources.resource_filename('pynufft', 'src/data/') # PHANTOM_FILE = pkg_resources.resource_filename('pynufft', 'data/phantom_256_256.txt') import scipy image = scipy.misc.ascent() image = scipy.misc.imresize(image, (256, 256)) image = image.astype(numpy.float) / numpy.max(image[...]) Nd = (256, 256) # image space size Kd = (512, 512) # k-space size Jd = (6, 6) # interpolation size # load k-space points as M * 2 array om = numpy.load(DATA_PATH + 'om2D.npz')['arr_0'] # Show the shape of om print('the shape of om = ', om.shape) # initiating NUFFT_cpu object nfft = NUFFT_cpu() # CPU NUFFT class # Plan the nfft object nfft.plan(om, Nd, Kd, Jd) # initiating NUFFT_hsa object NufftObj = NUFFT_hsa('cuda', 0, 0) # Plan the NufftObj (similar to NUFFT_cpu) NufftObj.plan(om, Nd, Kd, Jd) import time t0 = time.time() for pp in range(0, 10): y = nfft.forward(image) t_cpu = (time.time() - t0) / 10.0 ## Moving image to gpu ## gx is an gpu array, dtype = complex64 gx = NufftObj.to_device(image) t0 = time.time() for pp in range(0, 100): gy = NufftObj.forward(gx) t_cu = (time.time() - t0) / 100 print('t_cpu = ', t_cpu) print('t_cuda =, ', t_cu) print('gy close? = ', numpy.allclose(y, gy.get(), atol=numpy.linalg.norm(y) * 1e-3)) print("acceleration=", t_cpu / t_cu) maxiter = 100 import time t0 = time.time() x_cpu_cg = nfft.solve(y, 'cg', maxiter=maxiter) # x2 = nfft.solve(y2, 'L1TVLAD',maxiter=maxiter, rho = 2) t1 = time.time() - t0 # gy=NufftObj.thr.copy_array(NufftObj.thr.to_device(y2)) t0 = time.time() x_cuda_cg = NufftObj.solve(gy, 'cg', maxiter=maxiter) # x = NufftObj.solve(gy,'L1TVLAD', maxiter=maxiter, rho=2) t2 = time.time() - t0 print(t1, t2) print('acceleration of cg=', t1 / t2) t0 = time.time() x_cpu_TV = nfft.solve(y, 'L1TVOLS', maxiter=maxiter, rho=2) t1 = time.time() - t0 t0 = time.time() x_cuda_TV = NufftObj.solve(gy, 'L1TVOLS', maxiter=maxiter, rho=2) t2 = time.time() - t0 print(t1, t2) print('acceleration of TV=', t1 / t2) matplotlib.pyplot.subplot(2, 2, 1) matplotlib.pyplot.imshow(x_cpu_cg.real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('CG_cpu') matplotlib.pyplot.subplot(2, 2, 2) matplotlib.pyplot.imshow(x_cuda_cg.get().real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('CG_cuda') matplotlib.pyplot.subplot(2, 2, 3) matplotlib.pyplot.imshow(x_cpu_TV.real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('TV_cpu') matplotlib.pyplot.subplot(2, 2, 4) matplotlib.pyplot.imshow(x_cuda_TV.get().real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('TV_cuda') matplotlib.pyplot.show() NufftObj.release() del NufftObj
list of the samples between [-0.5, 0.5[. """ dim1, dim2, dim3 = np.where(mask == 1) dim1 = dim1.astype("float") / mask.shape[0] - 0.5 dim2 = dim2.astype("float") / mask.shape[1] - 0.5 dim3 = dim3.astype("float") / mask.shape[2] - 0.5 return np.c_[dim1, dim2, dim3] #samples = convert_mask_to_locations_3D(np.ones(Il.shape[0:3])) #samples = 2 * np.pi * samples samples = np.random.randn(2085640,3)*3.1415 # Create a NUFFT object nufftObj = NUFFT_hsa(API='ocl', platform_number=1, device_number=0, verbosity=0) nufftObj.plan(om=samples, Nd=(128,128,128), Kd=tuple([256, 256, 256]), Jd=tuple([5, 5, 5]), batch=4, ft_axes=(0,1,2), radix=1) # Casting the dtype to be complex64 dtype = np.complex64 # Computing the forward pass of the NUFFT # JML: not needed in the new version
om = np.load(DATA_PATH + 'om2D.npz')['arr_0'] assert (om.shape[1] == dim) Kd = (Kd1, ) * dim Jd = (Jd1, ) * dim print('setting image dimension Nd...', Nd) print('setting spectrum dimension Kd...', Kd) print('setting interpolation size Jd...', Jd) print('Fourier transform...') time_pre = time.clock() # Preprocessing NUFFT if (gpu == True): time_1 = time.clock() NufftObj = NUFFT_hsa() time_2 = time.clock() # mem_usage = memory_usage((NufftObj.plan,(om, Nd, Kd, Jd))) # print(mem_usage) NufftObj.plan(om, Nd, Kd, Jd) time_3 = time.clock() # NufftObj.offload('cuda') # for GPU computation NufftObj.offload('ocl') # for multi-CPU computation time_4 = time.clock() dtype = np.complex64 time_5 = time.clock() print("send image to device") NufftObj.x_Nd = NufftObj.thr.to_device(image.astype(dtype)) print("copy image to gx")
import numpy from pynufft import NUFFT_hsa import scipy.misc import matplotlib Nd = (256,256) Kd = (512,512) Jd = (6,6) om = numpy.random.randn(65536, 2) x = scipy.misc.imresize(scipy.misc.ascent(), Nd) om1 = om[om[:,0]>0, :] om2 = om[om[:,0]<=0, :] NufftObj1 = NUFFT_hsa('ocl') NufftObj1.plan(om1, Nd, Kd, Jd) NufftObj2 = NUFFT_hsa('cuda') NufftObj2.plan(om2, Nd, Kd, Jd) y1 = NufftObj1.forward(x) y2 = NufftObj2.forward(x)
def test_mCoil(sense_number): image = scipy.misc.ascent() Nd = (64, 64, 64) # time grid, tuple # image = scipy.misc.imresize(image, Nd)*(1.0 + 0.0j) image = numpy.random.randn(64, 64, 64) * (1.0 + 0.0j) Kd = (128, 128, 128) # frequency grid, tuple Jd = (6, 6, 6) # interpolator # om= numpy.load(DATA_PATH+'om3D.npz')['arr_0'] # om = numpy.random.randn(10000,3)*2 # om = numpy.load('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/')['arr_0'] # om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/sparkling/samples_sparkling_x8_64x3072.mat')['samples_sparkling'] # om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/samples_radial_x8_64x3072.mat')['samples_radial'] # om = om/numpy.max(om.real.ravel()) * numpy.pi om = numpy.random.randn(int((128**3) / 32), 3) * 1.5 print('om.shape, ', om.shape) # sense_number = 16 # sense = numpy.ones(Nd + (sense_number,), dtype=numpy.complex64) m = om.shape[0] print(om.shape) from pynufft import NUFFT_cpu, NUFFT_hsa, NUFFT_hsa_legacy # from pynufft import NUFFT_memsave NufftObj_cpu = NUFFT_cpu() api = 'ocl' proc = 0 NufftObj_radix1 = NUFFT_hsa(api, proc, 0) NufftObj_radix2 = NUFFT_hsa(api, proc, 0) NufftObj_radix3 = NUFFT_hsa(api, proc, 0) import time # t0=time.time() NufftObj_cpu.plan(om, Nd, Kd, Jd, batch=sense_number) # t1 = time.time() # t12 = time.time() # t2 = time.time() # tc = time.time() # proc = 0 # GPU # proc = 1 # gpu # NufftObj_radix1.offload(API = 'ocl', platform_number = proc, device_number = 0) # t22 = time.time() # NufftObj_radix2.offload(API = 'ocl', platform_number = proc, device_number = 0) # NufftObj_radix2.offload(API = 'cuda', platform_number = 0, device_number = 0) # t3 = time.time() # NufftObj_radix3.offload(API = 'ocl', platform_number = proc, device_number = 0) # tp = time.time() # if proc is 0: # print('CPU') # else: # print('GPU') # print('Number of samples = ', om.shape[0]) # print('planning time of CPU = ', t1 - t0) # print('planning time of HSA = ', t12 - t1) # print('planning time of MEM = ', t2 - t12) # print('planning time of mCoil = ', tc - t2) # print('loading time of HSA = ', t22 - tc) # print('loading time of MEM = ', t3 - t22) # print('loading time of mCoil = ', tp - t3) maxiter = 1 tcpu_forward, tcpu_adjoint, ycpu, xcpu = benchmark(NufftObj_cpu, image, maxiter) print('CPU', int(m), tcpu_forward, tcpu_adjoint) maxiter = 20 NufftObj_radix1.plan(om, Nd, Kd, Jd, batch=sense_number, radix=1) gx_hsa = NufftObj_radix1.thr.to_device(image.astype(numpy.complex64)) # gx_hsa = NufftObj_radix1.s2x(gx_hsa0) thsa_forward, thsa_adjoint, yradix1, xradix1 = benchmark( NufftObj_radix1, gx_hsa, maxiter) print( 'radix-1', int(m), thsa_forward, thsa_adjoint, ) #numpy.linalg.norm(yradix1.get() - ycpu)/ numpy.linalg.norm( ycpu)) # for ss in range(0, sense_number): erry = numpy.linalg.norm(yradix1.get() - ycpu) / numpy.linalg.norm(ycpu) errx = numpy.linalg.norm(xradix1.get() - xcpu) / numpy.linalg.norm(xcpu) if erry > 1e-6 or errx > 1e-6: print("degraded accuracy:", sense_number, erry, errx) else: print("Pass test for coil: ", sense_number, erry, errx) print("Pass test for coil: ", sense_number, erry, errx) NufftObj_radix1.release() NufftObj_radix2.plan(om, Nd, Kd, Jd, batch=sense_number, radix=2) gx_memsave = NufftObj_radix2.thr.to_device(image.astype(numpy.complex64)) # gx_memsave = NufftObj_radix2.s2x(gx_memsave0) tmem_forward, tmem_adjoint, yradix2, xradix2 = benchmark( NufftObj_radix2, gx_memsave, maxiter) #, sense_number) print('radix-2', int(m), tmem_forward, tmem_adjoint) # for ss in range(0, sense_number): erry = numpy.linalg.norm(yradix2.get() - ycpu) / numpy.linalg.norm(ycpu) errx = numpy.linalg.norm(xradix2.get() - xcpu) / numpy.linalg.norm(xcpu) if erry > 1e-6 or errx > 1e-6: print("degraded accuracy:", sense_number, erry, errx) else: print("Pass test for coil: ", sense_number, erry, errx) print("Pass test for coil: ", sense_number, erry, errx) NufftObj_radix2.release() NufftObj_radix3.plan(om, Nd, Kd, Jd, batch=sense_number, radix=3) gx_mCoil = NufftObj_radix3.thr.to_device(image.astype(numpy.complex64)) # gx_mCoil = NufftObj_radix3.s2x(gx_mCoil0) tmCoil_forward, tmCoil_adjoint, yradix3, xradix3 = benchmark( NufftObj_radix3, gx_mCoil, maxiter) print('radix-3', int(m), tmCoil_forward, tmCoil_adjoint) # for ss in range(0, sense_number): erry = numpy.linalg.norm(yradix3.get() - ycpu) / numpy.linalg.norm(ycpu) errx = numpy.linalg.norm(xradix3.get() - xcpu) / numpy.linalg.norm(xcpu) if erry > 1e-6 or errx > 1e-6: print("degraded accuracy:", sense_number, erry, errx) else: print("Pass test for coil: ", sense_number, erry, errx) print("Pass test for coil: ", sense_number, erry, errx) # print("Pass test for coil: ", ss) NufftObj_radix3.release() del NufftObj_radix2, NufftObj_radix1, NufftObj_radix3, NufftObj_cpu return tcpu_forward, tcpu_adjoint, thsa_forward, thsa_adjoint, tmem_forward, tmem_adjoint, tmCoil_forward, tmCoil_adjoint
def __init__(self, samples, shape, platform='cuda', Kd=None, Jd=None, n_coils=1, verbosity=0): """ Initilize the 'NUFFT' class. Parameters ---------- samples: np.ndarray the mask samples in the Fourier domain. shape: tuple of int shape of the image (necessarly a square/cubic matrix). platform: string, 'cpu', 'opencl' or 'cuda' string indicating which hardware platform will be used to compute the NUFFT Kd: int or tuple int or tuple indicating the size of the frequency grid, for regridding. If int, will be evaluated to (Kd,)*nb_dim of the image Jd: int or tuple Size of the interpolator kernel. If int, will be evaluated to (Jd,)*dims image n_coils: int Number of coils used to acquire the signal in case of multiarray receiver coils acquisition """ if (n_coils < 1) or (type(n_coils) is not int): raise ValueError('The number of coils should be an integer >= 1') if not pynufft_available: raise ValueError('PyNUFFT Package is not installed, please ' 'consider using `gpuNUFFT` or install the ' 'PyNUFFT package') self.nb_coils = n_coils self.shape = shape self.platform = platform self.samples = samples * (2 * np.pi) # Pynufft use samples in # [-pi, pi[ instead of [-0.5, 0.5[ self.dim = samples.shape[1] # number of dimensions of the image if type(Kd) == int: self.Kd = (Kd, ) * self.dim elif type(Kd) == tuple: self.Kd = Kd elif Kd is None: # Preferential option self.Kd = tuple([2 * ix for ix in shape]) if type(Jd) == int: self.Jd = (Jd, ) * self.dim elif type(Jd) == tuple: self.Jd = Jd elif Jd is None: # Preferential option self.Jd = (5, ) * self.dim for (i, s) in enumerate(shape): assert (self.shape[i] <= self.Kd[i]), 'size of frequency grid' + \ 'must be greater or equal ' \ 'than the image size' if verbosity > 0: print('Creating the NUFFT object...') if self.platform == 'opencl': warn('Attemping to use OpenCL plateform. Make sure to ' 'have all the dependecies installed') Singleton.__init__(self) if self.getNumInstances() > 1: warn('You have created more than one NUFFT object. ' 'This could cause memory leaks') self.nufftObj = NUFFT_hsa(API='ocl', platform_number=None, device_number=None, verbosity=verbosity) self.nufftObj.plan( om=self.samples, Nd=self.shape, Kd=self.Kd, Jd=self.Jd, batch=1, # TODO self.nb_coils, ft_axes=tuple(range(samples.shape[1])), radix=None) elif self.platform == 'cuda': warn('Attemping to use Cuda plateform. Make sure to ' 'have all the dependecies installed and ' 'to create only one instance of NUFFT GPU') Singleton.__init__(self) if self.getNumInstances() > 1: warn('You have created more than one NUFFT object. ' 'This could cause memory leaks') self.nufftObj = NUFFT_hsa(API='cuda', platform_number=None, device_number=None, verbosity=verbosity) self.nufftObj.plan( om=self.samples, Nd=self.shape, Kd=self.Kd, Jd=self.Jd, batch=1, # TODO self.nb_coils, ft_axes=tuple(range(samples.shape[1])), radix=None) else: raise ValueError('Wrong type of platform. Platform must be' '\'opencl\' or \'cuda\'')
class NUFFT(Singleton): """ GPU implementation of N-D non uniform Fast Fourrier Transform class. Attributes ---------- samples: np.ndarray the mask samples in the Fourier domain. shape: tuple of int shape of the image (necessarly a square/cubic matrix). nufftObj: The pynufft object depending on the required computational platform platform: string, 'opencl' or 'cuda' string indicating which hardware platform will be used to compute the NUFFT Kd: int or tuple int or tuple indicating the size of the frequency grid, for regridding. if int, will be evaluated to (Kd,)*nb_dim of the image Jd: int or tuple Size of the interpolator kernel. If int, will be evaluated to (Jd,)*dims image n_coils: int default 1 Number of coils used to acquire the signal in case of multiarray receiver coils acquisition. If n_coils > 1, please organize data as n_coils X data_per_coil """ numOfInstances = 0 def __init__(self, samples, shape, platform='cuda', Kd=None, Jd=None, n_coils=1, verbosity=0): """ Initilize the 'NUFFT' class. Parameters ---------- samples: np.ndarray the mask samples in the Fourier domain. shape: tuple of int shape of the image (necessarly a square/cubic matrix). platform: string, 'cpu', 'opencl' or 'cuda' string indicating which hardware platform will be used to compute the NUFFT Kd: int or tuple int or tuple indicating the size of the frequency grid, for regridding. If int, will be evaluated to (Kd,)*nb_dim of the image Jd: int or tuple Size of the interpolator kernel. If int, will be evaluated to (Jd,)*dims image n_coils: int Number of coils used to acquire the signal in case of multiarray receiver coils acquisition """ if (n_coils < 1) or (type(n_coils) is not int): raise ValueError('The number of coils should be an integer >= 1') if not pynufft_available: raise ValueError('PyNUFFT Package is not installed, please ' 'consider using `gpuNUFFT` or install the ' 'PyNUFFT package') self.nb_coils = n_coils self.shape = shape self.platform = platform self.samples = samples * (2 * np.pi) # Pynufft use samples in # [-pi, pi[ instead of [-0.5, 0.5[ self.dim = samples.shape[1] # number of dimensions of the image if type(Kd) == int: self.Kd = (Kd, ) * self.dim elif type(Kd) == tuple: self.Kd = Kd elif Kd is None: # Preferential option self.Kd = tuple([2 * ix for ix in shape]) if type(Jd) == int: self.Jd = (Jd, ) * self.dim elif type(Jd) == tuple: self.Jd = Jd elif Jd is None: # Preferential option self.Jd = (5, ) * self.dim for (i, s) in enumerate(shape): assert (self.shape[i] <= self.Kd[i]), 'size of frequency grid' + \ 'must be greater or equal ' \ 'than the image size' if verbosity > 0: print('Creating the NUFFT object...') if self.platform == 'opencl': warn('Attemping to use OpenCL plateform. Make sure to ' 'have all the dependecies installed') Singleton.__init__(self) if self.getNumInstances() > 1: warn('You have created more than one NUFFT object. ' 'This could cause memory leaks') self.nufftObj = NUFFT_hsa(API='ocl', platform_number=None, device_number=None, verbosity=verbosity) self.nufftObj.plan( om=self.samples, Nd=self.shape, Kd=self.Kd, Jd=self.Jd, batch=1, # TODO self.nb_coils, ft_axes=tuple(range(samples.shape[1])), radix=None) elif self.platform == 'cuda': warn('Attemping to use Cuda plateform. Make sure to ' 'have all the dependecies installed and ' 'to create only one instance of NUFFT GPU') Singleton.__init__(self) if self.getNumInstances() > 1: warn('You have created more than one NUFFT object. ' 'This could cause memory leaks') self.nufftObj = NUFFT_hsa(API='cuda', platform_number=None, device_number=None, verbosity=verbosity) self.nufftObj.plan( om=self.samples, Nd=self.shape, Kd=self.Kd, Jd=self.Jd, batch=1, # TODO self.nb_coils, ft_axes=tuple(range(samples.shape[1])), radix=None) else: raise ValueError('Wrong type of platform. Platform must be' '\'opencl\' or \'cuda\'') def __del__(self): # This is an important desctructor to ensure that the device memory # is freed # TODO this is still not freeing the memory right on device. # Mostly issue with reikna library. # Refer : https://github.com/fjarri/reikna/issues/53 if self.platform == 'opencl' or self.platform == 'cuda': self.nufftObj.release() def op(self, img): """ This method calculates the masked non-cartesian Fourier transform of a 3-D image. Parameters ---------- img: np.ndarray input 3D array with the same shape as shape. Returns ------- x: np.ndarray masked Fourier transform of the input image. """ if self.nb_coils == 1: dtype = np.complex64 # Send data to the mCPU/GPU platform self.nufftObj.x_Nd = self.nufftObj.thr.to_device(img.astype(dtype)) gx = self.nufftObj.thr.copy_array(self.nufftObj.x_Nd) # Forward operator of the NUFFT gy = self.nufftObj.forward(gx) y = np.squeeze(gy.get()) else: dtype = np.complex64 # Send data to the mCPU/GPU platform y = [] for ch in range(self.nb_coils): self.nufftObj.x_Nd = self.nufftObj.thr.to_device( np.copy(img[ch]).astype(dtype)) gx = self.nufftObj.thr.copy_array(self.nufftObj.x_Nd) # Forward operator of the NUFFT gy = self.nufftObj.forward(gx) y.append(np.squeeze(gy.get())) y = np.asarray(y) return y * 1.0 / np.sqrt(np.prod(self.Kd)) def adj_op(self, x): """ This method calculates inverse masked non-uniform Fourier transform of a 1-D coefficients array. Parameters ---------- x: np.ndarray masked non-uniform Fourier transform 1D data. Returns ------- img: np.ndarray inverse 3D discrete Fourier transform of the input coefficients. """ if self.nb_coils == 1: dtype = np.complex64 cuda_array = self.nufftObj.thr.to_device(x.astype(dtype)) gx = self.nufftObj.adjoint(cuda_array) img = np.squeeze(gx.get()) else: dtype = np.complex64 img = [] for ch in range(self.nb_coils): cuda_array = self.nufftObj.thr.to_device( np.copy(x[ch]).astype(dtype)) gx = self.nufftObj.adjoint(cuda_array) img.append(gx.get()) img = np.asarray(np.squeeze(img)) return img * np.sqrt(np.prod(self.Kd))
Kd = (256, 256, 256) # frequency grid, tuple Jd = (6, 6, 6) # interpolator # om= numpy.load(DATA_PATH+'om3D.npz')['arr_0'] # om = numpy.random.randn(10000,3)*2 # for m in (1e+5, 2e+5, 3e+5, 4e+5, 5e+5, 6e+5, 7e+5, 8e+5, 9e+5, 1e+6, 2e+6, 3e+6, 4e+6, 5e+6, # 6e+6, 7e+6, 8e+6, 9e+6, 10e+6, 11e+6, 12e+6, 13e+6, 14e+6, 15e+6, # 16e+6, 17e+6, 18e+6, 19e+6, 20e+6, 30e+6, 40e+6, 50e+6, 60e+6, 70e+6, 80e+6, 90e+6, 100e+6): for m in (1e+4, ): om = numpy.random.randn(int(m), 3) * 2 # om = numpy.load('/home/sram/UCL/DATA/G/3D_Angio/greg_3D.npz')['arr_0'][0:int(m), :] print(om.shape) from pynufft import NUFFT_cpu, NUFFT_hsa #, NUFFT_memsave # from pynufft import NUFFT_memsave NufftObj_cpu = NUFFT_cpu() # NufftObj_hsa = NUFFT_hsa() NufftObj_hsa = NUFFT_hsa('cuda', 0, 0) import time t0 = time.time() NufftObj_cpu.plan(om, Nd, Kd, Jd) t1 = time.time() # NufftObj_hsa.plan(om, Nd, Kd, Jd) t12 = time.time() RADIX = 1 NufftObj_hsa.plan(om, Nd, Kd, Jd, radix=RADIX) t2 = time.time() # proc = 0 # GPU # proc = 1 # gpu # NufftObj_hsa.offload(API = 'ocl', platform_number = proc, device_number = 0) t22 = time.time() # NufftObj_memsave.offload(API = 'ocl', platform_number = proc, device_number = 0)
# image = numpy.abs(image) # print(special_license) # pyplot.imshow(numpy.abs(image[:,:,64]), label='original signal',cmap=gray) # pyplot.show() Nd = (128, 128, 128) # time grid, tuple Kd = (256, 256, 256) # frequency grid, tuple Jd = (6, 6, 6) # interpolator mid_slice = int(Nd[2] / 2) # om= numpy.load(DATA_PATH+'om3D.npz')['arr_0'] numpy.random.seed(0) om = numpy.random.randn(int(5e+5), 3) print(om.shape) from pynufft import NUFFT_cpu, NUFFT_hsa, NUFFT_hsa_legacy NufftObj = NUFFT_hsa(API='ocl', platform_number=1, device_number=0) NufftObj.plan(om, Nd, Kd, Jd) # NufftObj.offload(API = 'cuda', platform_number = 0, device_number = 0) gx = NufftObj.thr.to_device(image.astype(numpy.complex64)) gy = NufftObj.forward(gx) import time t0 = time.time() restore_x2 = GBPDNA_old(NufftObj, gy, maxiter=5) t1 = time.time() restore_x = NufftObj.solve(gy, 'cg', maxiter=50) t2 = time.time() print("GBPDNA time = ", t1 - t0) print("CG time = ", t2 - t1)
def test_opencl_multicoils(): import numpy import matplotlib.pyplot # load example image import pkg_resources ## Define the source of data DATA_PATH = pkg_resources.resource_filename('pynufft', 'src/data/') # PHANTOM_FILE = pkg_resources.resource_filename('pynufft', 'data/phantom_256_256.txt') import scipy image = scipy.misc.ascent()[::2, ::2] image = image.astype(numpy.float) / numpy.max(image[...]) Nd = (256, 256) # image space size Kd = (512, 512) # k-space size Jd = (6, 6) # interpolation size # load k-space points as M * 2 array om = numpy.load(DATA_PATH + 'om2D.npz')['arr_0'] # Show the shape of om print('the shape of om = ', om.shape) batch = 8 # initiating NUFFT_cpu object nfft = NUFFT_cpu() # CPU NUFFT class # Plan the nfft object nfft.plan(om, Nd, Kd, Jd, batch=batch) # initiating NUFFT_hsa object try: NufftObj = NUFFT_hsa('cuda', 0, 0) except: try: NufftObj = NUFFT_hsa('ocl', 1, 0) except: NufftObj = NUFFT_hsa('ocl', 0, 0) # Plan the NufftObj (similar to NUFFT_cpu) NufftObj.plan(om, Nd, Kd, Jd, batch=batch, radix=2) coil_sense = numpy.ones(Nd + (batch, ), dtype=numpy.complex64) for cc in range(0, batch, 2): coil_sense[int(256 / batch) * cc:int(256 / batch) * (cc + 1), :, cc].real *= 0.1 coil_sense[:, int(256 / batch) * cc:int(256 / batch) * (cc + 1), cc].imag *= -0.1 NufftObj.set_sense(coil_sense) nfft.set_sense(coil_sense) y = nfft.forward_one2many(image) import time t0 = time.time() for pp in range(0, 2): xx = nfft.adjoint_many2one(y) t_cpu = (time.time() - t0) / 2 ## Moving image to gpu ## gx is an gpu array, dtype = complex64 gx = NufftObj.to_device(image) gy = NufftObj.forward_one2many(gx) t0 = time.time() for pp in range(0, 10): gxx = NufftObj.adjoint_many2one(gy) t_cu = (time.time() - t0) / 10 print(y.shape, gy.get().shape) print('t_cpu = ', t_cpu) print('t_cuda =, ', t_cu) print('gy close? = ', numpy.allclose(y, gy.get(), atol=numpy.linalg.norm(y) * 1e-6)) print('gy error = ', numpy.linalg.norm(y - gy.get()) / numpy.linalg.norm(y)) print('gxx close? = ', numpy.allclose(xx, gxx.get(), atol=numpy.linalg.norm(xx) * 1e-6)) print('gxx error = ', numpy.linalg.norm(xx - gxx.get()) / numpy.linalg.norm(xx)) # for bb in range(0, batch): matplotlib.pyplot.subplot(1, 2, 1) matplotlib.pyplot.imshow(xx[...].real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('Adjoint_cpu_coil') matplotlib.pyplot.subplot(1, 2, 2) matplotlib.pyplot.imshow(gxx.get()[...].real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('Adjoint_hsa_coil') # matplotlib.pyplot.subplot(2, 2, 3) # matplotlib.pyplot.imshow( x_cpu_TV.real, cmap= matplotlib.cm.gray) # matplotlib.pyplot.title('TV_cpu')# x_cuda_TV = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2) # matplotlib.pyplot.subplot(2, 2, 4) # matplotlib.pyplot.imshow(x_cuda_TV.get().real, cmap= matplotlib.cm.gray) # matplotlib.pyplot.title('TV_cuda') matplotlib.pyplot.show(block=False) matplotlib.pyplot.pause(1) matplotlib.pyplot.close() print("acceleration=", t_cpu / t_cu) maxiter = 100 import time t0 = time.time() x_cpu_cg = nfft.solve(y, 'cg', maxiter=maxiter) # x2 = nfft.solve(y2, 'L1TVLAD',maxiter=maxiter, rho = 2) t1 = time.time() - t0 # gy=NufftObj.thr.copy_array(NufftObj.thr.to_device(y2)) t0 = time.time() x_cuda_cg = NufftObj.solve(gy, 'cg', maxiter=maxiter) # x = NufftObj.solve(gy,'L1TVLAD', maxiter=maxiter, rho=2) print('shape of cg = ', x_cuda_cg.get().shape, x_cpu_cg.shape) t2 = time.time() - t0 print(t1, t2) print('acceleration of cg=', t1 / t2) t0 = time.time() # x_cpu_TV = nfft.solve(y, 'L1TVOLS',maxiter=maxiter, rho = 2) t1 = time.time() - t0 t0 = time.time() # x_cuda_TV = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2) t2 = time.time() - t0 print(t1, t2) # print('acceleration of TV=', t1/t2 ) # try: for bb in range(0, batch): matplotlib.pyplot.subplot(2, batch, 1 + bb) matplotlib.pyplot.imshow(x_cpu_cg[..., bb].real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('CG_cpu_coil_' + str(bb)) matplotlib.pyplot.subplot(2, batch, 1 + batch + bb) matplotlib.pyplot.imshow(x_cuda_cg.get()[..., bb].real, cmap=matplotlib.cm.gray) matplotlib.pyplot.title('CG_hsa_coil_' + str(bb)) # matplotlib.pyplot.subplot(2, 2, 3) # matplotlib.pyplot.imshow( x_cpu_TV.real, cmap= matplotlib.cm.gray) # matplotlib.pyplot.title('TV_cpu')# x_cuda_TV = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2) # matplotlib.pyplot.subplot(2, 2, 4) # matplotlib.pyplot.imshow(x_cuda_TV.get().real, cmap= matplotlib.cm.gray) # matplotlib.pyplot.title('TV_cuda') matplotlib.pyplot.show() # except: # print('no matplotlib') NufftObj.release() del NufftObj
def test_init(): # cm = matplotlib.cm.gray # load example image import pkg_resources DATA_PATH = pkg_resources.resource_filename('pynufft', 'src/data/') # PHANTOM_FILE = pkg_resources.resource_filename('pynufft', 'data/phantom_256_256.txt') import numpy # import matplotlib.pyplot import scipy image = scipy.misc.ascent()[::2,::2] image=image.astype(numpy.float)/numpy.max(image[...]) Nd = (256, 256) # image space size Kd = (512, 512) # k-space size Jd = (6,6) # interpolation size # load k-space points om = numpy.load(DATA_PATH+'om2D.npz')['arr_0'] nfft = NUFFT_cpu() # CPU nfft.plan(om, Nd, Kd, Jd) try: NufftObj = NUFFT_hsa('cuda',0,0) except: NufftObj = NUFFT_hsa('ocl',0,0) # NufftObj2 = NUFFT_hsa('cuda',0,0) NufftObj.debug = 1 NufftObj.plan(om, Nd, Kd, Jd, radix=2) # NufftObj2.plan(om, Nd, Kd, Jd) # NufftObj.offload(API = 'cuda', platform_number = 0, device_number = 0) # NufftObj2.offload(API = 'cuda', platform_number = 0, device_number = 0) # NufftObj2.offload('cuda') # NufftObj.offload(API = 'cuda', platform_number = 0, device_number = 0) # print('api=', NufftObj.thr.api_name()) # NufftObj.offload(API = 'ocl', platform_number = 0, device_number = 0) y = nfft.k2y(nfft.xx2k(nfft.x2xx(image))) NufftObj.x_Nd = NufftObj.thr.to_device( image.astype(dtype)) gx = NufftObj.thr.copy_array(NufftObj.x_Nd) print('x close? = ', numpy.allclose(image, gx.get() , atol=1e-4)) gxx = NufftObj.x2xx(gx) print('xx close? = ', numpy.allclose(nfft.x2xx(image), gxx.get() , atol=1e-4)) gk = NufftObj.xx2k(gxx) k = nfft.xx2k(nfft.x2xx(image)) print('k close? = ', numpy.allclose(nfft.xx2k(nfft.x2xx(image)), gk.get(), atol=1e-3*numpy.linalg.norm(k))) gy = NufftObj.k2y(gk) k2 = NufftObj.y2k(gy) print('y close? = ', numpy.allclose(y, gy.get() , atol=1e-3*numpy.linalg.norm(y)), numpy.linalg.norm((y - gy.get())/numpy.linalg.norm(y))) y2 = y print('k2 close? = ', numpy.allclose(nfft.y2k(y2), k2.get(), atol=1e-3*numpy.linalg.norm(nfft.y2k(y2)) ), numpy.linalg.norm(( nfft.y2k(y2)- k2.get())/numpy.linalg.norm(nfft.y2k(y2)))) gxx2 = NufftObj.k2xx(k2) # print('xx close? = ', numpy.allclose(nfft.k2xx(nfft.y2k(y2)), NufftObj.xx_Nd.get(queue=NufftObj.queue, async=False) , atol=0.1)) gx2 = NufftObj.xx2x(gxx2) print('x close? = ', numpy.allclose(nfft.adjoint(y2), gx2.get() , atol=1e-3*numpy.linalg.norm(nfft.adjoint(y2)))) image3 = gx2.get() import time t0 = time.time() # k = nfft.xx2k(nfft.x2xx(image)) for pp in range(0,50): # y = nfft.k2y(nfft.xx2k(nfft.x2xx(image))) y = nfft.forward(image) # y = nfft.k2y(k) # k = nfft.y2k(y) # x = nfft.adjoint(y) # y = nfft.forward(image) # y2 = NufftObj.y.get( NufftObj.queue, async=False) t_cpu = (time.time() - t0)/50.0 print(t_cpu) # del nfft gy2=NufftObj.forward(gx) # gk = NufftObj.xx2k(NufftObj.x2xx(gx)) t0= time.time() for pp in range(0,20): # pass gy2 = NufftObj.forward(gx) # gy2 = NufftObj.k2y(gk) # gx2 = NufftObj.adjoint(gy2) # gk2 = NufftObj.y2k(gy2) # del gy2 # c = gx2.get() # gy=NufftObj.forward(gx) NufftObj.thr.synchronize() t_cl = (time.time() - t0)/20 print(t_cl) print('gy close? = ', numpy.allclose(y, gy.get(), atol=numpy.linalg.norm(y)*1e-3)) print("acceleration=", t_cpu/t_cl) maxiter =100 import time t0= time.time() # x2 = nfft.solve(y2, 'cg',maxiter=maxiter) x2 = nfft.solve(y2, 'L1TVOLS',maxiter=maxiter, rho = 2) t1 = time.time()-t0 # gy=NufftObj.thr.copy_array(NufftObj.thr.to_device(y2)) t0= time.time() # x = NufftObj.solve(gy,'cg', maxiter=maxiter) x = NufftObj.solve(gy,'L1TVOLS', maxiter=maxiter, rho=2) t2 = time.time() - t0 print(t1, t2) print('acceleration=', t1/t2 ) # k = x.get() # x = nfft.k2xx(k)/nfft.st['sn'] # return try: import matplotlib.pyplot matplotlib.pyplot.subplot(1, 2, 1) matplotlib.pyplot.imshow( x.get().real, cmap= matplotlib.cm.gray, vmin = 0, vmax = 1) matplotlib.pyplot.title("HSA reconstruction") matplotlib.pyplot.subplot(1, 2,2) matplotlib.pyplot.imshow(x2.real, cmap= matplotlib.cm.gray) matplotlib.pyplot.title("CPU reconstruction") matplotlib.pyplot.show(block = False) matplotlib.pyplot.pause(3) matplotlib.pyplot.close() # del NufftObj.thr # del NufftObj except: print("no graphics")
Kd = (512, 512) # frequency grid, tuple Jd = (6, 6) # interpolator # om= numpy.load(DATA_PATH+'om3D.npz')['arr_0'] # om = numpy.random.randn(10000,3)*2 # om = numpy.load('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/')['arr_0'] om = scipy.io.loadmat( '/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/sparkling/samples_sparkling_x8_64x3072.mat' )['samples_sparkling'] # om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/samples_radial_x8_64x3072.mat')['samples_radial'] om = om / numpy.max(om.real.ravel()) * numpy.pi print(om.shape) from pynufft import NUFFT_cpu, NUFFT_hsa, NUFFT_hsa_legacy # from pynufft import NUFFT_memsave NufftObj_cpu = NUFFT_cpu() NufftObj_hsa = NUFFT_hsa() NufftObj_memsave = NUFFT_hsa() import time t0 = time.time() NufftObj_cpu.plan(om, Nd, Kd, Jd) t1 = time.time() NufftObj_hsa.plan(om, Nd, Kd, Jd) NufftObj_memsave.plan(om, Nd, Kd, Jd) t2 = time.time() # proc = 0 # cpu proc = 1 # gpu # NufftObj_hsa.offload(API = 'ocl', platform_number = proc, device_number = 0) # NufftObj_memsave.offload(API = 'ocl', platform_number = proc, device_number = 0)