def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() dev = af.get_device() print_func(dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert (k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert (mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert (mem_info['lock']['buffers'] == 1 + mem_info_old['lock']['buffers']) af.set_device(dev)
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() dev = af.get_device() print_func(dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(dev)
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(curr_dev) a = af.randu(10,10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) c = af.randu(10,10) af.lock_array(c) af.unlock_array(c) a = af.constant(1, 3, 3) b = af.constant(2, 3, 3) af.eval(a) af.eval(b) print_func(a) print_func(b) c = a + b d = a - b af.eval(c, d) print_func(c) print_func(d) print_func(af.set_manual_eval_flag(True)) assert(af.get_manual_eval_flag() == True) print_func(af.set_manual_eval_flag(False)) assert(af.get_manual_eval_flag() == False) display_func(af.is_locked_array(a))
def gpuGridrec(tomo,angles,center,input_params): """ Gridrec reconstruction using GPU based gridding Inputs: tomo : 3D numpy sinogram array with dimensions same as tomopy angles : Array of angles in radians center : Floating point center of rotation input_params : A dictionary with the keys 'gpu_device' : Device id of the gpu (For a 4 GPU cluster ; 0-3) 'oversamp_factor': A factor by which to pad the image/data for FFT 'fbp_filter_param' : A number between 0-1 for setting the filter cut-off for FBP """ print('Starting GPU NUFFT recon') #allocate space for final answer af.set_device(input_params['gpu_device']) #Set the device number for gpu based code #Change tomopy format new_tomo=np.transpose(tomo,(1,2,0)) #slice, columns, angles im_size = new_tomo.shape[1] num_slice = new_tomo.shape[0] num_angles=new_tomo.shape[2] pad_size=np.int16(im_size*input_params['oversamp_factor']) # nufft_scaling = (np.pi/pad_size)**2 #Initialize structures for NUFFT sino={} geom={} sino['Ns'] = pad_size#Sinogram size after padding sino['Ns_orig'] = im_size #size of original sinogram sino['center'] = center + (sino['Ns']/2 - sino['Ns_orig']/2) #for padded sinogram sino['angles'] = angles sino['filter'] = input_params['fbp_filter_param'] #Paramter to control strength of FBP filter normalized to [0,1] #Initialize NUFFT parameters nufft_params = init_nufft_params(sino,geom) rec_nufft = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64) Ax = afnp.zeros((sino['Ns'],num_angles),dtype=afnp.complex64) pad_idx = slice(sino['Ns']/2-sino['Ns_orig']/2,sino['Ns']/2+sino['Ns_orig']/2) rec_nufft_final=np.zeros((num_slice,sino['Ns_orig'],sino['Ns_orig']),dtype=np.float32) #Move all data to GPU slice_1=slice(0,num_slice,2) slice_2=slice(1,num_slice,2) gdata=afnp.array(new_tomo[slice_1]+1j*new_tomo[slice_2],dtype=afnp.complex64) x_recon = afnp.zeros((sino['Ns'],sino['Ns']),dtype=afnp.complex64) #loop over all slices for i in range(0,num_slice/2): Ax[pad_idx,:]=gdata[i] #filtered back-projection rec_nufft[i] = (back_project(Ax,nufft_params))[pad_idx,pad_idx] #Move to CPU #Rescale result to match tomopy rec_nufft=np.array(rec_nufft,dtype=np.complex64) #*nufft_scaling rec_nufft_final[slice_1]=np.array(rec_nufft.real,dtype=np.float32) rec_nufft_final[slice_2]=np.array(rec_nufft.imag,dtype=np.float32) return rec_nufft_final
def main(): parser = argparse.ArgumentParser( description='af vs sklearn logit comparison') parser.add_argument('-b', '--backend', choices=['default', 'cpu', 'cuda', 'opencl'], default='default', action='store', help='ArrayFire backend to be used') parser.add_argument('-v', '--device', type=int, default=0, action='store', help='ArrayFire backend device to be used') parser.add_argument('-d', '--dataset', choices=['iris', 'mnist', 'notmnist'], default='iris', action='store', help='Dataset to be used') parser.add_argument('-t', '--type', choices=['simple', 'predict', 'benchmark'], default='simple', action='store', help='Demo type') args = parser.parse_args() af.set_backend(args.backend) af.set_device(args.device) af.info() dataset = None if args.dataset == 'iris': dataset = read_and_preprocess_iris_data() elif args.dataset == 'mnist': dataset = read_and_preprocess_mnist_data() elif args.dataset == 'notmnist': dataset = read_and_preprocess_notmnist_data() else: parser.print_help() return -1 print('------------') if args.type == 'simple': demo_simple(arrayfire_knn_demo, sklearn_knn_demo, dataset) elif args.type == 'predict': demo_pred(arrayfire_knn_demo, sklearn_knn_demo, dataset) elif args.type == 'benchmark': demo_bench(arrayfire_knn_demo, sklearn_knn_demo, dataset) else: parser.print_help() return -1
def _get_compute_device_internal(id: int) -> ComputeDevice: af.set_device(id) device_info = af.device_info() name = device_info['device'] backend = device_info['backend'] toolkit_version = device_info['toolkit'] compute_version = device_info['compute'] return ComputeDevice(id, name, backend, toolkit_version, compute_version)
def set_compute_device(compute_device: tp.Union[int, ComputeDevice]) \ -> None: if isinstance(compute_device, int): compute_device \ = ComputeDeviceManager.get_compute_device(compute_device) elif not isinstance(compute_device, ComputeDevice): raise TypeError(f"The argument compute_device must be of " f"type ComputeDevice or of type int. The argument " f"provided is of type {type(compute_device)}") af.set_device(compute_device.id)
def get_compute_devices(cls) -> tp.Sequence[ComputeDevice]: if ComputeDeviceManager._compute_devices is None: saved_device_id = cls.get_current_compute_device_id() n = af.get_device_count() ComputeDeviceManager._compute_devices = [] for id in range(n): (ComputeDeviceManager._compute_devices.append( ComputeDeviceManager._get_compute_device_internal(id))) af.set_device(saved_device_id) return ComputeDeviceManager._compute_devices
def main(): argc = len(sys.argv) device = int(sys.argv[1]) if argc > 1 else 0 console = sys.argv[2][0] == '-' if argc > 2 else False perc = int(sys.argv[3]) if argc > 3 else 60 try: af.set_device(device) af.info() logit_demo(console, perc) except Exception as e: print('Error: ', str(e))
def gpuGridrec(tomo, angles, center, input_params): print('Starting GPU NUFFT recon') #allocate space for final answer af.set_device( input_params['gpu_device']) #Set the device number for gpu based code #Change tomopy format new_tomo = np.transpose(tomo, (1, 2, 0)) #slice, columns, angles im_size = new_tomo.shape[1] num_slice = new_tomo.shape[0] num_angles = new_tomo.shape[2] pad_size = np.int16(im_size * input_params['oversamp_factor']) nufft_scaling = (np.pi / pad_size)**2 #Initialize structures for NUFFT sino = {} geom = {} sino['Ns'] = pad_size #Sinogram size after padding sino['Ns_orig'] = im_size #size of original sinogram sino['center'] = center + (sino['Ns'] / 2 - sino['Ns_orig'] / 2 ) #for padded sinogram sino['angles'] = angles sino['filter'] = input_params[ 'fbp_filter_param'] #Paramter to control strength of FBP filter normalized to [0,1] #Initialize NUFFT parameters nufft_params = init_nufft_params(sino, geom) rec_nufft = afnp.zeros((num_slice / 2, sino['Ns_orig'], sino['Ns_orig']), dtype=afnp.complex64) Ax = afnp.zeros((sino['Ns'], num_angles), dtype=afnp.complex64) pad_idx = slice(sino['Ns'] / 2 - sino['Ns_orig'] / 2, sino['Ns'] / 2 + sino['Ns_orig'] / 2) rec_nufft_final = np.zeros((num_slice, sino['Ns_orig'], sino['Ns_orig']), dtype=np.float32) #Move all data to GPU slice_1 = slice(0, num_slice, 2) slice_2 = slice(1, num_slice, 2) gdata = afnp.array(new_tomo[slice_1] + 1j * new_tomo[slice_2], dtype=afnp.complex64) x_recon = afnp.zeros((sino['Ns'], sino['Ns']), dtype=afnp.complex64) #loop over all slices for i in range(0, num_slice / 2): Ax[pad_idx, :] = gdata[i] #filtered back-projection rec_nufft[i] = (back_project(Ax, nufft_params))[pad_idx, pad_idx] #Move to CPU #Rescale result to match tomopy rec_nufft = np.array(rec_nufft, dtype=np.complex64) * nufft_scaling rec_nufft_final[slice_1] = np.array(rec_nufft.real, dtype=np.float32) rec_nufft_final[slice_2] = np.array(rec_nufft.imag, dtype=np.float32) return rec_nufft_final
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert (k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert (mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert (mem_info['lock']['buffers'] == 1 + mem_info_old['lock']['buffers']) af.set_device(curr_dev) a = af.randu(10, 10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) af.lock_device_ptr(b) af.unlock_device_ptr(b)
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(curr_dev) a = af.randu(10,10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) af.lock_device_ptr(b) af.unlock_device_ptr(b)
def __init__(self, physical_system, performance_test_flag = False): """ Constructor for the nonlinear_solver object. It takes the physical system object as an argument and uses it in intialization and evolution of the system in consideration. Additionally, a performance test flag is also passed which when true, stores time which is consumed by each of the major solver routines. This proves particularly useful in analyzing performance bottlenecks and obtaining benchmarks. Parameters: ----------- physical_system: object The defined physical system object which holds all the simulation information such as the initial conditions, and the domain info is passed as an argument in defining an instance of the nonlinear_solver. This system is then evolved, and monitored using the various methods under the nonlinear_solver class. performance_test_flag: bool When set to true, the time elapsed in each of the solver routines is measured. These performance stats can be obtained at the end of the run using the command print_performance_timings, which summarizes the results in a table. """ self.physical_system = physical_system # Holding Domain Info: self.q1_start, self.q1_end = physical_system.q1_start,\ physical_system.q1_end self.q2_start, self.q2_end = physical_system.q2_start,\ physical_system.q2_end self.p1_start, self.p1_end = physical_system.p1_start,\ physical_system.p1_end self.p2_start, self.p2_end = physical_system.p2_start,\ physical_system.p2_end self.p3_start, self.p3_end = physical_system.p3_start,\ physical_system.p3_end # Holding Domain Resolution: self.N_q1, self.dq1 = physical_system.N_q1, physical_system.dq1 self.N_q2, self.dq2 = physical_system.N_q2, physical_system.dq2 self.N_p1, self.dp1 = physical_system.N_p1, physical_system.dp1 self.N_p2, self.dp2 = physical_system.N_p2, physical_system.dp2 self.N_p3, self.dp3 = physical_system.N_p3, physical_system.dp3 # Getting number of ghost zones, and the boundary # conditions that are utilized: N_g = self.N_ghost = physical_system.N_ghost self.boundary_conditions = physical_system.boundary_conditions # MPI Communicator: self._comm = self.physical_system.mpi_communicator if(self.physical_system.params.num_devices>1): rank = self._comm.rank if (self.physical_system.params.manual_device_allocation == True): af.set_device(self.physical_system.params.device_allocation[rank]) else: af.set_device(rank%self.physical_system.params.num_devices) # Getting number of species: N_s = self.N_species = self.physical_system.N_species # TODO: Remove mass and charge from lib if(type(physical_system.params.mass) == list): # Having a temporary copy of the lists to copy to af.Array: list_mass = physical_system.params.mass.copy() list_charge = physical_system.params.charge.copy() # Initializing af.Arrays for mass and charge: # Having the mass and charge along axis 1: self.physical_system.params.mass = af.constant(0, 1, N_s, dtype = af.Dtype.f64) self.physical_system.params.charge = af.constant(0, 1, N_s, dtype = af.Dtype.f64) for i in range(N_s): self.physical_system.params.mass[0, i] = list_mass[i] self.physical_system.params.charge[0, i] = list_charge[i] self.physical_system.params.rank = self._comm.rank PETSc.Sys.Print('\nBackend Details for Nonlinear Solver:') # Printing the backend details for each rank/device/node: PETSc.Sys.syncPrint(indent('Rank ' + str(self._comm.rank) + ' of ' + str(self._comm.size-1))) PETSc.Sys.syncPrint(indent('On Node: '+ socket.gethostname())) PETSc.Sys.syncPrint(indent('Device Details:')) PETSc.Sys.syncPrint(indent(af.info_str(), 2)) PETSc.Sys.syncPrint(indent('Device Bandwidth = ' + str(bandwidth_test(100)) + ' GB / sec')) PETSc.Sys.syncPrint() PETSc.Sys.syncFlush() self.performance_test_flag = performance_test_flag # Initializing variables which are used to time the components of the solver: if(performance_test_flag == True): self.time_ts = 0 self.time_interp2 = 0 self.time_sourcets = 0 self.time_fvm_solver = 0 self.time_reconstruct = 0 self.time_riemann = 0 self.time_fieldstep = 0 self.time_interp3 = 0 self.time_apply_bcs_f = 0 self.time_communicate_f = 0 petsc_bc_in_q1 = 'ghosted'; self.N_g1 = self.N_ghost petsc_bc_in_q2 = 'ghosted'; self.N_g2 = self.N_ghost # Only for periodic boundary conditions or shearing-box boundary conditions # do the boundary conditions passed to the DA need to be changed. PETSc # automatically handles the application of periodic boundary conditions when # running in parallel. For shearing box boundary conditions, an interpolation # operation needs to be applied on top of the periodic boundary conditions. # In all other cases, ghosted boundaries are used. if( self.boundary_conditions.in_q1_left == 'periodic' or self.boundary_conditions.in_q1_left == 'shearing-box' ): petsc_bc_in_q1 = 'periodic' if( self.boundary_conditions.in_q2_bottom == 'periodic' or self.boundary_conditions.in_q2_bottom == 'shearing-box' ): petsc_bc_in_q2 = 'periodic' if(self.boundary_conditions.in_q1_left == 'none'): petsc_bc_in_q1 = 'none'; self.N_g1 = 0 if(self.boundary_conditions.in_q2_bottom == 'none'): petsc_bc_in_q2 = 'none'; self.N_g2 = 0 if(self.boundary_conditions.in_q1_left == 'periodic'): try: assert(self.boundary_conditions.in_q1_right == 'periodic') except: raise Exception('Periodic boundary conditions need to be applied to \ both the boundaries of a particular axis' ) if(self.boundary_conditions.in_q1_left == 'shearing-box'): try: assert(self.boundary_conditions.in_q1_right == 'shearing-box') except: raise Exception('Shearing box boundary conditions need to be applied to \ both the boundaries of a particular axis' ) if(self.boundary_conditions.in_q2_bottom == 'periodic'): try: assert(self.boundary_conditions.in_q2_top == 'periodic') except: raise Exception('Periodic boundary conditions need to be applied to \ both the boundaries of a particular axis' ) if(self.boundary_conditions.in_q2_bottom == 'shearing-box'): try: assert(self.boundary_conditions.in_q2_top == 'shearing-box') except: raise Exception('Shearing box boundary conditions need to be applied to \ both the boundaries of a particular axis' ) if(self.boundary_conditions.in_q1_left == 'none'): try: assert(self.boundary_conditions.in_q1_right == 'none') except: raise Exception('NONE boundary conditions need to be applied to \ both the boundaries of a particular axis' ) if(self.boundary_conditions.in_q2_bottom == 'none'): try: assert(self.boundary_conditions.in_q2_top == 'none') except: raise Exception('NONE boundary conditions need to be applied to \ both the boundaries of a particular axis' ) self._nproc_in_q1 = PETSc.DECIDE self._nproc_in_q2 = PETSc.DECIDE # Break up the domain into manually defined portions self._ownership_ranges = None if self.physical_system.params.enable_manual_domain_decomposition: ownership_q1 = [self.N_q1*item for item in self.physical_system.params.q1_partition] ownership_q2 = [self.N_q2*item for item in self.physical_system.params.q2_partition] self._ownership_ranges = (ownership_q1, ownership_q2) # TODO : Implement error handling and give clean messages # Since shearing boundary conditions require interpolations which are non-local: if(self.boundary_conditions.in_q2_bottom == 'shearing-box'): self._nproc_in_q1 = 1 if(self.boundary_conditions.in_q1_left == 'shearing-box'): self._nproc_in_q2 = 1 # DMDA is a data structure to handle a distributed structure # grid and its related core algorithms. It stores metadata of # how the grid is partitioned when run in parallel which is # utilized by the various methods of the solver. self._da_f = PETSc.DMDA().create([self.N_q1, self.N_q2], dof = ( self.N_species * self.N_p1 * self.N_p2 * self.N_p3 ), stencil_width = N_g, boundary_type = (petsc_bc_in_q1, petsc_bc_in_q2 ), proc_sizes = (self._nproc_in_q1, self._nproc_in_q2 ), ownership_ranges = self._ownership_ranges, stencil_type = 1, comm = self._comm ) lx, ly = self._da_f.getOwnershipRanges() # This DA is used by the FileIO routine dump_moments(): # Finding the number of definitions for the moments: attributes = [a for a in dir(self.physical_system.moments) if not a.startswith('_')] # Removing utility functions: if('integral_over_v' in attributes): attributes.remove('integral_over_v') self._da_dump_moments = PETSc.DMDA().create([self.N_q1, self.N_q2], dof = self.N_species * (len(attributes)-2), # don't countintegral_over_p, and params in moments.py proc_sizes = (self._nproc_in_q1, self._nproc_in_q2 ), ownership_ranges = self._ownership_ranges, comm = self._comm ) # For dumping aux arrays: self.dump_aux_arrays_initial_call = 1 # Creation of the local and global vectors from the DA: # This is for the distribution function self._glob_f = self._da_f.createGlobalVec() self._local_f = self._da_f.createLocalVec() # The following vector is used to dump the data to file: self._glob_moments = self._da_dump_moments.createGlobalVec() # Getting the arrays for the above vectors: self._glob_f_array = self._glob_f.getArray() self._local_f_array = self._local_f.getArray() self._glob_moments_array = self._glob_moments.getArray() # Setting names for the objects which will then be # used as the key identifiers for the HDF5 files: PETSc.Object.setName(self._glob_f, 'distribution_function') PETSc.Object.setName(self._glob_moments, 'moments') # Indexing vars used through out self.i_q1_start = self.N_g1; self.i_q1_end = -self.N_g1 self.i_q2_start = self.N_g2; self.i_q2_end = -self.N_g2 if (self.N_g1 == 0): self.i_q1_end = 1 if (self.N_g2 == 0): self.i_q2_end = 1 # Get start (corner) indices of the local zone wrt global ordering and its size ((_i_q1_start, _i_q2_start), (_N_q1_local, _N_q2_local)) = self._da_f.getCorners() # Coordinates of the local zone in a global coord system self.q1_start_local = self.q1_start + _i_q1_start * self.dq1 self.q2_start_local = self.q2_start + _i_q2_start * self.dq2 # TODO : Fix this. Passing into params for use in coords.py self.physical_system.params.q1_start_local_left = self.q1_start_local self.physical_system.params.q2_start_local_bottom = self.q2_start_local print("nonlinear.py: rank = ", self._comm.rank, "(q1_start_local, q2_start_local) = (", self.q1_start_local, self.q2_start_local, ")" ) print("nonlinear.py: rank = ", self._comm.rank, "(N_q1_local, N_q2_local) = (", _N_q1_local, _N_q2_local, ")" ) print("nonlinear.py: rank = ", self._comm.rank, "ownership_ranges : lx = ", lx, "ly = ", ly ) self.N_q1_local = _N_q1_local self.N_q2_local = _N_q2_local self.N_q1_local_with_Ng = _N_q1_local + 2*self.N_g1 self.N_q2_local_with_Ng = _N_q2_local + 2*self.N_g2 # Obtaining the array values of spatial coordinates: q_left_bot, q_center_bot, q_left_center, q_center = \ calculate_q(self.q1_start_local, self.q2_start_local, self.N_q1_local, self.N_q2_local, self.N_g1, self.N_g2, self.dq1, self.dq2 ) self.q1_left_bot = q_left_bot[0] self.q2_left_bot = q_left_bot[1] self.q1_center_bot = q_center_bot[0] self.q2_center_bot = q_center_bot[1] self.q1_left_center = q_left_center[0] self.q2_left_center = q_left_center[1] self.q1_center = q_center[0] self.q2_center = q_center[1] self.p1_center, self.p2_center, self.p3_center = \ calculate_p_center(self.p1_start, self.p2_start, self.p3_start, self.N_p1, self.N_p2, self.N_p3, self.dp1, self.dp2, self.dp3, ) self.p1_left, self.p2_bottom, self.p3_back = \ calculate_p_corner(self.p1_start, self.p2_start, self.p3_start, self.N_p1, self.N_p2, self.N_p3, self.dp1, self.dp2, self.dp3, ) # Need to convert the lists dp1, dp2, dp3 to af.Arrays for vector # computations to work self.dp1 = af.moddims(af.to_array(np.array(self.dp1)), 1, self.N_species) self.dp2 = af.moddims(af.to_array(np.array(self.dp2)), 1, self.N_species) self.dp3 = af.moddims(af.to_array(np.array(self.dp3)), 1, self.N_species) # Need to do the same for the p1_start/end lists. self.p1_start = af.moddims(af.to_array(self.p1_start), 1, self.N_species) self.p2_start = af.moddims(af.to_array(self.p2_start), 1, self.N_species) self.p3_start = af.moddims(af.to_array(self.p3_start), 1, self.N_species) self.p1_end = af.moddims(af.to_array(self.p1_end), 1, self.N_species) self.p2_end = af.moddims(af.to_array(self.p2_end), 1, self.N_species) self.p3_end = af.moddims(af.to_array(self.p3_end), 1, self.N_species) self.p2_left = self.p2_center self.p3_left = self.p3_center self.p1_bottom = self.p1_center self.p3_bottom = self.p3_center self.p1_back = self.p1_center self.p2_back = self.p2_center # Initialize according to initial condition provided by user: self._initialize(physical_system.params) # Initializing a variable to track time-elapsed: self.time_elapsed = 0 # Assigning the function objects to methods of the solver: self._A_q = physical_system.A_q self._C_q = physical_system.C_q self._A_p = physical_system.A_p self._C_p = physical_system.C_p # Source/Sink term: self._source = physical_system.source
parser.add_argument("--N", help="Amount of measurements taken. Default: 10",default=10,type=int) parser.add_argument("--sigma", help="Sigma of gaussian filter. Default: 1.0",default=1.0,type=float) parser.add_argument("--size", help="Size of squared image. Default: 1024",default=1024,type=int) parser.add_argument("--raw", help="Produce raw output for plot data.",action="store_true") parser.add_argument("--device", help="Select GPU device number. Default: 0",default=0,type=int) args = parser.parse_args() if not args.raw: print "--- Parameters ---" print "\tSize:",args.size print "\tN:",args.N print "\tSigma:",args.sigma print "\tDevice:",args.device af.set_device(args.device) # create input image img = np.random.random((args.size,args.size)) # create arrayfire gaussiankernel start = time.clock() afsmk = af_gaussian2D(args.sigma) end = time.clock() af_kernel = end-start # storage for times af_cpy_hd = np.zeros(args.N) af_convolve = np.zeros(args.N) af_cpy_dh = np.zeros(args.N) vigra_t = np.zeros(args.N)
import arrayfire as af import afnumpy as afnp import numpy as np import sys from gnufft import tvd_update, add_hessian import tomopy import pyqtgraph as pg import time import arrayfire as af af.set_device(2) nslice = 150 im_size = 2560 #obj = np.ones((nslice,im_size,im_size),dtype=np.float32) #obj=tomopy.shepp3d((nslice,im_size,im_size),dtype=np.float32) obj = np.random.rand(nslice, im_size, im_size).astype(np.float32) x = obj[::2] y = obj[1::2] print(x.shape) vol = x + 1j * y t = time.time() vol = afnp.array(vol.astype(np.complex64)) #255* fcn = afnp.zeros((nslice / 2, im_size, im_size), dtype=np.complex64) tvd_update(1.2, 1, vol, fcn) elapsed = time.time() - t print('Time taken for gradient %f' % (elapsed)) output = np.zeros((nslice, im_size, im_size), dtype=np.float32) output[::2] = np.array(fcn).real output[1::2] = np.array(fcn).imag print(output.max()) print(output.min())
def __init__(self, physical_system, performance_test_flag=False): """ Constructor for the nonlinear_solver object. It takes the physical system object as an argument and uses it in intialization and evolution of the system in consideration. Additionally, a performance test flag is also passed which when true stores time which is consumed by each of the major solver routines. This proves particularly useful in analyzing performance bottlenecks and obtaining benchmarks. Parameters: ----------- physical_system: The defined physical system object which holds all the simulation information such as the initial conditions, and the domain info is passed as an argument in defining an instance of the nonlinear_solver. This system is then evolved, and monitored using the various methods under the nonlinear_solver class. """ self.physical_system = physical_system # Holding Domain Info: self.q1_start, self.q1_end = physical_system.q1_start,\ physical_system.q1_end self.q2_start, self.q2_end = physical_system.q2_start,\ physical_system.q2_end self.p1_start, self.p1_end = physical_system.p1_start,\ physical_system.p1_end self.p2_start, self.p2_end = physical_system.p2_start,\ physical_system.p2_end self.p3_start, self.p3_end = physical_system.p3_start,\ physical_system.p3_end # Holding Domain Resolution: self.N_q1, self.dq1 = physical_system.N_q1, physical_system.dq1 self.N_q2, self.dq2 = physical_system.N_q2, physical_system.dq2 self.N_p1, self.dp1 = physical_system.N_p1, physical_system.dp1 self.N_p2, self.dp2 = physical_system.N_p2, physical_system.dp2 self.N_p3, self.dp3 = physical_system.N_p3, physical_system.dp3 # Getting number of ghost zones, and the boundary # conditions that are utilized: N_g = self.N_ghost = physical_system.N_ghost self.boundary_conditions = physical_system.boundary_conditions # Declaring the communicator: self._comm = PETSc.COMM_WORLD.tompi4py() if (self.physical_system.params.num_devices > 1): af.set_device(self._comm.rank % self.physical_system.params.num_devices) PETSc.Sys.Print('\nBackend Details for Nonlinear Solver:') # Printing the backend details for each rank/device/node: PETSc.Sys.syncPrint( indent('Rank ' + str(self._comm.rank) + ' of ' + str(self._comm.size - 1))) PETSc.Sys.syncPrint(indent('On Node: ' + socket.gethostname())) PETSc.Sys.syncPrint(indent('Device Details:')) PETSc.Sys.syncPrint(indent(af.info_str(), 2)) PETSc.Sys.syncPrint( indent('Device Bandwidth = ' + str(bandwidth_test(100)) + ' GB / sec')) PETSc.Sys.syncPrint() PETSc.Sys.syncFlush() self.performance_test_flag = performance_test_flag if (performance_test_flag == True): self.time_ts = 0 self.time_interp2 = 0 self.time_sourcets = 0 self.time_fvm_solver = 0 self.time_reconstruct = 0 self.time_riemann = 0 self.time_fieldstep = 0 self.time_fieldsolver = 0 self.time_interp3 = 0 self.time_apply_bcs_f = 0 self.time_apply_bcs_fields = 0 self.time_communicate_f = 0 self.time_communicate_fields = 0 petsc_bc_in_q1 = 'ghosted' petsc_bc_in_q2 = 'ghosted' # Only for periodic boundary conditions do the boundary # conditions passed to the DA need to be changed. PETSc # automatically handles the application of periodic # boundary conditions when running in parallel. In all other # cases, ghosted boundaries are used. if (self.boundary_conditions.in_q1_left == 'periodic'): petsc_bc_in_q1 = 'periodic' if (self.boundary_conditions.in_q2_bottom == 'periodic'): petsc_bc_in_q2 = 'periodic' # DMDA is a data structure to handle a distributed structure # grid and its related core algorithms. It stores metadata of # how the grid is partitioned when run in parallel which is # utilized by the various methods of the solver. self._da_f = PETSc.DMDA().create( [self.N_q1, self.N_q2], dof=(self.N_p1 * self.N_p2 * self.N_p3), stencil_width=self.N_ghost, boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2), proc_sizes=(PETSc.DECIDE, PETSc.DECIDE), stencil_type=1, comm=self._comm) # This DA object is used in the communication routines for the # EM field quantities. A DOF of 6 is taken so that the communications, # and application of B.C's may be carried out in a single call among # all the field quantities(E1, E2, E3, B1, B2, B3) self._da_fields = PETSc.DMDA().create([self.N_q1, self.N_q2], dof=6, stencil_width=self.N_ghost, boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2), proc_sizes=(PETSc.DECIDE, PETSc.DECIDE), stencil_type=1, comm=self._comm) # Additionally, a DMDA object also needs to be created for # the KSP/SNES solver with a DOF of 1. This is used to solve for # the electrostatic case: self._da_ksp = PETSc.DMDA().create([self.N_q1, self.N_q2], stencil_width=self.N_ghost, boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2), proc_sizes=(PETSc.DECIDE, PETSc.DECIDE), stencil_type=1, comm=self._comm) # This DA is used by the FileIO routine dump_moments(): self._da_dump_moments = PETSc.DMDA().create( [self.N_q1, self.N_q2], dof=len(self.physical_system.moment_exponents), proc_sizes=(PETSc.DECIDE, PETSc.DECIDE), comm=self._comm) # Creation of the local and global vectors from the DA: # This is for the distribution function self._glob_f = self._da_f.createGlobalVec() self._local_f = self._da_f.createLocalVec() # The following global and local vectors are used in # the communication routines for EM fields self._glob_fields = self._da_fields.createGlobalVec() self._local_fields = self._da_fields.createLocalVec() # The following vector is used to dump the data to file: self._glob_moments = self._da_dump_moments.createGlobalVec() # Getting the arrays for the above vectors: self._glob_f_array = self._glob_f.getArray() self._local_f_array = self._local_f.getArray() self._glob_fields_array = self._glob_fields.getArray() self._local_fields_array = self._local_fields.getArray() self._glob_moments_array = self._glob_moments.getArray() # Setting names for the objects which will then be # used as the key identifiers for the HDF5 files: PETSc.Object.setName(self._glob_f, 'distribution_function') PETSc.Object.setName(self._glob_moments, 'moments') # Obtaining the array values of the cannonical variables: self.q1_center, self.q2_center = self._calculate_q_center() self.p1, self.p2, self.p3 = self._calculate_p_center() # Initialize according to initial condition provided by user: self._initialize(physical_system.params) # Obtaining start coordinates for the local zone # Additionally, we also obtain the size of the local zone ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_f.getCorners() (i_q1_end, i_q2_end) = (i_q1_start + N_q1_local - 1, i_q2_start + N_q2_local - 1) # Applying dirichlet boundary conditions: if (self.physical_system.boundary_conditions.in_q1_left == 'dirichlet' ): # If local zone includes the left physical boundary: if (i_q1_start == 0): self.f[:, :N_g] = self.boundary_conditions.\ f_left(self.f, self.q1_center, self.q2_center, self.p1, self.p2, self.p3, self.physical_system.params )[:, :N_g] if (self.physical_system.boundary_conditions.in_q1_right == 'dirichlet' ): # If local zone includes the right physical boundary: if (i_q1_end == self.N_q1 - 1): self.f[:, -N_g:] = self.boundary_conditions.\ f_right(self.f, self.q1_center, self.q2_center, self.p1, self.p2, self.p3, self.physical_system.params )[:, -N_g:] if (self.physical_system.boundary_conditions.in_q2_bottom == 'dirichlet'): # If local zone includes the bottom physical boundary: if (i_q2_start == 0): self.f[:, :, :N_g] = self.boundary_conditions.\ f_bot(self.f, self.q1_center, self.q2_center, self.p1, self.p2, self.p3, self.physical_system.params )[:, :, :N_g] if (self.physical_system.boundary_conditions.in_q2_top == 'dirichlet'): # If local zone includes the top physical boundary: if (i_q2_end == self.N_q2 - 1): self.f[:, :, -N_g:] = self.boundary_conditions.\ f_top(self.f, self.q1_center, self.q2_center, self.p1, self.p2, self.p3, self.physical_system.params )[:, :, -N_g:] # Assigning the value to the PETSc Vecs(for dump at t = 0): (af.flat(self.f)).to_ndarray(self._local_f_array) (af.flat(self.f[:, N_g:-N_g, N_g:-N_g])).to_ndarray(self._glob_f_array) # Assigning the advection terms along q1 and q2 self._A_q1 = physical_system.A_q(self.q1_center, self.q2_center, self.p1, self.p2, self.p3, physical_system.params)[0] self._A_q2 = physical_system.A_q(self.q1_center, self.q2_center, self.p1, self.p2, self.p3, physical_system.params)[1] # Assigning the conservative advection terms along q1 and q2 self._C_q1 = physical_system.C_q(self.q1_center, self.q2_center, self.p1, self.p2, self.p3, physical_system.params)[0] self._C_q2 = physical_system.C_q(self.q1_center, self.q2_center, self.p1, self.p2, self.p3, physical_system.params)[1] # Assigning the function objects to methods of the solver: self._A_p = physical_system.A_p # Source/Sink term: self._source = physical_system.source # Initializing a variable to track time-elapsed: # This becomes necessary when applying shearing wall # boundary conditions(WIP): self.time_elapsed = 0
#! /usr/bin/env python3 # -*- coding: utf-8 -*- import numpy as np import arrayfire as af backend = 'opencl' device = 0 af.set_backend(backend) af.set_device(device) from dg_maxwell import lagrange from dg_maxwell import isoparam from dg_maxwell import utils from dg_maxwell import msh_parser from dg_maxwell import wave_equation from dg_maxwell import wave_equation_2d # The domain of the function. x_nodes = af.np_to_af_array(np.array([-1., 1.])) # The number of LGL points into which an element is split. N_LGL = 6 # Number of elements the domain is to be divided into. N_Elements = 9 # The scheme to be used for integration. Values are either # 'gauss_quadrature' or 'lobatto_quadrature'
def __init__(self, physical_system, performance_test_flag=False): """ Constructor for the nonlinear_solver object. It takes the physical system object as an argument and uses it in intialization and evolution of the system in consideration. Additionally, a performance test flag is also passed which when true, stores time which is consumed by each of the major solver routines. This proves particularly useful in analyzing performance bottlenecks and obtaining benchmarks. Parameters: ----------- physical_system: The defined physical system object which holds all the simulation information such as the initial conditions, and the domain info is passed as an argument in defining an instance of the nonlinear_solver. This system is then evolved, and monitored using the various methods under the nonlinear_solver class. """ self.physical_system = physical_system # Holding Domain Info: self.q1_start, self.q1_end = physical_system.q1_start,\ physical_system.q1_end self.q2_start, self.q2_end = physical_system.q2_start,\ physical_system.q2_end self.p1_start, self.p1_end = physical_system.p1_start,\ physical_system.p1_end self.p2_start, self.p2_end = physical_system.p2_start,\ physical_system.p2_end self.p3_start, self.p3_end = physical_system.p3_start,\ physical_system.p3_end # Holding Domain Resolution: self.N_q1, self.dq1 = physical_system.N_q1, physical_system.dq1 self.N_q2, self.dq2 = physical_system.N_q2, physical_system.dq2 self.N_p1, self.dp1 = physical_system.N_p1, physical_system.dp1 self.N_p2, self.dp2 = physical_system.N_p2, physical_system.dp2 self.N_p3, self.dp3 = physical_system.N_p3, physical_system.dp3 # Getting number of ghost zones, and the boundary # conditions that are utilized: N_g_q = self.N_ghost_q = physical_system.N_ghost_q N_g_p = self.N_ghost_p = physical_system.N_ghost_p self.boundary_conditions = physical_system.boundary_conditions # Declaring the communicator: self._comm = PETSc.COMM_WORLD.tompi4py() if (self.physical_system.params.num_devices > 1): af.set_device(self._comm.rank % self.physical_system.params.num_devices) # Getting number of species: self.N_species = len(physical_system.params.mass) # Having the mass and charge along axis 1: self.physical_system.params.mass = \ af.cast(af.moddims(af.to_array(physical_system.params.mass), 1, self.N_species ), af.Dtype.f64 ) self.physical_system.params.charge = \ af.cast(af.moddims(af.to_array(physical_system.params.charge), 1, self.N_species ), af.Dtype.f64 ) PETSc.Sys.Print('\nBackend Details for Nonlinear Solver:') # Printing the backend details for each rank/device/node: PETSc.Sys.syncPrint( indent('Rank ' + str(self._comm.rank) + ' of ' + str(self._comm.size - 1))) PETSc.Sys.syncPrint(indent('On Node: ' + socket.gethostname())) PETSc.Sys.syncPrint(indent('Device Details:')) PETSc.Sys.syncPrint(indent(af.info_str(), 2)) PETSc.Sys.syncPrint( indent('Device Bandwidth = ' + str(bandwidth_test(100)) + ' GB / sec')) PETSc.Sys.syncPrint() PETSc.Sys.syncFlush() self.performance_test_flag = performance_test_flag # Initializing variables which are used to time the components of the solver: if (performance_test_flag == True): self.time_ts = 0 self.time_interp2 = 0 self.time_sourcets = 0 self.time_fvm_solver = 0 self.time_reconstruct = 0 self.time_riemann = 0 self.time_fieldstep = 0 self.time_interp3 = 0 self.time_apply_bcs_f = 0 self.time_communicate_f = 0 petsc_bc_in_q1 = 'ghosted' petsc_bc_in_q2 = 'ghosted' # Only for periodic boundary conditions or shearing-box boundary conditions # do the boundary conditions passed to the DA need to be changed. PETSc # automatically handles the application of periodic boundary conditions when # running in parallel. For shearing box boundary conditions, an interpolation # operation needs to be applied on top of the periodic boundary conditions. # In all other cases, ghosted boundaries are used. if (self.boundary_conditions.in_q1_left == 'periodic' or self.boundary_conditions.in_q1_left == 'shearing-box'): petsc_bc_in_q1 = 'periodic' if (self.boundary_conditions.in_q2_bottom == 'periodic' or self.boundary_conditions.in_q2_bottom == 'shearing-box'): petsc_bc_in_q2 = 'periodic' if (self.boundary_conditions.in_q1_left == 'periodic'): try: assert (self.boundary_conditions.in_q1_right == 'periodic') except: raise Exception( 'Periodic boundary conditions need to be applied to \ both the boundaries of a particular axis') if (self.boundary_conditions.in_q1_left == 'shearing-box'): try: assert (self.boundary_conditions.in_q1_right == 'shearing-box') except: raise Exception( 'Shearing box boundary conditions need to be applied to \ both the boundaries of a particular axis') if (self.boundary_conditions.in_q2_bottom == 'periodic'): try: assert (self.boundary_conditions.in_q2_top == 'periodic') except: raise Exception( 'Periodic boundary conditions need to be applied to \ both the boundaries of a particular axis') if (self.boundary_conditions.in_q2_bottom == 'shearing-box'): try: assert (self.boundary_conditions.in_q2_top == 'shearing-box') except: raise Exception( 'Shearing box boundary conditions need to be applied to \ both the boundaries of a particular axis') nproc_in_q1 = PETSc.DECIDE nproc_in_q2 = PETSc.DECIDE # Since shearing boundary conditions require interpolations which are non-local: if (self.boundary_conditions.in_q2_bottom == 'shearing-box'): nproc_in_q1 = 1 if (self.boundary_conditions.in_q1_left == 'shearing-box'): nproc_in_q2 = 1 # DMDA is a data structure to handle a distributed structure # grid and its related core algorithms. It stores metadata of # how the grid is partitioned when run in parallel which is # utilized by the various methods of the solver. self._da_f = PETSc.DMDA().create( [self.N_q1, self.N_q2], dof=(self.N_species * (self.N_p1 + 2 * N_g_p) * (self.N_p2 + 2 * N_g_p) * (self.N_p3 + 2 * N_g_p)), stencil_width=N_g_q, boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2), proc_sizes=(nproc_in_q1, nproc_in_q2), stencil_type=1, comm=self._comm) # This DA is used by the FileIO routine dump_distribution_function(): self._da_dump_f = PETSc.DMDA().create( [self.N_q1, self.N_q2], dof=(self.N_species * self.N_p1 * self.N_p2 * self.N_p3), stencil_width=N_g_q, boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2), proc_sizes=(nproc_in_q1, nproc_in_q2), stencil_type=1, comm=self._comm) # This DA is used by the FileIO routine dump_moments(): # Finding the number of definitions for the moments: attributes = [ a for a in dir(self.physical_system.moments) if not a.startswith('_') ] # Removing utility functions: if ('integral_over_v' in attributes): attributes.remove('integral_over_v') self._da_dump_moments = PETSc.DMDA().create( [self.N_q1, self.N_q2], dof=self.N_species * len(attributes), proc_sizes=(nproc_in_q1, nproc_in_q2), comm=self._comm) # Creation of the local and global vectors from the DA: # This is for the distribution function self._glob_f = self._da_f.createGlobalVec() self._local_f = self._da_f.createLocalVec() # The following vector is used to dump the data to file: self._glob_dump_f = self._da_dump_f.createGlobalVec() self._glob_moments = self._da_dump_moments.createGlobalVec() # Getting the arrays for the above vectors: self._glob_f_array = self._glob_f.getArray() self._local_f_array = self._local_f.getArray() self._glob_moments_array = self._glob_moments.getArray() self._glob_dump_f_array = self._glob_dump_f.getArray() # Setting names for the objects which will then be # used as the key identifiers for the HDF5 files: PETSc.Object.setName(self._glob_dump_f, 'distribution_function') PETSc.Object.setName(self._glob_moments, 'moments') # Obtaining the array values of the cannonical variables: self.q1_center, self.q2_center = self._calculate_q_center() self.p1_center, self.p2_center, self.p3_center = self._calculate_p_center( ) # Initialize according to initial condition provided by user: self._initialize(physical_system.params) # Obtaining start coordinates for the local zone # Additionally, we also obtain the size of the local zone ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_f.getCorners() (i_q1_end, i_q2_end) = (i_q1_start + N_q1_local - 1, i_q2_start + N_q2_local - 1) # Applying dirichlet boundary conditions: if (self.physical_system.boundary_conditions.in_q1_left == 'dirichlet' ): # If local zone includes the left physical boundary: if (i_q1_start == 0): self.f[:, :N_g_q] = self.boundary_conditions.\ f_left(self.f, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.physical_system.params )[:, :N_g_q] if (self.physical_system.boundary_conditions.in_q1_right == 'dirichlet' ): # If local zone includes the right physical boundary: if (i_q1_end == self.N_q1 - 1): self.f[:, -N_g_q:] = self.boundary_conditions.\ f_right(self.f, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.physical_system.params )[:, -N_g_q:] if (self.physical_system.boundary_conditions.in_q2_bottom == 'dirichlet'): # If local zone includes the bottom physical boundary: if (i_q2_start == 0): self.f[:, :, :N_g_q] = self.boundary_conditions.\ f_bot(self.f, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.physical_system.params )[:, :, :N_g_q] if (self.physical_system.boundary_conditions.in_q2_top == 'dirichlet'): # If local zone includes the top physical boundary: if (i_q2_end == self.N_q2 - 1): self.f[:, :, -N_g_q:] = self.boundary_conditions.\ f_top(self.f, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.physical_system.params )[:, :, -N_g_q:] # Assigning the value to the PETSc Vecs(for dump at t = 0): (af.flat(self.f)).to_ndarray(self._local_f_array) (af.flat(self.f[:, :, N_g_q:-N_g_q, N_g_q:-N_g_q])).to_ndarray(self._glob_f_array) # Assigning the function objects to methods of the solver: self._A_q = physical_system.A_q self._C_q = physical_system.C_q self._A_p = physical_system.A_p self._C_p = physical_system.C_p # Source/Sink term: self._source = physical_system.source # Initializing a variable to track time-elapsed: self.time_elapsed = 0
def gpuMBIR(tomo,angles,center,input_params): """ MBIR reconstruction using GPU based gridding operators Inputs: tomo : 3D numpy sinogram array with dimensions same as tomopy angles : Array of angles in radians center : Floating point center of rotation input_params : A dictionary with the keys 'gpu_device' : Device id of the gpu (For a 4 GPU cluster ; 0-3) 'oversamp_factor': A factor by which to pad the image/data for FFT 'num_iter' : Max number of MBIR iterations 'smoothness' : Regularization constant 'p': MRF shape param """ print('Starting GPU MBIR recon') #allocate space for final answer af.set_device(input_params['gpu_device']) #Set the device number for gpu based code #Change tomopy format new_tomo=np.transpose(tomo,(1,2,0)) #slice, columns, angles im_size = new_tomo.shape[1] num_slice = new_tomo.shape[0] num_angles=new_tomo.shape[2] pad_size=np.int16(im_size*input_params['oversamp_factor']) # nufft_scaling = (np.pi/pad_size)**2 num_iter = input_params['num_iter'] mrf_sigma = input_params['smoothness'] mrf_p = input_params['p'] print('MRF params p=%f sigma=%f' %(mrf_p,mrf_sigma)) #Initialize structures for NUFFT sino={} geom={} sino['Ns'] = pad_size#Sinogram size after padding sino['Ns_orig'] = im_size #size of original sinogram sino['center'] = center + (sino['Ns']/2 - sino['Ns_orig']/2) #for padded sinogram sino['angles'] = angles #Initialize NUFFT parameters print('Initialize NUFFT params') nufft_params = init_nufft_params(sino,geom) temp_y = afnp.zeros((sino['Ns'],num_angles),dtype=afnp.complex64) temp_x = afnp.zeros((sino['Ns'],sino['Ns']),dtype=afnp.complex64) x_recon = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64) pad_idx = slice(sino['Ns']/2-sino['Ns_orig']/2,sino['Ns']/2+sino['Ns_orig']/2) #allocate output array rec_mbir_final=np.zeros((num_slice,sino['Ns_orig'],sino['Ns_orig']),dtype=np.float32) #Move all data to GPU print('Moving data to GPU') slice_1=slice(0,num_slice,2) slice_2=slice(1,num_slice,2) gdata=afnp.array(new_tomo[slice_1]+1j*new_tomo[slice_2],dtype=afnp.complex64) gradient = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']), dtype=afnp.complex64)#temp array to store the derivative of cost func z_recon = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64)#Nesterov method variables t_nes = 1 #Compute Lipschitz of gradient print('Computing Lipschitz of gradient') x_ones= afnp.ones((1,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64) temp_x[pad_idx,pad_idx]=x_ones[0] temp_proj=forward_project(temp_x,nufft_params) temp_backproj=(back_project(temp_proj,nufft_params))[pad_idx,pad_idx] print('Adding Hessian of regularizer') temp_backproj2=afnp.zeros((1,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64) temp_backproj2[0]=temp_backproj add_hessian(mrf_sigma,x_ones, temp_backproj2) L = np.max([temp_backproj2.real.max(),temp_backproj2.imag.max()]) print('Lipschitz constant = %f' %(L)) del x_ones,temp_proj,temp_backproj,temp_backproj2 #loop over all slices for iter_num in range(num_iter): print('Iteration %d of %d'%(iter_num,num_iter)) #Derivative of the data fitting term for i in range(num_slice/2): temp_x[pad_idx,pad_idx]=x_recon[i] Ax = forward_project(temp_x,nufft_params) temp_y[pad_idx]=gdata[i] gradient[i] =(back_project((Ax-temp_y),nufft_params))[pad_idx,pad_idx] #nufft_scaling #Derivative of regularization term tvd_update(mrf_p,mrf_sigma,x_recon, gradient) #x_recon-=gradient/L x_recon,z_recon,t_nes=nesterovOGM2update(x_recon,z_recon,t_nes,gradient,L) #Move to CPU #Rescale result to match tomopy rec_mbir=np.array(x_recon,dtype=np.complex64) rec_mbir_final[slice_1]=np.array(rec_mbir.real,dtype=np.float32) rec_mbir_final[slice_2]=np.array(rec_mbir.imag,dtype=np.float32) return rec_mbir_final
def gpuSIRT(tomo, angles, center, input_params): print('Starting GPU SIRT recon') #allocate space for final answer af.set_device( input_params['gpu_device']) #Set the device number for gpu based code #Change tomopy format new_tomo = np.transpose(tomo, (1, 2, 0)) #slice, columns, angles im_size = new_tomo.shape[1] num_slice = new_tomo.shape[0] num_angles = new_tomo.shape[2] pad_size = np.int16(im_size * input_params['oversamp_factor']) nufft_scaling = (np.pi / pad_size)**2 num_iter = input_params['num_iter'] #Initialize structures for NUFFT sino = {} geom = {} sino['Ns'] = pad_size #Sinogram size after padding sino['Ns_orig'] = im_size #size of original sinogram sino['center'] = center + (sino['Ns'] / 2 - sino['Ns_orig'] / 2 ) #for padded sinogram sino['angles'] = angles #Initialize NUFFT parameters nufft_params = init_nufft_params(sino, geom) temp_y = afnp.zeros((sino['Ns'], num_angles), dtype=afnp.complex64) temp_x = afnp.zeros((sino['Ns'], sino['Ns']), dtype=afnp.complex64) x_recon = afnp.zeros((num_slice / 2, sino['Ns_orig'], sino['Ns_orig']), dtype=afnp.complex64) pad_idx = slice(sino['Ns'] / 2 - sino['Ns_orig'] / 2, sino['Ns'] / 2 + sino['Ns_orig'] / 2) #allocate output array rec_sirt_final = np.zeros((num_slice, sino['Ns_orig'], sino['Ns_orig']), dtype=np.float32) #Pre-compute diagonal scaling matrices ; one the same size as the image and the other the same as data #initialize an image of all ones x_ones = afnp.ones((sino['Ns_orig'], sino['Ns_orig']), dtype=afnp.complex64) temp_x[pad_idx, pad_idx] = x_ones temp_proj = forward_project(temp_x, nufft_params) * (sino['Ns'] * afnp.pi / 2) R = 1 / afnp.abs(temp_proj) R[afnp.isnan(R)] = 0 R[afnp.isinf(R)] = 0 R = afnp.array(R, dtype=afnp.complex64) #Initialize a sinogram of all ones y_ones = afnp.ones((sino['Ns_orig'], num_angles), dtype=afnp.complex64) temp_y[pad_idx] = y_ones temp_backproj = back_project(temp_y, nufft_params) * nufft_scaling / 2 C = 1 / (afnp.abs(temp_backproj)) C[afnp.isnan(C)] = 0 C[afnp.isinf(C)] = 0 C = afnp.array(C, dtype=afnp.complex64) #Move all data to GPU slice_1 = slice(0, num_slice, 2) slice_2 = slice(1, num_slice, 2) gdata = afnp.array(new_tomo[slice_1] + 1j * new_tomo[slice_2], dtype=afnp.complex64) #loop over all slices for i in range(num_slice / 2): for iter_num in range(num_iter): #filtered back-projection temp_x[pad_idx, pad_idx] = x_recon[i] Ax = (np.pi / 2) * sino['Ns'] * forward_project( temp_x, nufft_params) temp_y[pad_idx] = gdata[i] x_recon[i] = x_recon[i] + ( C * back_project(R * (temp_y - Ax), nufft_params) * nufft_scaling / 2)[pad_idx, pad_idx] #Move to CPU #Rescale result to match tomopy rec_sirt = np.array(x_recon, dtype=np.complex64) rec_sirt_final[slice_1] = np.array(rec_sirt.real, dtype=np.float32) rec_sirt_final[slice_2] = np.array(rec_sirt.imag, dtype=np.float32) return rec_sirt_final
out.arr = ctypes.c_void_p(af_array_ptr) print("Converting from ", hex(af_array_ptr)) # print("New array has device pointer ",hex(get_gpu_pointer(out))) return out def get_use_count(arr): uses = ctypes.c_int(0) af.safe_call(af.backend.get().af_get_data_ref_count( af.c_pointer(uses), arr.arr)) return uses # use gpu backend af.set_backend('cuda') af.set_device(0) # select the gpu to use af.info() van = np.array([1, 2, 3, 5]) van = np.vander(van) two = np.ones((4, 4)) * 2 print("Define stuff", flush=True) afvan = af.interop.from_ndarray(van) afvan = afvan.as_type(af.Dtype.f32) afthr = af.interop.from_ndarray(two) afthr = afthr.as_type(af.Dtype.f32) af.device.print_mem_info("before loops")
from dg_maxwell import params from dg_maxwell import wave_equation import arrayfire as af af.set_backend('cpu') af.set_device(0) def L1_norm(u): ''' A function to calculate the L1 norm of error using the polynomial obtained using Lagrange interpolation Parameters ---------- u : arrayfire.Array [N_LGL N_Elements 1 1] Difference between analytical and numerical u at the mapped LGL points. Returns ------- L1_norm : float64 The L1 norm of error. ''' interpolated_coeffs = af.reorder(lagrange.lagrange_interpolation_u(\ u), 2, 1, 0) L1_norm = af.sum(lagrange.integrate(interpolated_coeffs)) return L1_norm
# The complete license agreement can be obtained at: # http://arrayfire.com/licenses/BSD-3-Clause ######################################################## import arrayfire as af import sys import os if __name__ == "__main__": if (len(sys.argv) == 1): raise RuntimeError("Expected to the image as the first argument") if not os.path.isfile(sys.argv[1]): raise RuntimeError("File %s not found" % sys.argv[1]) if (len(sys.argv) > 2): af.set_device(int(sys.argv[2])) af.info() hist_win = af.Window(512, 512, "3D Plot example using ArrayFire") img_win = af.Window(480, 640, "Input Image") img = (af.load_image(sys.argv[1])).(af.Dtype.u8) hist = af.histogram(img, 256, 0, 255) while (not hist_win.close()) and (not img_win.close()): hist_win.hist(hist, 0, 255) img_win.image(img)
return af.mean(payoff) * math.exp(-r * t) def monte_carlo_simulate(N, use_barrier, num_iter = 10): steps = 180 stock_price = 100.0 maturity = 0.5 volatility = 0.3 rate = 0.01 strike = 100 barrier = 115.0 start = time() for i in range(num_iter): monte_carlo_options(N, stock_price, maturity, volatility, rate, strike, steps, use_barrier, barrier) return (time() - start) / num_iter if __name__ == "__main__": if (len(sys.argv) > 1): af.set_device(int(sys.argv[1])) af.info() monte_carlo_simulate(1000, use_barrier = False) monte_carlo_simulate(1000, use_barrier = True ) af.sync() for n in range(10000, 100001, 10000): print("Time for %7d paths - vanilla method: %4.3f ms, barrier method: % 4.3f ms\n" % (n, 1000 * monte_carlo_simulate(n, False, 100), 1000 * monte_carlo_simulate(n, True, 100)))
####################################################### # Copyright (c) 2015, ArrayFire # All rights reserved. # # This file is distributed under 3-clause BSD license. # The complete license agreement can be obtained at: # http://arrayfire.com/licenses/BSD-3-Clause ######################################################## import arrayfire as af af.info() print(af.device_info()) print(af.get_device_count()) print(af.is_dbl_supported()) af.sync() print('starting the loop') for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print(af.is_dbl_supported(k)) a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1) assert(mem_info[ 'lock']['buffers'] == 1)
#! /usr/bin/env python3 # -*- coding: utf-8 -*- import numpy as np from scipy import special as sp import arrayfire as af from dg_maxwell import utils from dg_maxwell import params af.set_backend(params.backend) af.set_device(params.device) def LGL_points(N): ''' Calculates : math: `N` Legendre-Gauss-Lobatto (LGL) points. LGL points are the roots of the polynomial :math: `(1 - \\xi ** 2) P_{n - 1}'(\\xi) = 0` Where :math: `P_{n}(\\xi)` are the Legendre polynomials. This function finds the roots of the above polynomial. Parameters ---------- N : int Number of LGL nodes required Returns
####################################################### # Copyright (c) 2015, ArrayFire # All rights reserved. # # This file is distributed under 3-clause BSD license. # The complete license agreement can be obtained at: # http://arrayfire.com/licenses/BSD-3-Clause ######################################################## import arrayfire as af af.info() print(af.device_info()) print(af.get_device_count()) print(af.is_dbl_supported()) af.sync() print('starting the loop') for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert (k == dev) print(af.is_dbl_supported(k)) a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert (mem_info['alloc']['buffers'] == 1) assert (mem_info['lock']['buffers'] == 1)