def reallocate_and_copy_old( species, use_cuda, old_Ntot, new_Ntot ): """ Copy the particle quantities of `species` from arrays of size `old_Ntot` into arrays of size `new_Ntot`. Set these arrays as attributes of `species. (The first `old_Ntot` elements of the new arrays are copied from the old arrays ; the last elements are left empty and expected to be filled later.) When `use_cuda` is True, this function also reallocates the sorting buffers for GPU, with a size `new_Ntot` Parameters ---------- species: an fbpic Particles object use_cuda: bool If True, the new arrays are device arrays, and copying is done on GPU. If False, the arrays are on CPU, and copying is done on CPU. old_Ntot, new_Ntot: int Size of the old and new arrays (with old_Ntot < new_Ntot) """ # Check if the data is on the GPU data_on_gpu = (type(species.w) is not np.ndarray) # On GPU, use one thread per particle if data_on_gpu: ptcl_grid_1d, ptcl_block_1d = cuda_tpb_bpg_1d( old_Ntot ) # Iterate over particle attributes and copy the old particles for attr in ['x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma', 'Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz']: old_array = getattr(species, attr) new_array = allocate_empty( new_Ntot, data_on_gpu, dtype=np.float64 ) if data_on_gpu: copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ]( old_Ntot, old_array, new_array ) else: copy_particle_data_numba( old_Ntot, old_array, new_array ) setattr( species, attr, new_array ) # Copy the tracking id, if needed if species.tracker is not None: old_array = species.tracker.id new_array = allocate_empty( new_Ntot, use_cuda, dtype=np.uint64 ) if data_on_gpu: copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ]( old_Ntot, old_array, new_array ) else: copy_particle_data_numba( old_Ntot, old_array, new_array ) species.tracker.id = new_array # Allocate the auxiliary arrays for GPU if use_cuda: species.cell_idx = cuda.device_array((new_Ntot,), dtype=np.int32) species.sorted_idx = cuda.device_array((new_Ntot,), dtype=np.uint32) species.sorting_buffer = cuda.device_array((new_Ntot,), dtype=np.float64) if species.n_integer_quantities > 0: species.int_sorting_buffer = \ cuda.device_array( (new_Ntot,), dtype=np.uint64 ) # Modify the total number of particles species.Ntot = new_Ntot
def __init__(self, Nz, Nr, m, rmax, use_cuda=False): """ Initializes the dht and fft attributes, which contain auxiliary matrices allowing to transform the fields quickly Parameters ---------- Nz, Nr : int Number of points along z and r respectively m : int Index of the mode (needed for the Hankel transform) rmax : float The size of the simulation box along r. """ # Check whether to use the GPU self.use_cuda = use_cuda if (self.use_cuda is True) and (cuda_installed is False): self.use_cuda = False if self.use_cuda: # Initialize the dimension of the grid and blocks self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr, 1, 32) # Initialize the DHT (local implementation, see hankel.py) self.dht0 = DHT(m, m, Nr, Nz, rmax, use_cuda=self.use_cuda) self.dhtp = DHT(m + 1, m, Nr, Nz, rmax, use_cuda=self.use_cuda) self.dhtm = DHT(m - 1, m, Nr, Nz, rmax, use_cuda=self.use_cuda) # Initialize the FFT self.fft = FFT(Nr, Nz, use_cuda=self.use_cuda) # Initialize the spectral buffers if self.use_cuda: self.spect_buffer_r = cuda.device_array((Nz, Nr), dtype=np.complex128) self.spect_buffer_t = cuda.device_array((Nz, Nr), dtype=np.complex128) else: # Initialize the spectral buffers self.spect_buffer_r = np.zeros((Nz, Nr), dtype=np.complex128) self.spect_buffer_t = np.zeros((Nz, Nr), dtype=np.complex128) # Different names for same object (for economy of memory) self.spect_buffer_p = self.spect_buffer_r self.spect_buffer_m = self.spect_buffer_t
def allocate_empty(N, use_cuda, dtype): """ Allocate and return an empty array, of size `N` and type `dtype`, either on GPU or CPU, depending on whether `use_cuda` is True or False """ if use_cuda: return (cuda.device_array((N, ), dtype=dtype)) else: return (np.empty(N, dtype=dtype))
def allocate_empty(shape, use_cuda, dtype): """ Allocate and return an empty array, of size `N` and type `dtype`, either on GPU or CPU, depending on whether `use_cuda` is True or False """ if type(shape) is not tuple: # Convert single scalar to tuple shape = (shape, ) if use_cuda: return (cuda.device_array(shape, dtype=dtype)) else: return (np.empty(shape, dtype=dtype))
def __init__(self, t_lab, zmin_lab, zmax_lab, write_dir, i, fld, Nr_output): """ Initialize a LabSnapshot Parameters ---------- t_lab: float (seconds) Time of this snapshot *in the lab frame* zmin_lab, zmax_lab: floats Longitudinal limits of this snapshot write_dir: string Absolute path to the directory where the data for this snapshot is to be written i: int Number of the file where this snapshot is to be written fld: a Fields object This is passed only in order to determine how to initialize the slice_array buffer (either on the CPU or GPU) Nr_output: int Number of cells in the r direction, in the final output (This typically excludes the radial damping cells) """ # Deduce the name of the filename where this snapshot writes self.filename = os.path.join( write_dir, 'hdf5/data%08d.h5' %i) self.iteration = i # Time and boundaries in the lab frame (constants quantities) self.zmin_lab = zmin_lab self.zmax_lab = zmax_lab self.t_lab = t_lab # Positions where the fields are to be registered # (Change at every iteration) self.current_z_lab = 0 self.current_z_boost = 0 # Buffered field slice and corresponding array index in z self.buffered_slices = [] self.buffer_z_indices = [] # Allocate a buffer for only one slice (avoids having to # reallocate arrays when running on the GPU) data_shape = (10, 2*fld.Nm-1, Nr_output) if fld.use_cuda is False: self.slice_array = np.empty( data_shape ) else: self.slice_array = cuda.device_array( data_shape )
def add_buffers_gpu( species, float_recv_left, float_recv_right, uint_recv_left, uint_recv_right): """ Add the particles stored in recv_left and recv_right to the existing particle in species. Parameters ---------- species: a Particles object Contain the particles that stayed on the present processors float_recv_left, float_recv_right, uint_recv_left, uint_recv_right: arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl is the number of particles that are received to the left proc and right proc respectively, and where n_float and n_int are the number of float and integer quantities respectively These arrays are always on the CPU (since they were used for MPI) """ # Get the new number of particles old_Ntot = species.Ntot n_left = float_recv_left.shape[1] n_right = float_recv_right.shape[1] new_Ntot = old_Ntot + n_left + n_right # Get the threads per block and the blocks per grid n_left_grid, n_left_block = cuda_tpb_bpg_1d( n_left ) n_right_grid, n_right_block = cuda_tpb_bpg_1d( n_right ) n_old_grid, n_old_block = cuda_tpb_bpg_1d( old_Ntot ) # Iterate over particle attributes # Build list of float attributes to copy attr_list = [ (species,'x'), (species,'y'), (species,'z'), \ (species,'ux'), (species,'uy'), (species,'uz'), \ (species,'inv_gamma'), (species,'w') ] if species.ionizer is not None: attr_list += [ (species.ionizer, 'w_times_level') ] # Loop through the float quantities for i_attr in range( len(attr_list) ): # Copy the proper buffers to the GPU left_buffer = cuda.to_device( float_recv_left[i_attr] ) right_buffer = cuda.to_device( float_recv_right[i_attr] ) # Initialize the new particle array particle_array = cuda.device_array( (new_Ntot,), dtype=np.float64) # Merge the arrays on the GPU stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1]) if n_left != 0: copy_particles[n_left_grid, n_left_block]( n_left, left_buffer, 0, particle_array, 0 ) if old_Ntot != 0: copy_particles[n_old_grid, n_old_block]( old_Ntot, stay_buffer, 0, particle_array, n_left ) if n_right != 0: copy_particles[n_right_grid, n_right_block]( n_right, right_buffer, 0, particle_array, n_left+old_Ntot ) # Assign the stay_buffer to the initial particle data array # and fill the sending buffers (if needed for MPI) setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array) # Build list of integer quantities to copy attr_list = [] if species.tracker is not None: attr_list.append( (species.tracker,'id') ) if species.ionizer is not None: attr_list.append( (species.ionizer,'ionization_level') ) # Loop through the integer quantities for i_attr in range( len(attr_list) ): # Copy the proper buffers to the GPU left_buffer = cuda.to_device( uint_recv_left[i_attr] ) right_buffer = cuda.to_device( uint_recv_right[i_attr] ) # Initialize the new particle array particle_array = cuda.device_array( (new_Ntot,), dtype=np.uint64) # Merge the arrays on the GPU stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1]) if n_left != 0: copy_particles[n_left_grid, n_left_block]( n_left, left_buffer, 0, particle_array, 0 ) if old_Ntot != 0: copy_particles[n_old_grid, n_old_block]( old_Ntot, stay_buffer, 0, particle_array, n_left ) if n_right != 0: copy_particles[n_right_grid, n_right_block]( n_right, right_buffer, 0, particle_array, n_left+old_Ntot ) # Assign the stay_buffer to the initial particle data array # and fill the sending buffers (if needed for MPI) setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array) # Adapt the total number of particles species.Ntot = new_Ntot
def add_buffers_to_particles( species, float_recv_left, float_recv_right, uint_recv_left, uint_recv_right): """ Add the particles stored in recv_left and recv_right to the existing particle in species. Resize the auxiliary arrays of the particles Ex, Ey, Ez, Bx, By, Bz, as well as cell_idx, sorted_idx and sorting_buffer Parameters ---------- species: a Particles object Contain the particles that stayed on the present processors float_recv_left, float_recv_right, uint_recv_left, uint_recv_right: arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl is the number of particles that are received to the left proc and right proc respectively, and where n_float and n_int are the number of float and integer quantities respectively These arrays are always on the CPU (since they were used for MPI) """ # Copy the buffers to an enlarged array if species.use_cuda: add_buffers_gpu( species, float_recv_left, float_recv_right, uint_recv_left, uint_recv_right ) else: add_buffers_cpu( species, float_recv_left, float_recv_right, uint_recv_left, uint_recv_right ) # Reallocate the particles auxiliary arrays. This needs to be done, # as the total number of particles in this domain has changed. if species.use_cuda: shape = (species.Ntot,) # Reallocate empty field-on-particle arrays on the GPU species.Ex = cuda.device_array( shape, dtype=np.float64 ) species.Ex = cuda.device_array( shape, dtype=np.float64 ) species.Ey = cuda.device_array( shape, dtype=np.float64 ) species.Ez = cuda.device_array( shape, dtype=np.float64 ) species.Bx = cuda.device_array( shape, dtype=np.float64 ) species.By = cuda.device_array( shape, dtype=np.float64 ) species.Bz = cuda.device_array( shape, dtype=np.float64 ) # Reallocate empty auxiliary sorting arrays on the GPU species.cell_idx = cuda.device_array( shape, dtype=np.int32 ) species.sorted_idx = cuda.device_array( shape, dtype=np.intp ) species.sorting_buffer = cuda.device_array( shape, dtype=np.float64 ) if species.n_integer_quantities > 0: species.int_sorting_buffer = \ cuda.device_array( shape, dtype=np.uint64 ) else: # Reallocate empty field-on-particle arrays on the CPU species.Ex = np.empty(species.Ntot, dtype=np.float64) species.Ey = np.empty(species.Ntot, dtype=np.float64) species.Ez = np.empty(species.Ntot, dtype=np.float64) species.Bx = np.empty(species.Ntot, dtype=np.float64) species.By = np.empty(species.Ntot, dtype=np.float64) species.Bz = np.empty(species.Ntot, dtype=np.float64) # The particles are unsorted after adding new particles. species.sorted = False
def remove_particles_gpu(species, fld, n_guard, left_proc, right_proc): """ Remove the particles that are outside of the physical domain (i.e. in the guard cells). Store them in sending buffers, which are returned. Parameters ---------- species: a Particles object Contains the data of this species fld: a Fields object Contains information about the dimension of the grid, and the prefix sum (when using the GPU) n_guard: int Number of guard cells left_proc, right_proc: int or None Indicate whether there is a left or right processor or if the boundary is open (None). Returns ------- float_send_left, float_send_right, uint_send_left, uint_send_right: arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl is the number of particles that are sent to the left proc and right proc respectively, and where n_float and n_int are the number of float and integer quantities respectively """ # Check if particles are sorted # (The particles are usually expected to be sorted from the previous # iteration at this point - except at the first iteration of `step`.) if species.sorted == False: species.sort_particles(fld = fld) species.sorted = True # Get the particle indices between which to remove the particles # (Take into account the fact that the moving window may have # shifted the grid since the particles were last sorted: prefix_sum_shift) prefix_sum = species.prefix_sum Nz = fld.Nz Nr = fld.Nr # Find the z index of the first cell for which particles are kept iz_min = max( n_guard + species.prefix_sum_shift, 0 ) # Find the z index of the first cell for which particles are removed again iz_max = min( Nz - n_guard + species.prefix_sum_shift + 1, Nz ) # Find the corresponding indices in the particle array # Reminder: prefix_sum[i] is the cumulative sum of the number of particles # in cells 0 to i (where cell i is included) if iz_min*(Nr+1) - 1 >= 0: i_min = prefix_sum.getitem( iz_min*(Nr+1) - 1 ) else: i_min = 0 i_max = prefix_sum.getitem( iz_max*(Nr+1) - 1 ) # Total number of particles in each particle group N_send_l = i_min new_Ntot = i_max - i_min N_send_r = species.Ntot - i_max # Allocate the sending buffers on the CPU n_float = species.n_float_quantities n_int = species.n_integer_quantities if left_proc is not None: float_send_left = np.empty((n_float, N_send_l), dtype=np.float64) uint_send_left = np.empty((n_int, N_send_l), dtype=np.uint64) else: float_send_left = np.empty((n_float, 0), dtype=np.float64) uint_send_left = np.empty((n_int, 0), dtype=np.uint64) if right_proc is not None: float_send_right = np.empty((n_float, N_send_r), dtype=np.float64) uint_send_right = np.empty((n_int, N_send_r), dtype=np.uint64) else: float_send_right = np.empty((n_float, 0), dtype=np.float64) uint_send_right = np.empty((n_int, 0), dtype=np.uint64) # Get the threads per block and the blocks per grid dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( species.Ntot ) # Float quantities: # Build list of float attributes to copy attr_list = [ (species,'x'), (species,'y'), (species,'z'), (species,'ux'), (species,'uy'), (species,'uz'), (species,'inv_gamma'), (species,'w') ] if species.ionizer is not None: attr_list.append( (species.ionizer,'w_times_level') ) # Loop through the float attributes for i_attr in range(n_float): # Initialize 3 buffer arrays on the GPU (need to be initialized # inside the loop, as `copy_to_host` invalidates these arrays) left_buffer = cuda.device_array((N_send_l,), dtype=np.float64) right_buffer = cuda.device_array((N_send_r,), dtype=np.float64) stay_buffer = cuda.device_array((new_Ntot,), dtype=np.float64) # Check that the buffers are still on GPU # (safeguard against automatic memory management) assert type(left_buffer) != np.ndarray assert type(right_buffer) != np.ndarray assert type(left_buffer) != np.ndarray # Split the particle array into the 3 buffers on the GPU particle_array = getattr( attr_list[i_attr][0], attr_list[i_attr][1] ) split_particles_to_buffers[dim_grid_1d, dim_block_1d]( particle_array, left_buffer, stay_buffer, right_buffer, i_min, i_max) # Assign the stay_buffer to the initial particle data array # and fill the sending buffers (if needed for MPI) setattr( attr_list[i_attr][0], attr_list[i_attr][1], stay_buffer) if left_proc is not None: left_buffer.copy_to_host( float_send_left[i_attr] ) if right_proc is not None: right_buffer.copy_to_host( float_send_right[i_attr] ) # Integer quantities: if n_int > 0: attr_list = [] if species.tracker is not None: attr_list.append( (species.tracker,'id') ) if species.ionizer is not None: attr_list.append( (species.ionizer,'ionization_level') ) for i_attr in range(n_int): # Initialize 3 buffer arrays on the GPU (need to be initialized # inside the loop, as `copy_to_host` invalidates these arrays) left_buffer = cuda.device_array((N_send_l,), dtype=np.uint64) right_buffer = cuda.device_array((N_send_r,), dtype=np.uint64) stay_buffer = cuda.device_array((new_Ntot,), dtype=np.uint64) # Split the particle array into the 3 buffers on the GPU particle_array = getattr( attr_list[i_attr][0], attr_list[i_attr][1] ) split_particles_to_buffers[dim_grid_1d, dim_block_1d]( particle_array, left_buffer, stay_buffer, right_buffer, i_min, i_max) # Assign the stay_buffer to the initial particle data array # and fill the sending buffers (if needed for MPI) setattr( attr_list[i_attr][0], attr_list[i_attr][1], stay_buffer) if left_proc is not None: left_buffer.copy_to_host( uint_send_left[i_attr] ) if right_proc is not None: right_buffer.copy_to_host( uint_send_right[i_attr] ) # Change the new total number of particles species.Ntot = new_Ntot # Return the sending buffers return(float_send_left, float_send_right, uint_send_left, uint_send_right)
def load_species(species, name, ts, iteration, comm): """ Read the species data from the checkpoint `ts` and load it into the Species object `species` Parameters: ----------- species: a Species object The object into which data is loaded name: string The name of the corresponding species in the checkpoint ts: an OpenPMDTimeSeries object Points to the data in the checkpoint iteration: integer The iteration at which to load the checkpoint comm: an fbpic.BoundaryCommunicator object Contains information about the number of procs """ # Get the particles' positions (convert to meters) x, y, z = ts.get_particle(['x', 'y', 'z'], iteration=iteration, species=name) species.x, species.y, species.z = 1.e-6 * x, 1.e-6 * y, 1.e-6 * z # Get the particles' momenta species.ux, species.uy, species.uz = ts.get_particle(['ux', 'uy', 'uz'], iteration=iteration, species=name) # Get the weight (multiply it by the charge to conform with FBPIC) species.w, = ts.get_particle(['w'], iteration=iteration, species=name) # Get the inverse gamma species.inv_gamma = 1. / np.sqrt(1 + species.ux**2 + species.uy**2 + species.uz**2) # Take into account the fact that the arrays are resized Ntot = len(species.w) species.Ntot = Ntot # Check if the particles where tracked if "id" in ts.avail_record_components[name]: pid, = ts.get_particle(['id'], iteration=iteration, species=name) species.track(comm) species.tracker.overwrite_ids(pid, comm) # If the species is ionizable, set the proper arrays if species.ionizer is not None: # Reallocate the ionization_level, and reset it with the right value species.ionizer.ionization_level = np.empty(Ntot, dtype=np.uint64) q, = ts.get_particle(['charge'], iteration=iteration, species=name) species.ionizer.ionization_level[:] = np.uint64(np.round(q / e)) # Set the auxiliary array species.ionizer.w_times_level = \ species.w * species.ionizer.ionization_level # Reset the injection positions (for continuous injection) if species.continuous_injection: species.injector.reset_injection_positions() # As a safe-guard, check that the loaded data is in float64 for attr in ['x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma']: assert getattr(species, attr).dtype == np.float64 # Field arrays species.Ez = np.zeros(Ntot) species.Ex = np.zeros(Ntot) species.Ey = np.zeros(Ntot) species.Bz = np.zeros(Ntot) species.Bx = np.zeros(Ntot) species.By = np.zeros(Ntot) # Sorting arrays if species.use_cuda: # cell_idx and sorted_idx always stay on GPU species.cell_idx = cuda.device_array(Ntot, dtype=np.int32) species.sorted_idx = cuda.device_array(Ntot, dtype=np.intp) # sorting buffers are initialized on CPU # (because they are swapped with other particle arrays during sorting) species.sorting_buffer = np.empty(Ntot, dtype=np.float64) if hasattr(species, 'int_sorting_buffer'): species.int_sorting_buffer = np.empty(Ntot, dtype=np.uint64) species.sorted = False
def __init__(self, Nr, Nz, use_cuda=False, nthreads=None): """ Initialize an FFT object Parameters ---------- Nr: int Number of grid points along the r axis (axis -1) Nz: int Number of grid points along the z axis (axis 0) use_cuda: bool, optional Whether to perform the Fourier transform on the z axis nthreads : int, optional Number of threads for the FFTW transform. If None, the default number of threads of numba is used (environment variable NUMBA_NUM_THREADS) """ # Check whether to use cuda self.use_cuda = use_cuda if (self.use_cuda is True) and (cuda_installed is False): self.use_cuda = False print('** Cuda not available for Fourier transform.') print('** Performing the Fourier transform on the CPU.') # Check whether to use MKL self.use_mkl = mkl_installed # Initialize the object for calculation on the GPU if self.use_cuda: # Initialize the dimension of the grid and blocks self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr) # Initialize 1d buffer for cufft self.buffer1d_in = cuda.device_array((Nz * Nr, ), dtype=np.complex128) self.buffer1d_out = cuda.device_array((Nz * Nr, ), dtype=np.complex128) # Initialize the cuda libraries object self.fft = cufft.FFTPlan(shape=(Nz, ), itype=np.complex128, otype=np.complex128, batch=Nr) self.blas = cublas.Blas() # For normalization of the iFFT self.inv_Nz = 1. / Nz # For normalization of the iFFT # Initialize the object for calculation on the CPU else: # For MKL FFT if self.use_mkl: # Initialize the MKL plan with dummy array spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128) self.mklfft = MKLFFT(spect_buffer) # For FFTW else: # Determine number of threads if nthreads is None: # Get the default number of threads for numba nthreads = numba.config.NUMBA_NUM_THREADS # Initialize the FFT plan with dummy arrays interp_buffer = np.zeros((Nz, Nr), dtype=np.complex128) spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128) self.fft = pyfftw.FFTW(interp_buffer, spect_buffer, axes=(0, ), direction='FFTW_FORWARD', threads=nthreads) self.ifft = pyfftw.FFTW(spect_buffer, interp_buffer, axes=(0, ), direction='FFTW_BACKWARD', threads=nthreads)
def __init__(self, q, m, n, Npz, zmin, zmax, Npr, rmin, rmax, Nptheta, dt, ux_m=0., uy_m=0., uz_m=0., ux_th=0., uy_th=0., uz_th=0., dens_func=None, continuous_injection=True, grid_shape=None, particle_shape='linear', use_cuda=False, dz_particles=None): """ Initialize a uniform set of particles Parameters ---------- q : float (in Coulombs) Charge of the particle species m : float (in kg) Mass of the particle species n : float (in particles per m^3) Peak density of particles Npz : int Number of macroparticles along the z axis zmin, zmax : floats (in meters) z positions between which the particles are initialized Npr : int Number of macroparticles along the r axis rmin, rmax : floats (in meters) r positions between which the particles are initialized Nptheta : int Number of macroparticules along theta dt : float (in seconds) The timestep for the particle pusher ux_m, uy_m, uz_m: floats (dimensionless), optional Normalized mean momenta of the injected particles in each direction ux_th, uy_th, uz_th: floats (dimensionless), optional Normalized thermal momenta in each direction dens_func : callable, optional A function of the form : def dens_func( z, r ) ... where z and r are 1d arrays, and which returns a 1d array containing the density *relative to n* (i.e. a number between 0 and 1) at the given positions continuous_injection : bool, optional Whether to continuously inject the particles, in the case of a moving window grid_shape: tuple, optional Needed when running on the GPU The shape of the local grid (including guard cells), i.e. a tuple of the form (Nz, Nr). This is needed in order to initialize the sorting of the particles per cell. particle_shape: str, optional Set the particle shape for the charge/current deposition. Possible values are 'linear' and 'cubic' for first and third order particle shape factors. use_cuda : bool, optional Wether to use the GPU or not. dz_particles: float (in meter), optional The spacing between particles in `z` (for continuous injection) In most cases, the spacing between particles can be inferred from the arguments `zmin`, `zmax` and `Npz`. However, when there are no particles in the initial box (`Npz = 0`), `dz_particles` needs to be explicitly passed. """ # Define whether or not to use the GPU self.use_cuda = use_cuda if (self.use_cuda == True) and (cuda_installed == False): warnings.warn('Cuda not available for the particles.\n' 'Performing the particle operations on the CPU.') self.use_cuda = False # Generate evenly-spaced particles Ntot, x, y, z, ux, uy, uz, inv_gamma, w = generate_evenly_spaced( Npz, zmin, zmax, Npr, rmin, rmax, Nptheta, n, dens_func, ux_m, uy_m, uz_m, ux_th, uy_th, uz_th) # Register the properties of the particles # (Necessary for the pusher, and when adding more particles later, ) self.Ntot = Ntot self.q = q self.m = m self.dt = dt # Register the particle arrarys self.x = x self.y = y self.z = z self.ux = ux self.uy = uy self.uz = uz self.inv_gamma = inv_gamma self.w = w # Initialize the fields array (at the positions of the particles) self.Ez = np.zeros(Ntot) self.Ex = np.zeros(Ntot) self.Ey = np.zeros(Ntot) self.Bz = np.zeros(Ntot) self.Bx = np.zeros(Ntot) self.By = np.zeros(Ntot) # The particle injector stores information that is useful in order # continuously inject particles in the simulation, with moving window self.continuous_injection = continuous_injection if continuous_injection: self.injector = ContinuousInjector(Npz, zmin, zmax, dz_particles, Npr, rmin, rmax, Nptheta, n, dens_func, ux_m, uy_m, uz_m, ux_th, uy_th, uz_th) else: self.injector = None # By default, there is no particle tracking (see method track) self.tracker = None # By default, the species experiences no elementary processes # (see method make_ionizable and activate_compton) self.ionizer = None self.compton_scatterer = None # Total number of quantities (necessary in MPI communications) self.n_integer_quantities = 0 self.n_float_quantities = 8 # x, y, z, ux, uy, uz, inv_gamma, w # Register particle shape self.particle_shape = particle_shape # Register boolean that records whether field array should # be rearranged whenever sorting particles # (gets modified during the main PIC loop, on GPU) self.keep_fields_sorted = False # Allocate arrays and register variables when using CUDA if self.use_cuda: if grid_shape is None: raise ValueError( "A `grid_shape` is needed when running " "on the GPU.\nPlease provide it when initializing particles." ) # Register grid shape self.grid_shape = grid_shape # Allocate arrays for the particles sorting when using CUDA # Most required arrays always stay on GPU Nz, Nr = grid_shape self.cell_idx = cuda.device_array(Ntot, dtype=np.int32) self.sorted_idx = cuda.device_array(Ntot, dtype=np.intp) self.prefix_sum = cuda.device_array(Nz * (Nr + 1), dtype=np.int32) # sorting buffers are initialized on CPU like other particle arrays # (because they are swapped with these arrays during sorting) self.sorting_buffer = np.empty(Ntot, dtype=np.float64) # Register integer thta records shift in the indices, # induced by the moving window self.prefix_sum_shift = 0 # Register boolean that records if the particles are sorted or not self.sorted = False # Define optimal number of CUDA threads per block for deposition # and gathering kernels (determined empirically) if particle_shape == "cubic": self.deposit_tpb = 32 self.gather_tpb = 256 else: self.deposit_tpb = 16 if cuda_gpu_model == "V100" else 8 self.gather_tpb = 128
def extract_slice_from_gpu(pref_sum_curr, N_area, species): """ Extract the particles which have which have index between pref_sum_curr and pref_sum_curr + N_area, and return them in dictionaries. Parameters ---------- pref_sum_curr: int The starting index needed for the extraction process N_area: int The number of particles to extract. species: an fbpic Species object The species from to extract data Returns ------- particle_data : A dictionary of 1D float arrays (that are on the CPU) A dictionary that contains the particle data of the simulation (with normalized weigths), including optional integer arrays (e.g. "id", "charge") """ # Call kernel that extracts particles from GPU dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(N_area) # - General particle quantities part_data = cuda.device_array((8, N_area), dtype=np.float64) extract_particles_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.x, species.y, species.z, species.ux, species.uy, species.uz, species.w, species.inv_gamma, part_data) # - Optional particle arrays if species.tracker is not None: selected_particle_id = cuda.device_array((N_area, ), dtype=np.uint64) extract_array_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.tracker.id, selected_particle_id) if species.ionizer is not None: selected_particle_charge = cuda.device_array((N_area, ), dtype=np.uint64) extract_array_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.ionizer.ionization_level, selected_particle_charge) selected_particle_weight = cuda.device_array((N_area, ), dtype=np.float64) extract_array_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.ionizer.w_times_level, selected_particle_weight) # Copy GPU arrays to the host part_data = part_data.copy_to_host() particle_data = { 'x': part_data[0], 'y': part_data[1], 'z': part_data[2], 'ux': part_data[3], 'uy': part_data[4], 'uz': part_data[5], 'w': part_data[6], 'inv_gamma': part_data[7] } if species.tracker is not None: particle_data['id'] = selected_particle_id.copy_to_host() if species.ionizer is not None: particle_data['charge'] = selected_particle_charge.copy_to_host() # Replace particle weight particle_data['w'] = selected_particle_weight.copy_to_host() # Return the data as dictionary return (particle_data)