Python cuda_tpb_bpg_1dの例、fbpic.utils.cuda.cuda_tpb_bpg_1d Pythonの例

コード例 #1

0

ファイルを表示

    def deposit_virtual_particles_gpu(self, q, fieldtype, grid):
        # Position of the particles
        x = self.d_baseline_x + q * self.excursion_x
        y = self.d_baseline_y + q * self.excursion_y

        if fieldtype == 'rho':
            # ---------------------------------------
            # Deposit the charge density mode by mode
            # ---------------------------------------
            for m in range(len(grid)):

                dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
                deposit_rho_gpu_unsorted[dim_grid_1d, dim_block_1d](
                    x, y, self.d_baseline_z, self.w, q, grid[m].invdz,
                    grid[m].zmin, grid[m].Nz, grid[m].invdr, grid[m].rmin,
                    grid[m].Nr, grid[m].rho, m, grid[m].d_ruyten_linear_coef)

        elif fieldtype == 'J':
            # Particle velocities
            vx = q * self.vx
            vy = q * self.vy
            # ---------------------------------------
            # Deposit the current density mode by mode
            # ---------------------------------------
            for m in range(len(grid)):

                dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
                deposit_J_gpu_unsorted[dim_grid_1d, dim_block_1d](
                    x, y, self.d_baseline_z, self.w, q, vx, vy, self.d_vz,
                    grid[m].invdz, grid[m].zmin, grid[m].Nz, grid[m].invdr,
                    grid[m].rmin, grid[m].Nr, grid[m].Jr, grid[m].Jt,
                    grid[m].Jz, m, grid[m].d_ruyten_linear_coef)

コード例 #2

0

ファイルを表示

def reallocate_and_copy_old( species, use_cuda, old_Ntot, new_Ntot ):
    """
    Copy the particle quantities of `species` from arrays of size `old_Ntot`
    into arrays of size `new_Ntot`. Set these arrays as attributes of `species.

    (The first `old_Ntot` elements of the new arrays are copied from the old
    arrays ; the last elements are left empty and expected to be filled later.)

    When `use_cuda` is True, this function also reallocates
    the sorting buffers for GPU, with a size `new_Ntot`

    Parameters
    ----------
    species: an fbpic Particles object
    use_cuda: bool
        If True, the new arrays are device arrays, and copying is done on GPU.
        If False, the arrays are on CPU, and copying is done on CPU.
    old_Ntot, new_Ntot: int
        Size of the old and new arrays (with old_Ntot < new_Ntot)
    """
    # Check if the data is on the GPU
    data_on_gpu = (type(species.w) is not np.ndarray)

    # On GPU, use one thread per particle
    if data_on_gpu:
        ptcl_grid_1d, ptcl_block_1d = cuda_tpb_bpg_1d( old_Ntot )

    # Iterate over particle attributes and copy the old particles
    for attr in ['x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma',
                    'Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz']:
        old_array = getattr(species, attr)
        new_array = allocate_empty( new_Ntot, data_on_gpu, dtype=np.float64 )
        if data_on_gpu:
            copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ](
                old_Ntot, old_array, new_array )
        else:
            copy_particle_data_numba( old_Ntot, old_array, new_array )
        setattr( species, attr, new_array )
    # Copy the tracking id, if needed
    if species.tracker is not None:
        old_array = species.tracker.id
        new_array = allocate_empty( new_Ntot, use_cuda, dtype=np.uint64 )
        if data_on_gpu:
            copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ](
                old_Ntot, old_array, new_array )
        else:
            copy_particle_data_numba( old_Ntot, old_array, new_array )
        species.tracker.id = new_array

    # Allocate the auxiliary arrays for GPU
    if use_cuda:
        species.cell_idx = cuda.device_array((new_Ntot,), dtype=np.int32)
        species.sorted_idx = cuda.device_array((new_Ntot,), dtype=np.uint32)
        species.sorting_buffer = cuda.device_array((new_Ntot,), dtype=np.float64)
        if species.n_integer_quantities > 0:
            species.int_sorting_buffer = \
                cuda.device_array( (new_Ntot,), dtype=np.uint64 )

    # Modify the total number of particles
    species.Ntot = new_Ntot

コード例 #3

0

ファイルを表示

ファイル: antenna_injection.py プロジェクト: fhabib/fbpic

    def copy_rho_buffer(self, iz_min, grid):
        """
        Add the small-size array rho_buffer into the full-size array rho

        Parameters
        ----------
        iz_min: int
            The z index in the full-size array, that corresponds to index 0
            in the small-size array (i.e. position at which to add the
            small-size array into the full-size one)

        grid: a list of InterpolationGrid objects
            Contains the full-size array rho
        """
        Nm = len(grid)
        if type(grid[0].rho) is np.ndarray:
            # The large-size array rho is on the CPU
            for m in range(Nm):
                grid[m].rho[iz_min:iz_min + 2] += self.rho_buffer[m]
        else:
            # The large-size array rho is on the GPU
            # Copy the small-size buffer to the GPU
            cuda.to_device(self.rho_buffer, to=self.d_rho_buffer)
            # On the GPU: add the small-size buffers to the large-size array
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(grid[0].Nr, TPB=64)
            for m in range(Nm):
                add_rho_to_gpu_array[dim_grid_1d,
                                     dim_block_1d](iz_min, self.d_rho_buffer,
                                                   grid[m].rho, m)

コード例 #4

0

ファイルを表示

ファイル: particles.py プロジェクト: omriseemann/fbpic

    def push_x( self, dt, x_push=1., y_push=1., z_push=1. ) :
        """
        Advance the particles' positions over `dt` using the current
        momenta (ux, uy, uz).

        Parameters:
        -----------
        dt: float, seconds
            The timestep that should be used for the push
            (This can be typically be half of the simulation timestep)

        x_push, y_push, z_push: float, dimensionless
            Multiplying coefficient for the momenta in x, y and z
            e.g. if x_push=1., the particles are pushed forward in x
                 if x_push=-1., the particles are pushed backward in x
        """
        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot )
            # Call the CUDA Kernel for push in x
            push_x_gpu[dim_grid_1d, dim_block_1d](
                self.x, self.y, self.z,
                self.ux, self.uy, self.uz,
                self.inv_gamma, dt, x_push, y_push, z_push )
            # The particle array is unsorted after the push in x
            self.sorted = False
        # CPU version
        else:
            push_x_numba( self.x, self.y, self.z,
                self.ux, self.uy, self.uz,
                self.inv_gamma, self.Ntot,
                dt, x_push, y_push, z_push )

コード例 #5

0

ファイルを表示

ファイル: particles.py プロジェクト: xyuan/fbpic

    def sort_particles(self, fld):
        """
        Sort the particles by performing the following steps:
        1. Get fied cell index
        2. Sort field cell index
        3. Parallel prefix sum
        4. Rearrange particle arrays

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.
        """
        # Shortcut for interpolation grids
        grid = fld.interp
        # Get the threads per block and the blocks per grid
        dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
        dim_grid_2d_flat, dim_block_2d_flat = \
                cuda_tpb_bpg_1d( self.prefix_sum.shape[0] )

        # ------------------------
        # Sorting of the particles
        # ------------------------
        # Get the cell index of each particle
        # (defined by iz_lower and ir_lower)
        get_cell_idx_per_particle[dim_grid_1d,
                                  dim_block_1d](self.cell_idx, self.sorted_idx,
                                                self.x, self.y, self.z,
                                                grid[0].invdz, grid[0].zmin,
                                                grid[0].Nz, grid[0].invdr,
                                                grid[0].rmin, grid[0].Nr)
        # Sort the cell index array and modify the sorted_idx array
        # accordingly. The value of the sorted_idx array corresponds
        # to the index of the sorted particle in the other particle
        # arrays.
        sort_particles_per_cell(self.cell_idx, self.sorted_idx)
        # Reset the old prefix sum
        self.prefix_sum_shift = 0
        prefill_prefix_sum[dim_grid_2d_flat,
                           dim_block_2d_flat](self.cell_idx, self.prefix_sum,
                                              self.Ntot)
        # Perform the inclusive parallel prefix sum
        incl_prefix_sum[dim_grid_1d, dim_block_1d](self.cell_idx,
                                                   self.prefix_sum)
        # Rearrange the particle arrays
        self.rearrange_particle_arrays()

コード例 #6

0

ファイルを表示

    def apply_expression(self, ptcl, t):
        """
        Apply the external field function to the particles

        This function is called at each timestep, after field gathering
        in the step function.

        Parameters
        ----------
        ptcl: a list a Particles objects
            The particles on which the external fields will be applied

        t: float (seconds)
            The time in the simulation
        """
        for species in ptcl:

            # If any species was specified at initialization,
            # apply the field only on this species
            if (self.species is None) or (species is self.species):

                # Only apply the field if there are macroparticles
                # in this species
                if species.Ntot <= 0:
                    return

                # Loop over the different fields involved
                for (fieldtype, amplitude) in self.fieldtypes_and_amplitudes:

                    field = getattr(species, fieldtype)

                    if type(field) is np.ndarray:
                        # Call the CPU function
                        self.cpu_func(field,
                                      species.x,
                                      species.y,
                                      species.z,
                                      t,
                                      amplitude,
                                      self.length_scale,
                                      out=field)
                    else:
                        # Get the threads per block and the blocks per grid
                        dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(
                            species.Ntot)
                        # Call the GPU kernel
                        self.gpu_func[dim_grid_1d,
                                      dim_block_1d](field, species.x,
                                                    species.y, species.z, t,
                                                    amplitude,
                                                    self.length_scale)

コード例 #7

0

ファイルを表示

ファイル: particles.py プロジェクト: lauridsj/fbpic

 def rearrange_particle_arrays(self):
     """
     Rearranges the particle data arrays to match with the sorted
     cell index array. The sorted index array is used to resort the
     arrays. A particle buffer is used to temporarily store
     the rearranged data.
     """
     # Get the threads per block and the blocks per grid
     dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
     # Iterate over (float) particle attributes
     attr_list = [ (self,'x'), (self,'y'), (self,'z'), \
                     (self,'ux'), (self,'uy'), (self,'uz'), \
                     (self, 'w'), (self,'inv_gamma') ]
     if self.keep_fields_sorted:
         attr_list += [ (self, 'Ex'), (self, 'Ey'), (self, 'Ez'), \
                         (self, 'Bx'), (self, 'By'), (self, 'Bz') ]
     if self.ionizer is not None:
         attr_list += [(self.ionizer, 'w_times_level')]
     for attr in attr_list:
         # Get particle GPU array
         particle_array = getattr(attr[0], attr[1])
         # Write particle data to particle buffer array while rearranging
         write_sorting_buffer[dim_grid_1d,
                              dim_block_1d](self.sorted_idx, particle_array,
                                            self.sorting_buffer)
         # Assign the particle buffer to
         # the initial particle data array
         setattr(attr[0], attr[1], self.sorting_buffer)
         # Assign the old particle data array to the particle buffer
         self.sorting_buffer = particle_array
     # Iterate over (integer) particle attributes
     attr_list = []
     if self.tracker is not None:
         attr_list += [(self.tracker, 'id')]
     if self.ionizer is not None:
         attr_list += [(self.ionizer, 'ionization_level')]
     for attr in attr_list:
         # Get particle GPU array
         particle_array = getattr(attr[0], attr[1])
         # Write particle data to particle buffer array while rearranging
         write_sorting_buffer[dim_grid_1d,
                              dim_block_1d](self.sorted_idx, particle_array,
                                            self.int_sorting_buffer)
         # Assign the particle buffer to
         # the initial particle data array
         setattr(attr[0], attr[1], self.int_sorting_buffer)
         # Assign the old particle data array to the particle buffer
         self.int_sorting_buffer = particle_array

コード例 #8

0

ファイルを表示

ファイル: tracking.py プロジェクト: skuschel/fbpic

    def generate_new_ids_gpu( self, i_start, i_end ):
        """
        Generate new unique ids, and use them to fill the array `id` in place
        from index `i_start` (included) to index `i_end` (excluded)

        Parameters
        ----------
        i_start, i_end: int
            The indices between which new id should be generated
        """
        N = i_end - i_start
        grid_1d, block_1d = cuda_tpb_bpg_1d( N )
        # Modify the array self.id in-place,
        # between the indices i_start and i_end
        generate_ids_gpu[ grid_1d, block_1d ]( self.id, i_start, i_end,
                                    self.next_attributed_id, self.id_step )
        # Update the value of self.next_attributed_id
        self.next_attributed_id = self.next_attributed_id + N*self.id_step

コード例 #9

0

ファイルを表示

    def push_p( self ) :
        """
        Advance the particles' momenta over one timestep, using the Vay pusher
        Reference : Vay, Physics of Plasmas 15, 056701 (2008)

        This assumes that the momenta (ux, uy, uz) are initially one
        half-timestep *behind* the positions (x, y, z), and it brings
        them one half-timestep *ahead* of the positions.
        """
        # Skip push for neutral particles (e.g. photons)
        if self.q == 0:
            return

        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot )
            # Call the CUDA Kernel for the particle push
            if self.ionizer is None:
                push_p_gpu[dim_grid_1d, dim_block_1d](
                    self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez,
                    self.Bx, self.By, self.Bz,
                    self.q, self.m, self.Ntot, self.dt )
            else:
                # Ionizable species can have a charge that depends on the
                # macroparticle, and hence require a different function
                push_p_ioniz_gpu[dim_grid_1d, dim_block_1d](
                    self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez,
                    self.Bx, self.By, self.Bz,
                    self.m, self.Ntot, self.dt, self.ionizer.ionization_level )
        # CPU version
        else:
            if self.ionizer is None:
                push_p_numba(self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz,
                    self.q, self.m, self.Ntot, self.dt )
            else:
                # Ionizable species can have a charge that depends on the
                # macroparticle, and hence require a different function
                push_p_ioniz_numba(self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz,
                    self.m, self.Ntot, self.dt, self.ionizer.ionization_level )

コード例 #10

0

ファイルを表示

def shift_particles_periodic_subdomain( species, zmin, zmax ):
    """
    Assuming the local subdomain is periodic:
    Shift the particle positions by an integer number of box length,
    so that outside particle are back inside the physical domain

    Parameters:
    -----------
    species: an fbpic.Species object
        Contains the particle data
    zmin, zmax: floats
        Positions of the edges of the periodic box
    """
    # Perform the shift on the GPU
    if species.use_cuda:
        dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( species.Ntot )
        shift_particles_periodic_cuda[ dim_grid_1d, dim_block_1d ](
                                                    species.z, zmin, zmax )
    # Perform the shift on the CPU
    else:
        shift_particles_periodic_numba( species.z, zmin, zmax )

コード例 #11

0

ファイルを表示

    def halfpush_x( self ) :
        """
        Advance the particles' positions over one half-timestep

        This assumes that the positions (x, y, z) are initially either
        one half-timestep *behind* the momenta (ux, uy, uz), or at the
        same timestep as the momenta.
        """
        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot )
            # Call the CUDA Kernel for halfpush in x
            push_x_gpu[dim_grid_1d, dim_block_1d](
                self.x, self.y, self.z,
                self.ux, self.uy, self.uz,
                self.inv_gamma, self.dt )
            # The particle array is unsorted after the push in x
            self.sorted = False
        # CPU version
        else:
            push_x_numba( self.x, self.y, self.z,
                self.ux, self.uy, self.uz,
                self.inv_gamma, self.Ntot, self.dt )

コード例 #12

0

ファイルを表示

    def copy_J_buffer(self, iz_min, grid):
        """
        Add the small-size arrays Jr_buffer, Jt_buffer, Jz_buffer into
        the full-size arrays Jr, Jt, Jz

        Parameters
        ----------
        iz_min: int
            The z index in the full-size array, that corresponds to index 0
            in the small-size array (i.e. position at which to add the
            small-size array into the full-size one)

        grid: a list of InterpolationGrid objects
            Contains the full-size array Jr, Jt, Jz
        """
        Nm = len(grid)
        if type(grid[0].Jr) is np.ndarray:
            # The large-size arrays for J are on the CPU
            for m in range(Nm):
                grid[m].Jr[iz_min:iz_min + 2] += self.Jr_buffer[m]
                grid[m].Jt[iz_min:iz_min + 2] += self.Jt_buffer[m]
                grid[m].Jz[iz_min:iz_min + 2] += self.Jz_buffer[m]
        else:
            # The large-size arrays for J are on the GPU
            # Copy the small-size buffers to the GPU
            self.d_Jr_buffer.set(self.Jr_buffer)
            self.d_Jt_buffer.set(self.Jt_buffer)
            self.d_Jz_buffer.set(self.Jz_buffer)
            # On the GPU: add the small-size buffers to the large-size array
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(grid[0].Nr, TPB=64)
            for m in range(Nm):
                add_J_to_gpu_array[dim_grid_1d,
                                   dim_block_1d](iz_min, self.d_Jr_buffer,
                                                 self.d_Jt_buffer,
                                                 self.d_Jz_buffer, grid[m].Jr,
                                                 grid[m].Jt, grid[m].Jz, m)

コード例 #13

0

ファイルを表示

ファイル: ionizer.py プロジェクト: zcl-maker/fbpic

    def handle_ionization(self, ion):
        """
        Handle ionization, either on CPU or GPU

        - For each ion macroparticle, decide whether it is going to
          be further ionized during this timestep, based on the ADK rate.
        - Add the electrons created from ionization to the `target_species`

        Parameters:
        -----------
        ion: an fbpic.Particles object
            The ionizable species, from which new electrons are created.
        """
        # Skip this function if there are no ions
        if ion.Ntot == 0:
            return

        # Process particles in batches (of typically 10, 20 particles)
        N_batch = int(ion.Ntot / self.batch_size) + 1
        # Short-cuts
        use_cuda = self.use_cuda

        # Set the number of levels that should be distinguished
        if self.store_electrons_per_level:
            n_levels = self.level_max - self.level_start
        else:
            n_levels = 1

        # Create temporary arrays (on CPU or GPU, depending on `use_cuda`)
        ionized_from = allocate_empty(ion.Ntot, use_cuda, dtype=np.int16)
        n_ionized = allocate_empty((n_levels, N_batch),
                                   use_cuda,
                                   dtype=np.int64)
        # Draw random numbers
        if self.use_cuda:
            random_draw = cupy.random.rand(ion.Ntot, dtype=cupy.float32)
        else:
            random_draw = np.random.rand(ion.Ntot)

        # Determine the ions that are ionized, and count them in each batch
        # (one thread per batch on GPU; parallel loop over batches on CPU)
        if use_cuda:
            batch_grid_1d, batch_block_1d = cuda_tpb_bpg_1d(N_batch)
            ionize_ions_cuda[batch_grid_1d, batch_block_1d](
                N_batch, self.batch_size, ion.Ntot, self.level_start,
                self.level_max, n_levels, n_ionized, ionized_from,
                self.ionization_level, random_draw, self.adk_prefactor,
                self.adk_power, self.adk_exp_prefactor, ion.ux, ion.uy, ion.uz,
                ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz, ion.w,
                self.w_times_level)
        else:
            ionize_ions_numba(N_batch, self.batch_size, ion.Ntot,
                              self.level_start, self.level_max, n_levels,
                              n_ionized, ionized_from, self.ionization_level,
                              random_draw, self.adk_prefactor, self.adk_power,
                              self.adk_exp_prefactor, ion.ux, ion.uy, ion.uz,
                              ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz,
                              ion.w, self.w_times_level)

        # Count the total number of new electrons (operation always performed
        # on the CPU, as this is typically difficult on the GPU)
        if use_cuda:
            n_ionized = n_ionized.copy_to_host()
        cumulative_n_ionized = perform_cumsum_2d(n_ionized)
        # If no new particle was created, skip the rest of this function
        if np.all(cumulative_n_ionized[:, -1] == 0):
            return
        # Copy the cumulated number of electrons back on GPU
        # (Keep a copy on the CPU)
        if use_cuda:
            d_cumulative_n_ionized = cuda.to_device(cumulative_n_ionized)

        # Loop over the electron species associated to each level
        # (when store_electrons_per_level is False, there is a single species)
        # Reallocate electron species (on CPU or GPU depending on `use_cuda`),
        # to accomodate the electrons produced by ionization,
        # and copy the old electrons to the new arrays
        assert len(self.target_species) == n_levels
        for i_level, elec in enumerate(self.target_species):
            old_Ntot = elec.Ntot
            new_Ntot = old_Ntot + cumulative_n_ionized[i_level, -1]
            reallocate_and_copy_old(elec, use_cuda, old_Ntot, new_Ntot)
            # Create the new electrons from ionization (one thread per batch)
            if use_cuda:
                copy_ionized_electrons_cuda[batch_grid_1d, batch_block_1d](
                    N_batch, self.batch_size, old_Ntot, ion.Ntot,
                    d_cumulative_n_ionized, ionized_from, i_level,
                    self.store_electrons_per_level, elec.x, elec.y, elec.z,
                    elec.inv_gamma, elec.ux, elec.uy, elec.uz, elec.w, elec.Ex,
                    elec.Ey, elec.Ez, elec.Bx, elec.By, elec.Bz, ion.x, ion.y,
                    ion.z, ion.inv_gamma, ion.ux, ion.uy, ion.uz, ion.w,
                    ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz)
                # Mark the new electrons as unsorted
                elec.sorted = False
            else:
                copy_ionized_electrons_numba(
                    N_batch, self.batch_size, old_Ntot, ion.Ntot,
                    cumulative_n_ionized, ionized_from, i_level,
                    self.store_electrons_per_level, elec.x, elec.y, elec.z,
                    elec.inv_gamma, elec.ux, elec.uy, elec.uz, elec.w, elec.Ex,
                    elec.Ey, elec.Ez, elec.Bx, elec.By, elec.Bz, ion.x, ion.y,
                    ion.z, ion.inv_gamma, ion.ux, ion.uy, ion.uz, ion.w,
                    ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz)

            # If the electrons are tracked, generate new ids
            # (on GPU or GPU depending on `use_cuda`)
            generate_new_ids(elec, old_Ntot, new_Ntot)

コード例 #14

0

ファイルを表示

ファイル: particles.py プロジェクト: omriseemann/fbpic

    def deposit( self, fld, fieldtype ) :
        """
        Deposit the particles charge or current onto the grid

        This assumes that the particle positions (and momenta in the case of J)
        are currently at the same timestep as the field that is to be deposited

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.

        fieldtype : string
             Indicates which field to deposit
             Either 'J' or 'rho'
        """
        # Skip deposition for neutral particles (e.g. photons)
        if self.q == 0:
            return

        # Shortcuts and safe-guards
        grid = fld.interp
        assert fieldtype in ['rho', 'J']
        assert self.particle_shape in ['linear', 'cubic']

        # When running on GPU: first sort the arrays of particles
        if self.use_cuda:
            # Sort the particles
            if not self.sorted:
                self.sort_particles(fld=fld)
                # The particles are now sorted and rearranged
                self.sorted = True

        # For ionizable atoms: set the effective weight to the weight
        # times the ionization level (on GPU, this needs to be done *after*
        # sorting, otherwise `weight` is not equal to the corresponding array)
        if self.ionizer is not None:
            weight = self.ionizer.w_times_level
        else:
            weight = self.w

        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_2d_flat, dim_block_2d_flat = \
                cuda_tpb_bpg_1d( self.prefix_sum.shape[0], TPB=64 )

            # Call the CUDA Kernel for the deposition of rho or J
            Nm = len( grid )
            # Rho
            if fieldtype == 'rho':
                if self.particle_shape == 'linear':
                    if Nm == 2:
                        deposit_rho_gpu_linear[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].rho, grid[1].rho,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_rho_gpu_linear_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].rho, m,
                                self.cell_idx, self.prefix_sum)
                elif self.particle_shape == 'cubic':
                    if Nm == 2:
                        deposit_rho_gpu_cubic[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].rho, grid[1].rho,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_rho_gpu_cubic_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].rho, m,
                                self.cell_idx, self.prefix_sum)
            # J
            elif fieldtype == 'J':
                # Deposit J in each of four directions
                if self.particle_shape == 'linear':
                    if Nm == 2:
                        deposit_J_gpu_linear[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            self.ux, self.uy, self.uz, self.inv_gamma,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].Jr, grid[1].Jr,
                            grid[0].Jt, grid[1].Jt,
                            grid[0].Jz, grid[1].Jz,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_J_gpu_linear_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                self.ux, self.uy, self.uz, self.inv_gamma,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].Jr, grid[m].Jt, grid[m].Jz, m,
                                self.cell_idx, self.prefix_sum)
                elif self.particle_shape == 'cubic':
                    if Nm == 2:
                        deposit_J_gpu_cubic[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            self.ux, self.uy, self.uz, self.inv_gamma,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].Jr, grid[1].Jr,
                            grid[0].Jt, grid[1].Jt,
                            grid[0].Jz, grid[1].Jz,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_J_gpu_cubic_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                self.ux, self.uy, self.uz, self.inv_gamma,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].Jr, grid[m].Jt, grid[m].Jz, m,
                                self.cell_idx, self.prefix_sum)

        # CPU version
        else:
            # Divide particles in chunks (each chunk is handled by a different
            # thread) and register the indices that bound each chunks
            ptcl_chunk_indices = get_chunk_indices(self.Ntot, nthreads)

            # Multithreading functions for the deposition of rho or J
            # for Mode 0 and 1 only.
            if fieldtype == 'rho':
                # Deposit rho using CPU threading
                if self.particle_shape == 'linear':
                    deposit_rho_numba_linear(
                        self.x, self.y, self.z, weight, self.q,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.rho_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )
                elif self.particle_shape == 'cubic':
                    deposit_rho_numba_cubic(
                        self.x, self.y, self.z, weight, self.q,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.rho_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )

            elif fieldtype == 'J':
                # Deposit J using CPU threading
                if self.particle_shape == 'linear':
                    deposit_J_numba_linear(
                        self.x, self.y, self.z, weight, self.q,
                        self.ux, self.uy, self.uz, self.inv_gamma,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.Jr_global, fld.Jt_global, fld.Jz_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )
                elif self.particle_shape == 'cubic':
                    deposit_J_numba_cubic(
                        self.x, self.y, self.z, weight, self.q,
                        self.ux, self.uy, self.uz, self.inv_gamma,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.Jr_global, fld.Jt_global, fld.Jz_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )

コード例 #15

0

ファイルを表示

ファイル: particles.py プロジェクト: omriseemann/fbpic

    def gather( self, grid ) :
        """
        Gather the fields onto the macroparticles

        This assumes that the particle positions are currently at
        the same timestep as the field that is to be gathered.

        Parameter
        ----------
        grid : a list of InterpolationGrid objects
             (one InterpolationGrid object per azimuthal mode)
             Contains the field values on the interpolation grid
        """
        # Skip gathering for neutral particles (e.g. photons)
        if self.q == 0:
            return

        # Number of modes
        Nm = len(grid)

        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot, TPB=64 )
            # Call the CUDA Kernel for the gathering of E and B Fields
            if self.particle_shape == 'linear':
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_gpu_linear[dim_grid_1d, dim_block_1d](
                         self.x, self.y, self.z,
                         grid[0].invdz, grid[0].zmin, grid[0].Nz,
                         grid[0].invdr, grid[0].rmin, grid[0].Nr,
                         grid[0].Er, grid[0].Et, grid[0].Ez,
                         grid[1].Er, grid[1].Et, grid[1].Ez,
                         grid[0].Br, grid[0].Bt, grid[0].Bz,
                         grid[1].Br, grid[1].Bt, grid[1].Bz,
                         self.Ex, self.Ey, self.Ez,
                         self.Bx, self.By, self.Bz)
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_cuda[dim_grid_1d, dim_block_1d](
                                    self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_gpu_linear_one_mode[
                            dim_grid_1d, dim_block_1d](
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz)
            elif self.particle_shape == 'cubic':
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_gpu_cubic[dim_grid_1d, dim_block_1d](
                         self.x, self.y, self.z,
                         grid[0].invdz, grid[0].zmin, grid[0].Nz,
                         grid[0].invdr, grid[0].rmin, grid[0].Nr,
                         grid[0].Er, grid[0].Et, grid[0].Ez,
                         grid[1].Er, grid[1].Et, grid[1].Ez,
                         grid[0].Br, grid[0].Bt, grid[0].Bz,
                         grid[1].Br, grid[1].Bt, grid[1].Bz,
                         self.Ex, self.Ey, self.Ez,
                         self.Bx, self.By, self.Bz)
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_cuda[dim_grid_1d, dim_block_1d](
                                    self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_gpu_cubic_one_mode[
                            dim_grid_1d, dim_block_1d](
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz)
            else:
                raise ValueError("`particle_shape` should be either \
                                  'linear' or 'cubic' \
                                   but is `%s`" % self.particle_shape)
        # CPU version
        else:
            if self.particle_shape == 'linear':
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_numba_linear(
                        self.x, self.y, self.z,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Er, grid[0].Et, grid[0].Ez,
                        grid[1].Er, grid[1].Et, grid[1].Ez,
                        grid[0].Br, grid[0].Bt, grid[0].Bz,
                        grid[1].Br, grid[1].Bt, grid[1].Bz,
                        self.Ex, self.Ey, self.Ez,
                        self.Bx, self.By, self.Bz)
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_numba( self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_numba_linear_one_mode(
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz
                        )
            elif self.particle_shape == 'cubic':
                # Divide particles into chunks (each chunk is handled by a
                # different thread) and return the indices that bound chunks
                ptcl_chunk_indices = get_chunk_indices(self.Ntot, nthreads)
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_numba_cubic(
                        self.x, self.y, self.z,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Er, grid[0].Et, grid[0].Ez,
                        grid[1].Er, grid[1].Et, grid[1].Ez,
                        grid[0].Br, grid[0].Bt, grid[0].Bz,
                        grid[1].Br, grid[1].Bt, grid[1].Bz,
                        self.Ex, self.Ey, self.Ez,
                        self.Bx, self.By, self.Bz,
                        nthreads, ptcl_chunk_indices )
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_numba( self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_numba_cubic_one_mode(
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz,
                            nthreads, ptcl_chunk_indices )
            else:
                raise ValueError("`particle_shape` should be either \
                                  'linear' or 'cubic' \
                                   but is `%s`" % self.particle_shape)

コード例 #16

0

ファイルを表示

ファイル: particles.py プロジェクト: omriseemann/fbpic

    def push_p( self, t ) :
        """
        Advance the particles' momenta over one timestep, using the Vay pusher
        Reference : Vay, Physics of Plasmas 15, 056701 (2008)

        This assumes that the momenta (ux, uy, uz) are initially one
        half-timestep *behind* the positions (x, y, z), and it brings
        them one half-timestep *ahead* of the positions.

        Parameters
        ----------
        t: float
            The current simulation time
            (Useful for particles that are ballistic before a given plane)
        """
        # Skip push for neutral particles (e.g. photons)
        if self.q == 0:
            return
        # For particles that are ballistic before a plane,
        # get the current position of the plane
        if isinstance( self.injector, BallisticBeforePlane ):
            z_plane = self.injector.get_current_plane_position( t )
            if self.ionizer is not None:
                raise NotImplementedError('Ballistic injection before a plane '
                    'is not implemented for ionizable particles.')
        else:
            z_plane = None

        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot )
            # Call the CUDA Kernel for the particle push
            if self.ionizer is not None:
                # Ionizable species can have a charge that depends on the
                # macroparticle, and hence require a different function
                push_p_ioniz_gpu[dim_grid_1d, dim_block_1d](
                    self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez,
                    self.Bx, self.By, self.Bz,
                    self.m, self.Ntot, self.dt, self.ionizer.ionization_level )
            elif z_plane is not None:
                # Particles that are ballistic before a plane also
                # require a different pusher
                push_p_after_plane_gpu[dim_grid_1d, dim_block_1d](
                    self.z, z_plane,
                    self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez,
                    self.Bx, self.By, self.Bz,
                    self.q, self.m, self.Ntot, self.dt )
            else:
                # Standard pusher
                push_p_gpu[dim_grid_1d, dim_block_1d](
                    self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez,
                    self.Bx, self.By, self.Bz,
                    self.q, self.m, self.Ntot, self.dt )

        # CPU version
        else:
            if self.ionizer is not None:
                # Ionizable species can have a charge that depends on the
                # macroparticle, and hence require a different function
                push_p_ioniz_numba(self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz,
                    self.m, self.Ntot, self.dt, self.ionizer.ionization_level )
            elif z_plane is not None:
                # Particles that are ballistic before a plane also
                # require a different pusher
                push_p_after_plane_numba(
                    self.z, z_plane,
                    self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez,
                    self.Bx, self.By, self.Bz,
                    self.q, self.m, self.Ntot, self.dt )
            else:
                # Standard pusher
                push_p_numba(self.ux, self.uy, self.uz, self.inv_gamma,
                    self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz,
                    self.q, self.m, self.Ntot, self.dt )

コード例 #17

0

ファイルを表示

def add_buffers_gpu( species, float_recv_left, float_recv_right,
                            uint_recv_left, uint_recv_right):
    """
    Add the particles stored in recv_left and recv_right
    to the existing particle in species.

    Parameters
    ----------
    species: a Particles object
        Contain the particles that stayed on the present processors

    float_recv_left, float_recv_right, uint_recv_left, uint_recv_right:
        arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl
        is the number of particles that are received to the left
        proc and right proc respectively, and where n_float and n_int
        are the number of float and integer quantities respectively
        These arrays are always on the CPU (since they were used for MPI)
    """
    # Get the new number of particles
    old_Ntot = species.Ntot
    n_left = float_recv_left.shape[1]
    n_right = float_recv_right.shape[1]
    new_Ntot = old_Ntot + n_left + n_right

    # Get the threads per block and the blocks per grid
    n_left_grid, n_left_block = cuda_tpb_bpg_1d( n_left )
    n_right_grid, n_right_block = cuda_tpb_bpg_1d( n_right )
    n_old_grid, n_old_block = cuda_tpb_bpg_1d( old_Ntot )

    # Iterate over particle attributes
    # Build list of float attributes to copy
    attr_list = [ (species,'x'), (species,'y'), (species,'z'), \
                  (species,'ux'), (species,'uy'), (species,'uz'), \
                  (species,'inv_gamma'), (species,'w') ]
    if species.ionizer is not None:
        attr_list += [ (species.ionizer, 'w_times_level') ]
    # Loop through the float quantities
    for i_attr in range( len(attr_list) ):
        # Copy the proper buffers to the GPU
        left_buffer = cuda.to_device( float_recv_left[i_attr] )
        right_buffer = cuda.to_device( float_recv_right[i_attr] )
        # Initialize the new particle array
        particle_array = cuda.device_array( (new_Ntot,), dtype=np.float64)
        # Merge the arrays on the GPU
        stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1])
        if n_left != 0:
            copy_particles[n_left_grid, n_left_block](
                n_left, left_buffer, 0, particle_array, 0 )
        if old_Ntot != 0:
            copy_particles[n_old_grid, n_old_block](
                old_Ntot, stay_buffer, 0, particle_array, n_left )
        if n_right != 0:
            copy_particles[n_right_grid, n_right_block](
                n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)

    # Build list of integer quantities to copy
    attr_list = []
    if species.tracker is not None:
        attr_list.append( (species.tracker,'id') )
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'ionization_level') )
    # Loop through the integer quantities
    for i_attr in range( len(attr_list) ):
        # Copy the proper buffers to the GPU
        left_buffer = cuda.to_device( uint_recv_left[i_attr] )
        right_buffer = cuda.to_device( uint_recv_right[i_attr] )
        # Initialize the new particle array
        particle_array = cuda.device_array( (new_Ntot,), dtype=np.uint64)
        # Merge the arrays on the GPU
        stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1])
        if n_left != 0:
            copy_particles[n_left_grid, n_left_block](
                n_left, left_buffer, 0, particle_array, 0 )
        if old_Ntot != 0:
            copy_particles[n_old_grid, n_old_block](
                old_Ntot, stay_buffer, 0, particle_array, n_left )
        if n_right != 0:
            copy_particles[n_right_grid, n_right_block](
                n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)

    # Adapt the total number of particles
    species.Ntot = new_Ntot

コード例 #18

0

ファイルを表示

def remove_particles_gpu(species, fld, n_guard, left_proc, right_proc):
    """
    Remove the particles that are outside of the physical domain (i.e.
    in the guard cells). Store them in sending buffers, which are returned.

    Parameters
    ----------
    species: a Particles object
        Contains the data of this species

    fld: a Fields object
        Contains information about the dimension of the grid,
        and the prefix sum (when using the GPU)

    n_guard: int
        Number of guard cells

    left_proc, right_proc: int or None
        Indicate whether there is a left or right processor or if the
        boundary is open (None).

    Returns
    -------
    float_send_left, float_send_right, uint_send_left, uint_send_right:
        arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl
        is the number of particles that are sent to the left
        proc and right proc respectively, and where n_float and n_int
        are the number of float and integer quantities respectively
    """
    # Check if particles are sorted
    # (The particles are usually expected to be sorted from the previous
    # iteration at this point - except at the first iteration of `step`.)
    if species.sorted == False:
        species.sort_particles(fld = fld)
        species.sorted = True

    # Get the particle indices between which to remove the particles
    # (Take into account the fact that the moving window may have
    # shifted the grid since the particles were last sorted: prefix_sum_shift)
    prefix_sum = species.prefix_sum
    Nz = fld.Nz
    Nr = fld.Nr
    # Find the z index of the first cell for which particles are kept
    iz_min = max( n_guard + species.prefix_sum_shift, 0 )
    # Find the z index of the first cell for which particles are removed again
    iz_max = min( Nz - n_guard + species.prefix_sum_shift + 1, Nz )
    # Find the corresponding indices in the particle array
    # Reminder: prefix_sum[i] is the cumulative sum of the number of particles
    # in cells 0 to i (where cell i is included)
    if iz_min*(Nr+1) - 1 >= 0:
        i_min = prefix_sum.getitem( iz_min*(Nr+1) - 1 )
    else:
        i_min = 0
    i_max = prefix_sum.getitem( iz_max*(Nr+1) - 1 )

    # Total number of particles in each particle group
    N_send_l = i_min
    new_Ntot = i_max - i_min
    N_send_r = species.Ntot - i_max

    # Allocate the sending buffers on the CPU
    n_float = species.n_float_quantities
    n_int = species.n_integer_quantities
    if left_proc is not None:
        float_send_left = np.empty((n_float, N_send_l), dtype=np.float64)
        uint_send_left = np.empty((n_int, N_send_l), dtype=np.uint64)
    else:
        float_send_left = np.empty((n_float, 0), dtype=np.float64)
        uint_send_left = np.empty((n_int, 0), dtype=np.uint64)
    if right_proc is not None:
        float_send_right = np.empty((n_float, N_send_r), dtype=np.float64)
        uint_send_right = np.empty((n_int, N_send_r), dtype=np.uint64)
    else:
        float_send_right = np.empty((n_float, 0), dtype=np.float64)
        uint_send_right = np.empty((n_int, 0), dtype=np.uint64)

    # Get the threads per block and the blocks per grid
    dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( species.Ntot )
    # Float quantities:
    # Build list of float attributes to copy
    attr_list = [ (species,'x'), (species,'y'), (species,'z'),
                    (species,'ux'), (species,'uy'), (species,'uz'),
                    (species,'inv_gamma'), (species,'w') ]
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'w_times_level') )
    # Loop through the float attributes
    for i_attr in range(n_float):
        # Initialize 3 buffer arrays on the GPU (need to be initialized
        # inside the loop, as `copy_to_host` invalidates these arrays)
        left_buffer = cuda.device_array((N_send_l,), dtype=np.float64)
        right_buffer = cuda.device_array((N_send_r,), dtype=np.float64)
        stay_buffer = cuda.device_array((new_Ntot,), dtype=np.float64)
        # Check that the buffers are still on GPU
        # (safeguard against automatic memory management)
        assert type(left_buffer) != np.ndarray
        assert type(right_buffer) != np.ndarray
        assert type(left_buffer) != np.ndarray
        # Split the particle array into the 3 buffers on the GPU
        particle_array = getattr( attr_list[i_attr][0], attr_list[i_attr][1] )
        split_particles_to_buffers[dim_grid_1d, dim_block_1d]( particle_array,
                    left_buffer, stay_buffer, right_buffer, i_min, i_max)
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr( attr_list[i_attr][0], attr_list[i_attr][1], stay_buffer)
        if left_proc is not None:
            left_buffer.copy_to_host( float_send_left[i_attr] )
        if right_proc is not None:
            right_buffer.copy_to_host( float_send_right[i_attr] )

    # Integer quantities:
    if n_int > 0:
        attr_list = []
    if species.tracker is not None:
        attr_list.append( (species.tracker,'id') )
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'ionization_level') )
    for i_attr in range(n_int):
        # Initialize 3 buffer arrays on the GPU (need to be initialized
        # inside the loop, as `copy_to_host` invalidates these arrays)
        left_buffer = cuda.device_array((N_send_l,), dtype=np.uint64)
        right_buffer = cuda.device_array((N_send_r,), dtype=np.uint64)
        stay_buffer = cuda.device_array((new_Ntot,), dtype=np.uint64)
        # Split the particle array into the 3 buffers on the GPU
        particle_array = getattr( attr_list[i_attr][0], attr_list[i_attr][1] )
        split_particles_to_buffers[dim_grid_1d, dim_block_1d]( particle_array,
            left_buffer, stay_buffer, right_buffer, i_min, i_max)
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr( attr_list[i_attr][0], attr_list[i_attr][1], stay_buffer)
        if left_proc is not None:
            left_buffer.copy_to_host( uint_send_left[i_attr] )
        if right_proc is not None:
            right_buffer.copy_to_host( uint_send_right[i_attr] )

    # Change the new total number of particles
    species.Ntot = new_Ntot

    # Return the sending buffers
    return(float_send_left, float_send_right, uint_send_left, uint_send_right)

コード例 #19

0

ファイルを表示

    def handle_scattering(self, elec, t):
        """
        Handle Compton scattering, either on CPU or GPU

        - For each electron, decide whether it is going to be produce a new
          photon, based on the integrated Klein-Nishina formula
        - Add the photons created from Compton scattering to `target_species`

        Parameters:
        -----------
        elec: an fbpic.Particles object
            The electrons species, from which new photons will be created

        t: float
            The simulation time
        """
        # Process particles in batches (of typically 10, 20 particles)
        N_batch = int(elec.Ntot / self.batch_size) + 1
        # Short-cut for use_cuda
        use_cuda = self.use_cuda

        # Create temporary arrays (on CPU or GPU, depending on `use_cuda`)
        nscatter_per_batch = allocate_empty(N_batch, use_cuda, dtype=np.int64)
        nscatter_per_elec = allocate_empty(elec.Ntot, use_cuda, dtype=np.int64)
        photon_n = allocate_empty(elec.Ntot, use_cuda, dtype=np.float64)
        # Prepare random numbers
        if self.use_cuda:
            seed = np.random.randint(256)
            random_states = create_xoroshiro128p_states(N_batch, seed)

        # For each electron, calculate the local density of photons
        # *in the frame of the simulation*
        if use_cuda:
            bpg, tpg = cuda_tpb_bpg_1d(elec.Ntot)
            get_photon_density_gaussian_cuda[bpg, tpg](
                photon_n, elec.Ntot, elec.x, elec.y, elec.z, c * t,
                self.photon_n_lab_peak, self.inv_laser_waist2,
                self.inv_laser_ctau2, self.laser_initial_z0, self.gamma_boost,
                self.beta_boost)
        else:
            get_photon_density_gaussian_numba(
                photon_n, elec.Ntot, elec.x, elec.y, elec.z, c * t,
                self.photon_n_lab_peak, self.inv_laser_waist2,
                self.inv_laser_ctau2, self.laser_initial_z0, self.gamma_boost,
                self.beta_boost)

        # Determine the electrons that scatter, and count them in each batch
        # (one thread per batch on GPU; parallel loop over batches on CPU)
        if use_cuda:
            batch_grid_1d, batch_block_1d = cuda_tpb_bpg_1d(N_batch)
            determine_scatterings_cuda[batch_grid_1d, batch_block_1d](
                N_batch, self.batch_size, elec.Ntot, nscatter_per_elec,
                nscatter_per_batch, random_states, elec.dt, elec.ux, elec.uy,
                elec.uz, elec.inv_gamma, self.ratio_w_electron_photon,
                photon_n, self.photon_p, self.photon_beta_x,
                self.photon_beta_y, self.photon_beta_z)
        else:
            determine_scatterings_numba(N_batch, self.batch_size, elec.Ntot,
                                        nscatter_per_elec, nscatter_per_batch,
                                        elec.dt, elec.ux, elec.uy, elec.uz,
                                        elec.inv_gamma,
                                        self.ratio_w_electron_photon, photon_n,
                                        self.photon_p, self.photon_beta_x,
                                        self.photon_beta_y, self.photon_beta_z)

        # Count the total number of new photons
        cumul_nscatter_per_batch = perform_cumsum(nscatter_per_batch, use_cuda)
        N_created = int(cumul_nscatter_per_batch[-1])
        # If no new particle was created, skip the rest of this function
        if N_created == 0:
            return

        # Reallocate photons species (on CPU or GPU depending on `use_cuda`),
        # to accomodate the photons produced by Compton scattering,
        # and copy the old photons to the new arrays
        photons = self.target_species
        old_Ntot = photons.Ntot
        new_Ntot = old_Ntot + N_created
        reallocate_and_copy_old(photons, use_cuda, old_Ntot, new_Ntot)

        # Create the new photons from ionization (with a random
        # scattering angle) and add recoil momentum to the electrons
        if use_cuda:
            scatter_photons_electrons_cuda[batch_grid_1d, batch_block_1d](
                N_batch, self.batch_size, old_Ntot, elec.Ntot,
                cumul_nscatter_per_batch, nscatter_per_elec, random_states,
                self.photon_p, self.photon_px, self.photon_py, self.photon_pz,
                photons.x, photons.y, photons.z, photons.inv_gamma, photons.ux,
                photons.uy, photons.uz, photons.w, elec.x, elec.y, elec.z,
                elec.inv_gamma, elec.ux, elec.uy, elec.uz, elec.w,
                self.inv_ratio_w_elec_photon)
            photons.sorted = False
        else:
            scatter_photons_electrons_numba(
                N_batch, self.batch_size, old_Ntot, elec.Ntot,
                cumul_nscatter_per_batch, nscatter_per_elec, self.photon_p,
                self.photon_px, self.photon_py, self.photon_pz, photons.x,
                photons.y, photons.z, photons.inv_gamma, photons.ux,
                photons.uy, photons.uz, photons.w, elec.x, elec.y, elec.z,
                elec.inv_gamma, elec.ux, elec.uy, elec.uz, elec.w,
                self.inv_ratio_w_elec_photon)

        # If the photons are tracked, generate new ids
        # (on GPU or GPU depending on `use_cuda`)
        generate_new_ids(photons, old_Ntot, new_Ntot)

コード例 #20

0

ファイルを表示

def extract_slice_from_gpu(pref_sum_curr, N_area, species):
    """
    Extract the particles which have which have index between pref_sum_curr
    and pref_sum_curr + N_area, and return them in dictionaries.

    Parameters
    ----------
    pref_sum_curr: int
        The starting index needed for the extraction process
    N_area: int
        The number of particles to extract.
    species: an fbpic Species object
        The species from to extract data

    Returns
    -------
    particle_data : A dictionary of 1D float arrays (that are on the CPU)
        A dictionary that contains the particle data of
        the simulation (with normalized weigths), including optional
        integer arrays (e.g. "id", "charge")
    """
    # Call kernel that extracts particles from GPU
    dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(N_area)
    # - General particle quantities
    part_data = cupy.empty((8, N_area), dtype=np.float64)
    extract_particles_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr, species.x,
                                             species.y, species.z, species.ux,
                                             species.uy, species.uz, species.w,
                                             species.inv_gamma, part_data)
    # - Optional particle arrays
    if species.tracker is not None:
        selected_particle_id = cupy.empty((N_area, ), dtype=np.uint64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr, species.tracker.id,
                                             selected_particle_id)
    if species.ionizer is not None:
        selected_particle_charge = cupy.empty((N_area, ), dtype=np.uint64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr,
                                             species.ionizer.ionization_level,
                                             selected_particle_charge)
        selected_particle_weight = cupy.empty((N_area, ), dtype=np.float64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr,
                                             species.ionizer.w_times_level,
                                             selected_particle_weight)

    # Copy GPU arrays to the host
    part_data = part_data.get()
    particle_data = {
        'x': part_data[0],
        'y': part_data[1],
        'z': part_data[2],
        'ux': part_data[3],
        'uy': part_data[4],
        'uz': part_data[5],
        'w': part_data[6],
        'inv_gamma': part_data[7]
    }
    if species.tracker is not None:
        particle_data['id'] = selected_particle_id.get()
    if species.ionizer is not None:
        particle_data['charge'] = selected_particle_charge.get()
        # Replace particle weight
        particle_data['w'] = selected_particle_weight.get()

    # Return the data as dictionary
    return (particle_data)

コード例 #21

0

ファイルを表示

ファイル: ionizer.py プロジェクト: fhabib/fbpic

    def handle_ionization(self, ion):
        """
        Handle ionization, either on CPU or GPU

        - For each ion macroparticle, decide whether it is going to
          be further ionized during this timestep, based on the ADK rate.
        - Add the electrons created from ionization to the `target_species`

        Parameters:
        -----------
        ion: an fbpic.Particles object
            The ionizable species, from which new electrons are created.
        """
        # Process particles in batches (of typically 10, 20 particles)
        N_batch = int(ion.Ntot / self.batch_size) + 1
        # Short-cut for use_cuda
        use_cuda = self.use_cuda

        # Create temporary arrays (on CPU or GPU, depending on `use_cuda`)
        is_ionized = allocate_empty(ion.Ntot, use_cuda, dtype=np.int16)
        n_ionized = allocate_empty(N_batch, use_cuda, dtype=np.int64)
        # Draw random numbers
        if self.use_cuda:
            random_draw = allocate_empty(ion.Ntot, use_cuda, dtype=np.float32)
            self.prng.uniform(random_draw)
        else:
            random_draw = np.random.rand(ion.Ntot)

        # Determine the ions that are ionized, and count them in each batch
        # (one thread per batch on GPU; parallel loop over batches on CPU)
        if use_cuda:
            batch_grid_1d, batch_block_1d = cuda_tpb_bpg_1d(N_batch)
            ionize_ions_cuda[batch_grid_1d, batch_block_1d](
                N_batch, self.batch_size, ion.Ntot, self.level_max, n_ionized,
                is_ionized, self.ionization_level, random_draw,
                self.adk_prefactor, self.adk_power, self.adk_exp_prefactor,
                ion.ux, ion.uy, ion.uz, ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By,
                ion.Bz, ion.w, self.w_times_level)
        else:
            ionize_ions_numba(N_batch, self.batch_size, ion.Ntot,
                              self.level_max, n_ionized, is_ionized,
                              self.ionization_level, random_draw,
                              self.adk_prefactor, self.adk_power,
                              self.adk_exp_prefactor, ion.ux, ion.uy, ion.uz,
                              ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz,
                              ion.w, self.w_times_level)

        # Count the total number of new electrons (operation always performed
        # on the CPU, as this is typically difficult on the GPU)
        if use_cuda:
            n_ionized = n_ionized.copy_to_host()
        cumulative_n_ionized = perform_cumsum(n_ionized)
        # If no new particle was created, skip the rest of this function
        if cumulative_n_ionized[-1] == 0:
            return

        # Reallocate electron species (on CPU or GPU depending on `use_cuda`),
        # to accomodate the electrons produced by ionization,
        # and copy the old electrons to the new arrays
        elec = self.target_species
        old_Ntot = elec.Ntot
        new_Ntot = old_Ntot + cumulative_n_ionized[-1]
        reallocate_and_copy_old(elec, use_cuda, old_Ntot, new_Ntot)

        # Create the new electrons from ionization (one thread per batch)
        if use_cuda:
            cumulative_n_ionized = cuda.to_device(cumulative_n_ionized)
            copy_ionized_electrons_cuda[batch_grid_1d, batch_block_1d](
                N_batch, self.batch_size, old_Ntot, ion.Ntot,
                cumulative_n_ionized, is_ionized, elec.x, elec.y, elec.z,
                elec.inv_gamma, elec.ux, elec.uy, elec.uz, elec.w, elec.Ex,
                elec.Ey, elec.Ez, elec.Bx, elec.By, elec.Bz, ion.x, ion.y,
                ion.z, ion.inv_gamma, ion.ux, ion.uy, ion.uz, ion.w, ion.Ex,
                ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz)
            # Mark the new electrons as unsorted
            elec.sorted = False
        else:
            copy_ionized_electrons_numba(
                N_batch, self.batch_size, old_Ntot, ion.Ntot,
                cumulative_n_ionized, is_ionized, elec.x, elec.y, elec.z,
                elec.inv_gamma, elec.ux, elec.uy, elec.uz, elec.w, elec.Ex,
                elec.Ey, elec.Ez, elec.Bx, elec.By, elec.Bz, ion.x, ion.y,
                ion.z, ion.inv_gamma, ion.ux, ion.uy, ion.uz, ion.w, ion.Ex,
                ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz)

        # If the electrons are tracked, generate new ids
        # (on GPU or GPU depending on `use_cuda`)
        generate_new_ids(elec, old_Ntot, new_Ntot)

コード例 #22

0

ファイルを表示

ファイル: boosted_field_diag.py プロジェクト: skuschel/fbpic

    def extract_slice(self, fld, comm, z_boost, zmin_boost, slice_array):
        """
        Fills `slice_array` with a slice of the fields at z_boost
        (the fields returned are still in the boosted frame ;
        for performance, the Lorentz transform of the fields values
        is performed only when flushing to disk)

        Parameters
        ----------
        fld: a Fields object
            The object from which to extract the fields

        comm: a BoundaryCommunicator object
            Contains information about the gard cells in particular

        z_boost: float (meters)
            Position of the slice in the boosted frame

        zmin_boost: float (meters)
            Position of the left end of physical part of the local subdomain
            (i.e. excludes guard cells)

        slice_array: either a numpy array or a cuda device array
            An array of reals that packs together the slices of the
            different fields (always on array on the CPU).
            The first index of this array corresponds to the field type
            (10 different field types), and the correspondance
            between the field type and integer index is given field_to_index
            The shape of this arrays is (10, 2*Nm-1, Nr_output)
        """
        # Find the index of the slice in the boosted frame
        # and the corresponding interpolation shape factor
        dz = fld.interp[0].dz
        # Find the interpolation data in the z direction
        z_staggered_gridunits = (z_boost - zmin_boost - 0.5 * dz) / dz
        iz = int(z_staggered_gridunits)
        Sz = iz + 1 - z_staggered_gridunits
        # Add the guard cells to the index iz
        if comm is not None:
            iz += comm.n_guard
            if comm.left_proc is None:
                iz += comm.nz_damp + comm.n_inject

        # Extract the slice directly on the CPU
        # Fill the pre-allocated CPU array slice_array
        if fld.use_cuda is False:
            # Extract a slice of the fields *in the boosted frame*
            # at z_boost, using interpolation, and store them in slice_array
            self.extract_slice_cpu(fld, iz, Sz, slice_array)

        # Extract the slice on the GPU
        # Fill the pre-allocated GPU array slice_array
        else:
            # Prepare kernel call
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Nr_output)

            # Extract the slices
            interp = fld.interp
            for m in range(fld.Nm):
                extract_slice_cuda[dim_grid_1d, dim_block_1d](
                    self.Nr_output, iz, Sz, slice_array, interp[m].Er,
                    interp[m].Et, interp[m].Ez, interp[m].Br, interp[m].Bt,
                    interp[m].Bz, interp[m].Jr, interp[m].Jt, interp[m].Jz,
                    interp[m].rho, m)