Ejemplo n.º 1
0
    def deposit_virtual_particles_cpu(self, q, fieldtype, grid, fld):
        x = self.baseline_x + q * self.excursion_x
        y = self.baseline_y + q * self.excursion_y

        # Divide particles in chunks (each chunk is handled by a different
        # thread) and register the indices that bound each chunks
        ptcl_chunk_indices = get_chunk_indices(self.Ntot, nthreads)

        # The set of Ruyten shape coefficients to use for higher modes.
        # For Nm > 1, the set from mode 1 is used, since all higher modes have the
        # same coefficients. For Nm == 1, the coefficients from mode 0 are
        # passed twice to satisfy the argument types for Numba JIT.
        if fld.Nm > 1:
            ruyten_m = 1
        else:
            ruyten_m = 0

        if fieldtype == 'rho':
            # ---------------------------------------
            # Deposit the charge density all modes at once
            # ---------------------------------------
            deposit_rho_numba_linear(
                x, y, self.baseline_z, self.w, q, grid[0].invdz, grid[0].zmin,
                grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                fld.rho_global, fld.Nm, nthreads, ptcl_chunk_indices,
                grid[0].ruyten_linear_coef, grid[ruyten_m].ruyten_linear_coef)

        elif fieldtype == 'J':
            # Calculate the relativistic momenta from the velocities.
            # The gamma is set to 1 both here and in the deposition kernel.
            # This is alright since the deposition only depends on the products
            # ux*inv_gamma, uy*inv_gamma and uz*inv_gamma, which correspond to
            # vx/c, vy/c and vz/c, respectively. So as long as the products are
            # correct, passing inv_gamma = 1 is no issue.
            ux = q * self.vx / c
            uy = q * self.vy / c
            uz = self.vz / c

            # ---------------------------------------
            # Deposit the current density all modes at once
            # ---------------------------------------
            deposit_J_numba_linear(
                x, y, self.baseline_z, self.w, q, ux, uy, uz, self.inv_gamma,
                grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr,
                grid[0].rmin, grid[0].Nr, fld.Jr_global, fld.Jt_global,
                fld.Jz_global, fld.Nm, nthreads, ptcl_chunk_indices,
                grid[0].ruyten_linear_coef, grid[ruyten_m].ruyten_linear_coef)
Ejemplo n.º 2
0
    def deposit( self, fld, fieldtype ) :
        """
        Deposit the particles charge or current onto the grid

        This assumes that the particle positions (and momenta in the case of J)
        are currently at the same timestep as the field that is to be deposited

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.

        fieldtype : string
             Indicates which field to deposit
             Either 'J' or 'rho'
        """
        # Skip deposition for neutral particles (e.g. photons)
        if self.q == 0:
            return

        # Shortcuts and safe-guards
        grid = fld.interp
        assert fieldtype in ['rho', 'J']
        assert self.particle_shape in ['linear', 'cubic']

        # When running on GPU: first sort the arrays of particles
        if self.use_cuda:
            # Sort the particles
            if not self.sorted:
                self.sort_particles(fld=fld)
                # The particles are now sorted and rearranged
                self.sorted = True

        # For ionizable atoms: set the effective weight to the weight
        # times the ionization level (on GPU, this needs to be done *after*
        # sorting, otherwise `weight` is not equal to the corresponding array)
        if self.ionizer is not None:
            weight = self.ionizer.w_times_level
        else:
            weight = self.w

        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_2d_flat, dim_block_2d_flat = \
                cuda_tpb_bpg_1d( self.prefix_sum.shape[0], TPB=64 )

            # Call the CUDA Kernel for the deposition of rho or J
            Nm = len( grid )
            # Rho
            if fieldtype == 'rho':
                if self.particle_shape == 'linear':
                    if Nm == 2:
                        deposit_rho_gpu_linear[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].rho, grid[1].rho,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_rho_gpu_linear_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].rho, m,
                                self.cell_idx, self.prefix_sum)
                elif self.particle_shape == 'cubic':
                    if Nm == 2:
                        deposit_rho_gpu_cubic[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].rho, grid[1].rho,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_rho_gpu_cubic_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].rho, m,
                                self.cell_idx, self.prefix_sum)
            # J
            elif fieldtype == 'J':
                # Deposit J in each of four directions
                if self.particle_shape == 'linear':
                    if Nm == 2:
                        deposit_J_gpu_linear[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            self.ux, self.uy, self.uz, self.inv_gamma,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].Jr, grid[1].Jr,
                            grid[0].Jt, grid[1].Jt,
                            grid[0].Jz, grid[1].Jz,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_J_gpu_linear_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                self.ux, self.uy, self.uz, self.inv_gamma,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].Jr, grid[m].Jt, grid[m].Jz, m,
                                self.cell_idx, self.prefix_sum)
                elif self.particle_shape == 'cubic':
                    if Nm == 2:
                        deposit_J_gpu_cubic[
                            dim_grid_2d_flat, dim_block_2d_flat](
                            self.x, self.y, self.z, weight, self.q,
                            self.ux, self.uy, self.uz, self.inv_gamma,
                            grid[0].invdz, grid[0].zmin, grid[0].Nz,
                            grid[0].invdr, grid[0].rmin, grid[0].Nr,
                            grid[0].Jr, grid[1].Jr,
                            grid[0].Jt, grid[1].Jt,
                            grid[0].Jz, grid[1].Jz,
                            self.cell_idx, self.prefix_sum)
                    else:
                        for m in range(Nm):
                            deposit_J_gpu_cubic_one_mode[
                                dim_grid_2d_flat, dim_block_2d_flat](
                                self.x, self.y, self.z, weight, self.q,
                                self.ux, self.uy, self.uz, self.inv_gamma,
                                grid[m].invdz, grid[m].zmin, grid[m].Nz,
                                grid[m].invdr, grid[m].rmin, grid[m].Nr,
                                grid[m].Jr, grid[m].Jt, grid[m].Jz, m,
                                self.cell_idx, self.prefix_sum)

        # CPU version
        else:
            # Divide particles in chunks (each chunk is handled by a different
            # thread) and register the indices that bound each chunks
            ptcl_chunk_indices = get_chunk_indices(self.Ntot, nthreads)

            # Multithreading functions for the deposition of rho or J
            # for Mode 0 and 1 only.
            if fieldtype == 'rho':
                # Deposit rho using CPU threading
                if self.particle_shape == 'linear':
                    deposit_rho_numba_linear(
                        self.x, self.y, self.z, weight, self.q,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.rho_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )
                elif self.particle_shape == 'cubic':
                    deposit_rho_numba_cubic(
                        self.x, self.y, self.z, weight, self.q,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.rho_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )

            elif fieldtype == 'J':
                # Deposit J using CPU threading
                if self.particle_shape == 'linear':
                    deposit_J_numba_linear(
                        self.x, self.y, self.z, weight, self.q,
                        self.ux, self.uy, self.uz, self.inv_gamma,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.Jr_global, fld.Jt_global, fld.Jz_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )
                elif self.particle_shape == 'cubic':
                    deposit_J_numba_cubic(
                        self.x, self.y, self.z, weight, self.q,
                        self.ux, self.uy, self.uz, self.inv_gamma,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        fld.Jr_global, fld.Jt_global, fld.Jz_global, fld.Nm,
                        nthreads, ptcl_chunk_indices )
Ejemplo n.º 3
0
    def gather( self, grid ) :
        """
        Gather the fields onto the macroparticles

        This assumes that the particle positions are currently at
        the same timestep as the field that is to be gathered.

        Parameter
        ----------
        grid : a list of InterpolationGrid objects
             (one InterpolationGrid object per azimuthal mode)
             Contains the field values on the interpolation grid
        """
        # Skip gathering for neutral particles (e.g. photons)
        if self.q == 0:
            return

        # Number of modes
        Nm = len(grid)

        # GPU (CUDA) version
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( self.Ntot, TPB=64 )
            # Call the CUDA Kernel for the gathering of E and B Fields
            if self.particle_shape == 'linear':
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_gpu_linear[dim_grid_1d, dim_block_1d](
                         self.x, self.y, self.z,
                         grid[0].invdz, grid[0].zmin, grid[0].Nz,
                         grid[0].invdr, grid[0].rmin, grid[0].Nr,
                         grid[0].Er, grid[0].Et, grid[0].Ez,
                         grid[1].Er, grid[1].Et, grid[1].Ez,
                         grid[0].Br, grid[0].Bt, grid[0].Bz,
                         grid[1].Br, grid[1].Bt, grid[1].Bz,
                         self.Ex, self.Ey, self.Ez,
                         self.Bx, self.By, self.Bz)
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_cuda[dim_grid_1d, dim_block_1d](
                                    self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_gpu_linear_one_mode[
                            dim_grid_1d, dim_block_1d](
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz)
            elif self.particle_shape == 'cubic':
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_gpu_cubic[dim_grid_1d, dim_block_1d](
                         self.x, self.y, self.z,
                         grid[0].invdz, grid[0].zmin, grid[0].Nz,
                         grid[0].invdr, grid[0].rmin, grid[0].Nr,
                         grid[0].Er, grid[0].Et, grid[0].Ez,
                         grid[1].Er, grid[1].Et, grid[1].Ez,
                         grid[0].Br, grid[0].Bt, grid[0].Bz,
                         grid[1].Br, grid[1].Bt, grid[1].Bz,
                         self.Ex, self.Ey, self.Ez,
                         self.Bx, self.By, self.Bz)
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_cuda[dim_grid_1d, dim_block_1d](
                                    self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_gpu_cubic_one_mode[
                            dim_grid_1d, dim_block_1d](
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz)
            else:
                raise ValueError("`particle_shape` should be either \
                                  'linear' or 'cubic' \
                                   but is `%s`" % self.particle_shape)
        # CPU version
        else:
            if self.particle_shape == 'linear':
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_numba_linear(
                        self.x, self.y, self.z,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Er, grid[0].Et, grid[0].Ez,
                        grid[1].Er, grid[1].Et, grid[1].Ez,
                        grid[0].Br, grid[0].Bt, grid[0].Bz,
                        grid[1].Br, grid[1].Bt, grid[1].Bz,
                        self.Ex, self.Ey, self.Ez,
                        self.Bx, self.By, self.Bz)
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_numba( self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_numba_linear_one_mode(
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz
                        )
            elif self.particle_shape == 'cubic':
                # Divide particles into chunks (each chunk is handled by a
                # different thread) and return the indices that bound chunks
                ptcl_chunk_indices = get_chunk_indices(self.Ntot, nthreads)
                if Nm == 2:
                    # Optimized version for 2 modes
                    gather_field_numba_cubic(
                        self.x, self.y, self.z,
                        grid[0].invdz, grid[0].zmin, grid[0].Nz,
                        grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Er, grid[0].Et, grid[0].Ez,
                        grid[1].Er, grid[1].Et, grid[1].Ez,
                        grid[0].Br, grid[0].Bt, grid[0].Bz,
                        grid[1].Br, grid[1].Bt, grid[1].Bz,
                        self.Ex, self.Ey, self.Ez,
                        self.Bx, self.By, self.Bz,
                        nthreads, ptcl_chunk_indices )
                else:
                    # Generic version for arbitrary number of modes
                    erase_eb_numba( self.Ex, self.Ey, self.Ez,
                                    self.Bx, self.By, self.Bz, self.Ntot )
                    for m in range(Nm):
                        gather_field_numba_cubic_one_mode(
                            self.x, self.y, self.z,
                            grid[m].invdz, grid[m].zmin, grid[m].Nz,
                            grid[m].invdr, grid[m].rmin, grid[m].Nr,
                            grid[m].Er, grid[m].Et, grid[m].Ez,
                            grid[m].Br, grid[m].Bt, grid[m].Bz, m,
                            self.Ex, self.Ey, self.Ez,
                            self.Bx, self.By, self.Bz,
                            nthreads, ptcl_chunk_indices )
            else:
                raise ValueError("`particle_shape` should be either \
                                  'linear' or 'cubic' \
                                   but is `%s`" % self.particle_shape)