Python cuda_tpb_bpg_2d 예제들, fbpic.cuda_utils.cuda_tpb_bpg_2d Python 예제들

예제 #1

0

파일 보기

    def shift_interp_field_gpu(self, field_array, n_move):
        """
        Shift the field 'field_array' by n_move cells (backwards)
        on the GPU by applying a kernel that copies the shifted
        fields to a buffer array.

        Parameters
        ----------
        field_array: 2darray of complexs
            Contains the value of the fields, and is modified by
            this function

        n_move: int
            The number of cells by which the grid should be shifted

        Returns
        -------
        The new shifted field array
        """
        # Get a 2D CUDA grid of the size of the grid
        dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(field_array.shape[0],
                                                    field_array.shape[1])
        # Initialize a field buffer to temporarily store the data
        field_buffer = cuda.device_array(
            (field_array.shape[0], field_array.shape[1]), dtype=np.complex128)
        # Shift the field array and copy it to the buffer
        shift_field_array_gpu[dim_grid_2d, dim_block_2d](field_array,
                                                         field_buffer, n_move)
        # Assign the buffer to the original field array object
        field_array = field_buffer
        # Return the new shifted field array
        return (field_array)

예제 #2

0

파일 보기

    def erase(self, fieldtype ) :
        """
        Sets the field `fieldtype` to zero on the interpolation grid

        Parameter
        ---------
        fieldtype : string
            A string which represents the kind of field to be erased
            (either 'E', 'B', 'J', 'rho')
        """
        if self.use_cuda :
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr )

            # Erase the arrays on the GPU
            if fieldtype == 'rho' :
                cuda_erase_scalar[dim_grid, dim_block](
                    self.interp[0].rho, self.interp[1].rho )
            elif fieldtype == 'J' :
                cuda_erase_vector[dim_grid, dim_block](
                    self.interp[0].Jr, self.interp[1].Jr,
                    self.interp[0].Jt, self.interp[1].Jt,
                    self.interp[0].Jz, self.interp[1].Jz )
            elif fieldtype == 'E' :
                cuda_erase_vector[dim_grid, dim_block](
                    self.interp[0].Er, self.interp[1].Er,
                    self.interp[0].Et, self.interp[1].Et,
                    self.interp[0].Ez, self.interp[1].Ez )
            elif fieldtype == 'B' :
                cuda_erase_vector[dim_grid, dim_block](
                    self.interp[0].Br, self.interp[1].Br,
                    self.interp[0].Bt, self.interp[1].Bt,
                    self.interp[0].Bz, self.interp[1].Bz )
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)
        else :
            # Erase the arrays on the CPU
            if fieldtype == 'rho' :
                for m in range(self.Nm) :
                    self.interp[m].rho[:,:] = 0.
            elif fieldtype == 'J' :
                for m in range(self.Nm) :
                    self.interp[m].Jr[:,:] = 0.
                    self.interp[m].Jt[:,:] = 0.
                    self.interp[m].Jz[:,:] = 0.
            elif fieldtype == 'E' :
                for m in range(self.Nm) :
                    self.interp[m].Er[:,:] = 0.
                    self.interp[m].Et[:,:] = 0.
                    self.interp[m].Ez[:,:] = 0.
            elif fieldtype == 'B' :
                for m in range(self.Nm) :
                    self.interp[m].Br[:,:] = 0.
                    self.interp[m].Bt[:,:] = 0.
                    self.interp[m].Bz[:,:] = 0.
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)

예제 #3

0

파일 보기

    def filter(self, fieldtype) :
        """
        Filter the field `fieldtype`

        Parameter
        ---------
        fieldtype : string
            A string which represents the kind of field to be filtered
            (either 'E', 'B', 'J', 'rho_next' or 'rho_prev')
        """
        if self.use_cuda :
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr)
            # Filter fields on the GPU
            if fieldtype == 'rho_prev' :
                cuda_filter_scalar[dim_grid, dim_block](
                    self.rho_prev, self.d_filter_array, self.Nz, self.Nr )
            elif fieldtype == 'rho_next' :
                cuda_filter_scalar[dim_grid, dim_block](
                    self.rho_next, self.d_filter_array, self.Nz, self.Nr )
            elif fieldtype == 'J' :
                cuda_filter_vector[dim_grid, dim_block]( self.Jp, self.Jm,
                        self.Jz, self.d_filter_array, self.Nz, self.Nr)
            elif fieldtype == 'E' :
                cuda_filter_vector[dim_grid, dim_block]( self.Ep, self.Em,
                        self.Ez, self.d_filter_array, self.Nz, self.Nr)
            elif fieldtype == 'B' :
                cuda_filter_vector[dim_grid, dim_block]( self.Bp, self.Bm,
                        self.Bz, self.d_filter_array, self.Nz, self.Nr)
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)
        else :
            # Filter fields on the CPU

            if fieldtype == 'rho_prev' :
                self.rho_prev = self.rho_prev * self.filter_array
            elif fieldtype == 'rho_next' :
                self.rho_next = self.rho_next * self.filter_array
            elif fieldtype == 'J' :
                self.Jp = self.Jp * self.filter_array
                self.Jm = self.Jm * self.filter_array
                self.Jz = self.Jz * self.filter_array
            elif fieldtype == 'E' :
                self.Ep = self.Ep * self.filter_array
                self.Em = self.Em * self.filter_array
                self.Ez = self.Ez * self.filter_array
            elif fieldtype == 'B' :
                self.Bp = self.Bp * self.filter_array
                self.Bm = self.Bm * self.filter_array
                self.Bz = self.Bz * self.filter_array
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)

예제 #4

0

파일 보기

 def push_rho(self) :
     """
     Transfer the values of rho_next to rho_prev,
     and set rho_next to zero
     """
     if self.use_cuda :
         # Obtain the cuda grid
         dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr)
         # Push the fields on the GPU
         cuda_push_rho[dim_grid, dim_block](
             self.rho_prev, self.rho_next, self.Nz, self.Nr )
     else :
         # Push the fields on the CPU
         self.rho_prev[:,:] = self.rho_next[:,:]
         self.rho_next[:,:] = 0.

예제 #5

0

파일 보기

    def divide_by_volume( self, fieldtype ) :
        """
        Divide the field `fieldtype` in each cell by the cell volume,
        on the interpolation grid.

        This is typically done for rho and J, after the charge and
        current deposition.

        Parameter
        ---------
        fieldtype :
            A string which represents the kind of field to be erased
            (either 'rho' or 'J')
        """
        if self.use_cuda :
            # Perform division on the GPU
            dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr )

            if fieldtype == 'rho' :
                cuda_divide_scalar_by_volume[dim_grid, dim_block](
                    self.interp[0].rho, self.interp[1].rho,
                    self.interp[0].d_invvol, self.interp[1].d_invvol )
            elif fieldtype == 'J' :
                cuda_divide_vector_by_volume[dim_grid, dim_block](
                    self.interp[0].Jr, self.interp[1].Jr,
                    self.interp[0].Jt, self.interp[1].Jt,
                    self.interp[0].Jz, self.interp[1].Jz,
                    self.interp[0].d_invvol, self.interp[1].d_invvol )
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)
        else :
            # Perform division on the CPU
            if fieldtype == 'rho' :
                for m in range(self.Nm) :
                    self.interp[m].rho = \
                    self.interp[m].rho * self.interp[m].invvol[np.newaxis,:]
            elif fieldtype == 'J' :
                for m in range(self.Nm) :
                    self.interp[m].Jr = \
                    self.interp[m].Jr * self.interp[m].invvol[np.newaxis,:]
                    self.interp[m].Jt = \
                    self.interp[m].Jt * self.interp[m].invvol[np.newaxis,:]
                    self.interp[m].Jz = \
                    self.interp[m].Jz * self.interp[m].invvol[np.newaxis,:]
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)

예제 #6

0

파일 보기

    def correct_currents (self, dt, ps) :
        """
        Correct the currents so that they satisfy the
        charge conservation equation

        Parameters
        ----------
        dt : float
            Timestep of the simulation
        """
        # Precalculate useful coefficient
        inv_dt = 1./dt

        if self.use_cuda :
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr)
            # Correct the currents on the GPU
            if ps.V is None:
                # With standard PSATD algorithm
                cuda_correct_currents_standard[dim_grid, dim_block](
                    self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz,
                    self.d_kz, self.d_kr, self.d_inv_k2,
                    inv_dt, self.Nz, self.Nr )
            else:
                # With Galilean/comoving algorithm
                cuda_correct_currents_comoving[dim_grid, dim_block](
                    self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz,
                    self.d_kz, self.d_kr, self.d_inv_k2,
                    ps.d_j_corr_coef, ps.d_T_eb, ps.d_T_cc,
                    inv_dt, self.Nz, self.Nr)
        else :
            # Correct the currents on the CPU
            if ps.V is None:
                # With standard PSATD algorithm
                numba_correct_currents_standard(
                    self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz,
                    self.kz, self.kr, self.inv_k2, inv_dt, self.Nz, self.Nr )
            else:
                # With Galilean/comoving algorithm
                numba_correct_currents_comoving(
                    self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz,
                    self.kz, self.kr, self.inv_k2,
                    ps.j_corr_coef, ps.T_eb, ps.T_cc,
                    inv_dt, self.Nz, self.Nr)

예제 #7

0

파일 보기

파일: guard_cell_damping.py 프로젝트: hemenhosseini/fbpic

    def damp_guard_EB(self, interp):
        """
        Damp the fields E and B in the guard cells.

        Parameters
        ----------
        interp: list of InterpolationGrid objects (one per azimuthal mode)
            Objects that contain the fields to be damped.
        """
        # Damp the fields on the CPU or the GPU
        if interp[0].use_cuda:
            # Damp the fields on the GPU
            dim_grid, dim_block = cuda_tpb_bpg_2d(self.n_guard, interp[0].Nr)

            cuda_damp_EB[dim_grid,
                         dim_block](interp[0].Er, interp[0].Et, interp[0].Ez,
                                    interp[0].Br, interp[0].Bt, interp[0].Bz,
                                    interp[1].Er, interp[1].Et, interp[1].Ez,
                                    interp[1].Br, interp[1].Bt, interp[1].Bz,
                                    self.d_left_damp, self.d_right_damp,
                                    self.n_guard)

        else:
            # Damp the fields on the CPU
            n_guard = self.n_guard
            for m in range(len(interp)):
                # Damp the fields in left guard cells
                interp[m].Er[:n_guard, :] *= self.left_damp[:, np.newaxis]
                interp[m].Et[:n_guard, :] *= self.left_damp[:, np.newaxis]
                interp[m].Ez[:n_guard, :] *= self.left_damp[:, np.newaxis]
                interp[m].Br[:n_guard, :] *= self.left_damp[:, np.newaxis]
                interp[m].Bt[:n_guard, :] *= self.left_damp[:, np.newaxis]
                interp[m].Bz[:n_guard, :] *= self.left_damp[:, np.newaxis]
                # Damp the fields in right guard cells
                interp[m].Er[-n_guard:, :] *= self.right_damp[::-1, np.newaxis]
                interp[m].Et[-n_guard:, :] *= self.right_damp[::-1, np.newaxis]
                interp[m].Ez[-n_guard:, :] *= self.right_damp[::-1, np.newaxis]
                interp[m].Br[-n_guard:, :] *= self.right_damp[::-1, np.newaxis]
                interp[m].Bt[-n_guard:, :] *= self.right_damp[::-1, np.newaxis]
                interp[m].Bz[-n_guard:, :] *= self.right_damp[::-1, np.newaxis]

예제 #8

0

파일 보기

파일: fourier.py 프로젝트: hemenhosseini/fbpic

    def __init__(self, Nr, Nz, use_cuda=False, nthreads=4):
        """
        Initialize an FFT object

        Parameters
        ----------
        Nr: int
           Number of grid points along the r axis (axis -1)

        Nz: int
           Number of grid points along the z axis (axis 0)

        use_cuda: bool, optional
           Whether to perform the Fourier transform on the z axis

        nthreads : int, optional
            Number of threads for the FFTW transform
        """
        # Check whether to use cuda
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
            print('** Cuda not available for Fourier transform.')
            print('** Performing the Fourier transform on the CPU.')

        # Initialize the object for calculation on the GPU
        if self.use_cuda:
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr)

            # Initialize 1d buffer for cufft
            self.buffer1d_in = cuda.device_array((Nz * Nr, ),
                                                 dtype=np.complex128)
            self.buffer1d_out = cuda.device_array((Nz * Nr, ),
                                                  dtype=np.complex128)
            # Initialize the cuda libraries object
            self.fft = cufft.FFTPlan(shape=(Nz, ),
                                     itype=np.complex128,
                                     otype=np.complex128,
                                     batch=Nr)
            self.blas = cublas.Blas()  # For normalization of the iFFT
            self.inv_Nz = 1. / Nz  # For normalization of the iFFT

            # Initialize the spectral buffers
            self.spect_buffer_r = cuda.device_array((Nz, Nr),
                                                    dtype=np.complex128)
            self.spect_buffer_t = cuda.device_array((Nz, Nr),
                                                    dtype=np.complex128)

        # Initialize the object for calculation on the CPU
        else:
            # First buffer and FFTW transform
            self.interp_buffer_r = \
                pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' )
            self.spect_buffer_r = \
                pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' )
            self.fft_r = pyfftw.FFTW(self.interp_buffer_r,
                                     self.spect_buffer_r,
                                     axes=(0, ),
                                     direction='FFTW_FORWARD',
                                     threads=nthreads)
            self.ifft_r = pyfftw.FFTW(self.spect_buffer_r,
                                      self.interp_buffer_r,
                                      axes=(0, ),
                                      direction='FFTW_BACKWARD',
                                      threads=nthreads)

            # Second buffer and FFTW transform
            self.interp_buffer_t = \
                pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' )
            self.spect_buffer_t = \
                pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' )
            self.fft_t = pyfftw.FFTW(self.interp_buffer_t,
                                     self.spect_buffer_t,
                                     axes=(0, ),
                                     direction='FFTW_FORWARD',
                                     threads=nthreads)
            self.ifft_t = pyfftw.FFTW(self.spect_buffer_t,
                                      self.interp_buffer_t,
                                      axes=(0, ),
                                      direction='FFTW_BACKWARD',
                                      threads=nthreads)

예제 #9

0

파일 보기

파일: particles.py 프로젝트: hemenhosseini/fbpic

    def deposit(self, fld, fieldtype):
        """
        Deposit the particles charge or current onto the grid

        This assumes that the particle positions (and momenta in the case of J)
        are currently at the same timestep as the field that is to be deposited

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.

        fieldtype : string
             Indicates which field to deposit
             Either 'J' or 'rho'
        """
        # Shortcut for the list of InterpolationGrid objects
        grid = fld.interp

        if self.use_cuda == True:
            # Get the threads per block and the blocks per grid
            dim_grid_2d_flat, dim_block_2d_flat = cuda_tpb_bpg_1d(grid[0].Nz *
                                                                  grid[0].Nr)
            dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(grid[0].Nz, grid[0].Nr)

            # Create the helper arrays for deposition
            if self.particle_shape == 'linear_non_atomic':
                d_F0, d_F1, d_F2, d_F3 = cuda_deposition_arrays(
                    grid[0].Nz, grid[0].Nr, fieldtype=fieldtype)

            # Sort the particles
            if self.sorted is False:
                self.sort_particles(fld=fld)
                # The particles are now sorted and rearranged
                self.sorted = True

            # Call the CUDA Kernel for the deposition of rho or J
            # for Mode 0 and 1 only.
            # Rho
            if fieldtype == 'rho':
                # Deposit rho in each of four directions
                if self.particle_shape == 'linear_non_atomic':
                    deposit_rho_gpu[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, grid[0].invdz,
                        grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin,
                        grid[0].Nr, d_F0, d_F1, d_F2, d_F3, self.cell_idx,
                        self.prefix_sum)
                    # Add the four directions together
                    add_rho[dim_grid_2d,
                            dim_block_2d](grid[0].rho, grid[1].rho, d_F0, d_F1,
                                          d_F2, d_F3)
                elif self.particle_shape == 'cubic':
                    deposit_rho_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, grid[0].invdz,
                        grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin,
                        grid[0].Nr, grid[0].rho, grid[1].rho, self.cell_idx,
                        self.prefix_sum)
                elif self.particle_shape == 'linear':
                    deposit_rho_gpu_linear[dim_grid_2d_flat,
                                           dim_block_2d_flat](
                                               self.x, self.y, self.z, self.w,
                                               grid[0].invdz, grid[0].zmin,
                                               grid[0].Nz, grid[0].invdr,
                                               grid[0].rmin, grid[0].Nr,
                                               grid[0].rho, grid[1].rho,
                                               self.cell_idx, self.prefix_sum)
                else:
                    raise ValueError(
                        "`particle_shape` should be either 'linear', 'linear_atomic' \
                                      or 'cubic' but is `%s`" %
                        self.particle_shape)
            # J
            elif fieldtype == 'J':
                # Deposit J in each of four directions
                if self.particle_shape == 'linear_non_atomic':
                    deposit_J_gpu[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, self.ux, self.uy,
                        self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin,
                        grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        d_F0, d_F1, d_F2, d_F3, self.cell_idx, self.prefix_sum)
                    # Add the four directions together
                    add_J[dim_grid_2d,
                          dim_block_2d](grid[0].Jr, grid[1].Jr, grid[0].Jt,
                                        grid[1].Jt, grid[0].Jz, grid[1].Jz,
                                        d_F0, d_F1, d_F2, d_F3)
                elif self.particle_shape == 'cubic':
                    deposit_J_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, self.ux, self.uy,
                        self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin,
                        grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt,
                        grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum)
                elif self.particle_shape == 'linear':
                    deposit_J_gpu_linear[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, self.ux, self.uy,
                        self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin,
                        grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt,
                        grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum)
                else:
                    raise ValueError("`particle_shape` should be either \
                                      'linear', 'linear_atomic' or 'cubic' \
                                       but is `%s`" % self.particle_shape)
            else:
                raise ValueError("`fieldtype` should be either 'J' or \
                                  'rho', but is `%s`" % fieldtype)

        # CPU version
        else:
            # Preliminary arrays for the cylindrical conversion
            r = np.sqrt(self.x**2 + self.y**2)
            # Avoid division by 0.
            invr = 1. / np.where(r != 0., r, 1.)
            cos = np.where(r != 0., self.x * invr, 1.)
            sin = np.where(r != 0., self.y * invr, 0.)

            # Indices and weights
            if self.particle_shape == 'cubic':
                shape_order = 3
            else:
                shape_order = 1
            iz, Sz = weights(self.z,
                             grid[0].invdz,
                             grid[0].zmin,
                             grid[0].Nz,
                             direction='z',
                             shape_order=shape_order)
            ir, Sr = weights(r,
                             grid[0].invdr,
                             grid[0].rmin,
                             grid[0].Nr,
                             direction='r',
                             shape_order=shape_order)

            # Number of modes considered :
            # number of elements in the grid list
            Nm = len(grid)

            if fieldtype == 'rho':
                # ---------------------------------------
                # Deposit the charge density mode by mode
                # ---------------------------------------
                # Prepare auxiliary matrix
                exptheta = np.ones(self.Ntot, dtype='complex')
                # exptheta takes the value exp(im theta) throughout the loop
                for m in range(Nm):
                    # Increment exptheta (notice the + : forward transform)
                    if m == 1:
                        exptheta[:].real = cos
                        exptheta[:].imag = sin
                    elif m > 1:
                        exptheta[:] = exptheta * (cos + 1.j * sin)
                    # Deposit the fields
                    # (The sign -1 with which the guards are added is not
                    # trivial to derive but avoids artifacts on the axis)
                    deposit_field_numba(self.w * exptheta, grid[m].rho, iz, ir,
                                        Sz, Sr, -1.)

            elif fieldtype == 'J':
                # ----------------------------------------
                # Deposit the current density mode by mode
                # ----------------------------------------
                # Calculate the currents
                Jr = self.w * c * self.inv_gamma * (cos * self.ux +
                                                    sin * self.uy)
                Jt = self.w * c * self.inv_gamma * (cos * self.uy -
                                                    sin * self.ux)
                Jz = self.w * c * self.inv_gamma * self.uz
                # Prepare auxiliary matrix
                exptheta = np.ones(self.Ntot, dtype='complex')
                # exptheta takes the value exp(im theta) throughout the loop
                for m in range(Nm):
                    # Increment exptheta (notice the + : forward transform)
                    if m == 1:
                        exptheta[:].real = cos
                        exptheta[:].imag = sin
                    elif m > 1:
                        exptheta[:] = exptheta * (cos + 1.j * sin)
                    # Deposit the fields
                    # (The sign -1 with which the guards are added is not
                    # trivial to derive but avoids artifacts on the axis)
                    deposit_field_numba(Jr * exptheta, grid[m].Jr, iz, ir, Sz,
                                        Sr, -1.)
                    deposit_field_numba(Jt * exptheta, grid[m].Jt, iz, ir, Sz,
                                        Sr, -1.)
                    deposit_field_numba(Jz * exptheta, grid[m].Jz, iz, ir, Sz,
                                        Sr, -1.)

            else:
                raise ValueError(
                    "`fieldtype` should be either 'J' or 'rho', but is `%s`" %
                    fieldtype)

예제 #10

0

파일 보기

    def __init__(self, p, Nr, Nz, rmax, method, use_cuda=False, **kw):
        """
        Calculate the r (position) and nu (frequency) grid
        on which the transform will operate.

        Also store auxiliary data needed for the transform.

        Parameters :
        ------------
        p : int
        Order of the Hankel transform

        Nr, Nz : float
        Number of points in the r direction and z direction

        rmax : float
        Edge of the box in which the Hankel transform is taken
        (The function is assumed to be zero at that point.)

        method : string
        The method used to calculate the Hankel transform

        use_cuda : bool, optional
        Whether to use the GPU for the Hankel transform
        (Only available for the MDHT method)

        tpb : int, optional
        Number of threads per block, in the case where cuda is used

        kw : optional arguments to be passed in the case of the MDHT
        """

        # Check that the method is valid
        if (method in available_methods) == False:
            raise ValueError('Illegal method string')
        else:
            self.method = method

        # Register whether to use the GPU.
        # If yes, initialize the corresponding cuda stream
        self.use_cuda = use_cuda
        if (self.use_cuda == True) and (cuda_installed == False):
            self.use_cuda = False
            print('** Cuda not available for Hankel transform.')
            print('** Performing the Hankel transform on the CPU.')
        if self.use_cuda:
            # Initialize a cuda stream (required by cublas)
            self.blas = cublas.Blas()
            # Initialize two buffer arrays on the GPU
            # The cuBlas API requires that these arrays be in Fortran order
            zero_array = np.zeros((Nz, Nr), dtype=np.complex128, order='F')
            self.d_in = cuda.to_device(zero_array)
            self.d_out = cuda.to_device(zero_array)
            # Initialize the threads per block and block per grid
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr)

        # Call the corresponding initialization routine
        if self.method == 'FHT':
            self.FHT_init(p, Nr, rmax)
        elif self.method == 'QDHT':
            self.QDHT_init(p, Nr, rmax)
        elif self.method == 'MDHT(m,m)':
            self.MDHT_init(p, Nr, rmax, m=p, **kw)
        elif self.method == 'MDHT(m-1,m)':
            self.MDHT_init(p, Nr, rmax, m=p + 1, **kw)
        elif self.method == 'MDHT(m+1,m)':
            self.MDHT_init(p, Nr, rmax, m=p - 1, **kw)

예제 #11

0

파일 보기

파일: spectral_transformer.py 프로젝트: hemenhosseini/fbpic

    def __init__(self, Nz, Nr, m, rmax, use_cuda=False):
        """
        Initializes the dht and fft attributes, which contain auxiliary
        matrices allowing to transform the fields quickly

        Parameters
        ----------
        Nz, Nr : int
            Number of points along z and r respectively

        m : int
            Index of the mode (needed for the Hankel transform)

        rmax : float
            The size of the simulation box along r.
        """
        # Check whether to use the GPU
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
        if self.use_cuda:
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr)

        # Initialize the DHT (local implementation, see hankel.py)
        self.dht0 = DHT(m,
                        Nr,
                        Nz,
                        rmax,
                        'MDHT(m,m)',
                        d=0.5,
                        Fw='inverse',
                        use_cuda=self.use_cuda)
        self.dhtp = DHT(m + 1,
                        Nr,
                        Nz,
                        rmax,
                        'MDHT(m+1,m)',
                        d=0.5,
                        Fw='inverse',
                        use_cuda=self.use_cuda)
        self.dhtm = DHT(m - 1,
                        Nr,
                        Nz,
                        rmax,
                        'MDHT(m-1,m)',
                        d=0.5,
                        Fw='inverse',
                        use_cuda=self.use_cuda)

        # Initialize the FFT
        self.fft = FFT(Nr, Nz, use_cuda=self.use_cuda)

        # Extract the spectral buffers
        # - In the case where the GPU is used, these buffers are cuda
        #   device arrays.
        # - In the case where the CPU is used, these buffers are tied to
        #   the FFTW plan object (see the __init__ of the FFT object). Do
        #   *not* modify these buffers to make them point to another array.
        self.spect_buffer_r, self.spect_buffer_t = self.fft.get_buffers()

        # Different names for same object (for economy of memory)
        self.spect_buffer_p = self.spect_buffer_r
        self.spect_buffer_m = self.spect_buffer_t

예제 #12

0

파일 보기

    def push_eb_with(self, ps, use_true_rho=False ) :
        """
        Push the fields over one timestep, using the psatd coefficients.

        Parameters
        ----------
        ps : PsatdCoeffs object
            psatd object corresponding to the same m mode

        use_true_rho : bool, optional
            Whether to use the rho projected on the grid.
            If set to False, this will use div(E) and div(J)
            to evaluate rho and its time evolution.
            In the case use_true_rho==False, the rho projected
            on the grid is used only to correct the currents, and
            the simulation can be run without the neutralizing ions.
        """
        # Check that psatd object passed as argument is the right one
        # (i.e. corresponds to the right mode)
        assert( self.m == ps.m )

        if self.use_cuda :
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr)
            # Push the fields on the GPU
            if ps.V is None:
                # With the standard PSATD algorithm
                cuda_push_eb_standard[dim_grid, dim_block](
                    self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz,
                    self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next,
                    ps.d_rho_prev_coef, ps.d_rho_next_coef, ps.d_j_coef,
                    ps.d_C, ps.d_S_w, self.d_kr, self.d_kz, ps.dt,
                    use_true_rho, self.Nz, self.Nr )
            else:
                # With the Galilean/comoving algorithm
                cuda_push_eb_comoving[dim_grid, dim_block](
                    self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz,
                    self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next,
                    ps.d_rho_prev_coef, ps.d_rho_next_coef, ps.d_j_coef,
                    ps.d_C, ps.d_S_w, ps.d_T_eb, ps.d_T_cc, ps.d_T_rho,
                    self.d_kr, self.d_kz, ps.dt, ps.V,
                    use_true_rho, self.Nz, self.Nr )
        else :
            # Push the fields on the CPU
            if ps.V is None:
                # With the standard PSATD algorithm
                numba_push_eb_standard(
                    self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz,
                    self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next,
                    ps.rho_prev_coef, ps.rho_next_coef, ps.j_coef,
                    ps.C, ps.S_w, self.kr, self.kz, ps.dt,
                    use_true_rho, self.Nz, self.Nr )
            else:
                # With the Galilean/comoving algorithm
                numba_push_eb_comoving(
                    self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz,
                    self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next,
                    ps.rho_prev_coef, ps.rho_next_coef, ps.j_coef,
                    ps.C, ps.S_w, ps.T_eb, ps.T_cc, ps.T_rho,
                    self.kr, self.kz, ps.dt, ps.V,
                    use_true_rho, self.Nz, self.Nr )

예제 #13

0

파일 보기

    def copy_EB_buffers(self,
                        interp,
                        before_sending=False,
                        after_receiving=False):
        """
        Either copy the inner part of the domain to the sending buffer
        for E & B, or copy the receving buffer for E & B to the guard
        cells of the domain.

        Depending on whether the field data is initially on the CPU
        or on the GPU, this function will do the appropriate exchange
        with the device.

        Parameters
        ----------
        interp: a list of InterpolationGrid objects
            (one element per azimuthal mode)

        before_sending: bool
            Whether to copy the inner part of the domain to the sending buffer

        after_receiving: bool
            Whether to copy the receiving buffer to the guard cells
        """
        # Shortcut for the guard cells
        ng = self.n_guard
        copy_left = (self.left_proc is not None)
        copy_right = (self.right_proc is not None)

        # When using the GPU
        if interp[0].use_cuda:

            # Calculate the number of blocks and threads per block
            dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(ng, interp[0].Nr)

            if before_sending:
                # Copy the inner regions of the domain to the GPU buffers
                copy_EB_to_gpu_buffers[dim_grid_2d, dim_block_2d](
                    self.d_EB_l, self.d_EB_r, interp[0].Er, interp[0].Et,
                    interp[0].Ez, interp[0].Br, interp[0].Bt, interp[0].Bz,
                    interp[1].Er, interp[1].Et, interp[1].Ez, interp[1].Br,
                    interp[1].Bt, interp[1].Bz, copy_left, copy_right, ng)
                # Copy the GPU buffers to the sending CPU buffers
                if copy_left:
                    self.d_EB_l.copy_to_host(self.EB_send_l)
                if copy_right:
                    self.d_EB_r.copy_to_host(self.EB_send_r)

            elif after_receiving:
                # Copy the CPU receiving buffers to the GPU buffers
                if copy_left:
                    self.d_EB_l.copy_to_device(self.EB_recv_l)
                if copy_right:
                    self.d_EB_r.copy_to_device(self.EB_recv_r)
                # Copy the GPU buffers to the guard cells of the domain
                copy_EB_from_gpu_buffers[dim_grid_2d, dim_block_2d](
                    self.d_EB_l, self.d_EB_r, interp[0].Er, interp[0].Et,
                    interp[0].Ez, interp[0].Br, interp[0].Bt, interp[0].Bz,
                    interp[1].Er, interp[1].Et, interp[1].Ez, interp[1].Br,
                    interp[1].Bt, interp[1].Bz, copy_left, copy_right, ng)

        # Without GPU
        else:
            for m in range(self.Nm):
                offset = 6 * m

                if before_sending:
                    # Copy the inner regions of the domain to the buffer
                    if copy_left:
                        self.EB_send_l[0 + offset, :, :] = interp[m].Er[ng:2 *
                                                                        ng, :]
                        self.EB_send_l[1 + offset, :, :] = interp[m].Et[ng:2 *
                                                                        ng, :]
                        self.EB_send_l[2 + offset, :, :] = interp[m].Ez[ng:2 *
                                                                        ng, :]
                        self.EB_send_l[3 + offset, :, :] = interp[m].Br[ng:2 *
                                                                        ng, :]
                        self.EB_send_l[4 + offset, :, :] = interp[m].Bt[ng:2 *
                                                                        ng, :]
                        self.EB_send_l[5 + offset, :, :] = interp[m].Bz[ng:2 *
                                                                        ng, :]
                    if copy_right:
                        self.EB_send_r[0 +
                                       offset, :, :] = interp[m].Er[-2 *
                                                                    ng:-ng, :]
                        self.EB_send_r[1 +
                                       offset, :, :] = interp[m].Et[-2 *
                                                                    ng:-ng, :]
                        self.EB_send_r[2 +
                                       offset, :, :] = interp[m].Ez[-2 *
                                                                    ng:-ng, :]
                        self.EB_send_r[3 +
                                       offset, :, :] = interp[m].Br[-2 *
                                                                    ng:-ng, :]
                        self.EB_send_r[4 +
                                       offset, :, :] = interp[m].Bt[-2 *
                                                                    ng:-ng, :]
                        self.EB_send_r[5 +
                                       offset, :, :] = interp[m].Bz[-2 *
                                                                    ng:-ng, :]

                elif after_receiving:
                    # Copy the buffer to the guard cells of the domain
                    if copy_left:
                        interp[m].Er[:ng, :] = self.EB_recv_l[0 + offset, :, :]
                        interp[m].Et[:ng, :] = self.EB_recv_l[1 + offset, :, :]
                        interp[m].Ez[:ng, :] = self.EB_recv_l[2 + offset, :, :]
                        interp[m].Br[:ng, :] = self.EB_recv_l[3 + offset, :, :]
                        interp[m].Bt[:ng, :] = self.EB_recv_l[4 + offset, :, :]
                        interp[m].Bz[:ng, :] = self.EB_recv_l[5 + offset, :, :]
                    if copy_right:
                        interp[m].Er[-ng:, :] = self.EB_recv_r[0 +
                                                               offset, :, :]
                        interp[m].Et[-ng:, :] = self.EB_recv_r[1 +
                                                               offset, :, :]
                        interp[m].Ez[-ng:, :] = self.EB_recv_r[2 +
                                                               offset, :, :]
                        interp[m].Br[-ng:, :] = self.EB_recv_r[3 +
                                                               offset, :, :]
                        interp[m].Bt[-ng:, :] = self.EB_recv_r[4 +
                                                               offset, :, :]
                        interp[m].Bz[-ng:, :] = self.EB_recv_r[5 +
                                                               offset, :, :]