Python device_array 예제들, fbpic.utils.cuda.cuda.device_array Python 예제들

예제 #1

0

파일 보기

def reallocate_and_copy_old( species, use_cuda, old_Ntot, new_Ntot ):
    """
    Copy the particle quantities of `species` from arrays of size `old_Ntot`
    into arrays of size `new_Ntot`. Set these arrays as attributes of `species.

    (The first `old_Ntot` elements of the new arrays are copied from the old
    arrays ; the last elements are left empty and expected to be filled later.)

    When `use_cuda` is True, this function also reallocates
    the sorting buffers for GPU, with a size `new_Ntot`

    Parameters
    ----------
    species: an fbpic Particles object
    use_cuda: bool
        If True, the new arrays are device arrays, and copying is done on GPU.
        If False, the arrays are on CPU, and copying is done on CPU.
    old_Ntot, new_Ntot: int
        Size of the old and new arrays (with old_Ntot < new_Ntot)
    """
    # Check if the data is on the GPU
    data_on_gpu = (type(species.w) is not np.ndarray)

    # On GPU, use one thread per particle
    if data_on_gpu:
        ptcl_grid_1d, ptcl_block_1d = cuda_tpb_bpg_1d( old_Ntot )

    # Iterate over particle attributes and copy the old particles
    for attr in ['x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma',
                    'Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz']:
        old_array = getattr(species, attr)
        new_array = allocate_empty( new_Ntot, data_on_gpu, dtype=np.float64 )
        if data_on_gpu:
            copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ](
                old_Ntot, old_array, new_array )
        else:
            copy_particle_data_numba( old_Ntot, old_array, new_array )
        setattr( species, attr, new_array )
    # Copy the tracking id, if needed
    if species.tracker is not None:
        old_array = species.tracker.id
        new_array = allocate_empty( new_Ntot, use_cuda, dtype=np.uint64 )
        if data_on_gpu:
            copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ](
                old_Ntot, old_array, new_array )
        else:
            copy_particle_data_numba( old_Ntot, old_array, new_array )
        species.tracker.id = new_array

    # Allocate the auxiliary arrays for GPU
    if use_cuda:
        species.cell_idx = cuda.device_array((new_Ntot,), dtype=np.int32)
        species.sorted_idx = cuda.device_array((new_Ntot,), dtype=np.uint32)
        species.sorting_buffer = cuda.device_array((new_Ntot,), dtype=np.float64)
        if species.n_integer_quantities > 0:
            species.int_sorting_buffer = \
                cuda.device_array( (new_Ntot,), dtype=np.uint64 )

    # Modify the total number of particles
    species.Ntot = new_Ntot

예제 #2

0

파일 보기

    def __init__(self, Nz, Nr, m, rmax, use_cuda=False):
        """
        Initializes the dht and fft attributes, which contain auxiliary
        matrices allowing to transform the fields quickly

        Parameters
        ----------
        Nz, Nr : int
            Number of points along z and r respectively

        m : int
            Index of the mode (needed for the Hankel transform)

        rmax : float
            The size of the simulation box along r.
        """
        # Check whether to use the GPU
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
        if self.use_cuda:
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr, 1, 32)

        # Initialize the DHT (local implementation, see hankel.py)
        self.dht0 = DHT(m, m, Nr, Nz, rmax, use_cuda=self.use_cuda)
        self.dhtp = DHT(m + 1, m, Nr, Nz, rmax, use_cuda=self.use_cuda)
        self.dhtm = DHT(m - 1, m, Nr, Nz, rmax, use_cuda=self.use_cuda)

        # Initialize the FFT
        self.fft = FFT(Nr, Nz, use_cuda=self.use_cuda)

        # Initialize the spectral buffers
        if self.use_cuda:
            self.spect_buffer_r = cuda.device_array((Nz, Nr),
                                                    dtype=np.complex128)
            self.spect_buffer_t = cuda.device_array((Nz, Nr),
                                                    dtype=np.complex128)
        else:
            # Initialize the spectral buffers
            self.spect_buffer_r = np.zeros((Nz, Nr), dtype=np.complex128)
            self.spect_buffer_t = np.zeros((Nz, Nr), dtype=np.complex128)

        # Different names for same object (for economy of memory)
        self.spect_buffer_p = self.spect_buffer_r
        self.spect_buffer_m = self.spect_buffer_t

예제 #3

0

파일 보기

파일: cuda_numba_utils.py 프로젝트: fhabib/fbpic

def allocate_empty(N, use_cuda, dtype):
    """
    Allocate and return an empty array, of size `N` and type `dtype`,
    either on GPU or CPU, depending on whether `use_cuda` is True or False
    """
    if use_cuda:
        return (cuda.device_array((N, ), dtype=dtype))
    else:
        return (np.empty(N, dtype=dtype))

예제 #4

0

파일 보기

파일: cuda_numba_utils.py 프로젝트: zcl-maker/fbpic

def allocate_empty(shape, use_cuda, dtype):
    """
    Allocate and return an empty array, of size `N` and type `dtype`,
    either on GPU or CPU, depending on whether `use_cuda` is True or False
    """
    if type(shape) is not tuple:
        # Convert single scalar to tuple
        shape = (shape, )
    if use_cuda:
        return (cuda.device_array(shape, dtype=dtype))
    else:
        return (np.empty(shape, dtype=dtype))

예제 #5

0

파일 보기

    def __init__(self, t_lab, zmin_lab, zmax_lab,
                    write_dir, i, fld, Nr_output):
        """
        Initialize a LabSnapshot

        Parameters
        ----------
        t_lab: float (seconds)
            Time of this snapshot *in the lab frame*

        zmin_lab, zmax_lab: floats
            Longitudinal limits of this snapshot

        write_dir: string
            Absolute path to the directory where the data for
            this snapshot is to be written

        i: int
           Number of the file where this snapshot is to be written

        fld: a Fields object
           This is passed only in order to determine how to initialize
           the slice_array buffer (either on the CPU or GPU)

        Nr_output: int
            Number of cells in the r direction, in the final output
            (This typically excludes the radial damping cells)
        """
        # Deduce the name of the filename where this snapshot writes
        self.filename = os.path.join( write_dir, 'hdf5/data%08d.h5' %i)
        self.iteration = i

        # Time and boundaries in the lab frame (constants quantities)
        self.zmin_lab = zmin_lab
        self.zmax_lab = zmax_lab
        self.t_lab = t_lab

        # Positions where the fields are to be registered
        # (Change at every iteration)
        self.current_z_lab = 0
        self.current_z_boost = 0

        # Buffered field slice and corresponding array index in z
        self.buffered_slices = []
        self.buffer_z_indices = []

        # Allocate a buffer for only one slice (avoids having to
        # reallocate arrays when running on the GPU)
        data_shape = (10, 2*fld.Nm-1, Nr_output)
        if fld.use_cuda is False:
            self.slice_array = np.empty( data_shape )
        else:
            self.slice_array = cuda.device_array( data_shape )

예제 #6

0

파일 보기

def add_buffers_gpu( species, float_recv_left, float_recv_right,
                            uint_recv_left, uint_recv_right):
    """
    Add the particles stored in recv_left and recv_right
    to the existing particle in species.

    Parameters
    ----------
    species: a Particles object
        Contain the particles that stayed on the present processors

    float_recv_left, float_recv_right, uint_recv_left, uint_recv_right:
        arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl
        is the number of particles that are received to the left
        proc and right proc respectively, and where n_float and n_int
        are the number of float and integer quantities respectively
        These arrays are always on the CPU (since they were used for MPI)
    """
    # Get the new number of particles
    old_Ntot = species.Ntot
    n_left = float_recv_left.shape[1]
    n_right = float_recv_right.shape[1]
    new_Ntot = old_Ntot + n_left + n_right

    # Get the threads per block and the blocks per grid
    n_left_grid, n_left_block = cuda_tpb_bpg_1d( n_left )
    n_right_grid, n_right_block = cuda_tpb_bpg_1d( n_right )
    n_old_grid, n_old_block = cuda_tpb_bpg_1d( old_Ntot )

    # Iterate over particle attributes
    # Build list of float attributes to copy
    attr_list = [ (species,'x'), (species,'y'), (species,'z'), \
                  (species,'ux'), (species,'uy'), (species,'uz'), \
                  (species,'inv_gamma'), (species,'w') ]
    if species.ionizer is not None:
        attr_list += [ (species.ionizer, 'w_times_level') ]
    # Loop through the float quantities
    for i_attr in range( len(attr_list) ):
        # Copy the proper buffers to the GPU
        left_buffer = cuda.to_device( float_recv_left[i_attr] )
        right_buffer = cuda.to_device( float_recv_right[i_attr] )
        # Initialize the new particle array
        particle_array = cuda.device_array( (new_Ntot,), dtype=np.float64)
        # Merge the arrays on the GPU
        stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1])
        if n_left != 0:
            copy_particles[n_left_grid, n_left_block](
                n_left, left_buffer, 0, particle_array, 0 )
        if old_Ntot != 0:
            copy_particles[n_old_grid, n_old_block](
                old_Ntot, stay_buffer, 0, particle_array, n_left )
        if n_right != 0:
            copy_particles[n_right_grid, n_right_block](
                n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)

    # Build list of integer quantities to copy
    attr_list = []
    if species.tracker is not None:
        attr_list.append( (species.tracker,'id') )
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'ionization_level') )
    # Loop through the integer quantities
    for i_attr in range( len(attr_list) ):
        # Copy the proper buffers to the GPU
        left_buffer = cuda.to_device( uint_recv_left[i_attr] )
        right_buffer = cuda.to_device( uint_recv_right[i_attr] )
        # Initialize the new particle array
        particle_array = cuda.device_array( (new_Ntot,), dtype=np.uint64)
        # Merge the arrays on the GPU
        stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1])
        if n_left != 0:
            copy_particles[n_left_grid, n_left_block](
                n_left, left_buffer, 0, particle_array, 0 )
        if old_Ntot != 0:
            copy_particles[n_old_grid, n_old_block](
                old_Ntot, stay_buffer, 0, particle_array, n_left )
        if n_right != 0:
            copy_particles[n_right_grid, n_right_block](
                n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)

    # Adapt the total number of particles
    species.Ntot = new_Ntot

예제 #7

0

파일 보기

def add_buffers_to_particles( species, float_recv_left, float_recv_right,
                                        uint_recv_left, uint_recv_right):
    """
    Add the particles stored in recv_left and recv_right
    to the existing particle in species.

    Resize the auxiliary arrays of the particles Ex, Ey, Ez, Bx, By, Bz,
    as well as cell_idx, sorted_idx and sorting_buffer

    Parameters
    ----------
    species: a Particles object
        Contain the particles that stayed on the present processors

    float_recv_left, float_recv_right, uint_recv_left, uint_recv_right:
        arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl
        is the number of particles that are received to the left
        proc and right proc respectively, and where n_float and n_int
        are the number of float and integer quantities respectively
        These arrays are always on the CPU (since they were used for MPI)
    """
    # Copy the buffers to an enlarged array
    if species.use_cuda:
        add_buffers_gpu( species, float_recv_left, float_recv_right,
                                uint_recv_left, uint_recv_right )
    else:
        add_buffers_cpu( species, float_recv_left, float_recv_right,
                                uint_recv_left, uint_recv_right )

    # Reallocate the particles auxiliary arrays. This needs to be done,
    # as the total number of particles in this domain has changed.
    if species.use_cuda:
        shape = (species.Ntot,)
        # Reallocate empty field-on-particle arrays on the GPU
        species.Ex = cuda.device_array( shape, dtype=np.float64 )
        species.Ex = cuda.device_array( shape, dtype=np.float64 )
        species.Ey = cuda.device_array( shape, dtype=np.float64 )
        species.Ez = cuda.device_array( shape, dtype=np.float64 )
        species.Bx = cuda.device_array( shape, dtype=np.float64 )
        species.By = cuda.device_array( shape, dtype=np.float64 )
        species.Bz = cuda.device_array( shape, dtype=np.float64 )
        # Reallocate empty auxiliary sorting arrays on the GPU
        species.cell_idx = cuda.device_array( shape, dtype=np.int32 )
        species.sorted_idx = cuda.device_array( shape, dtype=np.intp )
        species.sorting_buffer = cuda.device_array( shape, dtype=np.float64 )
        if species.n_integer_quantities > 0:
            species.int_sorting_buffer = \
                cuda.device_array( shape, dtype=np.uint64 )
    else:
        # Reallocate empty field-on-particle arrays on the CPU
        species.Ex = np.empty(species.Ntot, dtype=np.float64)
        species.Ey = np.empty(species.Ntot, dtype=np.float64)
        species.Ez = np.empty(species.Ntot, dtype=np.float64)
        species.Bx = np.empty(species.Ntot, dtype=np.float64)
        species.By = np.empty(species.Ntot, dtype=np.float64)
        species.Bz = np.empty(species.Ntot, dtype=np.float64)

    # The particles are unsorted after adding new particles.
    species.sorted = False

예제 #8

0

파일 보기

def remove_particles_gpu(species, fld, n_guard, left_proc, right_proc):
    """
    Remove the particles that are outside of the physical domain (i.e.
    in the guard cells). Store them in sending buffers, which are returned.

    Parameters
    ----------
    species: a Particles object
        Contains the data of this species

    fld: a Fields object
        Contains information about the dimension of the grid,
        and the prefix sum (when using the GPU)

    n_guard: int
        Number of guard cells

    left_proc, right_proc: int or None
        Indicate whether there is a left or right processor or if the
        boundary is open (None).

    Returns
    -------
    float_send_left, float_send_right, uint_send_left, uint_send_right:
        arrays of shape (n_float,Nptcl) and (n_int,Nptcl) where Nptcl
        is the number of particles that are sent to the left
        proc and right proc respectively, and where n_float and n_int
        are the number of float and integer quantities respectively
    """
    # Check if particles are sorted
    # (The particles are usually expected to be sorted from the previous
    # iteration at this point - except at the first iteration of `step`.)
    if species.sorted == False:
        species.sort_particles(fld = fld)
        species.sorted = True

    # Get the particle indices between which to remove the particles
    # (Take into account the fact that the moving window may have
    # shifted the grid since the particles were last sorted: prefix_sum_shift)
    prefix_sum = species.prefix_sum
    Nz = fld.Nz
    Nr = fld.Nr
    # Find the z index of the first cell for which particles are kept
    iz_min = max( n_guard + species.prefix_sum_shift, 0 )
    # Find the z index of the first cell for which particles are removed again
    iz_max = min( Nz - n_guard + species.prefix_sum_shift + 1, Nz )
    # Find the corresponding indices in the particle array
    # Reminder: prefix_sum[i] is the cumulative sum of the number of particles
    # in cells 0 to i (where cell i is included)
    if iz_min*(Nr+1) - 1 >= 0:
        i_min = prefix_sum.getitem( iz_min*(Nr+1) - 1 )
    else:
        i_min = 0
    i_max = prefix_sum.getitem( iz_max*(Nr+1) - 1 )

    # Total number of particles in each particle group
    N_send_l = i_min
    new_Ntot = i_max - i_min
    N_send_r = species.Ntot - i_max

    # Allocate the sending buffers on the CPU
    n_float = species.n_float_quantities
    n_int = species.n_integer_quantities
    if left_proc is not None:
        float_send_left = np.empty((n_float, N_send_l), dtype=np.float64)
        uint_send_left = np.empty((n_int, N_send_l), dtype=np.uint64)
    else:
        float_send_left = np.empty((n_float, 0), dtype=np.float64)
        uint_send_left = np.empty((n_int, 0), dtype=np.uint64)
    if right_proc is not None:
        float_send_right = np.empty((n_float, N_send_r), dtype=np.float64)
        uint_send_right = np.empty((n_int, N_send_r), dtype=np.uint64)
    else:
        float_send_right = np.empty((n_float, 0), dtype=np.float64)
        uint_send_right = np.empty((n_int, 0), dtype=np.uint64)

    # Get the threads per block and the blocks per grid
    dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d( species.Ntot )
    # Float quantities:
    # Build list of float attributes to copy
    attr_list = [ (species,'x'), (species,'y'), (species,'z'),
                    (species,'ux'), (species,'uy'), (species,'uz'),
                    (species,'inv_gamma'), (species,'w') ]
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'w_times_level') )
    # Loop through the float attributes
    for i_attr in range(n_float):
        # Initialize 3 buffer arrays on the GPU (need to be initialized
        # inside the loop, as `copy_to_host` invalidates these arrays)
        left_buffer = cuda.device_array((N_send_l,), dtype=np.float64)
        right_buffer = cuda.device_array((N_send_r,), dtype=np.float64)
        stay_buffer = cuda.device_array((new_Ntot,), dtype=np.float64)
        # Check that the buffers are still on GPU
        # (safeguard against automatic memory management)
        assert type(left_buffer) != np.ndarray
        assert type(right_buffer) != np.ndarray
        assert type(left_buffer) != np.ndarray
        # Split the particle array into the 3 buffers on the GPU
        particle_array = getattr( attr_list[i_attr][0], attr_list[i_attr][1] )
        split_particles_to_buffers[dim_grid_1d, dim_block_1d]( particle_array,
                    left_buffer, stay_buffer, right_buffer, i_min, i_max)
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr( attr_list[i_attr][0], attr_list[i_attr][1], stay_buffer)
        if left_proc is not None:
            left_buffer.copy_to_host( float_send_left[i_attr] )
        if right_proc is not None:
            right_buffer.copy_to_host( float_send_right[i_attr] )

    # Integer quantities:
    if n_int > 0:
        attr_list = []
    if species.tracker is not None:
        attr_list.append( (species.tracker,'id') )
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'ionization_level') )
    for i_attr in range(n_int):
        # Initialize 3 buffer arrays on the GPU (need to be initialized
        # inside the loop, as `copy_to_host` invalidates these arrays)
        left_buffer = cuda.device_array((N_send_l,), dtype=np.uint64)
        right_buffer = cuda.device_array((N_send_r,), dtype=np.uint64)
        stay_buffer = cuda.device_array((new_Ntot,), dtype=np.uint64)
        # Split the particle array into the 3 buffers on the GPU
        particle_array = getattr( attr_list[i_attr][0], attr_list[i_attr][1] )
        split_particles_to_buffers[dim_grid_1d, dim_block_1d]( particle_array,
            left_buffer, stay_buffer, right_buffer, i_min, i_max)
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr( attr_list[i_attr][0], attr_list[i_attr][1], stay_buffer)
        if left_proc is not None:
            left_buffer.copy_to_host( uint_send_left[i_attr] )
        if right_proc is not None:
            right_buffer.copy_to_host( uint_send_right[i_attr] )

    # Change the new total number of particles
    species.Ntot = new_Ntot

    # Return the sending buffers
    return(float_send_left, float_send_right, uint_send_left, uint_send_right)

예제 #9

0

파일 보기

파일: checkpoint_restart.py 프로젝트: paultgibbon/fbpic

def load_species(species, name, ts, iteration, comm):
    """
    Read the species data from the checkpoint `ts`
    and load it into the Species object `species`

    Parameters:
    -----------
    species: a Species object
        The object into which data is loaded

    name: string
        The name of the corresponding species in the checkpoint

    ts: an OpenPMDTimeSeries object
        Points to the data in the checkpoint

    iteration: integer
        The iteration at which to load the checkpoint

    comm: an fbpic.BoundaryCommunicator object
        Contains information about the number of procs
    """
    # Get the particles' positions (convert to meters)
    x, y, z = ts.get_particle(['x', 'y', 'z'],
                              iteration=iteration,
                              species=name)
    species.x, species.y, species.z = 1.e-6 * x, 1.e-6 * y, 1.e-6 * z
    # Get the particles' momenta
    species.ux, species.uy, species.uz = ts.get_particle(['ux', 'uy', 'uz'],
                                                         iteration=iteration,
                                                         species=name)
    # Get the weight (multiply it by the charge to conform with FBPIC)
    species.w, = ts.get_particle(['w'], iteration=iteration, species=name)
    # Get the inverse gamma
    species.inv_gamma = 1. / np.sqrt(1 + species.ux**2 + species.uy**2 +
                                     species.uz**2)
    # Take into account the fact that the arrays are resized
    Ntot = len(species.w)
    species.Ntot = Ntot

    # Check if the particles where tracked
    if "id" in ts.avail_record_components[name]:
        pid, = ts.get_particle(['id'], iteration=iteration, species=name)
        species.track(comm)
        species.tracker.overwrite_ids(pid, comm)

    # If the species is ionizable, set the proper arrays
    if species.ionizer is not None:
        # Reallocate the ionization_level, and reset it with the right value
        species.ionizer.ionization_level = np.empty(Ntot, dtype=np.uint64)
        q, = ts.get_particle(['charge'], iteration=iteration, species=name)
        species.ionizer.ionization_level[:] = np.uint64(np.round(q / e))
        # Set the auxiliary array
        species.ionizer.w_times_level = \
                    species.w * species.ionizer.ionization_level

    # Reset the injection positions (for continuous injection)
    if species.continuous_injection:
        species.injector.reset_injection_positions()

    # As a safe-guard, check that the loaded data is in float64
    for attr in ['x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma']:
        assert getattr(species, attr).dtype == np.float64

    # Field arrays
    species.Ez = np.zeros(Ntot)
    species.Ex = np.zeros(Ntot)
    species.Ey = np.zeros(Ntot)
    species.Bz = np.zeros(Ntot)
    species.Bx = np.zeros(Ntot)
    species.By = np.zeros(Ntot)
    # Sorting arrays
    if species.use_cuda:
        # cell_idx and sorted_idx always stay on GPU
        species.cell_idx = cuda.device_array(Ntot, dtype=np.int32)
        species.sorted_idx = cuda.device_array(Ntot, dtype=np.intp)
        # sorting buffers are initialized on CPU
        # (because they are swapped with other particle arrays during sorting)
        species.sorting_buffer = np.empty(Ntot, dtype=np.float64)
        if hasattr(species, 'int_sorting_buffer'):
            species.int_sorting_buffer = np.empty(Ntot, dtype=np.uint64)
        species.sorted = False

예제 #10

0

파일 보기

파일: fourier.py 프로젝트: xyuan/fbpic

    def __init__(self, Nr, Nz, use_cuda=False, nthreads=None):
        """
        Initialize an FFT object

        Parameters
        ----------
        Nr: int
           Number of grid points along the r axis (axis -1)

        Nz: int
           Number of grid points along the z axis (axis 0)

        use_cuda: bool, optional
           Whether to perform the Fourier transform on the z axis

        nthreads : int, optional
            Number of threads for the FFTW transform.
            If None, the default number of threads of numba is used
            (environment variable NUMBA_NUM_THREADS)
        """
        # Check whether to use cuda
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
            print('** Cuda not available for Fourier transform.')
            print('** Performing the Fourier transform on the CPU.')

        # Check whether to use MKL
        self.use_mkl = mkl_installed

        # Initialize the object for calculation on the GPU
        if self.use_cuda:
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr)

            # Initialize 1d buffer for cufft
            self.buffer1d_in = cuda.device_array((Nz * Nr, ),
                                                 dtype=np.complex128)
            self.buffer1d_out = cuda.device_array((Nz * Nr, ),
                                                  dtype=np.complex128)
            # Initialize the cuda libraries object
            self.fft = cufft.FFTPlan(shape=(Nz, ),
                                     itype=np.complex128,
                                     otype=np.complex128,
                                     batch=Nr)
            self.blas = cublas.Blas()  # For normalization of the iFFT
            self.inv_Nz = 1. / Nz  # For normalization of the iFFT

        # Initialize the object for calculation on the CPU
        else:

            # For MKL FFT
            if self.use_mkl:
                # Initialize the MKL plan with dummy array
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.mklfft = MKLFFT(spect_buffer)

            # For FFTW
            else:
                # Determine number of threads
                if nthreads is None:
                    # Get the default number of threads for numba
                    nthreads = numba.config.NUMBA_NUM_THREADS
                # Initialize the FFT plan with dummy arrays
                interp_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.fft = pyfftw.FFTW(interp_buffer,
                                       spect_buffer,
                                       axes=(0, ),
                                       direction='FFTW_FORWARD',
                                       threads=nthreads)
                self.ifft = pyfftw.FFTW(spect_buffer,
                                        interp_buffer,
                                        axes=(0, ),
                                        direction='FFTW_BACKWARD',
                                        threads=nthreads)

예제 #11

0

파일 보기

파일: particles.py 프로젝트: lauridsj/fbpic

    def __init__(self,
                 q,
                 m,
                 n,
                 Npz,
                 zmin,
                 zmax,
                 Npr,
                 rmin,
                 rmax,
                 Nptheta,
                 dt,
                 ux_m=0.,
                 uy_m=0.,
                 uz_m=0.,
                 ux_th=0.,
                 uy_th=0.,
                 uz_th=0.,
                 dens_func=None,
                 continuous_injection=True,
                 grid_shape=None,
                 particle_shape='linear',
                 use_cuda=False,
                 dz_particles=None):
        """
        Initialize a uniform set of particles

        Parameters
        ----------
        q : float (in Coulombs)
           Charge of the particle species

        m : float (in kg)
           Mass of the particle species

        n : float (in particles per m^3)
           Peak density of particles

        Npz : int
           Number of macroparticles along the z axis

        zmin, zmax : floats (in meters)
           z positions between which the particles are initialized

        Npr : int
           Number of macroparticles along the r axis

        rmin, rmax : floats (in meters)
           r positions between which the particles are initialized

        Nptheta : int
           Number of macroparticules along theta

        dt : float (in seconds)
           The timestep for the particle pusher

        ux_m, uy_m, uz_m: floats (dimensionless), optional
           Normalized mean momenta of the injected particles in each direction

        ux_th, uy_th, uz_th: floats (dimensionless), optional
           Normalized thermal momenta in each direction

        dens_func : callable, optional
           A function of the form :
           def dens_func( z, r ) ...
           where z and r are 1d arrays, and which returns
           a 1d array containing the density *relative to n*
           (i.e. a number between 0 and 1) at the given positions

        continuous_injection : bool, optional
           Whether to continuously inject the particles,
           in the case of a moving window

        grid_shape: tuple, optional
            Needed when running on the GPU
            The shape of the local grid (including guard cells), i.e.
            a tuple of the form (Nz, Nr). This is needed in order
            to initialize the sorting of the particles per cell.

        particle_shape: str, optional
            Set the particle shape for the charge/current deposition.
            Possible values are 'linear' and 'cubic' for first and third
            order particle shape factors.

        use_cuda : bool, optional
            Wether to use the GPU or not.

        dz_particles: float (in meter), optional
            The spacing between particles in `z` (for continuous injection)
            In most cases, the spacing between particles can be inferred
            from the arguments `zmin`, `zmax` and `Npz`. However, when
            there are no particles in the initial box (`Npz = 0`),
            `dz_particles` needs to be explicitly passed.
        """
        # Define whether or not to use the GPU
        self.use_cuda = use_cuda
        if (self.use_cuda == True) and (cuda_installed == False):
            warnings.warn('Cuda not available for the particles.\n'
                          'Performing the particle operations on the CPU.')
            self.use_cuda = False

        # Generate evenly-spaced particles
        Ntot, x, y, z, ux, uy, uz, inv_gamma, w = generate_evenly_spaced(
            Npz, zmin, zmax, Npr, rmin, rmax, Nptheta, n, dens_func, ux_m,
            uy_m, uz_m, ux_th, uy_th, uz_th)

        # Register the properties of the particles
        # (Necessary for the pusher, and when adding more particles later, )
        self.Ntot = Ntot
        self.q = q
        self.m = m
        self.dt = dt

        # Register the particle arrarys
        self.x = x
        self.y = y
        self.z = z
        self.ux = ux
        self.uy = uy
        self.uz = uz
        self.inv_gamma = inv_gamma
        self.w = w

        # Initialize the fields array (at the positions of the particles)
        self.Ez = np.zeros(Ntot)
        self.Ex = np.zeros(Ntot)
        self.Ey = np.zeros(Ntot)
        self.Bz = np.zeros(Ntot)
        self.Bx = np.zeros(Ntot)
        self.By = np.zeros(Ntot)

        # The particle injector stores information that is useful in order
        # continuously inject particles in the simulation, with moving window
        self.continuous_injection = continuous_injection
        if continuous_injection:
            self.injector = ContinuousInjector(Npz, zmin, zmax, dz_particles,
                                               Npr, rmin, rmax, Nptheta, n,
                                               dens_func, ux_m, uy_m, uz_m,
                                               ux_th, uy_th, uz_th)
        else:
            self.injector = None

        # By default, there is no particle tracking (see method track)
        self.tracker = None
        # By default, the species experiences no elementary processes
        # (see method make_ionizable and activate_compton)
        self.ionizer = None
        self.compton_scatterer = None
        # Total number of quantities (necessary in MPI communications)
        self.n_integer_quantities = 0
        self.n_float_quantities = 8  # x, y, z, ux, uy, uz, inv_gamma, w

        # Register particle shape
        self.particle_shape = particle_shape

        # Register boolean that records whether field array should
        # be rearranged whenever sorting particles
        # (gets modified during the main PIC loop, on GPU)
        self.keep_fields_sorted = False

        # Allocate arrays and register variables when using CUDA
        if self.use_cuda:
            if grid_shape is None:
                raise ValueError(
                    "A `grid_shape` is needed when running "
                    "on the GPU.\nPlease provide it when initializing particles."
                )
            # Register grid shape
            self.grid_shape = grid_shape
            # Allocate arrays for the particles sorting when using CUDA
            # Most required arrays always stay on GPU
            Nz, Nr = grid_shape
            self.cell_idx = cuda.device_array(Ntot, dtype=np.int32)
            self.sorted_idx = cuda.device_array(Ntot, dtype=np.intp)
            self.prefix_sum = cuda.device_array(Nz * (Nr + 1), dtype=np.int32)
            # sorting buffers are initialized on CPU like other particle arrays
            # (because they are swapped with these arrays during sorting)
            self.sorting_buffer = np.empty(Ntot, dtype=np.float64)

            # Register integer thta records shift in the indices,
            # induced by the moving window
            self.prefix_sum_shift = 0
            # Register boolean that records if the particles are sorted or not
            self.sorted = False
            # Define optimal number of CUDA threads per block for deposition
            # and gathering kernels (determined empirically)
            if particle_shape == "cubic":
                self.deposit_tpb = 32
                self.gather_tpb = 256
            else:
                self.deposit_tpb = 16 if cuda_gpu_model == "V100" else 8
                self.gather_tpb = 128

예제 #12

0

파일 보기

파일: cuda_methods.py 프로젝트: zcl-maker/fbpic

def extract_slice_from_gpu(pref_sum_curr, N_area, species):
    """
    Extract the particles which have which have index between pref_sum_curr
    and pref_sum_curr + N_area, and return them in dictionaries.

    Parameters
    ----------
    pref_sum_curr: int
        The starting index needed for the extraction process
    N_area: int
        The number of particles to extract.
    species: an fbpic Species object
        The species from to extract data

    Returns
    -------
    particle_data : A dictionary of 1D float arrays (that are on the CPU)
        A dictionary that contains the particle data of
        the simulation (with normalized weigths), including optional
        integer arrays (e.g. "id", "charge")
    """
    # Call kernel that extracts particles from GPU
    dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(N_area)
    # - General particle quantities
    part_data = cuda.device_array((8, N_area), dtype=np.float64)
    extract_particles_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr, species.x,
                                             species.y, species.z, species.ux,
                                             species.uy, species.uz, species.w,
                                             species.inv_gamma, part_data)
    # - Optional particle arrays
    if species.tracker is not None:
        selected_particle_id = cuda.device_array((N_area, ), dtype=np.uint64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr, species.tracker.id,
                                             selected_particle_id)
    if species.ionizer is not None:
        selected_particle_charge = cuda.device_array((N_area, ),
                                                     dtype=np.uint64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr,
                                             species.ionizer.ionization_level,
                                             selected_particle_charge)
        selected_particle_weight = cuda.device_array((N_area, ),
                                                     dtype=np.float64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr,
                                             species.ionizer.w_times_level,
                                             selected_particle_weight)

    # Copy GPU arrays to the host
    part_data = part_data.copy_to_host()
    particle_data = {
        'x': part_data[0],
        'y': part_data[1],
        'z': part_data[2],
        'ux': part_data[3],
        'uy': part_data[4],
        'uz': part_data[5],
        'w': part_data[6],
        'inv_gamma': part_data[7]
    }
    if species.tracker is not None:
        particle_data['id'] = selected_particle_id.copy_to_host()
    if species.ionizer is not None:
        particle_data['charge'] = selected_particle_charge.copy_to_host()
        # Replace particle weight
        particle_data['w'] = selected_particle_weight.copy_to_host()

    # Return the data as dictionary
    return (particle_data)