def _update(self): positions = self._positions assert self.cell_list.cell_list is not None, "cell list is not initialised" assert self.cell_list.cell_list.dtype is ctypes.c_int, "bad datatype" assert positions.dtype is ctypes.c_double, "bad datatype" assert self._domain.cell_array.dtype is ctypes.c_int, "dtype" assert self.cell_list.cell_reverse_lookup.dtype is ctypes.c_int, "dtype" n = self._n_func() if self.ncount.ncomp < n: self.ncount = host.Array(ncomp=n, dtype=ctypes.c_int) needed_stride = self.cell_list.max_cell_contents_count * 27 if self.stride.value < needed_stride: self.stride.value = needed_stride if self.matrix.ncomp < n * self.stride.value: self.matrix = host.Array(ncomp=n * self.stride.value, dtype=ctypes.c_int) ret = self._lib(ctypes.c_int(n), positions.ctypes_data, self.cell_list.cell_list.ctypes_data, self.cell_list.offset, self.cell_list.cell_reverse_lookup.ctypes_data, self._domain.cell_array.ctypes_data, self.matrix.ctypes_data, self.ncount.ctypes_data, ctypes.c_int(self.stride.value), ctypes.c_double(self.cell_width**2.0)) assert ret >= 0, "lib failed, return code: " + str(ret) self.n_local = n self.total_num_neighbours = ret
def _init_escape_lib(self): ''' Create a lookup table between xor map and linear index for direction ''' self._bin_to_lin = data.ScalarArray(ncomp=57, dtype=ctypes.c_int) _lin_to_bin = np.zeros(26, dtype=ctypes.c_int) '''linear to xor map''' _lin_to_bin[0] = 1 ^ 2 ^ 4 _lin_to_bin[1] = 2 ^ 1 _lin_to_bin[2] = 32 ^ 2 ^ 1 _lin_to_bin[3] = 4 ^ 1 _lin_to_bin[4] = 1 _lin_to_bin[5] = 32 ^ 1 _lin_to_bin[6] = 4 ^ 1 ^ 16 _lin_to_bin[7] = 1 ^ 16 _lin_to_bin[8] = 32 ^ 16 ^ 1 _lin_to_bin[9] = 2 ^ 4 _lin_to_bin[10] = 2 _lin_to_bin[11] = 32 ^ 2 _lin_to_bin[12] = 4 _lin_to_bin[13] = 32 _lin_to_bin[14] = 4 ^ 16 _lin_to_bin[15] = 16 _lin_to_bin[16] = 32 ^ 16 _lin_to_bin[17] = 8 ^ 2 ^ 4 _lin_to_bin[18] = 2 ^ 8 _lin_to_bin[19] = 32 ^ 2 ^ 8 _lin_to_bin[20] = 4 ^ 8 _lin_to_bin[21] = 8 _lin_to_bin[22] = 32 ^ 8 _lin_to_bin[23] = 4 ^ 8 ^ 16 _lin_to_bin[24] = 8 ^ 16 _lin_to_bin[25] = 32 ^ 16 ^ 8 '''inverse map, probably not ideal''' for ix in range(26): self._bin_to_lin[_lin_to_bin[ix]] = ix # Number of escaping particles in each direction self._escape_count = host.Array(np.zeros(26), dtype=ctypes.c_int) # Linked list to store the ids of escaping particles in a similar way # to the cell list. # | [0-25 escape directions, index of first in direction] [26-end # current id and index of next id, (id, next_index) ]| self._escape_linked_list = host.Array( -1 * np.ones(26 + 2 * self.state.npart_local), dtype=ctypes.c_int) dtype = self.state.get_position_dat().dtype assert self.state.domain.boundary.dtype == dtype self._escape_guard_lib = ppmd.lib.build.lib_from_file_source( _LIB_SOURCES + 'EscapeGuard', 'EscapeGuard', { 'SUB_REAL': self.state.get_position_dat().ctype, 'SUB_INT': self._bin_to_lin.ctype })['EscapeGuard']
def _compress_particle_dats(self, num_slots_to_fill): """ Compress the particle dats held in the state. Compressing removes empty rows. """ _compressing_n_new = host.Array([0], dtype=ctypes.c_int) if self._compressing_lib is None: self._build_compressing_lib() if self.compressed is True: return else: self.compress_timer.start() self._compressing_lib( ctypes.c_int(num_slots_to_fill), ctypes.c_int(self.state.npart_local), self._move_empty_slots.ctypes_data, _compressing_n_new.ctypes_data, *[ getattr(self.state, n).ctypes_data for n in self.state.particle_dats ]) self.state.npart_local = _compressing_n_new[0] self.compressed = True # self._move_empty_slots = [] self.compress_timer.pause()
def get_shift(self): _sfd = host.Array(ncomp=26 * 3, dtype=ctypes.c_double) dims = mpi.cartcomm_dims_xyz(self.comm) top = mpi.cartcomm_top_xyz(self.comm) periods = mpi.cartcomm_periods_xyz(self.comm) for dx in range(26): dir = mpi.recv_modifiers[dx] for ix in range(3): if top[ix] == 0 and \ periods[ix] == 1 and \ dir[ix] == -1: _sfd[dx * 3 + ix] = self.extent[ix] elif top[ix] == dims[ix] - 1 and \ periods[ix] == 1 and \ dir[ix] == 1: _sfd[dx * 3 + ix] = -1. * self.extent[ix] else: _sfd[dx * 3 + ix] = 0.0 return _sfd
def _cell_sort_setup(self): """ Creates looping for cell list creation """ # Construct initial cell list self._cell_list = host.Array(dtype=ct.c_int, ncomp=self._positions.max_npart + self._domain.cell_count + 1) # Keep track of number of particles per cell self._cell_contents_count = host.Array( np.zeros([self._domain.cell_count]), dtype=ct.c_int) # Reverse lookup, given a local particle id, get containing cell. self._cell_reverse_lookup = host.Array(dtype=ct.c_int, ncomp=self._positions.max_npart) self._init = True
def _init_jstore(self, cell2part): n = cell2part.max_cell_contents_count * 27 if self._jstore[0].ncomp < n: self._jstore = [host.Array(ncomp=100+n, dtype=ctypes.c_int) for tx\ in range(runtime.NUM_THREADS)] self._jptrs = np.zeros(runtime.NUM_THREADS, ctypes.c_void_p) for tx in range(runtime.NUM_THREADS): self._jptrs[tx] = self._jstore[tx].ctypes_data.value return self._jptrs.ctypes.get_as_parameter()
def setup(self, n, positions, domain, cell_width): assert self.cell_list.cell_list is not None, "No cell to particle " \ "map setup" self.cell_width = cell_width self.cell_width_squared = host.Array(initial_value=cell_width**2, dtype=ct.c_double) self._domain = domain self._positions = positions self._n = n self.neighbour_starting_points = host.Array(ncomp=n() + 1, dtype=ct.c_long) _n = n() if _n < 10: _n = 10 _initial_factor = math.ceil( 15. * (_n**2) / (domain.cell_array[0] * domain.cell_array[1] * domain.cell_array[2])) if _initial_factor < 10: _initial_factor = 10 self.max_len = host.Array(initial_value=_initial_factor, dtype=ct.c_long) self.list = host.Array(ncomp=_initial_factor, dtype=ct.c_int) self._return_code = host.Array(ncomp=1, dtype=ct.c_int) self._return_code.data[0] = -1 self._neighbour_lib = ppmd.lib.build.lib_from_file_source( _LIB_SOURCES + 'NeighbourListv2', 'NeighbourListv2', { 'SUB_REAL': 'double', 'SUB_INT': 'int', 'SUB_LONG': 'long' })['NeighbourListv2']
def __init__(self, n, positions, domain, cell_width, cell_list): self._n_func = n self._positions = positions self._domain = domain self.cell_width = cell_width self.cell_list = cell_list self.version_id = 0 self.domain_id = 0 self.n_local = None self.timer_update = ppmd.opt.Timer(runtime.TIMER) self.matrix = host.Array(ncomp=1, dtype=ctypes.c_int) self.ncount = host.Array(ncomp=1, dtype=ctypes.c_int) self.stride = ctypes.c_int(0) self.total_num_neighbours = 0 self.max_size = 0 bn = os.path.join(os.path.dirname(__file__), 'lib') bn += '/NeighbourMatrixSource' self._lib = build.lib_from_file_source( bn, 'OMPNeighbourMatrix')['OMPNeighbourMatrix'] self._lib.restype = ctypes.c_longlong
def __init__(self, kernel=None, dat_dict=None, shell_cutoff=None): self._dat_dict = access.DatArgStore(self._get_allowed_types(), dat_dict) self._cc = build.TMPCC self._kernel = kernel self.shell_cutoff = shell_cutoff self.loop_timer = modules.code_timer.LoopTimer() self.wrapper_timer = opt.Timer(runtime.TIMER) self.list_timer = opt.Timer(runtime.TIMER) self._gather_space = host.ThreadSpace(100, ctypes.c_uint8) self._generate() self._offset_list = host.Array(ncomp=27, dtype=ctypes.c_int) self._lib = build.simple_lib_creator(self._generate_header_source(), self._components['LIB_SRC'], self._kernel.name, CC=self._cc) self._group = None for pd in self._dat_dict.items(): if issubclass(type(pd[1][0]), data.PositionDat): self._group = pd[1][0].group break #assert self._group is not None, "No cell to particle map found" if self._group is not None: self._make_cell_list(self._group) self._kernel_execution_count = INT64(0) self._invocations = 0 self._jstore = [host.Array(ncomp=100, dtype=ctypes.c_int) for tx in \ range(runtime.NUM_THREADS)]
def __init__(self, state_in=None): self.state = state_in # Initialise timers self.timer_apply = ppmd.opt.Timer(runtime.TIMER, 0) self.timer_search = ppmd.opt.Timer(runtime.TIMER, 0) self.timer_move = ppmd.opt.Timer(runtime.TIMER, 0) # One proc PBC lib self._one_process_pbc_lib = None # Escape guard lib self._escape_guard_lib = None self._escape_count = None self._escape_linked_list = None self._flag = host.Array(ncomp=1, dtype=ctypes.c_int)
def setup(self, n, positions, domain, cell_width): # setup the cell list if not done already (also handles domain decomp) if self.cell_list.cell_list is None: self.cell_list.setup(n, positions, domain, cell_width) self.cell_width = cell_width self.cell_width_squared = host.Array(initial_value=cell_width**2, dtype=ct.c_double) self._domain = domain self._positions = positions self._n = n self.neighbour_starting_points = host.Array(ncomp=n() + 1, dtype=ct.c_long) _initial_factor = math.ceil( 27. * (n()**2) / (domain.cell_array[0] * domain.cell_array[1] * domain.cell_array[2])) self.max_len = host.Array(initial_value=_initial_factor, dtype=ct.c_long) self.list = host.Array(ncomp=_initial_factor, dtype=ct.c_int) self._return_code = host.Array(ncomp=1, dtype=ct.c_int) self._return_code.data[0] = -1 self._neighbour_lib = ppmd.lib.build.lib_from_file_source( _LIB_SOURCES + 'NeighbourListNonN3', 'NeighbourListNonN3', { 'SUB_REAL': self._positions.ctype, 'SUB_INT': 'int', 'SUB_LONG': 'long' })['NeighbourListNonN3'] self.domain_id = self._domain.version_id
def get_boundary_cells(self): """ Return a host.Array containing the boundary cell indices of the domain. """ if self._boundary_cell_version < self._cell_array.version: _ca = self._cell_array _count = (_ca[0] - 2) * (_ca[1] - 2) * (_ca[2] - 2) - ( _ca[0] - 4) * (_ca[1] - 4) * (_ca[2] - 4) self._boundary_cells = host.Array(ncomp=_count, dtype=ctypes.c_int) m = 0 for ix in range(1, _ca[0] - 1): for iy in range(1, _ca[1] - 1): self._boundary_cells[m] = ix + _ca[0] * (iy + _ca[1]) self._boundary_cells[ m + (_ca[0] - 2) * (_ca[1] - 2)] = ix + _ca[0] * (iy + (_ca[2] - 2) * _ca[1]) m += 1 m += (_ca[0] - 2) * (_ca[1] - 2) for ix in range(1, _ca[0] - 1): for iz in range(2, _ca[2] - 2): self._boundary_cells[m] = ix + _ca[0] * (1 + iz * _ca[1]) self._boundary_cells[m + (_ca[0] - 2) * (_ca[2] - 4)] = ix + _ca[0] * ( (_ca[1] - 2) + iz * _ca[1]) m += 1 m += (_ca[0] - 2) * (_ca[2] - 4) for iy in range(2, _ca[1] - 2): for iz in range(2, _ca[2] - 2): self._boundary_cells[m] = 1 + _ca[0] * (iy + iz * _ca[1]) self._boundary_cells[m + (_ca[1] - 4) * ( _ca[2] - 4)] = _ca[0] - 2 + _ca[0] * (iy + iz * _ca[1]) m += 1 m += (_ca[1] - 4) * (_ca[2] - 4) self._boundary_cell_version = self._cell_array.version return self._boundary_cells
def __init__(self, *args, **kwargs): self.state = kwargs['state'] self._move_dir_recv_totals = None self._move_dir_send_totals = None self._move_shift_array = host.NullDoubleArray self._move_send_buffer = None self._move_recv_buffer = None self._move_unpacking_lib = None self._move_packing_lib = None self._move_empty_slots = host.Array(ncomp=4, dtype=ctypes.c_int) self._move_used_free_slot_count = None self._total_ncomp = None # Timers self.move_timer = ppmd.opt.Timer(runtime.TIMER, 0) self._status = mpi.MPI.Status() # Timers self.move_timer = ppmd.opt.Timer(runtime.TIMER, 0) self.compress_timer = ppmd.opt.Timer(runtime.TIMER, 0) self._status = mpi.MPI.Status() # compressing vars self._compressing_lib = None self.compressed = True """ Bool to determine if the held :class:`~data.ParticleDat` members have gaps in them. """ self.uncompressed_n = False
def __init__(self, npart=0, ncomp=1, initial_value=None, name=None, dtype=ctypes.c_double): # version ids. Internal then halo. assert ncomp > 0, "Negative number of components is not supported." self._vid_int = 0 self._vid_halo = -1 self.vid_halo_cell_list = -1 self._halo_exchange_count = 0 self.group = None # Initialise timers self.timer_comm = ppmd.opt.Timer() self.timer_pack = ppmd.opt.Timer() self.timer_transfer = ppmd.opt.Timer(runtime.TIMER, 0) self.timer_transfer_1 = ppmd.opt.Timer(runtime.TIMER, 0) self.timer_transfer_2 = ppmd.opt.Timer(runtime.TIMER, 0) self.timer_transfer_resize = ppmd.opt.Timer(runtime.TIMER, 0) self.name = name """:return: The name of the ParticleDat instance.""" self.idtype = dtype self._dat = host._make_array(initial_value=initial_value, dtype=dtype, nrow=npart, ncol=ncomp) self._ptr = None self._ptr_count = 0 self.max_npart = self._dat.shape[0] """:return: The maximum number of particles which can be stored within this particle dat.""" self.npart_local = self._dat.shape[0] """:return: The number of particles with properties stored in the particle dat.""" self.ncomp = self.ncol """:return: The number of components stored for each particle.""" self.halo_start = self.npart_local """:return: The starting index of the halo region of the particle dat. """ self.npart_halo = 0 self.npart_local_halo = 0 """:return: The number of particles currently stored within the halo region of the particle dat.""" self._resize_callback = None self._version = 0 self._exchange_lib = None self._tmp_halo_space = host.Array(ncomp=1, dtype=self.dtype) # tmp space for norms/maxes etc self._norm_tmp = ScalarArray(ncomp=1, dtype=self.dtype) self._linf_norm_lib = None # default comm is world self.comm = mpi.MPI.COMM_WORLD self._particle_dat_modifier = ParticleDatModifier( self, type(self) == PositionDat)
def __init__(self, domain, cell_width, positions): self.domain = domain boundary = domain.boundary assert cell_width > 0, "bad cell width" assert boundary[1] > boundary[0], "nonsensical boundary" assert boundary[3] > boundary[2], "nonsensical boundary" assert boundary[5] > boundary[4], "nonsensical boundary" self.positions = positions self.cell_array = host.Array(ncomp=3, dtype=ctypes.c_int) self.cell_sizes = host.Array(ncomp=3, dtype=ctypes.c_double) # get sizes just considering interior cell_array = [0, 0, 0] cell_array[0] = int(float(boundary[1] - boundary[0]) / cell_width) cell_array[1] = int(float(boundary[3] - boundary[2]) / cell_width) cell_array[2] = int(float(boundary[5] - boundary[4]) / cell_width) cell_sizes = [0, 0, 0] cell_sizes[0] = float(boundary[1] - boundary[0]) / cell_array[0] cell_sizes[1] = float(boundary[3] - boundary[2]) / cell_array[1] cell_sizes[2] = float(boundary[5] - boundary[4]) / cell_array[2] self.cell_sizes[:] = cell_sizes[:] padx = int(math.ceil( self.domain.cell_edge_lengths[0] / cell_sizes[0])) + 1 pady = int(math.ceil( self.domain.cell_edge_lengths[1] / cell_sizes[1])) + 1 padz = int(math.ceil( self.domain.cell_edge_lengths[2] / cell_sizes[2])) + 1 rpadx = padx * cell_sizes[0] rpady = pady * cell_sizes[1] rpadz = padz * cell_sizes[2] #print "CA", cell_array[:], "CS", self.cell_sizes[:], "CES", self.domain.cell_edge_lengths[:] self.cell_array[0] = cell_array[0] + 2 * padx self.cell_array[1] = cell_array[1] + 2 * pady self.cell_array[2] = cell_array[2] + 2 * padz #print "CA2", self.cell_array[:] self.boundary = host.Array(ncomp=6, dtype=ctypes.c_double) self.boundary[0] = boundary[0] - rpadx self.boundary[1] = boundary[1] + rpadx self.boundary[2] = boundary[2] - rpady self.boundary[3] = boundary[3] + rpady self.boundary[4] = boundary[4] - rpadz self.boundary[5] = boundary[5] + rpadz self.cell_count = cell_array[0] * cell_array[1] * cell_array[2] self.particle_layers = cuda_base.Array(ncomp=1, dtype=ctypes.c_int) self.cell_reverse_lookup = cuda_base.Array(ncomp=1, dtype=ctypes.c_int) self.cell_contents_count = cuda_base.Array(ncomp=self.cell_count, dtype=ctypes.c_int) self.matrix = cuda_base.Matrix(nrow=self.cell_count, ncol=1, dtype=ctypes.c_int) self.num_layers = 0 with open( str(ppmd.cuda.cuda_config.LIB_DIR) + '/cudaSubCellOccupancyMatrixSource.cu', 'r') as fh: _code = fh.read() with open( str(ppmd.cuda.cuda_config.LIB_DIR) + '/cudaSubCellOccupancyMatrixSource.h', 'r') as fh: _header = fh.read() _name = 'SubCellOccupancyMatrix' lib = cuda_build.simple_lib_creator(_header, _code, _name) self._sort_lib = lib['LayerSort'] self._fill_lib = lib['PopMatrix'] self.version_id = 0
def __init__(self, domain_func, cell_to_particle_map): self._timer = ppmd.opt.Timer(runtime.TIMER, 0, start=True) self._domain_func = domain_func self._domain = None self._cell_to_particle_map = cell_to_particle_map self._ca_copy = [None, None, None] self._version = -1 self._init = False # vars init self._boundary_cell_groups = host.Array(dtype=ctypes.c_int) self._boundary_groups_start_end_indices = host.Array( ncomp=7, dtype=ctypes.c_int) self._halo_cell_groups = host.Array(dtype=ctypes.c_int) self._halo_groups_start_end_indices = host.Array(ncomp=7, dtype=ctypes.c_int) self._boundary_groups_contents_array = host.Array(dtype=ctypes.c_int) self._exchange_sizes = host.Array(ncomp=6, dtype=ctypes.c_int) self._send_ranks = host.Array(ncomp=6, dtype=ctypes.c_int) self._recv_ranks = host.Array(ncomp=6, dtype=ctypes.c_int) self._h_count = ctypes.c_int(0) self._t_count = ctypes.c_int(0) self._h_tmp = host.Array(ncomp=10, dtype=ctypes.c_int) self._b_tmp = host.Array(ncomp=10, dtype=ctypes.c_int) self.dir_counts = host.Array(ncomp=6, dtype=ctypes.c_int) self._halo_shifts = None # ensure first update self._boundary_cell_groups.inc_version(-1) self._boundary_groups_start_end_indices.inc_version(-1) self._halo_cell_groups.inc_version(-1) self._halo_groups_start_end_indices.inc_version(-1) self._boundary_groups_contents_array.inc_version(-1) self._exchange_sizes.inc_version(-1) self._setup() self._exchange_sizes_lib = None self._cell_contents_count_tmp = None
def exchange_cell_counts(self): """ Exchange the contents count of cells between processes. This is provided as a method in halo to avoid repeated exchanging of cell occupancy counts if multiple ParticleDat objects are being communicated. """ self._update_domain() if self._exchange_sizes_lib is None: _es_args = ''' const int f_MPI_COMM, // F90 comm from mpi4py const int * RESTRICT SEND_RANKS, // send directions const int * RESTRICT RECV_RANKS, // recv directions const int * RESTRICT h_ind, // halo indices const int * RESTRICT b_ind, // local b indices const int * RESTRICT h_arr, // h cell indices const int * RESTRICT b_arr, // b cell indices int * RESTRICT ccc, // cell contents count int * RESTRICT h_count, // number of halo particles int * RESTRICT t_count, // amount of tmp space needed int * RESTRICT h_tmp, // tmp space for recving int * RESTRICT b_tmp, // tmp space for sending int * RESTRICT dir_counts // expected recv counts ''' _es_header = ''' #include <generic.h> #include <mpi.h> #include <iostream> using namespace std; #define RESTRICT %(RESTRICT)s extern "C" void HALO_ES_LIB(%(ARGS)s); ''' _es_code = ''' void HALO_ES_LIB(%(ARGS)s){ *h_count = 0; *t_count = 0; // get mpi comm and rank MPI_Comm MPI_COMM = MPI_Comm_f2c(f_MPI_COMM); int rank = -1; MPI_Comm_rank( MPI_COMM, &rank ); MPI_Status MPI_STATUS; // [W E] [N S] [O I] for( int dir=0 ; dir<6 ; dir++ ){ //cout << "dir " << dir << "-------" << endl; const int dir_s = b_ind[dir]; // start index const int dir_c = b_ind[dir+1] - dir_s; // cell count const int dir_s_r = h_ind[dir]; // start index const int dir_c_r = h_ind[dir+1] - dir_s_r; // cell count int tmp_count = 0; for( int ix=0 ; ix<dir_c ; ix++ ){ b_tmp[ix] = ccc[b_arr[dir_s + ix]]; // copy into // send buffer tmp_count += ccc[b_arr[dir_s + ix]]; } *t_count = MAX(*t_count, tmp_count); if(rank == RECV_RANKS[dir]){ for( int tx=0 ; tx < dir_c ; tx++ ){ h_tmp[tx] = b_tmp[tx]; } } else { MPI_Sendrecv ((void *) b_tmp, dir_c, MPI_INT, SEND_RANKS[dir], rank, (void *) h_tmp, dir_c_r, MPI_INT, RECV_RANKS[dir], RECV_RANKS[dir], MPI_COMM, &MPI_STATUS); } tmp_count=0; for( int ix=0 ; ix<dir_c_r ; ix++ ){ ccc[h_arr[dir_s_r + ix]] = h_tmp[ix]; *h_count += h_tmp[ix]; tmp_count += h_tmp[ix]; } dir_counts[dir] = tmp_count; *t_count = MAX(*t_count, tmp_count); } return; } ''' _es_dict = { 'ARGS': _es_args, 'RESTRICT': build.MPI_CC.restrict_keyword } _es_header %= _es_dict _es_code %= _es_dict self._exchange_sizes_lib = build.simple_lib_creator( _es_header, _es_code, 'HALO_ES_LIB', CC=build.MPI_CC)['HALO_ES_LIB'] # End of creation code ----------------------------------------------- # update internal arrays if self._version < self._domain.cell_array.version: self._get_pairs() ccc = self._cell_to_particle_map.cell_contents_count # This if allows the host size exchnage code to be used for the gpu if type(ccc) is host.Array: ccc_ptr = ccc.ctypes_data else: if self._cell_contents_count_tmp is None: self._cell_contents_count_tmp = host.Array(ncomp=ccc.ncomp, dtype=ctypes.c_int) elif self._cell_contents_count_tmp.ncomp < ccc.ncomp: self._cell_contents_count_tmp.realloc(ccc.ncomp) #make a local copy of the cell contents counts self._cell_contents_count_tmp[:] = ccc[:] ccc_ptr = self._cell_contents_count_tmp.ctypes_data assert ccc_ptr is not None, "No valid Cell Contents Count pointer found." self._exchange_sizes_lib( ctypes.c_int(self._domain.comm.py2f()), self._send_ranks.ctypes_data, self._recv_ranks.ctypes_data, self._halo_groups_start_end_indices.ctypes_data, self._boundary_groups_start_end_indices.ctypes_data, self._halo_cell_groups.ctypes_data, self._boundary_cell_groups.ctypes_data, ccc_ptr, ctypes.byref(self._h_count), ctypes.byref(self._t_count), self._h_tmp.ctypes_data, self._b_tmp.ctypes_data, self.dir_counts.ctypes_data) # copy new sizes back to original array (eg for gpu) if type(ccc) is not host.Array: ccc[:] = self._cell_contents_count_tmp[:ccc.ncomp:] return self._h_count.value, self._t_count.value
def _get_pairs(self): self._update_domain() _cell_pairs = ( # As these are the first exchange the halos cannot contain anything useful create_halo_pairs_slice_halo(self._domain, Slice[1, 1:-1, 1:-1], (-1, 0, 0)), create_halo_pairs_slice_halo(self._domain, Slice[-2, 1:-1, 1:-1], (1, 0, 0)), # As with the above no point exchanging anything extra in z direction create_halo_pairs_slice_halo(self._domain, Slice[::, 1, 1:-1], (0, -1, 0)), create_halo_pairs_slice_halo(self._domain, Slice[::, -2, 1:-1], (0, 1, 0)), # Exchange all halo cells from x and y create_halo_pairs_slice_halo(self._domain, Slice[::, ::, 1], (0, 0, -1)), create_halo_pairs_slice_halo(self._domain, Slice[::, ::, -2], (0, 0, 1))) _bs = np.zeros(1, dtype=ctypes.c_int) _b = np.zeros(0, dtype=ctypes.c_int) _hs = np.zeros(1, dtype=ctypes.c_int) _h = np.zeros(0, dtype=ctypes.c_int) _s = np.zeros(0, dtype=ctypes.c_double) _len_h_tmp = 10 _len_b_tmp = 10 for hx, bhx in enumerate(_cell_pairs): # print hx, bhx _len_b_tmp = max(_len_b_tmp, len(bhx[0])) _len_h_tmp = max(_len_h_tmp, len(bhx[1])) # Boundary and Halo start index. _bs = np.append(_bs, ctypes.c_int(len(bhx[0]) + _bs[-1])) _hs = np.append(_hs, ctypes.c_int(len(bhx[1]) + _hs[-1])) # Actual cell indices _b = np.append(_b, bhx[0]) _h = np.append(_h, bhx[1]) # Offset shifts for periodic boundary _s = np.append(_s, bhx[2]) self._send_ranks[hx] = bhx[3] self._recv_ranks[hx] = bhx[4] if _len_b_tmp > self._b_tmp.ncomp: self._b_tmp.realloc(_len_b_tmp) if _len_h_tmp > self._h_tmp.ncomp: self._h_tmp.realloc(_len_h_tmp) # indices in array of cell indices self._boundary_groups_start_end_indices = host.Array( _bs, dtype=ctypes.c_int) self._halo_groups_start_end_indices = host.Array(_hs, dtype=ctypes.c_int) # cell indices self._boundary_cell_groups = host.Array(_b, dtype=ctypes.c_int) self._halo_cell_groups = host.Array(_h, dtype=ctypes.c_int) # shifts for each direction. self._halo_shifts = host.Array(_s, dtype=ctypes.c_double) self._version = self._domain.cell_array.version
def move_to_neighbour(self, ids_directions_list=None, dir_send_totals=None, shifts=None): """ Move particles using the linked list. :arg host.Array ids_directions_list(int): Linked list of ids from directions. :arg host.Array dir_send_totals(int): 26 Element array of number of particles traveling in each direction. :arg host.Array shifts(double): 73 element array of the shifts to apply when moving particles for the 26 directions. """ self.move_timer.start() if self._move_packing_lib is None: self._move_packing_lib = _move_controller.build_pack_lib( self.state) _send_total = dir_send_totals.data.sum() # Make/resize send buffer. if self._move_send_buffer is None: self._move_send_buffer = host.Array(ncomp=self._total_ncomp * _send_total, dtype=ctypes.c_byte) elif self._move_send_buffer.ncomp < self._total_ncomp * _send_total: self._move_send_buffer.realloc(self._total_ncomp * _send_total) # Make recv sizes array. if self._move_dir_recv_totals is None: self._move_dir_recv_totals = host.Array(ncomp=26, dtype=ctypes.c_int) # exchange number of particles about to be sent. self._move_dir_send_totals = dir_send_totals self._move_dir_recv_totals.zero() self._move_exchange_send_recv_sizes() # resize recv buffer. _recv_total = self._move_dir_recv_totals.data.sum() # using uint_8 in library assert ctypes.sizeof(ctypes.c_byte) == 1 if self._move_recv_buffer is None: self._move_recv_buffer = host.Array(ncomp=self._total_ncomp * _recv_total, dtype=ctypes.c_byte) elif self._move_recv_buffer.ncomp < self._total_ncomp * _recv_total: self._move_recv_buffer.realloc(self._total_ncomp * _recv_total) for ix in self.state.particle_dats: _d = getattr(self.state, ix) if _recv_total + self.state.npart_local > _d.max_npart: _d.resize(_recv_total + self.state.npart_local) # Empty slots store. if _send_total > 0: self._resize_empty_slot_store(_send_total) # pack particles to send. assert shifts.dtype == ctypes.c_double self._move_packing_lib( self._move_send_buffer.ctypes_data, shifts.ctypes_data, ids_directions_list.ctypes_data, self._move_empty_slots.ctypes_data, *[ getattr(self.state, n).ctypes_data for n in self.state.particle_dats ]) # sort empty slots. self._move_empty_slots.data[0:_send_total:].sort() # exchange particle data. self._exchange_move_send_recv_buffers() # Create unpacking lib. if self._move_unpacking_lib is None: self._move_unpacking_lib = _move_controller.build_unpack_lib( self.state) # unpack recv buffer. self._move_unpacking_lib( ctypes.c_int(_recv_total), ctypes.c_int(_send_total), ctypes.c_int(self.state.npart_local), self._move_empty_slots.ctypes_data, self._move_recv_buffer.ctypes_data, *[ getattr(self.state, n).ctypes_data for n in self.state.particle_dats ]) _recv_rank = np.zeros(26) _send_rank = np.zeros(26) for _tx in range(26): direction = mpi.recv_modifiers[_tx] _send_rank[_tx] = mpi.cartcomm_shift(self._ccomm, direction, ignore_periods=True) _recv_rank[_tx] = mpi.cartcomm_shift( self._ccomm, (-1 * direction[0], -1 * direction[1], -1 * direction[2]), ignore_periods=True) if _recv_total < _send_total: self.compressed = False _tmp = self._move_empty_slots.data[_recv_total:_send_total:] self._move_empty_slots.data[0:_send_total - _recv_total:] = np.array(_tmp, copy=True) else: self.state.npart_local = self.state.npart_local + _recv_total - _send_total # Compress particle dats. self._compress_particle_dats(_send_total - _recv_total) if _send_total > 0 or _recv_total > 0: self.state.invalidate_lists() self.move_timer.pause() return True
def __init__(self, domain, eps=10.**-6, real_cutoff=None, alpha=None, recip_cutoff=None, recip_nmax=None, shared_memory=False, shell_width=None, work_ratio=1.0, force_unit=1.0, energy_unit=1.0): self.domain = domain self.eps = float(eps) assert shared_memory in (False, 'omp', 'mpi') ss = cmath.sqrt(scipy.special.lambertw(1. / eps)).real if alpha is not None and real_cutoff is not None and recip_cutoff is not None: pass elif alpha is not None and real_cutoff is not None: ss = real_cutoff * sqrt(alpha) elif alpha is None: alpha = (ss / real_cutoff)**2. else: real_cutoff = ss / sqrt(alpha) assert alpha is not None, "no alpha deduced/passed" assert real_cutoff is not None, "no real_cutoff deduced/passed" self.real_cutoff = float(real_cutoff) """Real space cutoff""" self.shell_width = shell_width """Real space padding width""" self.alpha = float(alpha) """alpha""" #self.real_cutoff = float(real_cutoff) #alpha = 0.2 #print("alpha", alpha) #print("r_c", self.real_cutoff) # these parts are specific to the orthongonal box extent = self.domain.extent lx = (extent[0], 0., 0.) ly = (0., extent[1], 0.) lz = (0., 0., extent[2]) ivolume = 1. / np.dot(lx, np.cross(ly, lz)) gx = np.cross(ly, lz) * ivolume * 2. * pi gy = np.cross(lz, lx) * ivolume * 2. * pi gz = np.cross(lx, ly) * ivolume * 2. * pi sqrtalpha = sqrt(alpha) nmax_x = round(ss * extent[0] * sqrtalpha / pi) nmax_y = round(ss * extent[1] * sqrtalpha / pi) nmax_z = round(ss * extent[2] * sqrtalpha / pi) #print gx, gy, gz #print 'nmax:', nmax_x, nmax_y, nmax_z #print "alpha", alpha, "sqrt(alpha)", sqrtalpha gxl = np.linalg.norm(gx) gyl = np.linalg.norm(gy) gzl = np.linalg.norm(gz) if recip_cutoff is None: max_len = min(gxl * float(nmax_x), gyl * float(nmax_y), gzl * float(nmax_z)) else: max_len = recip_cutoff if recip_nmax is None: nmax_x = int(ceil(max_len / gxl)) nmax_y = int(ceil(max_len / gyl)) nmax_z = int(ceil(max_len / gzl)) else: nmax_x = recip_nmax[0] nmax_y = recip_nmax[1] nmax_z = recip_nmax[2] #print 'max reciprocal vector len:', max_len nmax_t = max(nmax_x, nmax_y, nmax_z) #print "nmax_t", nmax_t self.last_real_energy = None self.last_recip_energy = None self.last_self_energy = None self.kmax = (nmax_x, nmax_y, nmax_z) """Number of reciporcal vectors taken in each direction.""" #print("kmax", self.kmax) self.recip_cutoff = max_len """Reciprocal space cutoff.""" self.recip_vectors = (gx, gy, gz) """Reciprocal lattice vectors""" self.ivolume = ivolume opt.PROFILE[self.__class__.__name__ + ':recip_vectors'] = (self.recip_vectors) opt.PROFILE[self.__class__.__name__ + ':recip_cutoff'] = (self.recip_cutoff) opt.PROFILE[self.__class__.__name__ + ':recip_kmax'] = (self.kmax) opt.PROFILE[self.__class__.__name__ + ':alpha'] = (self.alpha) opt.PROFILE[self.__class__.__name__ + ':tol'] = (eps) opt.PROFILE[self.__class__.__name__ + ':real_cutoff'] = (self.real_cutoff) # define persistent vars self._vars = {} self._vars['alpha'] = ctypes.c_double(alpha) self._vars['max_recip'] = ctypes.c_double(max_len) self._vars['nmax_vec'] = host.Array((nmax_x, nmax_y, nmax_z), dtype=ctypes.c_int) self._vars['recip_vec'] = host.Array( np.zeros((3, 3), dtype=ctypes.c_double)) self._vars['recip_vec'][0, :] = gx self._vars['recip_vec'][1, :] = gy self._vars['recip_vec'][2, :] = gz self._vars['ivolume'] = ivolume self._vars['coeff_space_kernel'] = data.ScalarArray( ncomp=((nmax_x + 1) * (nmax_y + 1) * (nmax_z + 1)), dtype=ctypes.c_double) self._vars['coeff_space'] = self._vars['coeff_space_kernel'].data.view( ).reshape(nmax_z + 1, nmax_y + 1, nmax_x + 1) #self._vars['coeff_space'] = np.zeros((nmax_z+1, nmax_y+1, nmax_x+1), dtype=ctypes.c_double) # pass stride in tmp space vector self._vars['recip_axis_len'] = ctypes.c_int(nmax_t) # |axis | planes | quads reciplen = (nmax_t+1)*12 +\ 8*nmax_x*nmax_y + \ 8*nmax_y*nmax_z +\ 8*nmax_z*nmax_x +\ 16*nmax_x*nmax_y*nmax_z self._vars['recip_space_kernel'] = data.GlobalArray( size=reciplen, dtype=ctypes.c_double, shared_memory=shared_memory) self._vars['recip_space_energy'] = data.GlobalArray( size=1, dtype=ctypes.c_double, shared_memory=shared_memory) self._vars['real_space_energy'] = data.GlobalArray( size=1, dtype=ctypes.c_double, shared_memory=shared_memory) self._vars['self_interaction_energy'] = data.GlobalArray( size=1, dtype=ctypes.c_double, shared_memory=shared_memory) self.shared_memory = shared_memory #self._vars['recip_vec_kernel'] = data.ScalarArray(np.zeros(3, dtype=ctypes.c_double)) #self._vars['recip_vec_kernel'][0] = gx[0] #self._vars['recip_vec_kernel'][1] = gy[1] #self._vars['recip_vec_kernel'][2] = gz[2] self._subvars = dict() self._subvars['SUB_GX'] = str(gx[0]) self._subvars['SUB_GY'] = str(gy[1]) self._subvars['SUB_GZ'] = str(gz[2]) self._subvars['SUB_NKMAX'] = str(nmax_t) self._subvars['SUB_NK'] = str(nmax_x) self._subvars['SUB_NL'] = str(nmax_y) self._subvars['SUB_NM'] = str(nmax_z) self._subvars['SUB_NKAXIS'] = str(nmax_t) self._subvars['SUB_LEN_QUAD'] = str(nmax_x * nmax_y * nmax_z) self._subvars['SUB_MAX_RECIP'] = str(max_len) self._subvars['SUB_MAX_RECIP_SQ'] = str(max_len**2.) self._subvars['SUB_SQRT_ALPHA'] = str(sqrt(alpha)) self._subvars['SUB_REAL_CUTOFF_SQ'] = str(real_cutoff**2.) self._subvars['SUB_REAL_CUTOFF'] = str(real_cutoff) self._subvars['SUB_M_SQRT_ALPHA_O_PI'] = str(-1.0 * sqrt(alpha / pi)) self._subvars['SUB_M2_SQRT_ALPHAOPI'] = str(-2.0 * sqrt(alpha / pi)) self._subvars['SUB_MALPHA'] = str(-1.0 * alpha) self._subvars['SUB_ENERGY_UNIT'] = str(energy_unit) self._subvars['SUB_ENERGY_UNITO2'] = str(energy_unit * 0.5) self._subvars['SUB_FORCE_UNIT'] = str(force_unit) self._real_space_pairloop = None self._init_libs() self._init_coeff_space() self._self_interaction_lib = None