Python Array 예제들, ppmd.host.Array Python 예제들

예제 #1

0

파일 보기

    def _update(self):
        positions = self._positions
        assert self.cell_list.cell_list is not None, "cell list is not initialised"
        assert self.cell_list.cell_list.dtype is ctypes.c_int, "bad datatype"
        assert positions.dtype is ctypes.c_double, "bad datatype"
        assert self._domain.cell_array.dtype is ctypes.c_int, "dtype"
        assert self.cell_list.cell_reverse_lookup.dtype is ctypes.c_int, "dtype"
        n = self._n_func()
        if self.ncount.ncomp < n:
            self.ncount = host.Array(ncomp=n, dtype=ctypes.c_int)
        needed_stride = self.cell_list.max_cell_contents_count * 27
        if self.stride.value < needed_stride:
            self.stride.value = needed_stride
        if self.matrix.ncomp < n * self.stride.value:
            self.matrix = host.Array(ncomp=n * self.stride.value,
                                     dtype=ctypes.c_int)

        ret = self._lib(ctypes.c_int(n), positions.ctypes_data,
                        self.cell_list.cell_list.ctypes_data,
                        self.cell_list.offset,
                        self.cell_list.cell_reverse_lookup.ctypes_data,
                        self._domain.cell_array.ctypes_data,
                        self.matrix.ctypes_data, self.ncount.ctypes_data,
                        ctypes.c_int(self.stride.value),
                        ctypes.c_double(self.cell_width**2.0))

        assert ret >= 0, "lib failed, return code: " + str(ret)
        self.n_local = n
        self.total_num_neighbours = ret

예제 #2

0

파일 보기

    def _init_escape_lib(self):
        ''' Create a lookup table between xor map and linear index for direction '''
        self._bin_to_lin = data.ScalarArray(ncomp=57, dtype=ctypes.c_int)
        _lin_to_bin = np.zeros(26, dtype=ctypes.c_int)
        '''linear to xor map'''
        _lin_to_bin[0] = 1 ^ 2 ^ 4
        _lin_to_bin[1] = 2 ^ 1
        _lin_to_bin[2] = 32 ^ 2 ^ 1
        _lin_to_bin[3] = 4 ^ 1
        _lin_to_bin[4] = 1
        _lin_to_bin[5] = 32 ^ 1
        _lin_to_bin[6] = 4 ^ 1 ^ 16
        _lin_to_bin[7] = 1 ^ 16
        _lin_to_bin[8] = 32 ^ 16 ^ 1

        _lin_to_bin[9] = 2 ^ 4
        _lin_to_bin[10] = 2
        _lin_to_bin[11] = 32 ^ 2
        _lin_to_bin[12] = 4
        _lin_to_bin[13] = 32
        _lin_to_bin[14] = 4 ^ 16
        _lin_to_bin[15] = 16
        _lin_to_bin[16] = 32 ^ 16

        _lin_to_bin[17] = 8 ^ 2 ^ 4
        _lin_to_bin[18] = 2 ^ 8
        _lin_to_bin[19] = 32 ^ 2 ^ 8
        _lin_to_bin[20] = 4 ^ 8
        _lin_to_bin[21] = 8
        _lin_to_bin[22] = 32 ^ 8
        _lin_to_bin[23] = 4 ^ 8 ^ 16
        _lin_to_bin[24] = 8 ^ 16
        _lin_to_bin[25] = 32 ^ 16 ^ 8
        '''inverse map, probably not ideal'''
        for ix in range(26):
            self._bin_to_lin[_lin_to_bin[ix]] = ix

        # Number of escaping particles in each direction
        self._escape_count = host.Array(np.zeros(26), dtype=ctypes.c_int)

        # Linked list to store the ids of escaping particles in a similar way
        # to the cell list.
        # | [0-25 escape directions, index of first in direction] [26-end
        # current id and index of next id, (id, next_index) ]|

        self._escape_linked_list = host.Array(
            -1 * np.ones(26 + 2 * self.state.npart_local), dtype=ctypes.c_int)

        dtype = self.state.get_position_dat().dtype
        assert self.state.domain.boundary.dtype == dtype

        self._escape_guard_lib = ppmd.lib.build.lib_from_file_source(
            _LIB_SOURCES + 'EscapeGuard', 'EscapeGuard', {
                'SUB_REAL': self.state.get_position_dat().ctype,
                'SUB_INT': self._bin_to_lin.ctype
            })['EscapeGuard']

예제 #3

0

파일 보기

    def _compress_particle_dats(self, num_slots_to_fill):
        """
        Compress the particle dats held in the state. Compressing removes empty rows.
        """
        _compressing_n_new = host.Array([0], dtype=ctypes.c_int)
        if self._compressing_lib is None:
            self._build_compressing_lib()

        if self.compressed is True:
            return
        else:

            self.compress_timer.start()
            self._compressing_lib(
                ctypes.c_int(num_slots_to_fill),
                ctypes.c_int(self.state.npart_local),
                self._move_empty_slots.ctypes_data,
                _compressing_n_new.ctypes_data, *[
                    getattr(self.state, n).ctypes_data
                    for n in self.state.particle_dats
                ])

            self.state.npart_local = _compressing_n_new[0]
            self.compressed = True
            # self._move_empty_slots = []
            self.compress_timer.pause()

예제 #4

0

파일 보기

    def get_shift(self):

        _sfd = host.Array(ncomp=26 * 3, dtype=ctypes.c_double)

        dims = mpi.cartcomm_dims_xyz(self.comm)
        top = mpi.cartcomm_top_xyz(self.comm)
        periods = mpi.cartcomm_periods_xyz(self.comm)

        for dx in range(26):
            dir = mpi.recv_modifiers[dx]

            for ix in range(3):

                if top[ix] == 0 and \
                   periods[ix] == 1 and \
                   dir[ix] == -1:

                    _sfd[dx * 3 + ix] = self.extent[ix]

                elif top[ix] == dims[ix] - 1 and \
                   periods[ix] == 1 and \
                   dir[ix] == 1:
                    _sfd[dx * 3 + ix] = -1. * self.extent[ix]
                else:
                    _sfd[dx * 3 + ix] = 0.0
        return _sfd

예제 #5

0

파일 보기

    def _cell_sort_setup(self):
        """
        Creates looping for cell list creation
        """

        # Construct initial cell list
        self._cell_list = host.Array(dtype=ct.c_int,
            ncomp=self._positions.max_npart + self._domain.cell_count + 1)

        # Keep track of number of particles per cell
        self._cell_contents_count = host.Array(
            np.zeros([self._domain.cell_count]), dtype=ct.c_int)

        # Reverse lookup, given a local particle id, get containing cell.
        self._cell_reverse_lookup = host.Array(dtype=ct.c_int,
                                               ncomp=self._positions.max_npart)

        self._init = True

예제 #6

0

파일 보기

    def _init_jstore(self, cell2part):
        n = cell2part.max_cell_contents_count * 27
        if self._jstore[0].ncomp < n:
            self._jstore = [host.Array(ncomp=100+n, dtype=ctypes.c_int) for tx\
                            in range(runtime.NUM_THREADS)]

        self._jptrs = np.zeros(runtime.NUM_THREADS, ctypes.c_void_p)
        for tx in range(runtime.NUM_THREADS):
            self._jptrs[tx] = self._jstore[tx].ctypes_data.value

        return self._jptrs.ctypes.get_as_parameter()

예제 #7

0

파일 보기

    def setup(self, n, positions, domain, cell_width):

        assert self.cell_list.cell_list is not None, "No cell to particle " \
                                                     "map setup"

        self.cell_width = cell_width

        self.cell_width_squared = host.Array(initial_value=cell_width**2,
                                             dtype=ct.c_double)
        self._domain = domain
        self._positions = positions
        self._n = n

        self.neighbour_starting_points = host.Array(ncomp=n() + 1,
                                                    dtype=ct.c_long)

        _n = n()
        if _n < 10:
            _n = 10

        _initial_factor = math.ceil(
            15. * (_n**2) / (domain.cell_array[0] * domain.cell_array[1] *
                             domain.cell_array[2]))

        if _initial_factor < 10:
            _initial_factor = 10

        self.max_len = host.Array(initial_value=_initial_factor,
                                  dtype=ct.c_long)

        self.list = host.Array(ncomp=_initial_factor, dtype=ct.c_int)

        self._return_code = host.Array(ncomp=1, dtype=ct.c_int)
        self._return_code.data[0] = -1

        self._neighbour_lib = ppmd.lib.build.lib_from_file_source(
            _LIB_SOURCES + 'NeighbourListv2', 'NeighbourListv2', {
                'SUB_REAL': 'double',
                'SUB_INT': 'int',
                'SUB_LONG': 'long'
            })['NeighbourListv2']

예제 #8

0

파일 보기

    def __init__(self, n, positions, domain, cell_width, cell_list):
        self._n_func = n
        self._positions = positions
        self._domain = domain
        self.cell_width = cell_width
        self.cell_list = cell_list
        self.version_id = 0
        self.domain_id = 0
        self.n_local = None
        self.timer_update = ppmd.opt.Timer(runtime.TIMER)
        self.matrix = host.Array(ncomp=1, dtype=ctypes.c_int)
        self.ncount = host.Array(ncomp=1, dtype=ctypes.c_int)
        self.stride = ctypes.c_int(0)
        self.total_num_neighbours = 0
        self.max_size = 0

        bn = os.path.join(os.path.dirname(__file__), 'lib')
        bn += '/NeighbourMatrixSource'
        self._lib = build.lib_from_file_source(
            bn, 'OMPNeighbourMatrix')['OMPNeighbourMatrix']

        self._lib.restype = ctypes.c_longlong

예제 #9

0

파일 보기

    def __init__(self, kernel=None, dat_dict=None, shell_cutoff=None):

        self._dat_dict = access.DatArgStore(self._get_allowed_types(),
                                            dat_dict)

        self._cc = build.TMPCC
        self._kernel = kernel
        self.shell_cutoff = shell_cutoff

        self.loop_timer = modules.code_timer.LoopTimer()
        self.wrapper_timer = opt.Timer(runtime.TIMER)
        self.list_timer = opt.Timer(runtime.TIMER)

        self._gather_space = host.ThreadSpace(100, ctypes.c_uint8)
        self._generate()

        self._offset_list = host.Array(ncomp=27, dtype=ctypes.c_int)

        self._lib = build.simple_lib_creator(self._generate_header_source(),
                                             self._components['LIB_SRC'],
                                             self._kernel.name,
                                             CC=self._cc)
        self._group = None

        for pd in self._dat_dict.items():
            if issubclass(type(pd[1][0]), data.PositionDat):
                self._group = pd[1][0].group
                break

        #assert self._group is not None, "No cell to particle map found"
        if self._group is not None:
            self._make_cell_list(self._group)

        self._kernel_execution_count = INT64(0)
        self._invocations = 0

        self._jstore = [host.Array(ncomp=100, dtype=ctypes.c_int) for tx in \
                        range(runtime.NUM_THREADS)]

예제 #10

0

파일 보기

    def __init__(self, state_in=None):
        self.state = state_in

        # Initialise timers
        self.timer_apply = ppmd.opt.Timer(runtime.TIMER, 0)
        self.timer_search = ppmd.opt.Timer(runtime.TIMER, 0)
        self.timer_move = ppmd.opt.Timer(runtime.TIMER, 0)

        # One proc PBC lib
        self._one_process_pbc_lib = None
        # Escape guard lib
        self._escape_guard_lib = None
        self._escape_count = None
        self._escape_linked_list = None
        self._flag = host.Array(ncomp=1, dtype=ctypes.c_int)

예제 #11

0

파일 보기

    def setup(self, n, positions, domain, cell_width):

        # setup the cell list if not done already (also handles domain decomp)
        if self.cell_list.cell_list is None:
            self.cell_list.setup(n, positions, domain, cell_width)

        self.cell_width = cell_width

        self.cell_width_squared = host.Array(initial_value=cell_width**2,
                                             dtype=ct.c_double)
        self._domain = domain
        self._positions = positions
        self._n = n

        self.neighbour_starting_points = host.Array(ncomp=n() + 1,
                                                    dtype=ct.c_long)

        _initial_factor = math.ceil(
            27. * (n()**2) / (domain.cell_array[0] * domain.cell_array[1] *
                              domain.cell_array[2]))

        self.max_len = host.Array(initial_value=_initial_factor,
                                  dtype=ct.c_long)
        self.list = host.Array(ncomp=_initial_factor, dtype=ct.c_int)

        self._return_code = host.Array(ncomp=1, dtype=ct.c_int)
        self._return_code.data[0] = -1

        self._neighbour_lib = ppmd.lib.build.lib_from_file_source(
            _LIB_SOURCES + 'NeighbourListNonN3', 'NeighbourListNonN3', {
                'SUB_REAL': self._positions.ctype,
                'SUB_INT': 'int',
                'SUB_LONG': 'long'
            })['NeighbourListNonN3']

        self.domain_id = self._domain.version_id

예제 #12

0

파일 보기

    def get_boundary_cells(self):
        """
        Return a host.Array containing the boundary cell indices of the domain.
        """

        if self._boundary_cell_version < self._cell_array.version:
            _ca = self._cell_array
            _count = (_ca[0] - 2) * (_ca[1] - 2) * (_ca[2] - 2) - (
                _ca[0] - 4) * (_ca[1] - 4) * (_ca[2] - 4)

            self._boundary_cells = host.Array(ncomp=_count, dtype=ctypes.c_int)
            m = 0

            for ix in range(1, _ca[0] - 1):
                for iy in range(1, _ca[1] - 1):

                    self._boundary_cells[m] = ix + _ca[0] * (iy + _ca[1])
                    self._boundary_cells[
                        m + (_ca[0] - 2) *
                        (_ca[1] - 2)] = ix + _ca[0] * (iy +
                                                       (_ca[2] - 2) * _ca[1])
                    m += 1
            m += (_ca[0] - 2) * (_ca[1] - 2)

            for ix in range(1, _ca[0] - 1):
                for iz in range(2, _ca[2] - 2):
                    self._boundary_cells[m] = ix + _ca[0] * (1 + iz * _ca[1])
                    self._boundary_cells[m + (_ca[0] - 2) *
                                         (_ca[2] - 4)] = ix + _ca[0] * (
                                             (_ca[1] - 2) + iz * _ca[1])
                    m += 1

            m += (_ca[0] - 2) * (_ca[2] - 4)

            for iy in range(2, _ca[1] - 2):
                for iz in range(2, _ca[2] - 2):
                    self._boundary_cells[m] = 1 + _ca[0] * (iy + iz * _ca[1])
                    self._boundary_cells[m + (_ca[1] - 4) * (
                        _ca[2] - 4)] = _ca[0] - 2 + _ca[0] * (iy + iz * _ca[1])
                    m += 1

            m += (_ca[1] - 4) * (_ca[2] - 4)

            self._boundary_cell_version = self._cell_array.version

        return self._boundary_cells

예제 #13

0

파일 보기

    def __init__(self, *args, **kwargs):

        self.state = kwargs['state']

        self._move_dir_recv_totals = None
        self._move_dir_send_totals = None

        self._move_shift_array = host.NullDoubleArray

        self._move_send_buffer = None
        self._move_recv_buffer = None

        self._move_unpacking_lib = None
        self._move_packing_lib = None
        self._move_empty_slots = host.Array(ncomp=4, dtype=ctypes.c_int)
        self._move_used_free_slot_count = None

        self._total_ncomp = None

        # Timers
        self.move_timer = ppmd.opt.Timer(runtime.TIMER, 0)

        self._status = mpi.MPI.Status()

        # Timers
        self.move_timer = ppmd.opt.Timer(runtime.TIMER, 0)
        self.compress_timer = ppmd.opt.Timer(runtime.TIMER, 0)

        self._status = mpi.MPI.Status()

        # compressing vars
        self._compressing_lib = None

        self.compressed = True
        """ Bool to determine if the held :class:`~data.ParticleDat` members have gaps in them. """

        self.uncompressed_n = False

예제 #14

0

파일 보기

    def __init__(self,
                 npart=0,
                 ncomp=1,
                 initial_value=None,
                 name=None,
                 dtype=ctypes.c_double):
        # version ids. Internal then halo.

        assert ncomp > 0, "Negative number of components is not supported."

        self._vid_int = 0
        self._vid_halo = -1
        self.vid_halo_cell_list = -1
        self._halo_exchange_count = 0

        self.group = None

        # Initialise timers
        self.timer_comm = ppmd.opt.Timer()
        self.timer_pack = ppmd.opt.Timer()
        self.timer_transfer = ppmd.opt.Timer(runtime.TIMER, 0)
        self.timer_transfer_1 = ppmd.opt.Timer(runtime.TIMER, 0)
        self.timer_transfer_2 = ppmd.opt.Timer(runtime.TIMER, 0)
        self.timer_transfer_resize = ppmd.opt.Timer(runtime.TIMER, 0)

        self.name = name
        """:return: The name of the ParticleDat instance."""

        self.idtype = dtype
        self._dat = host._make_array(initial_value=initial_value,
                                     dtype=dtype,
                                     nrow=npart,
                                     ncol=ncomp)

        self._ptr = None
        self._ptr_count = 0

        self.max_npart = self._dat.shape[0]
        """:return: The maximum number of particles which can be stored within this particle dat."""

        self.npart_local = self._dat.shape[0]
        """:return: The number of particles with properties stored in the particle dat."""

        self.ncomp = self.ncol
        """:return: The number of components stored for each particle."""

        self.halo_start = self.npart_local
        """:return: The starting index of the halo region of the particle dat. """
        self.npart_halo = 0
        self.npart_local_halo = 0
        """:return: The number of particles currently stored within the halo region of the particle dat."""

        self._resize_callback = None
        self._version = 0

        self._exchange_lib = None
        self._tmp_halo_space = host.Array(ncomp=1, dtype=self.dtype)

        # tmp space for norms/maxes etc
        self._norm_tmp = ScalarArray(ncomp=1, dtype=self.dtype)
        self._linf_norm_lib = None

        # default comm is world
        self.comm = mpi.MPI.COMM_WORLD

        self._particle_dat_modifier = ParticleDatModifier(
            self,
            type(self) == PositionDat)

예제 #15

0

파일 보기

    def __init__(self, domain, cell_width, positions):

        self.domain = domain
        boundary = domain.boundary

        assert cell_width > 0, "bad cell width"
        assert boundary[1] > boundary[0], "nonsensical boundary"
        assert boundary[3] > boundary[2], "nonsensical boundary"
        assert boundary[5] > boundary[4], "nonsensical boundary"

        self.positions = positions

        self.cell_array = host.Array(ncomp=3, dtype=ctypes.c_int)
        self.cell_sizes = host.Array(ncomp=3, dtype=ctypes.c_double)

        # get sizes just considering interior
        cell_array = [0, 0, 0]
        cell_array[0] = int(float(boundary[1] - boundary[0]) / cell_width)
        cell_array[1] = int(float(boundary[3] - boundary[2]) / cell_width)
        cell_array[2] = int(float(boundary[5] - boundary[4]) / cell_width)

        cell_sizes = [0, 0, 0]
        cell_sizes[0] = float(boundary[1] - boundary[0]) / cell_array[0]
        cell_sizes[1] = float(boundary[3] - boundary[2]) / cell_array[1]
        cell_sizes[2] = float(boundary[5] - boundary[4]) / cell_array[2]
        self.cell_sizes[:] = cell_sizes[:]

        padx = int(math.ceil(
            self.domain.cell_edge_lengths[0] / cell_sizes[0])) + 1
        pady = int(math.ceil(
            self.domain.cell_edge_lengths[1] / cell_sizes[1])) + 1
        padz = int(math.ceil(
            self.domain.cell_edge_lengths[2] / cell_sizes[2])) + 1

        rpadx = padx * cell_sizes[0]
        rpady = pady * cell_sizes[1]
        rpadz = padz * cell_sizes[2]

        #print "CA", cell_array[:], "CS", self.cell_sizes[:], "CES", self.domain.cell_edge_lengths[:]

        self.cell_array[0] = cell_array[0] + 2 * padx
        self.cell_array[1] = cell_array[1] + 2 * pady
        self.cell_array[2] = cell_array[2] + 2 * padz

        #print "CA2", self.cell_array[:]

        self.boundary = host.Array(ncomp=6, dtype=ctypes.c_double)
        self.boundary[0] = boundary[0] - rpadx
        self.boundary[1] = boundary[1] + rpadx
        self.boundary[2] = boundary[2] - rpady
        self.boundary[3] = boundary[3] + rpady
        self.boundary[4] = boundary[4] - rpadz
        self.boundary[5] = boundary[5] + rpadz

        self.cell_count = cell_array[0] * cell_array[1] * cell_array[2]
        self.particle_layers = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
        self.cell_reverse_lookup = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
        self.cell_contents_count = cuda_base.Array(ncomp=self.cell_count,
                                                   dtype=ctypes.c_int)
        self.matrix = cuda_base.Matrix(nrow=self.cell_count,
                                       ncol=1,
                                       dtype=ctypes.c_int)
        self.num_layers = 0

        with open(
                str(ppmd.cuda.cuda_config.LIB_DIR) +
                '/cudaSubCellOccupancyMatrixSource.cu', 'r') as fh:
            _code = fh.read()
        with open(
                str(ppmd.cuda.cuda_config.LIB_DIR) +
                '/cudaSubCellOccupancyMatrixSource.h', 'r') as fh:
            _header = fh.read()
        _name = 'SubCellOccupancyMatrix'
        lib = cuda_build.simple_lib_creator(_header, _code, _name)
        self._sort_lib = lib['LayerSort']
        self._fill_lib = lib['PopMatrix']

        self.version_id = 0

예제 #16

0

파일 보기

    def __init__(self, domain_func, cell_to_particle_map):
        self._timer = ppmd.opt.Timer(runtime.TIMER, 0, start=True)

        self._domain_func = domain_func
        self._domain = None

        self._cell_to_particle_map = cell_to_particle_map

        self._ca_copy = [None, None, None]

        self._version = -1

        self._init = False

        # vars init
        self._boundary_cell_groups = host.Array(dtype=ctypes.c_int)
        self._boundary_groups_start_end_indices = host.Array(
            ncomp=7, dtype=ctypes.c_int)
        self._halo_cell_groups = host.Array(dtype=ctypes.c_int)
        self._halo_groups_start_end_indices = host.Array(ncomp=7,
                                                         dtype=ctypes.c_int)
        self._boundary_groups_contents_array = host.Array(dtype=ctypes.c_int)
        self._exchange_sizes = host.Array(ncomp=6, dtype=ctypes.c_int)

        self._send_ranks = host.Array(ncomp=6, dtype=ctypes.c_int)
        self._recv_ranks = host.Array(ncomp=6, dtype=ctypes.c_int)

        self._h_count = ctypes.c_int(0)
        self._t_count = ctypes.c_int(0)

        self._h_tmp = host.Array(ncomp=10, dtype=ctypes.c_int)
        self._b_tmp = host.Array(ncomp=10, dtype=ctypes.c_int)

        self.dir_counts = host.Array(ncomp=6, dtype=ctypes.c_int)

        self._halo_shifts = None

        # ensure first update
        self._boundary_cell_groups.inc_version(-1)
        self._boundary_groups_start_end_indices.inc_version(-1)
        self._halo_cell_groups.inc_version(-1)
        self._halo_groups_start_end_indices.inc_version(-1)
        self._boundary_groups_contents_array.inc_version(-1)
        self._exchange_sizes.inc_version(-1)

        self._setup()

        self._exchange_sizes_lib = None
        self._cell_contents_count_tmp = None

예제 #17

0

파일 보기

    def exchange_cell_counts(self):
        """
        Exchange the contents count of cells between processes. This is
        provided as a method in halo to avoid repeated exchanging of cell
        occupancy counts if multiple ParticleDat objects are being
        communicated.
        """
        self._update_domain()
        if self._exchange_sizes_lib is None:

            _es_args = '''
            const int f_MPI_COMM,             // F90 comm from mpi4py
            const int * RESTRICT SEND_RANKS,  // send directions
            const int * RESTRICT RECV_RANKS,  // recv directions
            const int * RESTRICT h_ind,       // halo indices
            const int * RESTRICT b_ind,       // local b indices
            const int * RESTRICT h_arr,       // h cell indices
            const int * RESTRICT b_arr,       // b cell indices
            int * RESTRICT ccc,               // cell contents count
            int * RESTRICT h_count,           // number of halo particles
            int * RESTRICT t_count,           // amount of tmp space needed
            int * RESTRICT h_tmp,             // tmp space for recving
            int * RESTRICT b_tmp,             // tmp space for sending
            int * RESTRICT dir_counts         // expected recv counts
            '''

            _es_header = '''
            #include <generic.h>
            #include <mpi.h>
            #include <iostream>
            using namespace std;
            #define RESTRICT %(RESTRICT)s

            extern "C" void HALO_ES_LIB(%(ARGS)s);
            '''

            _es_code = '''

            void HALO_ES_LIB(%(ARGS)s){
                *h_count = 0;
                *t_count = 0;

                // get mpi comm and rank
                MPI_Comm MPI_COMM = MPI_Comm_f2c(f_MPI_COMM);
                int rank = -1; MPI_Comm_rank( MPI_COMM, &rank );
                MPI_Status MPI_STATUS;

                // [W E] [N S] [O I]
                for( int dir=0 ; dir<6 ; dir++ ){

                    //cout << "dir " << dir << "-------" << endl;

                    const int dir_s = b_ind[dir];             // start index
                    const int dir_c = b_ind[dir+1] - dir_s;   // cell count

                    const int dir_s_r = h_ind[dir];             // start index
                    const int dir_c_r = h_ind[dir+1] - dir_s_r; // cell count

                    int tmp_count = 0;
                    for( int ix=0 ; ix<dir_c ; ix++ ){
                        b_tmp[ix] = ccc[b_arr[dir_s + ix]];    // copy into
                                                               // send buffer

                        tmp_count += ccc[b_arr[dir_s + ix]];
                    }

                    *t_count = MAX(*t_count, tmp_count);


                    if(rank == RECV_RANKS[dir]){

                        for( int tx=0 ; tx < dir_c ; tx++ ){
                            h_tmp[tx] = b_tmp[tx];
                        }

                    } else {
                    MPI_Sendrecv ((void *) b_tmp, dir_c, MPI_INT,
                                  SEND_RANKS[dir], rank,
                                  (void *) h_tmp, dir_c_r, MPI_INT,
                                  RECV_RANKS[dir], RECV_RANKS[dir],
                                  MPI_COMM, &MPI_STATUS);
                    }

                    tmp_count=0;
                    for( int ix=0 ; ix<dir_c_r ; ix++ ){
                        ccc[h_arr[dir_s_r + ix]] = h_tmp[ix];
                        *h_count += h_tmp[ix];
                        tmp_count += h_tmp[ix];
                    }
                    dir_counts[dir] = tmp_count;
                    *t_count = MAX(*t_count, tmp_count);

                }

                return;
            }
            '''

            _es_dict = {
                'ARGS': _es_args,
                'RESTRICT': build.MPI_CC.restrict_keyword
            }

            _es_header %= _es_dict
            _es_code %= _es_dict

            self._exchange_sizes_lib = build.simple_lib_creator(
                _es_header, _es_code, 'HALO_ES_LIB',
                CC=build.MPI_CC)['HALO_ES_LIB']

        # End of creation code -----------------------------------------------

        # update internal arrays
        if self._version < self._domain.cell_array.version:
            self._get_pairs()

        ccc = self._cell_to_particle_map.cell_contents_count

        # This if allows the host size exchnage code to be used for the gpu
        if type(ccc) is host.Array:
            ccc_ptr = ccc.ctypes_data

        else:
            if self._cell_contents_count_tmp is None:
                self._cell_contents_count_tmp = host.Array(ncomp=ccc.ncomp,
                                                           dtype=ctypes.c_int)
            elif self._cell_contents_count_tmp.ncomp < ccc.ncomp:
                self._cell_contents_count_tmp.realloc(ccc.ncomp)

            #make a local copy of the cell contents counts
            self._cell_contents_count_tmp[:] = ccc[:]
            ccc_ptr = self._cell_contents_count_tmp.ctypes_data

        assert ccc_ptr is not None, "No valid Cell Contents Count pointer found."

        self._exchange_sizes_lib(
            ctypes.c_int(self._domain.comm.py2f()),
            self._send_ranks.ctypes_data, self._recv_ranks.ctypes_data,
            self._halo_groups_start_end_indices.ctypes_data,
            self._boundary_groups_start_end_indices.ctypes_data,
            self._halo_cell_groups.ctypes_data,
            self._boundary_cell_groups.ctypes_data, ccc_ptr,
            ctypes.byref(self._h_count), ctypes.byref(self._t_count),
            self._h_tmp.ctypes_data, self._b_tmp.ctypes_data,
            self.dir_counts.ctypes_data)

        # copy new sizes back to original array (eg for gpu)
        if type(ccc) is not host.Array:
            ccc[:] = self._cell_contents_count_tmp[:ccc.ncomp:]

        return self._h_count.value, self._t_count.value

예제 #18

0

파일 보기

    def _get_pairs(self):

        self._update_domain()

        _cell_pairs = (

            # As these are the first exchange the halos cannot contain anything useful
            create_halo_pairs_slice_halo(self._domain, Slice[1, 1:-1, 1:-1],
                                         (-1, 0, 0)),
            create_halo_pairs_slice_halo(self._domain, Slice[-2, 1:-1, 1:-1],
                                         (1, 0, 0)),

            # As with the above no point exchanging anything extra in z direction
            create_halo_pairs_slice_halo(self._domain, Slice[::, 1, 1:-1],
                                         (0, -1, 0)),
            create_halo_pairs_slice_halo(self._domain, Slice[::, -2, 1:-1],
                                         (0, 1, 0)),

            # Exchange all halo cells from x and y
            create_halo_pairs_slice_halo(self._domain, Slice[::, ::, 1],
                                         (0, 0, -1)),
            create_halo_pairs_slice_halo(self._domain, Slice[::, ::, -2],
                                         (0, 0, 1)))

        _bs = np.zeros(1, dtype=ctypes.c_int)
        _b = np.zeros(0, dtype=ctypes.c_int)

        _hs = np.zeros(1, dtype=ctypes.c_int)
        _h = np.zeros(0, dtype=ctypes.c_int)

        _s = np.zeros(0, dtype=ctypes.c_double)

        _len_h_tmp = 10
        _len_b_tmp = 10

        for hx, bhx in enumerate(_cell_pairs):

            # print hx, bhx

            _len_b_tmp = max(_len_b_tmp, len(bhx[0]))
            _len_h_tmp = max(_len_h_tmp, len(bhx[1]))

            # Boundary and Halo start index.
            _bs = np.append(_bs, ctypes.c_int(len(bhx[0]) + _bs[-1]))
            _hs = np.append(_hs, ctypes.c_int(len(bhx[1]) + _hs[-1]))

            # Actual cell indices
            _b = np.append(_b, bhx[0])
            _h = np.append(_h, bhx[1])

            # Offset shifts for periodic boundary
            _s = np.append(_s, bhx[2])

            self._send_ranks[hx] = bhx[3]
            self._recv_ranks[hx] = bhx[4]

        if _len_b_tmp > self._b_tmp.ncomp:
            self._b_tmp.realloc(_len_b_tmp)

        if _len_h_tmp > self._h_tmp.ncomp:
            self._h_tmp.realloc(_len_h_tmp)

        # indices in array of cell indices
        self._boundary_groups_start_end_indices = host.Array(
            _bs, dtype=ctypes.c_int)
        self._halo_groups_start_end_indices = host.Array(_hs,
                                                         dtype=ctypes.c_int)

        # cell indices
        self._boundary_cell_groups = host.Array(_b, dtype=ctypes.c_int)
        self._halo_cell_groups = host.Array(_h, dtype=ctypes.c_int)

        # shifts for each direction.
        self._halo_shifts = host.Array(_s, dtype=ctypes.c_double)

        self._version = self._domain.cell_array.version

예제 #19

0

파일 보기

    def move_to_neighbour(self,
                          ids_directions_list=None,
                          dir_send_totals=None,
                          shifts=None):
        """
        Move particles using the linked list.
        :arg host.Array ids_directions_list(int): Linked list of ids from
         directions.
        :arg host.Array dir_send_totals(int): 26 Element array of number of
        particles traveling in each direction.
        :arg host.Array shifts(double): 73 element array of the shifts to
        apply when moving particles for the 26 directions.
        """

        self.move_timer.start()

        if self._move_packing_lib is None:
            self._move_packing_lib = _move_controller.build_pack_lib(
                self.state)

        _send_total = dir_send_totals.data.sum()
        # Make/resize send buffer.
        if self._move_send_buffer is None:
            self._move_send_buffer = host.Array(ncomp=self._total_ncomp *
                                                _send_total,
                                                dtype=ctypes.c_byte)

        elif self._move_send_buffer.ncomp < self._total_ncomp * _send_total:
            self._move_send_buffer.realloc(self._total_ncomp * _send_total)

        # Make recv sizes array.
        if self._move_dir_recv_totals is None:
            self._move_dir_recv_totals = host.Array(ncomp=26,
                                                    dtype=ctypes.c_int)

        # exchange number of particles about to be sent.
        self._move_dir_send_totals = dir_send_totals

        self._move_dir_recv_totals.zero()
        self._move_exchange_send_recv_sizes()

        # resize recv buffer.
        _recv_total = self._move_dir_recv_totals.data.sum()

        # using uint_8 in library
        assert ctypes.sizeof(ctypes.c_byte) == 1

        if self._move_recv_buffer is None:
            self._move_recv_buffer = host.Array(ncomp=self._total_ncomp *
                                                _recv_total,
                                                dtype=ctypes.c_byte)

        elif self._move_recv_buffer.ncomp < self._total_ncomp * _recv_total:
            self._move_recv_buffer.realloc(self._total_ncomp * _recv_total)

        for ix in self.state.particle_dats:
            _d = getattr(self.state, ix)
            if _recv_total + self.state.npart_local > _d.max_npart:
                _d.resize(_recv_total + self.state.npart_local)

        # Empty slots store.
        if _send_total > 0:
            self._resize_empty_slot_store(_send_total)

        # pack particles to send.
        assert shifts.dtype == ctypes.c_double
        self._move_packing_lib(
            self._move_send_buffer.ctypes_data, shifts.ctypes_data,
            ids_directions_list.ctypes_data,
            self._move_empty_slots.ctypes_data, *[
                getattr(self.state, n).ctypes_data
                for n in self.state.particle_dats
            ])

        # sort empty slots.
        self._move_empty_slots.data[0:_send_total:].sort()

        # exchange particle data.
        self._exchange_move_send_recv_buffers()

        # Create unpacking lib.
        if self._move_unpacking_lib is None:
            self._move_unpacking_lib = _move_controller.build_unpack_lib(
                self.state)

        # unpack recv buffer.
        self._move_unpacking_lib(
            ctypes.c_int(_recv_total), ctypes.c_int(_send_total),
            ctypes.c_int(self.state.npart_local),
            self._move_empty_slots.ctypes_data,
            self._move_recv_buffer.ctypes_data, *[
                getattr(self.state, n).ctypes_data
                for n in self.state.particle_dats
            ])

        _recv_rank = np.zeros(26)
        _send_rank = np.zeros(26)

        for _tx in range(26):
            direction = mpi.recv_modifiers[_tx]

            _send_rank[_tx] = mpi.cartcomm_shift(self._ccomm,
                                                 direction,
                                                 ignore_periods=True)

            _recv_rank[_tx] = mpi.cartcomm_shift(
                self._ccomm,
                (-1 * direction[0], -1 * direction[1], -1 * direction[2]),
                ignore_periods=True)

        if _recv_total < _send_total:
            self.compressed = False
            _tmp = self._move_empty_slots.data[_recv_total:_send_total:]
            self._move_empty_slots.data[0:_send_total -
                                        _recv_total:] = np.array(_tmp,
                                                                 copy=True)

        else:
            self.state.npart_local = self.state.npart_local + _recv_total - _send_total

        # Compress particle dats.
        self._compress_particle_dats(_send_total - _recv_total)

        if _send_total > 0 or _recv_total > 0:
            self.state.invalidate_lists()

        self.move_timer.pause()

        return True

예제 #20

0

파일 보기

    def __init__(self,
                 domain,
                 eps=10.**-6,
                 real_cutoff=None,
                 alpha=None,
                 recip_cutoff=None,
                 recip_nmax=None,
                 shared_memory=False,
                 shell_width=None,
                 work_ratio=1.0,
                 force_unit=1.0,
                 energy_unit=1.0):

        self.domain = domain
        self.eps = float(eps)

        assert shared_memory in (False, 'omp', 'mpi')

        ss = cmath.sqrt(scipy.special.lambertw(1. / eps)).real

        if alpha is not None and real_cutoff is not None and recip_cutoff is not None:
            pass

        elif alpha is not None and real_cutoff is not None:
            ss = real_cutoff * sqrt(alpha)

        elif alpha is None:
            alpha = (ss / real_cutoff)**2.
        else:
            real_cutoff = ss / sqrt(alpha)

        assert alpha is not None, "no alpha deduced/passed"
        assert real_cutoff is not None, "no real_cutoff deduced/passed"

        self.real_cutoff = float(real_cutoff)
        """Real space cutoff"""
        self.shell_width = shell_width
        """Real space padding width"""
        self.alpha = float(alpha)
        """alpha"""

        #self.real_cutoff = float(real_cutoff)
        #alpha = 0.2
        #print("alpha", alpha)
        #print("r_c", self.real_cutoff)

        # these parts are specific to the orthongonal box
        extent = self.domain.extent
        lx = (extent[0], 0., 0.)
        ly = (0., extent[1], 0.)
        lz = (0., 0., extent[2])
        ivolume = 1. / np.dot(lx, np.cross(ly, lz))

        gx = np.cross(ly, lz) * ivolume * 2. * pi
        gy = np.cross(lz, lx) * ivolume * 2. * pi
        gz = np.cross(lx, ly) * ivolume * 2. * pi

        sqrtalpha = sqrt(alpha)

        nmax_x = round(ss * extent[0] * sqrtalpha / pi)
        nmax_y = round(ss * extent[1] * sqrtalpha / pi)
        nmax_z = round(ss * extent[2] * sqrtalpha / pi)

        #print gx, gy, gz
        #print 'nmax:', nmax_x, nmax_y, nmax_z
        #print "alpha", alpha, "sqrt(alpha)", sqrtalpha

        gxl = np.linalg.norm(gx)
        gyl = np.linalg.norm(gy)
        gzl = np.linalg.norm(gz)
        if recip_cutoff is None:
            max_len = min(gxl * float(nmax_x), gyl * float(nmax_y),
                          gzl * float(nmax_z))
        else:
            max_len = recip_cutoff

        if recip_nmax is None:
            nmax_x = int(ceil(max_len / gxl))
            nmax_y = int(ceil(max_len / gyl))
            nmax_z = int(ceil(max_len / gzl))
        else:
            nmax_x = recip_nmax[0]
            nmax_y = recip_nmax[1]
            nmax_z = recip_nmax[2]

        #print 'max reciprocal vector len:', max_len
        nmax_t = max(nmax_x, nmax_y, nmax_z)
        #print "nmax_t", nmax_t

        self.last_real_energy = None
        self.last_recip_energy = None
        self.last_self_energy = None

        self.kmax = (nmax_x, nmax_y, nmax_z)
        """Number of reciporcal vectors taken in each direction."""
        #print("kmax", self.kmax)

        self.recip_cutoff = max_len
        """Reciprocal space cutoff."""
        self.recip_vectors = (gx, gy, gz)
        """Reciprocal lattice vectors"""
        self.ivolume = ivolume

        opt.PROFILE[self.__class__.__name__ +
                    ':recip_vectors'] = (self.recip_vectors)
        opt.PROFILE[self.__class__.__name__ +
                    ':recip_cutoff'] = (self.recip_cutoff)
        opt.PROFILE[self.__class__.__name__ + ':recip_kmax'] = (self.kmax)
        opt.PROFILE[self.__class__.__name__ + ':alpha'] = (self.alpha)
        opt.PROFILE[self.__class__.__name__ + ':tol'] = (eps)
        opt.PROFILE[self.__class__.__name__ +
                    ':real_cutoff'] = (self.real_cutoff)

        # define persistent vars
        self._vars = {}
        self._vars['alpha'] = ctypes.c_double(alpha)
        self._vars['max_recip'] = ctypes.c_double(max_len)
        self._vars['nmax_vec'] = host.Array((nmax_x, nmax_y, nmax_z),
                                            dtype=ctypes.c_int)
        self._vars['recip_vec'] = host.Array(
            np.zeros((3, 3), dtype=ctypes.c_double))
        self._vars['recip_vec'][0, :] = gx
        self._vars['recip_vec'][1, :] = gy
        self._vars['recip_vec'][2, :] = gz
        self._vars['ivolume'] = ivolume
        self._vars['coeff_space_kernel'] = data.ScalarArray(
            ncomp=((nmax_x + 1) * (nmax_y + 1) * (nmax_z + 1)),
            dtype=ctypes.c_double)
        self._vars['coeff_space'] = self._vars['coeff_space_kernel'].data.view(
        ).reshape(nmax_z + 1, nmax_y + 1, nmax_x + 1)
        #self._vars['coeff_space'] = np.zeros((nmax_z+1, nmax_y+1, nmax_x+1), dtype=ctypes.c_double)

        # pass stride in tmp space vector
        self._vars['recip_axis_len'] = ctypes.c_int(nmax_t)

        # |axis | planes | quads
        reciplen = (nmax_t+1)*12 +\
                   8*nmax_x*nmax_y + \
                   8*nmax_y*nmax_z +\
                   8*nmax_z*nmax_x +\
                   16*nmax_x*nmax_y*nmax_z

        self._vars['recip_space_kernel'] = data.GlobalArray(
            size=reciplen, dtype=ctypes.c_double, shared_memory=shared_memory)

        self._vars['recip_space_energy'] = data.GlobalArray(
            size=1, dtype=ctypes.c_double, shared_memory=shared_memory)
        self._vars['real_space_energy'] = data.GlobalArray(
            size=1, dtype=ctypes.c_double, shared_memory=shared_memory)
        self._vars['self_interaction_energy'] = data.GlobalArray(
            size=1, dtype=ctypes.c_double, shared_memory=shared_memory)

        self.shared_memory = shared_memory

        #self._vars['recip_vec_kernel'] = data.ScalarArray(np.zeros(3, dtype=ctypes.c_double))
        #self._vars['recip_vec_kernel'][0] = gx[0]
        #self._vars['recip_vec_kernel'][1] = gy[1]
        #self._vars['recip_vec_kernel'][2] = gz[2]

        self._subvars = dict()
        self._subvars['SUB_GX'] = str(gx[0])
        self._subvars['SUB_GY'] = str(gy[1])
        self._subvars['SUB_GZ'] = str(gz[2])
        self._subvars['SUB_NKMAX'] = str(nmax_t)
        self._subvars['SUB_NK'] = str(nmax_x)
        self._subvars['SUB_NL'] = str(nmax_y)
        self._subvars['SUB_NM'] = str(nmax_z)
        self._subvars['SUB_NKAXIS'] = str(nmax_t)
        self._subvars['SUB_LEN_QUAD'] = str(nmax_x * nmax_y * nmax_z)
        self._subvars['SUB_MAX_RECIP'] = str(max_len)
        self._subvars['SUB_MAX_RECIP_SQ'] = str(max_len**2.)
        self._subvars['SUB_SQRT_ALPHA'] = str(sqrt(alpha))
        self._subvars['SUB_REAL_CUTOFF_SQ'] = str(real_cutoff**2.)
        self._subvars['SUB_REAL_CUTOFF'] = str(real_cutoff)
        self._subvars['SUB_M_SQRT_ALPHA_O_PI'] = str(-1.0 * sqrt(alpha / pi))
        self._subvars['SUB_M2_SQRT_ALPHAOPI'] = str(-2.0 * sqrt(alpha / pi))
        self._subvars['SUB_MALPHA'] = str(-1.0 * alpha)
        self._subvars['SUB_ENERGY_UNIT'] = str(energy_unit)
        self._subvars['SUB_ENERGY_UNITO2'] = str(energy_unit * 0.5)
        self._subvars['SUB_FORCE_UNIT'] = str(force_unit)

        self._real_space_pairloop = None
        self._init_libs()

        self._init_coeff_space()
        self._self_interaction_lib = None