def __init__(self, gpuf, core, direction):
        common.check_type('gpuf', gpuf, Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        if '+' in direction:
            self.gf_h = gf_h = GetFields(gpuf, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) 
            self.sf_e = SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) 

            target = 0 if rank == size-1 else rank+1
            self.req_send_h = comm.Send_init(gf_h.host_array, target, tag=0)
            self.tmp_recv_e = np.zeros(gf_h.host_array.shape, gpuf.dtype)
            self.req_recv_e = comm.Recv_init(self.tmp_recv_e, target, tag=1)

        if '-' in direction:
            self.gf_e = gf_e = GetFields(gpuf, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) 
            self.sf_h = SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            target = size-1 if rank == 0 else rank-1
            self.req_send_e = comm.Send_init(gf_e.host_array, target, tag=1)
            self.tmp_recv_h = np.zeros(gf_e.host_array.shape, gpuf.dtype)
            self.req_recv_h = comm.Recv_init(self.tmp_recv_h, target, tag=0)

        # global variables
        self.core = core
        self.direction = direction
Example #2
0
    def append_buffer_fields(self, cpuf):
        common.check_type('cpuf', cpuf, cpu.Fields)
        common.check_value('cpuf.mpi_type', cpuf.mpi_type, \
                ('x+', 'x-', 'y+', 'y-', 'z+', 'z-') )

        self.cpuf_dict[cpuf.mpi_type] = cpuf
        self.updatef_list.append(cpuf)
    def __init__(self, nodef, direction):
        common.check_type('nodef', nodef, node.Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        self.gpu = gpu
        self.cpu = cpu

        if '+' in direction:
            mf_p = nodef.mainf_list[-1]
            mpu = getattr(self, mf_p.device_type)
            
            self.getf_h = mpu.GetFields(mf_p, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) 
            self.setf_e = mpu.SetFields(mf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.tmp_recv_e = np.zeros(self.getf_h.host_array.shape, nodef.dtype)

        if '-' in direction:
            mf_m = nodef.mainf_list[0]
            mpu = getattr(self, mf_m.device_type)
            
            self.getf_e = mpu.GetFields(mf_m, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) 
            self.setf_h = mpu.SetFields(mf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.tmp_recv_h = np.zeros(self.getf_e.host_array.shape, nodef.dtype)

        # global variables
        self.direction = direction
        self.target_p = 0 if rank == size-1 else rank+1
        self.target_m = size-1 if rank == 0 else rank-1

        # append to the update list
        self.priority_type = 'mpi'
        nodef.append_instance(self)
    def __init__(self, gpuf, core, direction, tmax):
        common.check_type('gpuf', gpuf, Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        if '+' in direction:
            self.gf_h = gf_h = GetFields(gpuf, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) 
            self.sf_e = SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) 

            self.req_send_h = comm.Send_init(gf_h.host_array, rank+1, tag=0)
            self.tmp_recv_e_list = [np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2)]
            self.req_recv_e_list = [comm.Recv_init(tmp_recv_e, rank+1, tag=1) for tmp_recv_e in self.tmp_recv_e_list]
            self.switch_e = 0

        if '-' in direction:
            self.gf_e = gf_e = GetFields(gpuf, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) 
            self.sf_h = SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.req_send_e = comm.Send_init(gf_e.host_array, rank-1, tag=1)
            self.tmp_recv_h_list = [np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2)]
            self.req_recv_h_list = [comm.Recv_init(tmp_recv_h, rank-1, tag=0) for tmp_recv_h in self.tmp_recv_h_list]
            self.switch_h = 0

        # global variables
        self.gpuf = gpuf
        self.core = core
        self.direction = direction

        self.tmax = tmax
        self.tstep = 1
Example #5
0
    def get(self, str_f):
        value_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
        if self.ce_on:
            value_list += ['cex', 'cey', 'cez']
        if self.ch_on:
            value_list += ['chx', 'chy', 'chz']
        common.check_value('str_f', str_f, value_list)

        return self.__dict__[str_f]
    def set_fields_spatial_value(self, value):
        common.check_value('value.dtype', value.dtype, self.dtype)
        common.check_value('value.shape', value.shape, [self.shape])
        split_value = np.split(value, len(self.str_fs))

        for setf, slices in zip(self.setf_list, self.slices_list):
            val = np.concatenate([arr[slices] for arr in split_value])
            if val.shape != (1,):
                val = val.reshape([i for i in val.shape if i != 1])
            setf.set_fields(val)
    def set_fields_spatial_value(self, value):
        common.check_value('value.dtype', value.dtype, self.mainf.dtype)
        common.check_value('value.shape', value.shape, [self.shape])
        split_value = np.split(value, len(self.str_fs))

        for str_f, ndarr in zip(self.str_fs, split_value):
            if self.is_overwrite:
                self.mainf.get(str_f)[self.slice_xyz] = ndarr[:]
            else:
                self.mainf.get(str_f)[self.slice_xyz] += ndarr[:]
Example #8
0
    def __init__(self, node_fields, axis):
        """
        """

        common.check_type('node_fields', node_fields, NodeFields)
        common.check_value('axis', axis, ['x', 'y', 'z'])

        # local variables
        self.gpu = gpu
        self.cpu = cpu
        mainf_list = node_fields.mainf_list
        cpuf_dict = node_fields.cpuf_dict
        axis_id = {'x':0, 'y':1, 'z':2}[axis]

        set_cpuf = set( cpuf_dict.keys() )
        for ax in ['x', 'y', 'z']:
            if not set_cpuf.isdisjoint( [ax+'+', ax+'-'] ):
                raise ValueError, 'There are %s-axis buffer instances. The pbc_internal operation along %s-axis is not allowed.' % (ax, ax)

        # create pbc instances
        f0 = mainf_list[0]
        f1 = mainf_list[-1]

        if axis == 'x':
            if f0 is not f1:
                setf_e = cpu.SetFields(f1, ['ey', 'ez'], \
                        (f1.nx-1, 0, 0), (f1.nx-1, f1.ny-2, f1.nz-2), True)
                getf_e = gpu.GetFields(f0, ['ey', 'ez'], \
                        (0, 0, 0), (0, f0.ny-2, f0.nz-2) )

                setf_h = gpu.SetFields(f0, ['hy', 'hz'], \
                        (0, 1, 1), (0, f0.ny-1, f0.nz-1), True )
                getf_h = cpu.GetFields(f1, ['hy', 'hz'], \
                        (f1.nx-1, 1, 1), (f1.nx-1, f1.ny-1, f1.nz-1) )

            else:
                getattr(self, f0.device_type).Pbc(f0, axis)

        elif axis in ['y', 'z']:
            for f in mainf_list:
                getattr(self, f.device_type).Pbc(f, axis)

        # global variables and functions
        if axis == 'x' and f0 is not f1:
            self.setf_e = setf_e
            self.getf_e = getf_e
            self.setf_h = setf_h
            self.getf_h = getf_h

            self.update_e = self.update_e_actual
            self.update_h = self.update_h_actual

        else:
            self.update_e = lambda : None
            self.update_h = lambda : None
    def set_fields_spatial_value(self, value):
        common.check_value('value.dtype', value.dtype, self.mainf.dtype)
        common.check_value('value.shape', value.shape, [self.shape])
        nx, ny, nz_pitch = self.mainf.ns_pitch

        cuda.memcpy_htod(self.source_buf, value)

        for shift_idx, target_buf in enumerate(self.target_bufs):
            self.kernel_copy( \
                    nx, ny, nz_pitch, np.int32(shift_idx), target_buf, self.source_buf, \
                    grid=self.mainf.gs, block=self.mainf.bs)
Example #10
0
    def __init__(self, nx, ny, nz, precision_float='single', segment_nbytes=16):
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_type('segment_nbytes', segment_nbytes, int)
        common.check_value('precision_float', precision_float, ('single', 'double'))

        # local variables
        dtype = {'single':np.float32, 'double':np.float64}[precision_float]

        # padding for the nz which is multi of segment size
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [nx, ny, nz]
        ns_pitch = [nx, ny, nz_pitch]
        ns_pad = [nx, ny, pad]

        # allocations
        ehs = [np.zeros(ns_pitch, dtype) for i in range(6)]
        ces = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]
        chs = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]

        # global variables
        self.dx = 1.
        self.dt = 0.5
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        self.ces = ces
        self.cex, self.cey, self.cez = ces
        self.chs = chs
        self.chx, self.chy, self.chz = chs

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Example #11
0
    def __init__(self, fields, axis):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_value('axis', axis, ['x', 'y', 'z'])

        # local variables
        nx, ny, nz = fields.ns
        dtype_str_list = fields.dtype_str_list

        # program
        replace = lambda lst, idx, val: lst[:idx] + [val] + lst[idx+1:]

        base0 = {'e': [0, 0, 0], 'h': [1, 1, 1]}
        base1 = {'e': [nx-2, ny-2, nz-2], 'h': [nx-1, ny-1, nz-1]}
        axis_id = {'x':0, 'y':1, 'z':2}[axis]
        nn = fields.ns[axis_id]

        value_dict = {'e': [], 'h': []}
        for eh in ['e', 'h']:
            for idx in {'e': [0, nn-1], 'h':[nn-1, 0]}[eh]:
                pt0 = replace(base0[eh], axis_id, idx)
                pt1 = replace(base1[eh], axis_id, idx)
                nmax, xid, yid, zid = \
                        common_gpu.macro_replace_list(pt0, pt1)

                value_dict[eh].append( \
                        '%s*ny*nz + %s*nz + %s' % (xid, yid, zid) )

        macros = ['NMAX', 'IDX0', 'IDX1', 'DTYPE', 'PRAGMA_fp64']
        values_e = [nmax] + value_dict['e'] + dtype_str_list
        values_h = [nmax] + value_dict['h'] + dtype_str_list

        ksrc_e = common.replace_template_code( \
                open(common_gpu.src_path + 'copy_self.cl').read(), \
                macros, values_e)
        ksrc_h = common.replace_template_code( \
                open(common_gpu.src_path + 'copy_self.cl').read(), \
                macros, values_h)
        program_e = cl.Program(fields.context, ksrc_e).build()
        program_h = cl.Program(fields.context, ksrc_h).build()

        # global variables
        self.mainf = fields
        self.program_e = program_e
        self.program_h = program_h

        self.strfs_e = {'x':['ey','ez'], 'y':['ex','ez'], 'z':['ex','ey']}[axis]
        self.strfs_h = {'x':['hy','hz'], 'y':['hx','hz'], 'z':['hx','hy']}[axis]

        # append to the update list
        self.priority_type = 'pbc'
        self.mainf.append_instance(self)
Example #12
0
    def __init__(self, direction, ny, nz, coeff_use, precision_float):
        """
        """

        super(BufferFields, self).__init__(3, ny, nz, coeff_use, precision_float, use_cpu_core=1)
        common.check_value('direction', direction, ('x+', 'x-', 'y+', 'y-', 'z+', 'z-'))

        # global variables
        self.direction = direction

        p_or_m = direction[-1]
        self.part_e_list = {'+': [''], '-': ['pre', 'post']}[p_or_m]
        self.part_h_list = {'-': [''], '+': ['pre', 'post']}[p_or_m]
Example #13
0
    def set_fields_spatial_value(self, value, wait_for=[]):
        common.check_value('value.dtype', value.dtype, self.mainf.dtype)
        common.check_value('value.shape', value.shape, [self.shape])
        nx, ny, nz_pitch = self.mainf.ns_pitch

        self.mainf.enqueue(cl.enqueue_copy, \
                [self.mainf.queue, self.source_buf, value], \
                wait_for)

        for shift_idx, target_buf in enumerate(self.target_bufs):
            self.mainf.enqueue(self.program.copy, \
                    [self.mainf.queue, (self.gs,), (self.mainf.ls,), \
                    nx, ny, nz_pitch, np.int32(shift_idx), target_buf, self.source_buf])
Example #14
0
    def set_fields_spatial_value(self, value, wait_for=[]):
        """
        """

        mainf = self.mainf
        queue, gs, ls = mainf.queue, mainf.gs, mainf.ls
        nx, ny, nz_pitch = mainf.ns_pitch
        common.check_value('value.dtype', value.dtype, mainf.dtype)

        cl.enqueue_copy(queue, self.source_buf, value, \
                is_blocking=False, wait_for=wait_for)

        for shift_idx, target_buf in enumerate(self.target_bufs):
            self.program.subdomain(queue, (gs,), (ls,), \
                    nx, ny, nz_pitch, np.int32(shift_idx), \
                    target_buf, self.source_buf)
Example #15
0
    def __init__(self, fields, axis):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_value('axis', axis, ['x', 'y', 'z'])

        mtype = fields.mpi_type
        if axis == 'x' and mtype in ['x+', 'x-', 'y+', 'y-', 'z+', 'z-']:
            raise ValueError, 'The fields.mpi_type is \'%s\'. The buffer instance is only permit the pbc operation along y and z axes' % mtype

        # local variables
        nx, ny, nz = fields.ns
        axis_id = {'x':0, 'y':1, 'z':2}[axis]

        # slice indices
        replace = lambda lst, idx, val: lst[:idx] + [val] + lst[idx+1:]

        slices_e = [slice(None, -1), slice(None, -1), slice(None, nz-1)]
        slices_h = [slice(1, None), slice(1, None), slice(1, nz)]

        slices_e_src = replace(slices_e, axis_id, slice(None, 1))
        slices_h_dest = replace(slices_h, axis_id, slice(None, 1))
        if axis == 'z':
            slices_e_dest = replace(slices_e, axis_id, slice(nz-1, nz))
            slices_h_src = replace(slices_h, axis_id, slice(nz-1, nz))
        else:
            slices_e_dest = replace(slices_e, axis_id, slice(-1, None))
            slices_h_src = replace(slices_h, axis_id, slice(-1, None))

        # global variables
        self.mainf = fields
        self.slices_dict = { \
                'e_src': fields.split_slices_dict('e', slices_e_src), \
                'e_dest': fields.split_slices_dict('e', slices_e_dest), \
                'h_src': fields.split_slices_dict('h', slices_h_src), \
                'h_dest': fields.split_slices_dict('h', slices_h_dest) }

        self.strfs = {\
                'x': {'e': ['ey','ez'], 'h': ['hy','hz']}, \
                'y': {'e': ['ex','ez'], 'h': ['hx','hz']}, \
                'z': {'e': ['ex','ey'], 'h': ['hx','hy']} }[axis]

        # append to the update list
        self.priority_type = 'pbc'
        self.mainf.append_instance(self)
Example #16
0
    def __init__(self, fields, target_rank, tmax):
        common.check_type('fields', fields, Fields)

        # local variables
        nx, ny, nz = fields.ns
        dtype = fields.dtype
        mpi_type = fields.mpi_type

        common.check_value('mpi_type', mpi_type, \
                ['x+', 'x-', 'y+', 'y-', 'z+', 'z-'])

        # create instances (getf, setf and mpi requests)
        if '+' in mpi_type:     # split h
            getf = GetFields(fields, ['hy', 'hz'], \
                    (1, 1, 1), (1, ny-1, nz-1))
            setf = SetFields(fields, ['ey', 'ez'], \
                    (nx-1, 0, 0), (nx-1, ny-2, nz-2), True)

            req_send = comm.Send_init(getf.host_array, target_rank, tag=1)
            tmp_recv = np.zeros(getf.host_array.shape, dtype)
            req_recv = comm.Recv_init(tmp_recv, target_rank, tag=2)

        elif '-' in mpi_type:   # split e
            getf = GetFields(fields, ['ey', 'ez'], \
                    (nx-2, 0, 0), (nx-2, ny-2, nz-2))
            setf = SetFields(fields, ['hy', 'hz'], \
                    (0, 1, 1), (0, ny-1, nz-1), True)

            req_send = comm.Send_init(getf.host_array, target_rank, tag=2)
            tmp_recv = np.zeros(getf.host_array.shape, dtype)
            req_recv = comm.Recv_init(tmp_recv, target_rank, tag=1)

        # global variables and functions
        self.mainf = fields
        self.getf = getf
        self.setf = setf
        self.tmp_recv = tmp_recv
        self.req_send = req_send
        self.req_recv = req_recv

        self.tmax = tmax
        self.tstep = 1

        # append to the update list
        self.priority_type = 'mpi'
        self.mainf.append_instance(self)
    def __init__(self, gpuf, direction, tmax):
        common.check_type('gpuf', gpuf, gpu.Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        qtask = cpu.QueueTask()

        if '+' in direction:
            self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1)

            self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) 
            self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.gf_h = gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) 
            self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) 

            self.req_send_h = comm.Send_init(gf_h.host_array, rank+1, tag=0)
            self.tmp_recv_e_list = [np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2)]
            self.req_recv_e_list = [comm.Recv_init(tmp_recv_e, rank+1, tag=1) for tmp_recv_e in self.tmp_recv_e_list]
            self.switch_e = 0

        if '-' in direction:
            self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1)
            self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) 
            self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.gf_e = gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) 

            self.req_send_e = comm.Send_init(gf_e.host_array, rank-1, tag=1)
            self.tmp_recv_h_list = [np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2)]
            self.req_recv_h_list = [comm.Recv_init(tmp_recv_h, rank-1, tag=0) for tmp_recv_h in self.tmp_recv_h_list]
            self.switch_h = 0

        # global variables
        self.direction = direction
        self.qtask = qtask

        self.tmax = tmax
        self.tstep = 1
    def __init__(self, gpuf, direction):
        common.check_type('gpuf', gpuf, gpu.Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        if '+' in direction:
            self.getf_h = getf_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) 
            self.setf_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.tmp_recv_e = tmp_recv_e = np.zeros(getf_h.host_array.shape, gpuf.dtype)

        if '-' in direction:
            self.getf_e = getf_e = gpu.GetFields(gpuf, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) 
            self.setf_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.tmp_recv_h = tmp_recv_h = np.zeros(getf_e.host_array.shape, gpuf.dtype)

        # global variables
        self.direction = direction
Example #19
0
    def set_fields_spatial_value(self, value, wait_for=[]):
        common.check_value("value.dtype", value.dtype, self.mainf.dtype)
        common.check_value("value.shape", value.shape, [self.shape])
        nx, ny, nz_pitch = self.mainf.ns_pitch

        cl.enqueue_copy(self.mainf.queue, self.source_buf, value, is_blocking=False, wait_for=wait_for)

        for shift_idx, target_buf in enumerate(self.target_bufs):
            self.program.subdomain(
                self.mainf.queue,
                (self.mainf.gs,),
                (self.mainf.ls,),
                nx,
                ny,
                nz_pitch,
                np.int32(shift_idx),
                target_buf,
                self.source_buf,
            )
Example #20
0
    def __init__(self, direction, target_rank, ny, nz, coeff_use, precision_float):
        """
        """

        super(BufferFields, self).__init__(3, ny, nz, coeff_use, precision_float, use_cpu_core=0)
        common.check_value('direction', direction, ('x+', 'x-', 'y+', 'y-', 'z+', 'z-'))
        common.check_type('target_rank', target_rank, int)

        # global variables
        self.direction = direction
        self.target_rank = target_rank

        p_or_m = direction[-1]
        self.part_e_list = {'+': [''], '-': ['pre', 'post']}[p_or_m]
        self.part_h_list = {'-': [''], '+': ['pre', 'post']}[p_or_m]

        self.is_split_dict = { \
                '+': {'e': False, 'h': True}, \
                '-': {'e': True, 'h': False}}[p_or_m]
Example #21
0
    def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True):
        """
        """

        common.check_type("fields", fields, Fields)
        common.check_type("str_f", str_f, (str, list, tuple), str)
        common.check_type("pt0", pt0, (list, tuple), int)
        common.check_type("pt1", pt1, (list, tuple), int)
        common.check_type("is_array", is_array, bool)
        common.check_type("is_overwrite", is_overwrite, bool)

        # local variables
        str_fs = common.convert_to_tuple(str_f)

        for strf in str_fs:
            strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"]
            common.check_value("str_f", strf, strf_list)

        for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1):
            common.check_value("pt0 %s" % axis, p0, range(n))
            common.check_value("pt1 %s" % axis, p1, range(n))

        # global variables and functions
        self.mainf = fields
        self.str_fs = str_fs
        self.slice_xyz = common.slice_index_two_points(pt0, pt1)
        self.shape = common.shape_two_points(pt0, pt1, len(str_fs))
        self.is_overwrite = is_overwrite

        if is_array:
            self.func = self.set_fields_spatial_value
        else:
            self.func = self.set_fields_single_value
Example #22
0
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type("fields", fields, Fields)
        common.check_type("str_f", str_f, (str, list, tuple), str)
        common.check_type("pt0", pt0, (list, tuple), int)
        common.check_type("pt1", pt1, (list, tuple), int)

        # local variables
        str_fs = common.convert_to_tuple(str_f)

        for strf in str_fs:
            strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"]
            common.check_value("str_f", strf, strf_list)

        for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1):
            common.check_value("pt0 %s" % axis, p0, range(n))
            common.check_value("pt1 %s" % axis, p1, range(n))

        # allocation
        shape = common.shape_two_points(pt0, pt1, len(str_fs))
        host_array = np.zeros(shape, dtype=fields.dtype)

        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict(zip(str_fs, split_host_array))

        # global variables
        self.mainf = fields
        self.str_fs = str_fs
        self.slice_xyz = common.slice_index_two_points(pt0, pt1)

        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
Example #23
0
    def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True):
        """
        """

        common.check_type('fields', fields, (Fields, BufferFields))
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('is_array', is_array, bool)
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        str_fs = common.convert_to_tuple(str_f)

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # global variables and functions
        self.mainf = fields
        self.str_fs = str_fs
        self.slice_xyz = common.slices_two_points(pt0, pt1)
        self.shape = common.shape_two_points(pt0, pt1, len(str_fs))
        self.is_overwrite = is_overwrite

        if is_array:
            self.func = self.set_fields_spatial_value
        else:
            self.func = self.set_fields_single_value
Example #24
0
    def set_incident_direct(self, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False):
        common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz'))
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('tfunc', tfunc, types.FunctionType)
        common.check_type('spatial_value', spatial_value, \
                (np.ndarray, np.number, types.FloatType, types.IntType) )
        common.check_type('is_overwrite', is_overwrite, bool)

        pt0 = list( common.convert_indices(self.ns, pt0) )
        pt1 = list( common.convert_indices(self.ns, pt1) )

        if is_mpi:
            node_pt0 = list(self.node_pt0)
            node_pt1 = list(self.node_pt1)

            for i, axis in enumerate(['x', 'y', 'z']):
                if self.nodef.buffer_dict.has_key('%s+' % axis):
                    node_pt1[i] += 1
                if self.nodef.buffer_dict.has_key('%s-' % axis):
                    node_pt0[i] -= 1

                if coord[i] == 0 and pt0[i] == 0:
                    pt0[i] -= 1
                if coord[i] == self.mpi_shape[i]-1 and pt1[i] == self.ns[i]-1:
                    pt1[i] += 1

            overlap = common.overlap_two_regions(node_pt0, node_pt1, pt0, pt1)
            if overlap != None:
                sx0, sy0, sz0 = self.node_pt0
                ox0, oy0, oz0 = overlap[0]
                ox1, oy1, oz1 = overlap[1]

                local_pt0 = (ox0-sx0, oy0-sy0, oz0-sz0)
                local_pt1 = (ox1-sx0, oy1-sy0, oz1-sz0)

                node.IncidentDirect(self.nodef, str_f, local_pt0, local_pt1, tfunc, spatial_value, is_overwrite)

        else:
            node.IncidentDirect(self.nodef, str_f, pt0, pt1, tfunc, spatial_value, is_overwrite)
Example #25
0
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type('fields', fields, (Fields, BufferFields))
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)

        # local variables
        str_fs = common.convert_to_tuple(str_f)

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # allocation
        shape = common.shape_two_points(pt0, pt1, len(str_fs))
        host_array = np.zeros(shape, dtype=fields.dtype)

        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict(zip(str_fs, split_host_array))

        # global variables
        self.mainf = fields
        self.str_fs = str_fs
        self.slice_xyz = common.slices_two_points(pt0, pt1)

        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
Example #26
0
    def split_slices_dict(self, eh, slices):
        common.check_value('eh', eh, ('e', 'h'))
        common.check_type('slices', slices, (list, tuple), slice)

        overlap = lambda sl: \
                common.intersection_two_slices(self.ns, slices, \
                (sl, slice(None, None), slice(None, None)) )

        if eh == 'e':
            slices_dict = { \
                    '': slices, \
                    'pre': overlap( slice(-2, None) ), \
                    'mid': overlap( slice(1, -2) ), \
                    'post': overlap( slice(None, 1) ) }
        elif eh == 'h':
            slices_dict = { \
                    '': slices, \
                    'pre': overlap( slice(None, 2) ), \
                    'mid': overlap( slice(2, -1) ), \
                    'post': overlap( slice(-1, None) ) }

        return slices_dict
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                'ARGS', \
                'TARGET', 'SOURCE', 'OVERWRITE', \
                'DTYPE', 'PRAGMA_fp64']

        values = common_gpu.macro_replace_list(pt0, pt1) + \
                ['__global DTYPE *source', \
                'target[sub_idx]', 'source[idx]', '='] + \
                dtype_str_list

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'copy.cl').read(), macros, values)
        program = cl.Program(fields.context, ksrc).build()

        # allocation
        source_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        host_array = np.zeros(shape, dtype=fields.dtype)
        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict(zip(str_fs, split_host_array))

        target_buf = cl.Buffer( \
                fields.context, \
                cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \
                hostbuf=host_array)

        # global variables
        self.mainf = fields
        self.program = program
        self.source_bufs = source_bufs
        self.target_buf = target_buf
        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
Example #28
0
    def split_points_dict(self, eh, pt0, pt1):
        common.check_value('eh', eh, ('e', 'h'))
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)

        overlap = lambda pt2, pt3: \
                common.intersection_two_regions(pt0, pt1, pt2, pt3)

        nx, ny, nz = self.ns

        if eh == 'e':
            points_dict = { \
                    '': (pt0, pt1), \
                    'pre': overlap((nx-2, 0, 0), (nx-1, ny-1, nz-1)), \
                    'mid': overlap((1, 0, 0), (nx-3, ny-1, nz-1)), \
                    'post': overlap((0, 0, 0), (0, ny-1, nz-1)) }
        elif eh == 'h':
            points_dict = { \
                    '': (pt0, pt1), \
                    'pre': overlap((0, 0, 0), (1, ny-1, nz-1)), \
                    'mid': overlap((2, 0, 0), (nx-2, ny-1, nz-1)), \
                    'post': overlap((nx-1, 0, 0), (nx-1, ny-1, nz-1)) }

        return points_dict
Example #29
0
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))

        pt0 = list( common.convert_indices(fields.ns, pt0) )
        pt1 = list( common.convert_indices(fields.ns, pt1) )

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                'ARGS', \
                'TARGET', 'SOURCE', 'OVERWRITE', \
                'DTYPE']

        values = common_gpu.macro_replace_list(pt0, pt1) + \
                ['DTYPE *source', \
                'target[sub_idx]', 'source[idx]', '='] + \
                dtype_str_list

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'copy.cu').read(), macros, values)
        program = SourceModule(ksrc)
        kernel_copy = program.get_function('copy')

        # allocation
        source_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        host_array = np.zeros(shape, fields.dtype)
        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict( zip(str_fs, split_host_array) ) 
        target_buf = cuda.to_device(host_array)

        # global variables
        self.mainf = fields
        self.kernel_copy = kernel_copy
        self.source_bufs = source_bufs
        self.target_buf = target_buf
        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
    def __init__(self, gpuf, direction):
        common.check_type('gpuf', gpuf, gpu.Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        if '+' in direction:
            self.getf_h = getf_h = gpu.GetFields(gpuf, ['hy', 'hz'],
                                                 (-1, 0, 0), (-1, -1, -1))
            self.setf_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0),
                                        (-1, -1, -1), True)

            self.tmp_recv_e = tmp_recv_e = np.zeros(getf_h.host_array.shape,
                                                    gpuf.dtype)

        if '-' in direction:
            self.getf_e = getf_e = gpu.GetFields(gpuf, ['ey', 'ez'], (0, 0, 0),
                                                 (0, -1, -1))
            self.setf_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0),
                                        (0, -1, -1), True)

            self.tmp_recv_h = tmp_recv_h = np.zeros(getf_e.host_array.shape,
                                                    gpuf.dtype)

        # global variables
        self.direction = direction
    def __init__(self, gpuf, direction):
        common.check_type('gpuf', gpuf, gpu.Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        qtask = cpu.QueueTask()

        if '+' in direction:
            self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0)

            self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) 
            self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) 
            self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) 

            self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, gpuf.dtype)

        if '-' in direction:
            self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0)
            self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) 
            self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) 

            self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, gpuf.dtype)

        # global variables
        self.direction = direction
        self.qtask = qtask
Example #32
0
    def __init__(self, gpuf, direction):
        common.check_type('gpuf', gpuf, gpu.Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        qtask = cpu.QueueTask()

        if '+' in direction:
            self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0)

            self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) 
            self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) 
            self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) 

            self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, gpuf.dtype)

        if '-' in direction:
            self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0)
            self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True)

            self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) 
            self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True)

            self.gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) 
            self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) 

            self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, gpuf.dtype)

        # global variables
        self.direction = direction
        self.qtask = qtask
Example #33
0
    def __init__(self, fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz'))
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('tfunc', tfunc, types.FunctionType)
        common.check_type('spatial_value', spatial_value, \
                (np.ndarray, np.number, types.FloatType, types.IntType) )
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        e_or_h = str_f[0]
        dtype = fields.dtype
        is_array = True if isinstance(spatial_value, np.ndarray) else False

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            assert shape == spatial_value.shape, \
                    'shape mismatch : %s, %s' % (shape, spatial_value.shape)
            assert dtype == spatial_value.dtype, \
                    'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype)
        else:
            spatial_value = dtype(spatial_value)

        # create the SetFields instance
        setf = SetFields(fields, str_f, pt0, pt1, is_array, is_overwrite)

        # global variables
        self.mainf = fields
        self.dtype = dtype
        self.tfunc = tfunc
        self.setf = setf

        self.spatial_value = spatial_value
        self.tstep = 1

        # global functions
        if e_or_h == 'e':
            self.update_e = self.update
            self.update_h = lambda : None

        elif e_or_h == 'h':
            self.update_e = lambda : None 
            self.update_h = self.update

        # append to the update list
        self.priority_type = 'incident'
        fields.append_instance(self)
Example #34
0
    def __init__(self,
                 fields,
                 str_f,
                 pt0,
                 pt1,
                 tfunc,
                 spatial_value=1.,
                 is_overwrite=False):
        common.check_type('fields', fields, Fields)
        common.check_value('str_f', str_f,
                           ('ex', 'ey', 'ez', 'hx', 'hy', 'hz'))
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('tfunc', tfunc, types.FunctionType)
        common.check_type('spatial_value', spatial_value, \
                (np.ndarray, np.number, types.FloatType, types.IntType) )
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        pt0 = common.convert_indices(fields.ns, pt0)
        pt1 = common.convert_indices(fields.ns, pt1)
        dtype = fields.dtype
        is_array = True if isinstance(spatial_value, np.ndarray) else False

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            assert shape == spatial_value.shape, \
                    'shape mismatch : %s, %s' % (shape, spatial_value.shape)
            assert dtype == spatial_value.dtype, \
                    'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype)
        else:
            spatial_value = dtype(spatial_value)

        # global variables
        self.mainf = fields
        self.str_f = str_f
        self.slices = common.slices_two_points(pt0, pt1)
        self.tfunc = tfunc
        self.spatial_value = spatial_value
        self.is_overwrite = is_overwrite

        self.e_or_h = str_f[0]
        self.tstep = 1

        # append to the update list
        self.priority_type = 'incident'
        fields.append_instance(self)
    def __init__(self,
                 fields,
                 str_f,
                 pt0,
                 pt1,
                 is_array=False,
                 is_overwrite=True):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('is_array', is_array, bool)
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        str_fs = common.convert_to_tuple(str_f)

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # global variables and functions
        self.mainf = fields
        self.str_fs = str_fs
        self.slice_xyz = common.slice_index_two_points(pt0, pt1)
        self.shape = common.shape_two_points(pt0, pt1, len(str_fs))
        self.is_overwrite = is_overwrite

        if is_array:
            self.func = self.set_fields_spatial_value
        else:
            self.func = self.set_fields_single_value
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))

        pt0 = list( common.convert_indices(fields.ns, pt0) )
        pt1 = list( common.convert_indices(fields.ns, pt1) )

        # local variables
        str_fs = common.convert_to_tuple(str_f)

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # allocation
        shape = common.shape_two_points(pt0, pt1, len(str_fs))
        host_array = np.zeros(shape, dtype=fields.dtype)

        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict( zip(str_fs, split_host_array) ) 

        # global variables
        self.mainf = fields
        self.str_fs = str_fs
        self.slice_xyz = common.slices_two_points(pt0, pt1)

        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
Example #37
0
    def __init__(self, context, device, \
            nx, ny, nz, \
            coeff_use='e', \
            precision_float='single', \
            local_work_size=256, \
            global_work_size=0):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_type('global_work_size', global_work_size, int)
        common.check_type('local_work_size', local_work_size, int)

        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))

        self.context = context
        self.device = device
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ls = local_work_size
        self.gs = global_work_size
        self.coeff_use = coeff_use
        self.dtype = {
            'single': np.float32,
            'double': np.float64
        }[precision_float]
        self.dtype_str = {
            'single': 'float',
            'double': 'double'
        }[precision_float]
        self.dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'] }[precision_float]

        self.device_type = 'gpu'

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        self.align_size = a_size = {
            'single': 16,
            'double': 8
        }[precision_float]  # 64 Bytes
        self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz
        self.slz = slice(None, None) if pad == 0 else slice(None, -pad)
        self.nz_pitch = nz_pitch = nz + pad

        self.dtype_str_list.append('' if pad == 0 else '-%s' % pad)

        # ns, queue, global_size
        self.ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        self.ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        self.ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]
        self.queue = cl.CommandQueue(self.context, self.device)
        if self.gs == 0:
            self.gs = common_gpu.get_optimal_gs(self.device)

        # on/off the coefficient arrays
        self.ce_on = True if 'e' in self.coeff_use else False
        self.ch_on = True if 'h' in self.coeff_use else False

        # allocations
        f = np.zeros(self.ns_pitch, dtype=self.dtype)
        cf = np.ones_like(f) * 0.5
        mf = cl.mem_flags

        self.eh_bufs = [cl.Buffer(self.context, mf.READ_WRITE, f.nbytes) \
                for i in range(6)]
        for eh_buf in self.eh_bufs:
            cl.enqueue_copy(self.queue, eh_buf, f)
        self.ex_buf, self.ey_buf, self.ez_buf = self.eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = self.eh_bufs[3:]

        if self.ce_on:
            self.ce_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) \
                    for i in range(3)]
            self.cex_buf, self.cey_buf, self.cez_buf = self.ce_bufs

        if self.ch_on:
            self.ch_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) \
                    for i in range(3)]
            self.chx_buf, self.chy_buf, self.chz_buf = self.ch_bufs

        del f, cf

        # program
        macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'CHX', 'CHY', 'CHZ', \
                'DX', 'DTYPE', 'PRAGMA_fp64', 'PAD']

        values = ['', '0.5', '0.5', '0.5', \
                '', '0.5', '0.5', '0.5', \
                str(self.ls)] + self.dtype_str_list

        self.e_args = self.ns_pitch + self.eh_bufs
        self.h_args = self.ns_pitch + self.eh_bufs

        if self.ce_on:
            values[:4] = [ \
                    ', __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez', \
                    'cex[idx]', 'cey[idx]', 'cez[idx]']
            self.e_args += self.ce_bufs

        if self.ch_on:
            values[4:8] = [ \
                    ', __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz', \
                    'chx[idx]', 'chy[idx]', 'chz[idx]']
            self.h_args += self.ch_bufs

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'core.cl').read(), macros, values)
        self.program = cl.Program(self.context, ksrc).build()
Example #38
0
    def get(self, str_f):
        value_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz', 'cex', 'cey', 'cez', 'chx', 'chy', 'chz']
        common.check_value('str_f', str_f, value_list)

        return self.__dict__[str_f]
Example #39
0
    def __init__(self, nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            use_cpu_core=0):
        """
        """

        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, \
                ('single', 'double'))
        common.check_type('use_cpu_core', use_cpu_core, int)

        # local variables
        dtype = {'single': np.float32, 'double': np.float64}[precision_float]

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        segment_nbytes = 16
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [nx, ny, nz]
        ns_pitch = [nx, ny, nz_pitch]
        ns_pad = [nx, ny, pad]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        ehs = [np.zeros(ns_pitch, dtype) for i in range(6)]

        if ce_on:
            ces = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)]

        if ch_on:
            chs = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)]

        # global variables and functions
        self.device_type = 'cpu'
        self.qtask = QueueTask()
        self.enqueue = self.qtask.enqueue
        self.enqueue_barrier = self.qtask.enqueue_barrier

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.use_cpu_core = use_cpu_core
        self.dtype = dtype

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        if ce_on:
            self.ces = ces
            self.cex, self.cey, self.cez = ces
        if ch_on:
            self.chs = chs
            self.chx, self.chy, self.chz = chs

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Example #40
0
    def __init__(self,
                 fields,
                 pt0,
                 pt1,
                 ep_inf,
                 drude_freq,
                 gamma,
                 mask_arrays=(1, 1, 1)):
        common.check_type('fields', fields, Fields)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('ep_inf', ep_inf, (int, float))
        common.check_type('drude_freq', drude_freq, (int, float))
        common.check_type('gamma', gamma, (int, float))
        common.check_type('mask_arrays', mask_arrays, (list, tuple),
                          (np.ndarray, types.IntType))

        # local variables
        pt0 = common.convert_indices(fields.ns, pt0)
        pt1 = common.convert_indices(fields.ns, pt1)
        dtype = fields.dtype

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        for mask_array in mask_arrays:
            if isinstance(mask_array, np.ndarray):
                assert common.shape_two_points(pt0, pt1) == mask_array.shape, \
                       'shape mismatch : %s, %s' % (shape, mask_array.shape)

        # allocations
        shape = common.shape_two_points(pt0, pt1, is_dummy=True)
        psis = [np.zeros(shape, dtype) for i in range(3)]

        dt = fields.dt
        aa = (2 - gamma * dt) / (2 + gamma * dt)
        bb = drude_freq**2 * dt / (2 + gamma * dt)
        comm = 2 * ep_inf + bb * dt
        ca = 2 * dt / comm
        cb = -(aa + 3) * bb * dt / comm
        cc = -(aa + 1) * dt / comm
        cas = [ca * mask for mask in mask_arrays]
        cbs = [cb * mask for mask in mask_arrays]
        ccs = [cc * mask for mask in mask_arrays]

        # modify ce arrays
        slices = common.slices_two_points(pt0, pt1)
        for ce, ca in zip(fields.get_ces(), cas):
            ce[slices] = ca

        # global variables
        self.mainf = fields
        self.psis = psis
        self.cbs = cbs
        self.ccs = ccs
        self.pcs = aa, (aa + 1) * bb
        self.slices = slices

        # append to the update list
        self.priority_type = 'material'
        fields.append_instance(self)
    def __init__(self,
                 node_fields,
                 str_f,
                 pt0,
                 pt1,
                 tfunc,
                 spatial_value=1.,
                 is_overwrite=False):
        """
        """

        common.check_type('node_fields', node_fields, NodeFields)
        common.check_value('str_f', str_f,
                           ('ex', 'ey', 'ez', 'hx', 'hy', 'hz'))
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('tfunc', tfunc, types.FunctionType)
        common.check_type('spatial_value', spatial_value, \
                (np.ndarray, np.number, types.FloatType, types.IntType) )
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        nodef = node_fields
        dtype = nodef.dtype
        is_array = True if isinstance(spatial_value, np.ndarray) else False
        mainf_list = nodef.mainf_list
        anx = nodef.accum_nx_list

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            assert shape == spatial_value.shape, \
                    'shape mismatch : %s, %s' % (shape, spatial_value.shape)
            assert dtype == spatial_value.dtype, \
                    'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype)
        else:
            spatial_value = dtype(spatial_value)

        # allocation
        dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True)

        incident_list = []
        reduced_slices = []
        self.gpu, self.cpu = gpu, cpu
        for i, mainf in enumerate(mainf_list):
            nx0 = anx[i]
            nx1 = anx[i + 1]
            overlap = common.intersection_two_lines((nx0, nx1),
                                                    (pt0[0], pt1[0]))

            if overlap != None:
                x0, y0, z0 = pt0
                x1, y1, z1 = pt1

                shift_pt0 = (overlap[0] - x0, y0 - y0, z0 - z0)
                shift_pt1 = (overlap[1] - x0, y1 - y0, z1 - z0)
                shift_slices = [
                    slice(p0, p1 + 1) for p0, p1 in zip(shift_pt0, shift_pt1)
                ]
                if is_array:
                    reshaped_value = spatial_value.reshape(dummied_shape)
                    dummied_array = reshaped_value[shift_slices]
                    overlap_shape = common.shape_two_points(
                        shift_pt0, shift_pt1)
                    split_value = dummied_array.reshape(overlap_shape).copy()
                else:
                    split_value = spatial_value

                local_pt0 = (overlap[0] - nx0, y0, z0)
                local_pt1 = (overlap[1] - nx0, y1, z1)
                incident_list.append( \
                        getattr(self, mainf.device_type). \
                        DirectIncident(mainf, str_f, local_pt0, local_pt1, \
                        tfunc, split_value, is_overwrite) )

        # global variables
        self.incident_list = incident_list
    def __init__(self,
                 node_fields,
                 str_f,
                 pt0,
                 pt1,
                 tfunc,
                 spatial_value=1.,
                 is_overwrite=False):
        """
        """

        common.check_type('node_fields', node_fields, Fields)
        common.check_value('str_f', str_f,
                           ('ex', 'ey', 'ez', 'hx', 'hy', 'hz'))
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('tfunc', tfunc, types.FunctionType)
        common.check_type('spatial_value', spatial_value, \
                (np.ndarray, np.number, types.FloatType, types.IntType) )
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        nodef = node_fields
        dtype = nodef.dtype
        is_array = True if isinstance(spatial_value, np.ndarray) else False
        mainf_list = nodef.mainf_list
        buffer_dict = nodef.buffer_dict
        anx = nodef.accum_nx_list
        nx, ny, nz = nodef.ns

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1):
            start, end = 0, n
            if buffer_dict.has_key(axis + '+'):
                end = n + 1
            if buffer_dict.has_key(axis + '-'):
                start = -1
            common.check_value('pt0 %s' % axis, p0, range(start, end))
            common.check_value('pt1 %s' % axis, p1, range(start, end))

        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            assert shape == spatial_value.shape, \
                    'shape mismatch : %s, %s' % (shape, spatial_value.shape)
            assert dtype == spatial_value.dtype, \
                    'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype)
        else:
            spatial_value = dtype(spatial_value)

        # global valriables
        self.str_f = str_f
        self.pt0 = pt0
        self.pt1 = pt1
        self.tfunc = tfunc
        self.spatial_value = spatial_value
        self.is_overwrite = is_overwrite

        self.is_array = is_array
        self.cpu = cpu
        if 'gpu' in [f.device_type for f in nodef.updatef_list]:
            from kemp.fdtd3d import gpu
            self.gpu = gpu

        # create IncidentDirect instance
        for i, mainf in enumerate(mainf_list):
            fields_pt0 = (anx[i], 0, 0)
            fields_pt1 = (anx[i + 1] - 1, ny - 1, nz - 1)
            overlap = common.overlap_two_regions(fields_pt0, fields_pt1, pt0,
                                                 pt1)

            if overlap != None:
                self.create_instance(mainf, fields_pt0, fields_pt1, overlap[0],
                                     overlap[1])

        # for buffer
        for direction, buffer in buffer_dict.items():
            fields_pt0 = { \
                    'x+': (anx[-1]-1, 0, 0), \
                    'y+': (0, ny-2, 0), \
                    'z+': (0, 0, nz-2), \
                    'x-': (-1, 0, 0), \
                    'y-': (0, -1, 0), \
                    'z-': (0, 0, -1) }[direction]

            fields_pt1 = { \
                    'x+': (anx[-1]+1, ny-1, nz-1), \
                    'y+': (nx-1, ny, nz-1), \
                    'z+': (nx-1, ny-1, nz), \
                    'x-': (1, ny-1, nz-1), \
                    'y-': (nx-1, 1, nz-1), \
                    'z-': (nx-1, ny-1, 1) }[direction]

            overlap = common.overlap_two_regions(fields_pt0, fields_pt1, pt0,
                                                 pt1)
            if overlap != None:
                self.create_instance(buffer, fields_pt0, fields_pt1,
                                     overlap[0], overlap[1])
    def __init__(self,
                 fields,
                 str_f,
                 pt0,
                 pt1,
                 is_array=False,
                 is_overwrite=True):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('is_array', is_array, bool)
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        nodef = fields
        str_fs = common.convert_to_tuple(str_f)
        mainf_list = nodef.mainf_list
        anx = nodef.accum_nx_list

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # allocation
        dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True)

        setf_list = []
        slices_list = []
        self.gpu, self.cpu = gpu, cpu
        for i, mainf in enumerate(mainf_list):
            nx0 = anx[i]
            nx1 = anx[i + 1]
            overlap = common.intersection_two_lines((nx0, nx1),
                                                    (pt0[0], pt1[0]))

            if overlap != None:
                x0, y0, z0 = pt0
                x1, y1, z1 = pt1

                slice_pt0 = (overlap[0] - x0, 0, 0)
                slice_pt1 = (overlap[1] - x0, y1 - y0, z1 - z0)
                slices = []
                for j, p0, p1 in zip([0, 1, 2], slice_pt0, slice_pt1):
                    if dummied_shape[j] != 1:
                        slices.append(slice(p0, p1 + 1))
                slices_list.append(slices if slices != [] else [slice(0, 1)])

                local_pt0 = (overlap[0] - nx0, y0, z0)
                local_pt1 = (overlap[1] - nx0, y1, z1)
                setf_list.append( getattr(self, mainf.device_type). \
                        SetFields(mainf, str_fs, local_pt0, local_pt1, \
                        is_array, is_overwrite) )

        # global variables and functions
        self.str_fs = str_fs
        self.dtype = nodef.dtype
        self.shape = common.shape_two_points(pt0, pt1, len(str_fs))
        self.setf_list = setf_list
        self.slices_list = slices_list

        if is_array:
            self.set_fields = self.set_fields_spatial_value
        else:
            self.set_fields = self.set_fields_single_value
Example #44
0
    def __init__(self, context, device, queue_task, \
            nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            local_work_size=256):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('queue_task', queue_task, QueueTask)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, ('single', 'double'))
        common.check_type('local_work_size', local_work_size, int)

        # local variables
        queue = cl.CommandQueue(context, device)
        pragma_fp64 = ''
        if precision_float == 'double':
            extensions = device.get_info(cl.device_info.EXTENSIONS)
            if 'cl_khr_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'
            elif 'cl_amd_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable'
            else:
                precision_float = 'single'
                print('Warning: The %s GPU device is not support the double-precision.') % \
                        device.get_info(cl.device_info.NAME)
                print('The precision is changed to \'single\'.')

        dtype = {'single':np.float32, 'double':np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', pragma_fp64] }[precision_float]

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        segment_nbytes = 64
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        f = np.zeros(ns_pitch, dtype)
        cf = np.ones_like(f) * 0.5

        mflags = cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR
        eh_bufs = [cl.Buffer(context, mflags, hostbuf=f) for i in range(6)]

        c_mflags = cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR
        if ce_on:
            ce_bufs = [cl.Buffer(context, c_mflags, hostbuf=cf) for i in range(3)]

        if ch_on:
            ch_bufs = [cl.Buffer(context, c_mflags, hostbuf=cf) for i in range(3)]

        del f, cf

        # global variables
        self.device_type = 'gpu'
        self.context = context
        self.device = device
        self.queue = queue
        self.qtask = queue_task
        self.enqueue = queue_task.enqueue
        self.enqueue_barrier = queue_task.enqueue_barrier

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list 

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]
        if ce_on:
            self.ce_bufs = ce_bufs
            self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs
        if ch_on:
            self.ch_bufs = ch_bufs
            self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs

        self.ls = ls = local_work_size
        nmax = nx * ny * nz_pitch
        remainder = nmax % ls
        self.gs = nmax if remainder == 0 else nmax - remainder + ls 


        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)
Example #45
0
    def __init__(self,
                 geometry_h5_path,
                 max_tstep,
                 mpi_shape,
                 pbc_axes='',
                 target_device='all',
                 precision_float='single',
                 **kargs):
        """
        """

        common.check_type('geometry_h5_path', geometry_h5_path, str)
        common.check_type('max_tstep', max_tstep, int)
        common.check_type('mpi_shape', mpi_shape, (list, tuple), int)
        common.check_type('pbc_axes', pbc_axes, str)
        common.check_type('target_device', target_device, str)
        common.check_value('precision_float', precision_float,
                           ['single', 'double'])

        # import modules
        global is_mpi, is_gpu

        is_mpi = False if mpi_shape == (1, 1, 1) else True

        if is_mpi:
            global network, common_mpi, comm, size, rank, coord
            from mpi4py import MPI
            from kemp.fdtd3d import network
            from kemp.fdtd3d.util import common_mpi
            comm = MPI.COMM_WORLD
            size = comm.Get_size()
            rank = comm.Get_rank()
            coord = common_mpi.my_coord(rank, mpi_shape)

        is_master = False if is_mpi and rank != 0 else True
        is_cpu = True if target_device == 'all' or 'cpu' in target_device else False
        is_gpu = True if target_device == 'all' or 'gpu' in target_device else False

        if is_mpi:
            if reduce(lambda a, b: a * b, mpi_shape) != size:
                if is_master:
                    print("The MPI size %d is not matched the mpi_shape %s" %
                          (size, mpi_shape))
                sys.exit()

        if is_gpu:
            try:
                global cl, gpu, common_gpu
                import pyopencl as cl
                from kemp.fdtd3d import gpu
                from kemp.fdtd3d.util import common_gpu
            except:
                if is_master:
                    print("The 'pyopencl' module is not found.")

                if is_cpu:
                    if is_master:
                        print("The CPU is only used.")
                    target_device = 'cpu'
                    is_gpu = False
                else:
                    sys.exit()

        # read from the h5 file
        try:
            h5f = h5py.File(geometry_h5_path, 'r')
            coeff_use = h5f.attrs['coeff_use']
            nx = h5f.attrs['nx']
            ny = h5f.attrs['ny']
            nz = h5f.attrs['nz']
        except:
            if is_master:
                print(repr(sys.exc_info()))
                print("To load the geometry HDF5 file '%s' is failed." %
                      geometry_h5_path)
            sys.exit()

        # local variables
        device_nx_list = kargs['device_nx_list'] if kargs.has_key(
            'device_nx_list') else None
        ny_list = kargs['ny_list'] if kargs.has_key('ny_list') else None
        nz_list = kargs['nz_list'] if kargs.has_key('nz_list') else None

        # Set the number of device and the device_n_list
        ndev = 1 if is_cpu else 0
        if is_gpu:
            try:
                gpu_devices = common_gpu.gpu_device_list(print_info=False)
                context = cl.Context(gpu_devices)
                ndev += len(gpu_devices)
            except Exception as errinst:
                if is_master:
                    print(repr(sys.exc_info()))
                    print(
                        "To get the GPU devices is failed. The CPU is only used."
                    )
                target_device = 'cpu'
                is_gpu = False

        if is_mpi:
            mi, mj, mk = coord
            dnx_list = device_nx_list[mi * ndev:(mi + 1) * ndev]
            dny = ny_list[mj]
            dnz = nz_list[mk]
        else:
            dnx_list = device_nx_list
            dny = ny_list[0]
            dnz = nz_list[0]

        total_ndev = mpi_shape[0] * ndev
        if len(device_nx_list) != total_ndev:
            if is_master:
                print(
                    "The device_nx_list %s is not matched with the number of total devices %d."
                    % (device_nx_list, total_ndev))
            sys.exit()

        # create the mainf_list and the buffer_dict
        buffer_dict = {}
        if is_mpi:
            # create BufferFields instances
            snx = sum(dnx_list) - ndev + 1
            sny, snz = dny, dnz

            mpi_target_dict = common_mpi.mpi_target_dict(
                rank, mpi_shape, pbc_axes)
            for direction, target_rank in mpi_target_dict.items():
                if target_rank != None:
                    n0, n1 = {
                        'x': (sny, snz),
                        'y': (snx, snz),
                        'z': (snx, sny)
                    }[direction[0]]
                    bufferf = cpu.BufferFields(direction, target_rank, n0, n1,
                                               coeff_use, precision_float)
                    buffer_dict[direction] = bufferf
                    #network.ExchangeMpi(bufferf, target_rank, max_tstep)
                    #network.ExchangeMpiNoSplitBlock(bufferf, target_rank)
                    #network.ExchangeMpiBlock(bufferf, target_rank)

        mainf_list = []
        if is_cpu:
            mainf_list += [
                cpu.Fields(dnx_list.pop(0),
                           dny,
                           dnz,
                           coeff_use,
                           precision_float,
                           use_cpu_core=1)
            ]

        if is_gpu:
            mainf_list += [
                gpu.Fields(context, gpu_device, dnx, dny, dnz, coeff_use,
                           precision_float)
                for gpu_device, dnx in zip(gpu_devices, dnx_list)
            ]

        # create node.Fields instance
        nodef = node.Fields(mainf_list, buffer_dict)

        # create nodePbc instance
        node_pbc_axes = ''.join([
            axis for i, axis in enumerate(['x', 'y', 'z'])
            if mpi_shape[i] == 1 and axis in pbc_axes
        ])
        if node_pbc_axes != '':
            node.Pbc(nodef, node_pbc_axes)

        # create update instances
        node.Core(nodef)
        for bufferf in nodef.buffer_dict.values():
            #network.ExchangeMpiSplitBlock(bufferf)
            network.ExchangeMpiSplitNonBlock(bufferf, max_tstep)
            '''
            if rank == 0:
                direction = 'x+'
                target_rank = 1
            elif rank == 1:
                direction = 'x-'
                target_rank = 0

            #network.ExchangeMpiNoBufferBlock(nodef, target_rank, direction)    # no buffer, block
            self.mpi_instance_list = []
            self.mpi_instance_list.append( network.ExchangeMpiNoBufferNonBlock(nodef, target_rank, direction) )
            '''

        # accum_sub_ns_dict, node_pts
        if is_mpi:
            asn_dict = common_mpi.accum_sub_ns_dict(mpi_shape, ndev,
                                                    device_nx_list, ny_list,
                                                    nz_list)
            axes = ['x', 'y', 'z']
            node_pt0 = [asn_dict[ax][m] for ax, m in zip(axes, coord)]
            node_pt1 = [asn_dict[ax][m + 1] - 1 for ax, m in zip(axes, coord)]

        # global variables
        self.max_tstep = max_tstep
        self.mpi_shape = mpi_shape
        #self.ns = (nx, ny, nz)
        self.ns = (asn_dict['x'][-1], asn_dict['y'][-1],
                   asn_dict['z'][-1]) if is_mpi else nodef.ns

        self.nodef = nodef
        self.is_master = is_master

        if is_mpi:
            self.asn_dict = asn_dict
            self.node_pt0 = node_pt0
            self.node_pt1 = node_pt1

        # for savefields
        self.savef_tag_list = []
        self.savef_list = []
Example #46
0
    def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('is_array', is_array, bool)
        common.check_type('is_overwrite', is_overwrite, bool)

        pt0 = list( common.convert_indices(fields.ns, pt0) )
        pt1 = list( common.convert_indices(fields.ns, pt1) )

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list
        overwrite_str = {True: '=', False: '+='}[is_overwrite]

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                  'ARGS', \
                  'TARGET', 'SOURCE', 'OVERWRITE', \
                  'DTYPE', 'PRAGMA_fp64']

        nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1)
        if is_array:
            values = [nmax_str, xid_str, yid_str, zid_str, \
                      '__global DTYPE *source', \
                      'target[idx]', 'source[sub_idx]', overwrite_str] + \
                dtype_str_list
        else:
            values = [nmax_str, xid_str, yid_str, zid_str, \
                      'DTYPE source', \
                      'target[idx]', 'source', overwrite_str] + \
                dtype_str_list

        ksrc = common.replace_template_code( \
            open(common_gpu.src_path + 'copy.cl').read(), macros, values)
        program = cl.Program(fields.context, ksrc).build()

        # allocation
        target_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        if is_array:
            tmp_array = np.zeros(shape, dtype=fields.dtype)
            source_buf = cl.Buffer( \
                fields.context, \
                cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \
                hostbuf=tmp_array)

        # global variabels and functions
        self.mainf = fields
        self.program = program
        self.target_bufs = target_bufs
        self.shape = shape

        nmax = int(nmax_str)
        remainder = nmax % fields.ls
        self.gs = nmax if remainder == 0 else nmax - remainder + fields.ls 

        if is_array:
            self.source_buf = source_buf
            self.set_fields = self.set_fields_spatial_value
        else:
            self.set_fields = self.set_fields_single_value
Example #47
0
    def __init__(self, queue_task, \
                 nx, ny, nz, \
                 precision_float='single', \
                 use_cpu_core=0):
        """
        """

        common.check_type('queue_task', queue_task, QueueTask)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))
        common.check_type('use_cpu_core', use_cpu_core, int)

        # local variables
        ns = [nx, ny, nz]
        dtype = {'single': np.float32, 'double': np.float64}[precision_float]

        # allocations
        ehs = [np.zeros(ns, dtype) for i in range(6)]

        # common macros for C templates
        dtype_macros = ['DTYPE']
        dtype_values = {
            'single': ['float'],
            ' double': ['double']
        }[precision_float]

        omp_macros = ['OMP ', 'SET_NUM_THREADS']
        if use_cpu_core == 0:
            omp_values = ['', '']
        elif use_cpu_core == 1:
            omp_values = ['// ', '']
        else:
            omp_values = ['', 'omp_set_num_threads(%d);' % use_cpu_core]

        # global variables and functions
        self.device_type = 'cpu'
        self.qtask = queue_task
        self.enqueue = queue_task.enqueue
        self.enqueue_barrier = queue_task.enqueue_barrier

        self.dx = 1.
        self.dt = 0.5
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns

        self.dtype = dtype
        self.dtype_omp_macros = dtype_macros + omp_macros
        self.dtype_omp_values = dtype_values + omp_values

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]

        self.ce_on, self.ch_on = False, False
        self.rd_on = False

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
    def __init__(self,
                 fields,
                 str_f,
                 pt0,
                 pt1,
                 is_array=False,
                 is_overwrite=True):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('is_array', is_array, bool)
        common.check_type('is_overwrite', is_overwrite, bool)

        pt0 = list(common.convert_indices(fields.ns, pt0))
        pt1 = list(common.convert_indices(fields.ns, pt1))

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list
        overwrite_str = {True: '=', False: '+='}[is_overwrite]

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

# program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                'ARGS', \
                'TARGET', 'SOURCE', 'OVERWRITE', \
                'DTYPE']

        if is_array:
            values = common_gpu.macro_replace_list(pt0, pt1) + \
                    ['DTYPE *source', \
                    'target[idx]', 'source[sub_idx]', overwrite_str] + \
                    dtype_str_list
        else:
            values = common_gpu.macro_replace_list(pt0, pt1) + \
                    ['DTYPE source', \
                    'target[idx]', 'source', overwrite_str] + \
                    dtype_str_list

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'copy.cu').read(), macros, values)
        program = SourceModule(ksrc)
        kernel_copy = program.get_function('copy')

        # allocation
        target_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        if is_array:
            tmp_array = np.zeros(shape, fields.dtype)
            source_buf = cuda.to_device(tmp_array)

        # global variabels and functions
        self.mainf = fields
        self.kernel_copy = kernel_copy
        self.target_bufs = target_bufs
        self.shape = shape

        if is_array:
            self.source_buf = source_buf
            self.set_fields = self.set_fields_spatial_value
        else:
            self.set_fields = self.set_fields_single_value
Example #49
0
    def __init__(self, context, device, \
                 nx, ny, nz, \
                 precision_float='single', \
                 local_work_size=256):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))
        common.check_type('local_work_size', local_work_size, int)

        # local variables
        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]

        queue = cl.CommandQueue(context, device)
        pragma_fp64 = ''
        if precision_float == 'double':
            extensions = device.get_info(cl.device_info.EXTENSIONS)
            if 'cl_khr_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'
            elif 'cl_amd_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable'
            else:
                precision_float = 'single'
                print('Warning: The %s GPU device is not support the double-precision.') % \
                        device.get_info(cl.device_info.NAME)
                print('The precision is changed to \'single\'.')

        dtype = {'single': np.float32, 'double': np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', pragma_fp64] }[precision_float]

        # allocations
        f = np.zeros(ns, dtype)
        eh_bufs = [
            cl.Buffer(context, cl.mem_flags.READ_WRITE, f.nbytes)
            for i in range(6)
        ]
        for eh_buf in eh_bufs:
            cl.enqueue_copy(queue, eh_buf, f)

        # global variables
        self.device_type = 'gpu'
        self.context = context
        self.device = device
        self.queue = queue

        self.dx = 1.
        self.dt = 0.5
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]

        self.ce_on, self.ch_on = False, False
        self.rd_on = False

        self.ls = local_work_size

        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)

        # local variables
        nodef = fields
        str_fs = common.convert_to_tuple(str_f)
        mainf_list = nodef.mainf_list
        anx = nodef.accum_nx_list

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # allocation
        shape = common.shape_two_points(pt0, pt1, len(str_fs))
        dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True)
        host_array = np.zeros(shape, dtype=nodef.dtype)

        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict(zip(str_fs, split_host_array))

        getf_list = []
        slices_list = []
        self.gpu, self.cpu = gpu, cpu
        for i, mainf in enumerate(mainf_list):
            nx0 = anx[i]
            nx1 = anx[i + 1] - 1 if i < len(mainf_list) - 1 else anx[i + 1]
            overlap = common.intersection_two_lines((nx0, nx1),
                                                    (pt0[0], pt1[0]))

            if overlap != None:
                x0, y0, z0 = pt0
                x1, y1, z1 = pt1

                slice_pt0 = (overlap[0] - x0, 0, 0)
                slice_pt1 = (overlap[1] - x0, y1 - y0, z1 - z0)
                slices = []
                for j, p0, p1 in zip([0, 1, 2], slice_pt0, slice_pt1):
                    if dummied_shape[j] != 1:
                        slices.append(slice(p0, p1 + 1))
                slices_list.append(slices if slices != [] else [slice(0, 1)])

                local_pt0 = (overlap[0] - nx0, y0, z0)
                local_pt1 = (overlap[1] - nx0, y1, z1)
                getf_list.append( getattr(self, mainf.device_type). \
                        GetFields(mainf, str_fs, local_pt0, local_pt1) )

        # global variables
        self.str_fs = str_fs
        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
        self.getf_list = getf_list
        self.slices_list = slices_list
Example #51
0
    def __init__(self,
                 fields,
                 str_f,
                 pt0,
                 pt1,
                 tfunc,
                 spatial_value=1.,
                 is_overwrite=False):
        """
        """

        common.check_type('fields', fields, (Fields, BufferFields))
        common.check_value('str_f', str_f,
                           ('ex', 'ey', 'ez', 'hx', 'hy', 'hz'))
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('tfunc', tfunc, types.FunctionType)
        common.check_type('spatial_value', spatial_value, \
                (np.ndarray, np.number, types.FloatType, types.IntType) )
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        e_or_h = str_f[0]
        dtype = fields.dtype
        is_buffer = True if isinstance(fields, BufferFields) else False
        is_array = True if isinstance(spatial_value, np.ndarray) else False

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            assert shape == spatial_value.shape, \
                    'shape mismatch : %s, %s' % (shape, spatial_value.shape)
            assert dtype == spatial_value.dtype, \
                    'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype)
        else:
            spatial_value = dtype(spatial_value)

        # create the SetFields instances
        is_update_dict = {}
        setf_dict = {}
        svalue_dict = {}

        if is_buffer:
            for part in ['', 'pre', 'post']:
                sl0 = common.slices_two_points(pt0, pt1)
                sl1 = common_buffer.slice_dict[e_or_h][part]
                overlap = common.overlap_two_slices(fields.ns, sl0, sl1)

                if overlap == None:
                    setf_dict[part] = None
                else:
                    opt0, opt1 = common.two_points_slices(fields.ns, overlap)
                    setf_dict[part] = SetFields(fields, str_f, opt0, opt1,
                                                is_array, is_overwrite)
                    svalue_dict[part] = self.overlap_svalue(
                        pt0, pt1, opt0, opt1, spatial_value, is_array)

        else:
            setf_dict[''] = SetFields(fields, str_f, pt0, pt1, is_array,
                                      is_overwrite)
            svalue_dict[''] = spatial_value

        # global variables
        self.mainf = fields
        self.tfunc = tfunc
        self.setf_dict = setf_dict
        self.svalue_dict = svalue_dict

        self.e_or_h = e_or_h
        self.tstep = 1

        # append to the update list
        self.priority_type = 'incident'
        fields.append_instance(self)
    def __init__(self, gpuf, direction, tmax, ny, nz, coeff_use,
                 precision_float):
        """
        """

        super(BufferFields, self).__init__(3,
                                           ny,
                                           nz,
                                           coeff_use,
                                           precision_float,
                                           use_cpu_core=0)
        common.check_type('gpuf', gpuf, gpu.Fields)
        common.check_value('direction', direction, ('x+', 'x-'))

        if direction == 'x+':
            gf0 = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1))
            sf0 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1),
                                True)

            gf1 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1))
            sf1 = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1),
                                True)

            gf2 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1))
            sf2 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1),
                                True)

            target_rank = rank + 1
            tag_send, tag_recv = 0, 1

        elif direction == 'x-':
            gf0 = gpu.GetFields(gpuf, ['ey', 'ez'], (2, 0, 0), (2, -1, -1))
            sf0 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1),
                                True)

            gf1 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1))
            sf1 = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1),
                                True)

            gf2 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1))
            sf2 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1),
                                True)

            target_rank = rank - 1
            tag_send, tag_recv = 1, 0

        req_send = comm.Send_init(gf2.host_array, target_rank, tag=tag_send)
        tmp_recv_list = [
            np.zeros(gf2.host_array.shape, gpuf.dtype) for i in range(2)
        ]
        req_recv_list = [
            comm.Recv_init(tmp_recv, target_rank, tag=tag_recv)
            for tmp_recv in tmp_recv_list
        ]

        # global variables
        self.direction = direction
        self.gf0 = gf0
        self.sf0 = sf0
        self.gf1 = gf1
        self.sf1 = sf1
        self.gf2 = gf2
        self.sf2 = sf2
        self.req_send = req_send
        self.req_recv_list = req_recv_list
        self.tmp_recv_list = tmp_recv_list

        self.switch = 0
        self.tmax = tmax
        self.tstep = 1

        # global functions
        if direction == 'x+':
            self.update_e = self.update_e_xp
            self.update_h = self.update_h_xp
        elif direction == 'x-':
            self.update_e = self.update_e_xm
            self.update_h = self.update_h_xm
Example #53
0
    def __init__(self, context, device, \
            nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            local_work_size=256, \
            global_work_size=0):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_type('global_work_size', global_work_size, int)
        common.check_type('local_work_size', local_work_size, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, \
                ('single', 'double'))

        # local variables
        queue = cl.CommandQueue(context, device)
        pragma_fp64 = ''
        if precision_float == 'double':
            extensions = device.get_info(cl.device_info.EXTENSIONS)
            if 'cl_khr_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'
            elif 'cl_amd_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable'
            else:
                precision_float = 'single'
                print('Warning: The %s GPU device is not support the double-precision.') % \
                        device.get_info(cl.device_info.NAME)
                print('The precision is changed to \'single\'.')

        dtype = {'single': np.float32, 'double': np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', pragma_fp64] }[precision_float]

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        align_size = {'single': 16, 'double': 8}[precision_float]  # 64 Bytes
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        f = np.zeros(ns_pitch, dtype)
        cf = np.ones_like(f) * 0.5

        mflags = cl.mem_flags.READ_WRITE
        eh_bufs = [cl.Buffer(context, mflags, f.nbytes) for i in range(6)]
        for eh_buf in eh_bufs:
            cl.enqueue_copy(queue, eh_buf, f)

        if ce_on:
            mflags = cl.mem_flags.READ_ONLY
            ce_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)]

        if ch_on:
            mflags = cl.mem_flags.READ_ONLY
            ch_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)]

        del f, cf

        # global variables
        self.device_type = 'gpu'
        self.context = context
        self.device = device
        self.queue = queue

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]
        if ce_on:
            self.ce_bufs = ce_bufs
            self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs
        if ch_on:
            self.ch_bufs = ch_bufs
            self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs

        self.ls = local_work_size
        self.gs = global_work_size
        if self.gs == 0:
            self.gs = common_gpu.get_optimal_gs(device)

        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)
Example #54
0
    def __init__(self, nx, ny, nz, \
            coeff_use='e', \
            precision_float='single', \
            use_cpu_core=0):
        """
        """

        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.coeff_use = coeff_use
        self.dtype = {
            'single': np.float32,
            'double': np.float64
        }[precision_float]
        self.dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float]

        self.device_type = 'cpu'

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        a_size = {'single': 4, 'double': 2}[precision_float]  # 16 Bytes
        self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz
        self.slz = slice(None, None) if pad == 0 else slice(None, -pad)
        self.nz_pitch = nz_pitch = nz + pad

        mask_arr = np.ones(a_size, 'i')
        mask_arr[-(pad + 1):] = 0
        self.dtype_str_list.append(str(list(mask_arr)).strip('[]'))

        # ns, qtask, enqueue
        self.ns = [nx, ny, nz]
        self.ns_pitch = [nx, ny, nz_pitch]
        self.qtask = QueueTask()
        self.enqueue = self.qtask.enqueue
        self.enqueue_barrier = self.qtask.enqueue_barrier

        # on/off the coefficient arrays
        self.ce_on = True if 'e' in self.coeff_use else False
        self.ch_on = True if 'h' in self.coeff_use else False

        # allocations
        self.ehs = [
            np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6)
        ]
        self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs

        if self.ce_on:
            self.ces = [
                np.ones(self.ns_pitch, dtype=self.dtype) * 0.5
                for i in range(3)
            ]
            self.cex, self.cey, self.cez = self.ces

        if self.ch_on:
            self.chs = [
                np.ones(self.ns_pitch, dtype=self.dtype) * 0.5
                for i in range(3)
            ]
            self.chx, self.chy, self.chz = self.chs

        # program
        macros = [ \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'OMP_SET_NUM_THREADS', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E']

        values = [ \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ''] + self.dtype_str_list

        if use_cpu_core != 0:
            values[12] = 'omp_set_num_threads(%d);' % use_cpu_core

        if self.ce_on:
            values[:6] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if self.ch_on:
            values[6:12] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        self.program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \
                shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        self.program.update_e.argtypes = argtypes
        self.program.update_e.restype = None
        self.program.update_h.argtypes = argtypes
        self.program.update_h.restype = None

        self.e_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs
        self.h_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs

        if self.ce_on:
            self.program.update_e.argtypes += [carg for i in xrange(3)]
            self.e_args += self.ces

        if self.ch_on:
            self.program.update_h.argtypes += [carg for i in xrange(3)]
            self.h_args += self.chs
Example #55
0
    def __init__(self, fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False):
        """
        """
        
        common.check_type('fields', fields, Fields)
        common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz'))
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('tfunc', tfunc, types.FunctionType)
        common.check_type('spatial_value', spatial_value, \
                (np.ndarray, np.number, types.FloatType, types.IntType) )
        common.check_type('is_overwrite', is_overwrite, bool)

        # local variables
        e_or_h = str_f[0]
        dtype = fields.dtype
        is_array = True if isinstance(spatial_value, np.ndarray) else False

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            assert shape == spatial_value.shape, \
                    'shape mismatch : %s, %s' % (shape, spatial_value.shape)
            assert dtype == spatial_value.dtype, \
                    'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype)
        else:
            spatial_value = dtype(spatial_value)

        # create the SetFields instance
        func_dict = {}

        pts_dict = fields.split_points_dict(e_or_h, pt0, pt1)
        for part, pts in pts_dict.items():
            if pts == None:
                func_dict[part] = lambda a='': None
            else:
                func_dict[part] = SetFields(fields, str_f, \
                    pts[0], pts[1], is_array, is_overwrite).set_fields

        if is_array:
            spatial_array_dict = {}

            for part, pts in pts_dict.items():
                if pts == None:
                    spatial_array_dict[part] = 0

                else:
                    slices0 = [slice(p0, p1+1) for p0, p1 in zip(pt0, pt1)]
                    slices1 = [slice(p0, p1+1) for p0, p1 in zip(pts[0], pts[1])]
                    overlap_slices = common.intersection_two_slices(fields.ns, slices0, slices1)

                    shift_slices = []
                    for sl, p0 in zip(overlap_slices, pt0):
                        s0, s1 = sl.start, sl.stop
                        shift_slices.append( slice(s0-p0, s1-p0) )

                    dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True)
                    reshaped_value = spatial_value.reshape(dummied_shape)
                    dummied_array = reshaped_value[shift_slices]

                    overlap_shape = common.shape_two_points(pts[0], pts[1])
                    spatial_array_dict[part] = dummied_array.reshape(overlap_shape)
                    
        # global variables and functions
        self.mainf = fields
        self.dtype = dtype
        self.tfunc = tfunc
        self.func_dict = func_dict
        self.e_or_h = e_or_h
        self.tstep = 1
        
        if is_array:
            self.spatial_array_dict = spatial_array_dict
            self.update = self.update_spatial_value
        else:
            self.spatial_value = spatial_value
            self.update = self.update_single_value

        # append to the update list
        self.priority_type = 'incident'
        fields.append_instance(self)
Example #56
0
    def __init__(self, gpuf, direction, tmax):
        common.check_type('gpuf', gpuf, gpu.Fields)
        common.check_value('direction', direction, ('+', '-', '+-'))

        qtask = cpu.QueueTask()

        if '+' in direction:
            self.cpuf_p = cpuf_p = cpu.Fields(qtask,
                                              3,
                                              gpuf.ny,
                                              gpuf.nz,
                                              gpuf.coeff_use,
                                              gpuf.precision_float,
                                              use_cpu_core=1)

            self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0),
                                        (-2, -1, -1))
            self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0),
                                        (0, -1, -1), True)

            self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0),
                                        (1, -1, -1))
            self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0),
                                        (-1, -1, -1), True)

            self.gf_h = gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0),
                                             (1, -1, -1))
            self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0),
                                      (-1, -1, -1), True)

            self.req_send_h = comm.Send_init(gf_h.host_array, rank + 1, tag=0)
            self.tmp_recv_e_list = [
                np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2)
            ]
            self.req_recv_e_list = [
                comm.Recv_init(tmp_recv_e, rank + 1, tag=1)
                for tmp_recv_e in self.tmp_recv_e_list
            ]
            self.switch_e = 0

        if '-' in direction:
            self.cpuf_m = cpuf_m = cpu.Fields(qtask,
                                              3,
                                              gpuf.ny,
                                              gpuf.nz,
                                              gpuf.coeff_use,
                                              gpuf.precision_float,
                                              use_cpu_core=1)
            self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0),
                                        (1, -1, -1))
            self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0),
                                        (-1, -1, -1), True)

            self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0),
                                        (1, -1, -1))
            self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0),
                                        (0, -1, -1), True)

            self.gf_e = gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0),
                                             (1, -1, -1))
            self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0),
                                      (0, -1, -1), True)

            self.req_send_e = comm.Send_init(gf_e.host_array, rank - 1, tag=1)
            self.tmp_recv_h_list = [
                np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2)
            ]
            self.req_recv_h_list = [
                comm.Recv_init(tmp_recv_h, rank - 1, tag=0)
                for tmp_recv_h in self.tmp_recv_h_list
            ]
            self.switch_h = 0

        # global variables
        self.direction = direction
        self.qtask = qtask

        self.tmax = tmax
        self.tstep = 1
Example #57
0
    def __init__(self,
                 fields,
                 pt0,
                 pt1,
                 ep_inf,
                 drude_freq,
                 gamma,
                 mask_arrays=(1, 1, 1)):
        common.check_type('fields', fields, Fields)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('ep_inf', ep_inf, (int, float))
        common.check_type('drude_freq', drude_freq, (int, float))
        common.check_type('gamma', gamma, (int, float))
        common.check_type('mask_arrays', mask_arrays, (list, tuple),
                          (np.ndarray, int))

        # local variables
        pt0 = common.convert_indices(fields.ns, pt0)
        pt1 = common.convert_indices(fields.ns, pt1)
        context = fields.context
        queue = fields.queue
        dtype = fields.dtype
        shape = common.shape_two_points(pt0, pt1, is_dummy=True)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        for mask_array in mask_arrays:
            if isinstance(mask_array, np.ndarray):
                assert common.shape_two_points(pt0, pt1) == mask_array.shape, \
                       'shape mismatch : %s, %s' % (shape, mask_array.shape)

        # allocations
        psis = [np.zeros(shape, dtype) for i in range(3)]
        psi_bufs = [
            cl.Buffer(context, cl.mem_flags.READ_WRITE, psi.nbytes)
            for psi in psis
        ]
        for psi_buf, psi in zip(psi_bufs, psis):
            cl.enqueue_copy(queue, psi_buf, psi)

        dt = fields.dt
        aa = (2 - gamma * dt) / (2 + gamma * dt)
        bb = drude_freq**2 * dt / (2 + gamma * dt)
        comm = 2 * ep_inf + bb * dt
        ca = 2 * dt / comm
        cb = -(aa + 3) * bb * dt / comm
        cc = -(aa + 1) * dt / comm
        cas = [ca * mask for mask in mask_arrays]

        shape = common.shape_two_points(pt0, pt1, is_dummy=True)
        f = np.zeros(shape, dtype)
        psi_bufs = [
            cl.Buffer(context, cl.mem_flags.READ_WRITE, f.nbytes)
            for i in range(3)
        ]
        for psi_buf in psi_bufs:
            cl.enqueue_copy(queue, psi_buf, f)

        cf = np.ones(shape, dtype)
        mask_bufs = [
            cl.Buffer(context, cl.mem_flags.READ_ONLY, cf.nbytes)
            for i in range(3)
        ]
        for mask_buf, mask in zip(mask_bufs, mask_arrays):
            cl.enqueue_copy(queue, mask_buf, cf * mask)

        # modify ce arrays
        slices = common.slices_two_points(pt0, pt1)
        for ce, ca in zip(fields.get_ces(), cas):
            ce[slices] = ca * mask + ce[slices] * mask.__invert__()

        # program
        nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(
            pt0, pt1)
        macros = ['NMAX', 'XID', 'YID', 'ZID', 'DX', 'DTYPE', 'PRAGMA_fp64']
        values = [nmax_str, xid_str, yid_str, zid_str,
                  str(fields.ls)] + fields.dtype_str_list

        ksrc = common.replace_template_code( \
            open(common_gpu.src_path + 'drude.cl').read(), macros, values)
        program = cl.Program(fields.context, ksrc).build()

        # arguments
        pca = aa
        pcb = (aa + 1) * bb
        args = fields.ns + [dtype(cb), dtype(cc), dtype(pca), dtype(pcb)] \
            + fields.eh_bufs[:3] + psi_bufs + mask_bufs

        # global variables
        self.mainf = fields
        self.program = program
        self.args = args

        nx, ny, nz = fields.ns
        nmax = int(nmax_str)
        remainder = nmax % fields.ls
        self.gs = nmax if remainder == 0 else nmax - remainder + fields.ls

        # append to the update list
        self.priority_type = 'material'
        fields.append_instance(self)