コード例 #1
0
ファイル: pbc.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(self, fields, axis):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_value('axis', axis, ['x', 'y', 'z'])

        # local variables
        nx, ny, nz = fields.ns
        dtype_str_list = fields.dtype_str_list

        # program
        replace = lambda lst, idx, val: lst[:idx] + [val] + lst[idx+1:]

        base0 = {'e': [0, 0, 0], 'h': [1, 1, 1]}
        base1 = {'e': [nx-2, ny-2, nz-2], 'h': [nx-1, ny-1, nz-1]}
        axis_id = {'x':0, 'y':1, 'z':2}[axis]
        nn = fields.ns[axis_id]

        value_dict = {'e': [], 'h': []}
        for eh in ['e', 'h']:
            for idx in {'e': [0, nn-1], 'h':[nn-1, 0]}[eh]:
                pt0 = replace(base0[eh], axis_id, idx)
                pt1 = replace(base1[eh], axis_id, idx)
                nmax, xid, yid, zid = \
                        common_gpu.macro_replace_list(pt0, pt1)

                value_dict[eh].append( \
                        '%s*ny*nz + %s*nz + %s' % (xid, yid, zid) )

        macros = ['NMAX', 'IDX0', 'IDX1', 'DTYPE', 'PRAGMA_fp64']
        values_e = [nmax] + value_dict['e'] + dtype_str_list
        values_h = [nmax] + value_dict['h'] + dtype_str_list

        ksrc_e = common.replace_template_code( \
                open(common_gpu.src_path + 'copy_self.cl').read(), \
                macros, values_e)
        ksrc_h = common.replace_template_code( \
                open(common_gpu.src_path + 'copy_self.cl').read(), \
                macros, values_h)
        program_e = cl.Program(fields.context, ksrc_e).build()
        program_h = cl.Program(fields.context, ksrc_h).build()

        # global variables
        self.mainf = fields
        self.program_e = program_e
        self.program_h = program_h

        self.strfs_e = {'x':['ey','ez'], 'y':['ex','ez'], 'z':['ex','ey']}[axis]
        self.strfs_h = {'x':['hy','hz'], 'y':['hx','hz'], 'z':['hx','hy']}[axis]

        # append to the update list
        self.priority_type = 'pbc'
        self.mainf.append_instance(self)
コード例 #2
0
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))

        pt0 = list( common.convert_indices(fields.ns, pt0) )
        pt1 = list( common.convert_indices(fields.ns, pt1) )

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

        # program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                'ARGS', \
                'TARGET', 'SOURCE', 'OVERWRITE', \
                'DTYPE']

        values = common_gpu.macro_replace_list(pt0, pt1) + \
                ['DTYPE *source', \
                'target[sub_idx]', 'source[idx]', '='] + \
                dtype_str_list

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'copy.cu').read(), macros, values)
        program = SourceModule(ksrc)
        kernel_copy = program.get_function('copy')

        # allocation
        source_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        host_array = np.zeros(shape, fields.dtype)
        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict( zip(str_fs, split_host_array) ) 
        target_buf = cuda.to_device(host_array)

        # global variables
        self.mainf = fields
        self.kernel_copy = kernel_copy
        self.source_bufs = source_bufs
        self.target_buf = target_buf
        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
コード例 #3
0
ファイル: pbc.py プロジェクト: wbkifun/fdtd_accelerate
	def __init__(s, fields, axis):
		s.emf = fields

		s.e_strfs = {'x':['ey','ez'], 'y':['ex','ez'], 'z':['ex','ey']}[axis]
		s.h_strfs = {'x':['hy','hz'], 'y':['hx','hz'], 'z':['hx','hy']}[axis]

		macros = ['NMAX', 'IDX1', 'IDX2']
		e_vals = {
				'x': ['ny*nz', '(nx-1)*ny*nz + gid', 'gid'],
				'y': ['nx*nz', '(gid/nz)*ny*nz + (ny-1)*nz + gid', '(gid/nz)*ny*nz + gid'],
				'z': ['nx*ny', '(gid/ny)*ny*nz + gid*nz + (nz-1)', '(gid/ny)*ny*nz + gid*nz'] }[axis]
		h_vals = {
				'x': ['ny*nz', 'gid', '(nx-1)*ny*nz + gid'],
				'y': ['nx*nz', '(gid/nz)*ny*nz + gid', '(gid/nz)*ny*nz + (ny-1)*nz + gid'],
				'z': ['nx*ny', '(gid/ny)*ny*nz + gid*nz', '(gid/ny)*ny*nz + gid*nz + (nz-1)'] }[axis]

		e_ksrc = common.replace_template_code(open(common_gpu.src_path + '/copy.cl').read(), macros, e_vals)
		h_ksrc = common.replace_template_code(open(common_gpu.src_path + '/copy.cl').read(), macros, h_vals)
		s.program_e = cl.Program(s.emf.context, e_ksrc).build()
		s.program_h = cl.Program(s.emf.context, h_ksrc).build()
コード例 #4
0
ファイル: pbc.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(self, fields, axes):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('axes', axes, str)

        assert len( set(axes).intersection(set('xyz')) ) > 0, 'axes option is wrong: %s is given' % repr(axes)

        # local variables
        nx, ny, nz = fields.ns
        dtype_str_list = fields.dtype_str_list

        # program
        macros = ['NMAX', 'IDX0', 'IDX1', 'DTYPE', 'PRAGMA_fp64']

        program_dict = {}
        gs_dict = {}
        for axis in list(axes):
            program_dict[axis] = {}

            for e_or_h in ['e', 'h']:
                pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]['get']
                pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]['get']
                nmaxi_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1)
                idx0_str = '%s*ny*nz + %s*nz + %s' % (xid_str, yid_str, zid_str)

                pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]['set']
                pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]['set']
                nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1)
                idx1_str = '%s*ny*nz + %s*nz + %s' % (xid_str, yid_str, zid_str)

                values = [nmax_str, idx0_str, idx1_str] + dtype_str_list

                ksrc = common.replace_template_code( \
                        open(common_gpu.src_path + 'copy_self.cl').read(), \
                        macros, values)
                program = cl.Program(fields.context, ksrc).build()
                program_dict[axis][e_or_h] = program

            nmax = int(nmax_str)
            remainder = nmax % fields.ls
            gs_dict[axis] = nmax if remainder == 0 else nmax - remainder + fields.ls 

        # global variables
        self.mainf = fields
        self.axes = axes
        self.program_dict = program_dict
        self.gs_dict = gs_dict

        # append to the update list
        self.priority_type = 'pbc'
        self.mainf.append_instance(self)
コード例 #5
0
    def __init__(self, fields, str_f, pt0, pt1):
        """
        """

        common.check_type("fields", fields, Fields)
        common.check_type("str_f", str_f, (str, list, tuple), str)
        common.check_type("pt0", pt0, (list, tuple), int)
        common.check_type("pt1", pt1, (list, tuple), int)

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list

        for strf in str_fs:
            strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"]
            common.check_value("str_f", strf, strf_list)

        for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1):
            common.check_value("pt0 %s" % axis, p0, range(n))
            common.check_value("pt1 %s" % axis, p1, range(n))

        # program
        macros = ["NMAX", "XID", "YID", "ZID", "ARGS", "TARGET", "SOURCE", "OVERWRITE", "DTYPE", "PRAGMA_fp64"]

        values = (
            common_gpu.macro_replace_list(pt0, pt1)
            + ["__global DTYPE *source", "target[sub_idx]", "source[idx]", "="]
            + dtype_str_list
        )

        ksrc = common.replace_template_code(open(common_gpu.src_path + "copy.cl").read(), macros, values)
        program = cl.Program(fields.context, ksrc).build()

        # allocation
        source_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        host_array = np.zeros(shape, dtype=fields.dtype)
        split_host_array = np.split(host_array, len(str_fs))
        split_host_array_dict = dict(zip(str_fs, split_host_array))

        target_buf = cl.Buffer(fields.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=host_array)

        # global variables
        self.mainf = fields
        self.program = program
        self.source_bufs = source_bufs
        self.target_buf = target_buf
        self.host_array = host_array
        self.split_host_array_dict = split_host_array_dict
コード例 #6
0
    def __init__(self, fields, str_f, pt0, pt1, source_is_array=False):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), int)
        common.check_type('pt1', pt1, (list, tuple), int)
        common.check_type('source_is_array', source_is_array, bool)

        self.mainf = mainf = fields
        str_fs = common.convert_to_tuple(str_f)

		# program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                'ARGS', \
                'TARGET', 'SOURCE', \
                'DTYPE', 'PRAGMA_fp64']

        if source_is_array:
            values = macro_replace_list(pt0, pt1) + \
                    ['__global DTYPE *source', \
                    'target[idx]', 'source[sub_idx]'] + mainf.dtype_str_list[:2]
        else:
            values = macro_replace_list(pt0, pt1) + \
                    ['DTYPE source', \
                    'target[idx]', 'source'] + mainf.dtype_str_list[:2]

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'subdomain.cl').read(), macros, values)
        self.program = cl.Program(mainf.context, ksrc).build()

		# allocation
        self.target_bufs = [mainf.get_buf(str_f) for str_f in str_fs]
        shape = list( common.shape_two_points(pt0, pt1) )
        shape[0] *= len(str_fs)

        if source_is_array:
            tmp_array = np.zeros(shape, dtype=mainf.dtype)
            self.source_buf = cl.Buffer( \
                    mainf.context, \
                    cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \
                    hostbuf=tmp_array)
            self.set_fields = self.set_fields_spatial_value
        else:
            self.set_fields = self.set_fields_single_value
コード例 #7
0
ファイル: pbc.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(self, fields, axes):
        """
        """

        common.check_type("fields", fields, Fields)
        common.check_type("axes", axes, str)

        assert len(set(axes).intersection(set("xyz"))) > 0, "axes option is wrong: %s is given" % repr(axes)

        # local variables
        nx, ny, nz = fields.ns
        dtype_str_list = fields.dtype_str_list

        # program
        macros = ["NMAX", "IDX0", "IDX1", "DTYPE", "PRAGMA_fp64"]

        program_dict = {}
        for axis in list(axes):
            program_dict[axis] = {}

            for e_or_h in ["e", "h"]:
                pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]["get"]
                pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]["get"]
                nmax, xid, yid, zid = common_gpu.macro_replace_list(pt0, pt1)
                idx0_str = "%s*ny*nz + %s*nz + %s" % (xid, yid, zid)

                pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]["set"]
                pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]["set"]
                nmax, xid, yid, zid = common_gpu.macro_replace_list(pt0, pt1)
                idx1_str = "%s*ny*nz + %s*nz + %s" % (xid, yid, zid)

                values = [nmax, idx0_str, idx1_str] + dtype_str_list

                ksrc = common.replace_template_code(open(common_gpu.src_path + "copy_self.cl").read(), macros, values)
                program = cl.Program(fields.context, ksrc).build()
                program_dict[axis][e_or_h] = program

        # global variables
        self.mainf = fields
        self.axes = axes
        self.program_dict = program_dict

        # append to the update list
        self.priority_type = "pbc"
        self.mainf.append_instance(self)
コード例 #8
0
ファイル: core_split.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        precision_float = fields.precision_float
        use_cpu_core = fields.use_cpu_core
        dtype = fields.dtype

        nx, ny, nz_pitch = ns_pitch = fields.ns_pitch
        align_size = fields.align_size
        pad = fields.pad

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        ehs = fields.ehs
        if ce_on:
            ces = fields.ces
        if ch_on:
            chs = fields.chs

        # pad_str_list
        pad_str_list = []
        pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') )
        mask0 = np.ones(align_size, 'i')

        mask_h = mask0.copy()
        mask_h[0] = 0
        pad_str_append(mask_h)

        mask_exy = mask0.copy()
        mask_exy[-(pad+1):] = 0
        pad_str_append(mask_exy)

        mask = mask0.copy()
        if pad != 0:
            mask[-pad:] = 0
        pad_str_append(mask)

        # program
        dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float]

        macros = [ \
                'OMP_HEADER', 'OMP_FOR_E', 'OMP_FOR_H', \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \
                'MASK_H', 'MASK_EXY', 'MASK']

        values = ['', '', '', \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ] + dtype_str_list + pad_str_list

        if use_cpu_core != 1:
            values[0] = '#include <omp.h>'

            omp_str = '' if use_cpu_core == 0 else 'omp_set_num_threads(%d);\n\t' % use_cpu_core
            values[1] = omp_str + '#pragma omp parallel for private(idx, i, j, k, hx0, hy0, hz0, h1, h2, e PRIVATE_CE)'
            values[2] = omp_str + '#pragma omp parallel for private(idx, i, j, k, ex0, ey0, ez0, e1, e2, h PRIVATE_CH)'

        if ce_on:
            values[3:9] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if ch_on:
            values[9:15] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype, ndim=3, \
                shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        program.update_e.argtypes = argtypes
        program.update_e.restype = None
        program.update_h.argtypes = argtypes
        program.update_h.restype = None

        # arguments
        nyz_pitch = ny * nz_pitch
        e_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        h_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        if ce_on:
            program.update_e.argtypes += [carg for i in xrange(3)]
            e_args += ces
        if ch_on:
            program.update_h.argtypes += [carg for i in xrange(3)]
            h_args += chs

        set_args = lambda args, idx0, nmax: args[:3] + [idx0, nmax] + args[5:]
        e_args_dict = { \
                '': e_args, \
                'pre': set_args(e_args, nyz_pitch, 3*nyz_pitch), \
                'post': set_args(e_args, 0, nyz_pitch) }

        h_args_dict = { \
                '': h_args, \
                'pre': set_args(h_args, 0, 2*nyz_pitch), \
                'post': set_args(h_args, 2*nyz_pitch, 3*nyz_pitch) }

        # global variables
        self.mainf = fields
        self.program = program
        self.e_args_dict = e_args_dict
        self.h_args_dict = h_args_dict

        # append to the update list
        self.priority_type = 'core'
        fields.append_instance(self)
コード例 #9
0
    def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('is_array', is_array, bool)
        common.check_type('is_overwrite', is_overwrite, bool)

        pt0 = list( common.convert_indices(fields.ns, pt0) )
        pt1 = list( common.convert_indices(fields.ns, pt1) )

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list
        overwrite_str = {True: '=', False: '+='}[is_overwrite]

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

		# program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                'ARGS', \
                'TARGET', 'SOURCE', 'OVERWRITE', \
                'DTYPE']

        if is_array:
            values = common_gpu.macro_replace_list(pt0, pt1) + \
                    ['DTYPE *source', \
                    'target[idx]', 'source[sub_idx]', overwrite_str] + \
                    dtype_str_list
        else:
            values = common_gpu.macro_replace_list(pt0, pt1) + \
                    ['DTYPE source', \
                    'target[idx]', 'source', overwrite_str] + \
                    dtype_str_list

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'copy.cu').read(), macros, values)
        program = SourceModule(ksrc)
        kernel_copy = program.get_function('copy')

		# allocation
        target_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        if is_array:
            tmp_array = np.zeros(shape, fields.dtype)
            source_buf = cuda.to_device(tmp_array)

        # global variabels and functions
        self.mainf = fields
        self.kernel_copy = kernel_copy
        self.target_bufs = target_bufs
        self.shape = shape

        if is_array:
            self.source_buf = source_buf
            self.set_fields = self.set_fields_spatial_value
        else:
            self.set_fields = self.set_fields_single_value
コード例 #10
0
ファイル: fields.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(
        self,
        context,
        device,
        nx,
        ny,
        nz,
        coeff_use="e",
        precision_float="single",
        local_work_size=256,
        global_work_size=0,
    ):
        """
        """

        common.check_type("context", context, cl.Context)
        common.check_type("device", device, cl.Device)
        common.check_type("nx", nx, int)
        common.check_type("ny", ny, int)
        common.check_type("nz", nz, int)
        common.check_type("global_work_size", global_work_size, int)
        common.check_type("local_work_size", local_work_size, int)

        common.check_value("coeff_use", coeff_use, ("", "e", "h", "eh"))
        common.check_value("precision_float", precision_float, ("single", "double"))

        self.context = context
        self.device = device
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ls = local_work_size
        self.gs = global_work_size
        self.coeff_use = coeff_use
        self.dtype = {"single": np.float32, "double": np.float64}[precision_float]
        self.dtype_str = {"single": "float", "double": "double"}[precision_float]
        self.dtype_str_list = {
            "single": ["float", ""],
            "double": ["double", "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"],
        }[precision_float]

        self.device_type = "gpu"

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        self.align_size = a_size = {"single": 16, "double": 8}[precision_float]  # 64 Bytes
        self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz
        self.slz = slice(None, None) if pad == 0 else slice(None, -pad)
        self.nz_pitch = nz_pitch = nz + pad

        self.dtype_str_list.append("" if pad == 0 else "-%s" % pad)

        # ns, queue, global_size
        self.ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        self.ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        self.ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]
        self.queue = cl.CommandQueue(self.context, self.device)
        if self.gs == 0:
            self.gs = common_gpu.get_optimal_gs(self.device)

        # on/off the coefficient arrays
        self.ce_on = True if "e" in self.coeff_use else False
        self.ch_on = True if "h" in self.coeff_use else False

        # allocations
        f = np.zeros(self.ns_pitch, dtype=self.dtype)
        cf = np.ones_like(f) * 0.5
        mf = cl.mem_flags

        self.eh_bufs = [cl.Buffer(self.context, mf.READ_WRITE, f.nbytes) for i in range(6)]
        for eh_buf in self.eh_bufs:
            cl.enqueue_copy(self.queue, eh_buf, f)
        self.ex_buf, self.ey_buf, self.ez_buf = self.eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = self.eh_bufs[3:]

        if self.ce_on:
            self.ce_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) for i in range(3)]
            self.cex_buf, self.cey_buf, self.cez_buf = self.ce_bufs

        if self.ch_on:
            self.ch_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) for i in range(3)]
            self.chx_buf, self.chy_buf, self.chz_buf = self.ch_bufs

        del f, cf

        # program
        macros = ["ARGS_CE", "CEX", "CEY", "CEZ", "ARGS_CH", "CHX", "CHY", "CHZ", "DX", "DTYPE", "PRAGMA_fp64", "PAD"]

        values = ["", "0.5", "0.5", "0.5", "", "0.5", "0.5", "0.5", str(self.ls)] + self.dtype_str_list

        self.e_args = self.ns_pitch + self.eh_bufs
        self.h_args = self.ns_pitch + self.eh_bufs

        if self.ce_on:
            values[:4] = [
                ", __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez",
                "cex[idx]",
                "cey[idx]",
                "cez[idx]",
            ]
            self.e_args += self.ce_bufs

        if self.ch_on:
            values[4:8] = [
                ", __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz",
                "chx[idx]",
                "chy[idx]",
                "chz[idx]",
            ]
            self.h_args += self.ch_bufs

        ksrc = common.replace_template_code(open(common_gpu.src_path + "core.cl").read(), macros, values)
        self.program = cl.Program(self.context, ksrc).build()
コード例 #11
0
ファイル: fields.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(self, nx, ny, nz, \
            coeff_use='e', \
            precision_float='single', \
            use_cpu_core=0):
        """
        """

        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, ('single', 'double'))

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.coeff_use=coeff_use
        self.dtype = {'single':np.float32, 'double':np.float64}[precision_float]
        self.dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float]

        self.device_type = 'cpu'

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        a_size = {'single':4, 'double':2}[precision_float]   # 16 Bytes
        self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz
        self.slz = slice(None, None) if pad == 0 else slice(None, -pad)
        self.nz_pitch = nz_pitch = nz + pad

        mask_arr = np.ones(a_size, 'i')
        mask_arr[-(pad+1):] = 0
        self.dtype_str_list.append( str(list(mask_arr)).strip('[]') )

        # ns, qtask, enqueue
        self.ns = [nx, ny, nz]
        self.ns_pitch = [nx, ny, nz_pitch]
        self.qtask = QueueTask()
        self.enqueue = self.qtask.enqueue
        self.enqueue_barrier = self.qtask.enqueue_barrier

        # on/off the coefficient arrays
        self.ce_on = True if 'e' in self.coeff_use else False
        self.ch_on = True if 'h' in self.coeff_use else False

        # allocations
        self.ehs = [np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6)]
        self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs

        if self.ce_on:
            self.ces = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)]
            self.cex, self.cey, self.cez = self.ces 

        if self.ch_on:
            self.chs = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)]
            self.chx, self.chy, self.chz = self.chs

        # program
        macros = [ \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'OMP_SET_NUM_THREADS', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E']

        values = [ \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ''] + self.dtype_str_list

        if use_cpu_core != 0:
            values[12] = 'omp_set_num_threads(%d);' % use_cpu_core

        if self.ce_on:
            values[:6] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if self.ch_on:
            values[6:12] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        self.program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \
                shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        self.program.update_e.argtypes = argtypes
        self.program.update_e.restype = None
        self.program.update_h.argtypes = argtypes
        self.program.update_h.restype = None

        self.e_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs
        self.h_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs

        if self.ce_on:
            self.program.update_e.argtypes += [carg for i in xrange(3)]
            self.e_args += self.ces

        if self.ch_on:
            self.program.update_h.argtypes += [carg for i in xrange(3)]
            self.h_args += self.chs
コード例 #12
0
    def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True):
        """
        """

        common.check_type('fields', fields, Fields)
        common.check_type('str_f', str_f, (str, list, tuple), str)
        common.check_type('pt0', pt0, (list, tuple), (int, float))
        common.check_type('pt1', pt1, (list, tuple), (int, float))
        common.check_type('is_array', is_array, bool)
        common.check_type('is_overwrite', is_overwrite, bool)

        pt0 = list( common.convert_indices(fields.ns, pt0) )
        pt1 = list( common.convert_indices(fields.ns, pt1) )

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list
        overwrite_str = {True: '=', False: '+='}[is_overwrite]

        for strf in str_fs:
            strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']
            common.check_value('str_f', strf, strf_list)

        for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1):
            common.check_value('pt0 %s' % axis, p0, range(n))
            common.check_value('pt1 %s' % axis, p1, range(n))

		# program
        macros = ['NMAX', 'XID', 'YID', 'ZID', \
                'ARGS', \
                'TARGET', 'SOURCE', 'OVERWRITE', \
                'DTYPE', 'PRAGMA_fp64']

        nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1)
        if is_array:
            values = [nmax_str, xid_str, yid_str, zid_str, \
                    '__global DTYPE *source', \
                    'target[idx]', 'source[sub_idx]', overwrite_str] + \
                    dtype_str_list
        else:
            values = [nmax_str, xid_str, yid_str, zid_str, \
                    'DTYPE source', \
                    'target[idx]', 'source', overwrite_str] + \
                    dtype_str_list

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'copy.cl').read(), macros, values)
        program = cl.Program(fields.context, ksrc).build()

		# allocation
        target_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        if is_array:
            tmp_array = np.zeros(shape, dtype=fields.dtype) 
            source_buf = cl.Buffer( \
                    fields.context, \
                    cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \
                    hostbuf=tmp_array)

        # global variabels and functions
        self.mainf = fields
        self.program = program
        self.target_bufs = target_bufs
        self.shape = shape

        nmax = int(nmax_str)
        remainder = nmax % fields.ls
        self.gs = nmax if remainder == 0 else nmax - remainder + fields.ls 

        if is_array:
            self.source_buf = source_buf
            self.set_fields = self.set_fields_spatial_value
        else:
            self.set_fields = self.set_fields_single_value
コード例 #13
0
    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        context = fields.context

        ns_pitch = fields.ns_pitch
        pad = fields.pad

        precision_float = fields.precision_float
        dtype_str_list = fields.dtype_str_list

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        eh_bufs = fields.eh_bufs
        if ce_on:
            ce_bufs = fields.ce_bufs
        if ch_on:
            ch_bufs = fields.ch_bufs

        ls = fields.ls

        # program
        str_pad = '' if pad==0 else '-%s' % pad
        coeff_constant = {'single': '0.5f', 'double': '0.5'}[precision_float]

        macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'CHX', 'CHY', 'CHZ', \
                'DX', 'PAD', 'DTYPE', 'PRAGMA_fp64']

        values = ['', coeff_constant, coeff_constant, coeff_constant, \
                '', coeff_constant, coeff_constant, coeff_constant, \
                str(ls), str_pad] + dtype_str_list

        if ce_on:
            values[:4] = [ \
                    ', __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez', \
                    'cex[idx]', 'cey[idx]', 'cez[idx]']

        if ch_on:
            values[4:8] = [ \
                    ', __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz', \
                    'chx[idx]', 'chy[idx]', 'chz[idx]']

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'core_split.cl').read(), macros, values)
        program = cl.Program(context, ksrc).build()

        # arguments
        e_args = ns_pitch + eh_bufs
        h_args = ns_pitch + eh_bufs
        if ce_on:
            e_args += ce_bufs
        if ch_on:
            h_args += ch_bufs

        nx, ny, nz_pitch = ns_pitch
        nyzp = ny * nz_pitch
        e_args_dict = { \
                '': [np.int32(0), np.int32(nx*nyzp)] + e_args, \
                'pre': [np.int32(nyzp), np.int32(2*nyzp)] + e_args, \
                'mid': [np.int32(2*nyzp), np.int32(nx*nyzp)] + e_args, \
                'post': [np.int32(0), np.int32(nyzp)] + e_args}

        h_args_dict = { \
                '': [np.int32(0), np.int32(nx*nyzp)] + h_args, \
                'pre': [np.int32((nx-2)*nyzp), np.int32((nx-1)*nyzp)] + h_args, \
                'mid': [np.int32(0), np.int32((nx-2)*nyzp)] + h_args, \
                'post': [np.int32((nx-1)*nyzp), np.int32(nx*nyzp)] + h_args}

        gs = lambda n: int(n) if (n % fields.ls) == 0 else int(n - (n % fields.ls) + fields.ls)
        gs_dict = { \
                '': gs(nx*nyzp), \
                'pre': gs(nyzp), \
                'mid': gs((nx-2)*nyzp), \
                'post': gs(nyzp)}

        # global variables and functions
        self.mainf = fields
        self.program = program
        self.e_args_dict = e_args_dict
        self.h_args_dict = h_args_dict
        self.gs_dict = gs_dict
コード例 #14
0
ファイル: core.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        context = fields.context

        ns_pitch = fields.ns_pitch
        pad = fields.pad

        precision_float = fields.precision_float
        dtype_str_list = fields.dtype_str_list

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        eh_bufs = fields.eh_bufs
        if ce_on:
            ce_bufs = fields.ce_bufs
        if ch_on:
            ch_bufs = fields.ch_bufs

        ls = fields.ls

        # program
        str_pad = '' if pad==0 else '-%s' % pad
        coeff_constant = {'single': '0.5f', 'double': '0.5'}[precision_float]

        macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'CHX', 'CHY', 'CHZ', \
                'DX', 'PAD', 'DTYPE', 'PRAGMA_fp64']

        values = ['', coeff_constant, coeff_constant, coeff_constant, \
                '', coeff_constant, coeff_constant, coeff_constant, \
                str(ls), str_pad] + dtype_str_list

        if ce_on:
            values[:4] = [ \
                    ', __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez', \
                    'cex[idx]', 'cey[idx]', 'cez[idx]']

        if ch_on:
            values[4:8] = [ \
                    ', __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz', \
                    'chx[idx]', 'chy[idx]', 'chz[idx]']

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'core.cl').read(), macros, values)
        program = cl.Program(context, ksrc).build()

        # arguments
        e_args = ns_pitch + eh_bufs
        h_args = ns_pitch + eh_bufs
        if ce_on:
            e_args += ce_bufs
        if ch_on:
            h_args += ch_bufs

        # global variables and functions
        self.mainf = fields
        self.e_args = e_args
        self.h_args = h_args
        self.program = program

        # append to the update list
        self.priority_type = 'core'
        self.mainf.append_instance(self)
コード例 #15
0
    def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True):
        """
        """

        common.check_type("fields", fields, Fields)
        common.check_type("str_f", str_f, (str, list, tuple), str)
        common.check_type("pt0", pt0, (list, tuple), int)
        common.check_type("pt1", pt1, (list, tuple), int)
        common.check_type("is_array", is_array, bool)
        common.check_type("is_overwrite", is_overwrite, bool)

        # local variables
        str_fs = common.convert_to_tuple(str_f)
        dtype_str_list = fields.dtype_str_list
        overwrite_str = {True: "=", False: "+="}[is_overwrite]

        for strf in str_fs:
            strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"]
            common.check_value("str_f", strf, strf_list)

        for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1):
            common.check_value("pt0 %s" % axis, p0, range(n))
            common.check_value("pt1 %s" % axis, p1, range(n))

        # program
        macros = ["NMAX", "XID", "YID", "ZID", "ARGS", "TARGET", "SOURCE", "OVERWRITE", "DTYPE", "PRAGMA_fp64"]

        if is_array:
            values = (
                common_gpu.macro_replace_list(pt0, pt1)
                + ["__global DTYPE *source", "target[idx]", "source[sub_idx]", overwrite_str]
                + dtype_str_list
            )
        else:
            values = (
                common_gpu.macro_replace_list(pt0, pt1)
                + ["DTYPE source", "target[idx]", "source", overwrite_str]
                + dtype_str_list
            )

        ksrc = common.replace_template_code(open(common_gpu.src_path + "copy.cl").read(), macros, values)
        program = cl.Program(fields.context, ksrc).build()

        # allocation
        target_bufs = [fields.get_buf(str_f) for str_f in str_fs]
        shape = common.shape_two_points(pt0, pt1, len(str_fs))

        if is_array:
            tmp_array = np.zeros(shape, dtype=fields.dtype)
            source_buf = cl.Buffer(
                fields.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=tmp_array
            )

        # global variabels and functions
        self.mainf = fields
        self.program = program
        self.target_bufs = target_bufs
        self.shape = shape

        if is_array:
            self.source_buf = source_buf
            self.set_fields = self.set_fields_spatial_value
        else:
            self.set_fields = self.set_fields_single_value
コード例 #16
0
ファイル: core.py プロジェクト: wbkifun/fdtd_accelerate
    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        ns_pitch = fields.ns_pitch
        pad = fields.pad

        precision_float = fields.precision_float
        dtype_str_list = fields.dtype_str_list

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        eh_bufs = fields.eh_bufs
        ce_bufs = fields.ce_bufs
        ch_bufs = fields.ch_bufs

        bs = fields.bs

        # program
        str_pad = '' if pad==0 else '-%s' % pad
        coeff_constant = {'single': '0.5f', 'double': '0.5'}[precision_float]

        macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'CHX', 'CHY', 'CHZ', \
                'DX', 'PAD', 'DTYPE']

        values = ['', coeff_constant, coeff_constant, coeff_constant, \
                '', coeff_constant, coeff_constant, coeff_constant, \
                str(bs[0]), str_pad] + dtype_str_list

        if ce_on:
            values[:4] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', \
                    'cex[idx]', 'cey[idx]', 'cez[idx]']

        if ch_on:
            values[4:8] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', \
                    'chx[idx]', 'chy[idx]', 'chz[idx]']

        ksrc = common.replace_template_code( \
                open(common_gpu.src_path + 'core.cu').read(), macros, values)
        program = SourceModule(ksrc)
        kernel_update_e = program.get_function('update_e')
        kernel_update_h = program.get_function('update_h')

        # arguments
        args = ns_pitch + eh_bufs
        e_args = args + ce_bufs if ce_on else args
        h_args = args + ch_bufs if ch_on else args

        kernel_update_e.prepare([type(arg) for arg in e_args])
        kernel_update_h.prepare([type(arg) for arg in h_args])

        # global variables and functions
        self.mainf = fields
        self.e_args = e_args
        self.h_args = h_args
        self.kernel_update_e = kernel_update_e
        self.kernel_update_h = kernel_update_h

        # append to the update list
        self.priority_type = 'core'
        self.mainf.append_instance(self)
コード例 #17
0
ファイル: core.py プロジェクト: xj361685640/fdtd_accelerate
    def __init__(self, fields):
        """
        """

        common.check_type('fields', fields, Fields)

        # local variables
        precision_float = fields.precision_float
        use_cpu_core = fields.use_cpu_core
        dtype = fields.dtype

        nx, ny, nz_pitch = ns_pitch = fields.ns_pitch
        align_size = fields.align_size
        pad = fields.pad

        ce_on = fields.ce_on
        ch_on = fields.ch_on

        ehs = fields.ehs
        if ce_on:
            ces = fields.ces
        if ch_on:
            chs = fields.chs

        # program
        dtype_str_list = { \
                'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \
                'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float]
        pad_str_list = []
        pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') )
        mask0 = np.ones(align_size, 'i')

        mask_h = mask0.copy()
        mask_h[0] = 0
        pad_str_append(mask_h)

        mask_exy = mask0.copy()
        mask_exy[-(pad+1):] = 0
        pad_str_append(mask_exy)

        mask = mask0.copy()
        if pad != 0:
            mask[-pad:] = 0
        pad_str_append(mask)

        macros = [ \
                'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \
                'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \
                'OMP_SET_NUM_THREADS', \
                'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \
                'MASK_H', 'MASK_EXY', 'MASK']

        values = [ \
                '', 'ce=SET1(0.5)', '', '', '', '', \
                '', 'ch=SET1(0.5)', '', '', '', '', \
                ''] + dtype_str_list + pad_str_list

        if use_cpu_core != 0:
            values[12] = 'omp_set_num_threads(%d);' % use_cpu_core

        if ce_on:
            values[:6] = [ \
                    ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \
                    'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);']
        if ch_on:
            values[6:12] = [ \
                    ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \
                    'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);']

        ksrc = common.replace_template_code( \
                open(common_cpu.src_path + 'core.c').read(), macros, values)
        program = common_cpu.build_clib(ksrc)

        carg = np.ctypeslib.ndpointer(dtype, ndim=3, \
                shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED')
        argtypes = [c_int, c_int, c_int, c_int, c_int] + \
                [carg for i in xrange(6)]
        program.update_e.argtypes = argtypes
        program.update_e.restype = None
        program.update_h.argtypes = argtypes
        program.update_h.restype = None

        # arguments
        nyz_pitch = ny * nz_pitch
        e_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        h_args = ns_pitch + [0, nx*nyz_pitch] + ehs
        if ce_on:
            program.update_e.argtypes += [carg for i in xrange(3)]
            e_args += ces
        if ch_on:
            program.update_h.argtypes += [carg for i in xrange(3)]
            h_args += chs

        pre_e_args = e_args[:]
        pre_e_args[3:5] = [(nx-2)*nyz_pitch, nx*nyz_pitch]
        mid_e_args = e_args[:]
        mid_e_args[3:5] = [nyz_pitch, (nx-2)*nyz_pitch]
        post_e_args = e_args[:]
        post_e_args[3:5] = [0, nyz_pitch]

        pre_h_args = h_args[:]
        pre_h_args[3:5] = [0, 2*nyz_pitch]
        mid_h_args = h_args[:]
        mid_h_args[3:5] = [2*nyz_pitch, (nx-1)*nyz_pitch]
        post_h_args = h_args[:]
        post_h_args[3:5] = [(nx-1)*nyz_pitch, nx*nyz_pitch]

        # global variables
        self.mainf = fields
        self.e_args = e_args
        self.h_args = h_args
        self.program = program

        self.e_args_dict = {'':e_args, \
                'pre':pre_e_args, 'mid':mid_e_args, 'post':post_e_args}
        self.h_args_dict = {'':h_args, \
                'pre':pre_h_args, 'mid':mid_h_args, 'post':post_h_args}

        # append to the update list
        self.priority_type = 'core'
        fields.append_instance(self)