Beispiel #1
0
    def __init__(self, mainf_list, buffer_dict={}):
        """
        """

        common.check_type('buffer_dict', buffer_dict, dict)

        # local variables
        nx_list = [f.nx for f in mainf_list]
        nx = int(sum(nx_list) - len(nx_list) + 1)
        ny, nz = [int(n) for n in mainf_list[0].ns[1:]]
        accum_nx_list = np.add.accumulate([0] + [f.nx - 1 for f in mainf_list])
        #accum_nx_list[-1] += 1
        accum_nx_list = [int(anx) for anx in accum_nx_list]

        # global variables
        self.mainf_list = mainf_list
        self.buffer_dict = buffer_dict
        self.updatef_list = mainf_list[:] + buffer_dict.values()

        self.dtype = mainf_list[0].dtype

        self.nx = nx
        self.nx_list = nx_list
        self.accum_nx_list = accum_nx_list
        self.ns = (nx, ny, nz)

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Beispiel #2
0
    def __init__(self,
                 nx,
                 ny,
                 nz,
                 precision_float='single',
                 segment_nbytes=16):
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_type('segment_nbytes', segment_nbytes, int)
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))

        # local variables
        dtype = {'single': np.float32, 'double': np.float64}[precision_float]

        # padding for the nz which is multi of segment size
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [nx, ny, nz]
        ns_pitch = [nx, ny, nz_pitch]
        ns_pad = [nx, ny, pad]

        # allocations
        ehs = [np.zeros(ns_pitch, dtype) for i in range(6)]
        ces = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)]
        chs = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)]

        # global variables
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        self.ces = ces
        self.cex, self.cey, self.cez = ces
        self.chs = chs
        self.chx, self.chy, self.chz = chs

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Beispiel #3
0
    def __init__(self, nx, ny, nz, precision_float='single', segment_nbytes=16):
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_type('segment_nbytes', segment_nbytes, int)
        common.check_value('precision_float', precision_float, ('single', 'double'))

        # local variables
        dtype = {'single':np.float32, 'double':np.float64}[precision_float]

        # padding for the nz which is multi of segment size
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [nx, ny, nz]
        ns_pitch = [nx, ny, nz_pitch]
        ns_pad = [nx, ny, pad]

        # allocations
        ehs = [np.zeros(ns_pitch, dtype) for i in range(6)]
        ces = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]
        chs = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]

        # global variables
        self.dx = 1.
        self.dt = 0.5
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        self.ces = ces
        self.cex, self.cey, self.cez = ces
        self.chs = chs
        self.chx, self.chy, self.chz = chs

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Beispiel #4
0
    def __init__(self, mainf_list, buffer_dict={}):
        """
        """

        try:
            from kemp.fdtd3d import gpu
            common.check_type('mainf_list', mainf_list, (list, tuple),
                              (gpu.Fields, cpu.Fields))
        except:
            common.check_type('mainf_list', mainf_list, (list, tuple),
                              cpu.Fields)
        common.check_type('buffer_dict', buffer_dict, dict)

        # local variables
        device_type_list = [f.device_type for f in mainf_list]
        if 'cpu' in device_type_list:
            cpuf = mainf_list[device_type_list.index('cpu')]
        else:
            cpuf = None

        nx_list = [f.nx for f in mainf_list]
        nx = int(sum(nx_list) - len(nx_list) + 1)
        ny, nz = [int(n) for n in mainf_list[0].ns[1:]]
        accum_nx_list = np.add.accumulate([0] + [f.nx - 1 for f in mainf_list])
        accum_nx_list[-1] += 1
        accum_nx_list = [int(anx) for anx in accum_nx_list]

        # global variables
        self.mainf_list = mainf_list
        self.buffer_dict = buffer_dict
        self.updatef_list = buffer_dict.values() + mainf_list[:]
        self.cpuf = cpuf

        self.dtype = mainf_list[0].dtype

        self.nx = nx
        self.nx_list = nx_list
        self.accum_nx_list = accum_nx_list
        self.ns = (nx, ny, nz)

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)

        # append the ExchangeNode instance
        if len(self.updatef_list) > 1:
            from exchange import ExchangeNode
            ExchangeNode(self)
Beispiel #5
0
    def __init__(self, mainf_list, buffer_dict={}):
        """
        """

        try:
            from kemp.fdtd3d import gpu
            common.check_type('mainf_list', mainf_list, (list, tuple), (gpu.Fields, cpu.Fields))
        except:
            common.check_type('mainf_list', mainf_list, (list, tuple), cpu.Fields)
        common.check_type('buffer_dict', buffer_dict, dict)

        # local variables
        device_type_list = [f.device_type for f in mainf_list]
        if 'cpu' in device_type_list:
            cpuf = mainf_list[ device_type_list.index('cpu') ]
        else:
            cpuf = None

        nx_list = [f.nx for f in mainf_list]
        nx = int( sum(nx_list) - len(nx_list) + 1 )
        ny, nz = [int(n) for n in mainf_list[0].ns[1:]]
        accum_nx_list = np.add.accumulate([0] + [f.nx-1 for f in mainf_list])
        accum_nx_list[-1] += 1
        accum_nx_list = [int(anx) for anx in accum_nx_list]

        # global variables
        self.mainf_list = mainf_list
        self.buffer_dict = buffer_dict
        self.updatef_list = buffer_dict.values() + mainf_list[:] 
        self.cpuf = cpuf

        self.dtype = mainf_list[0].dtype

        self.nx = nx
        self.nx_list = nx_list
        self.accum_nx_list = accum_nx_list
        self.ns = (nx, ny, nz)

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)

        # append the ExchangeNode instance
        if len(self.updatef_list) > 1:
            from exchange import ExchangeNode
            ExchangeNode(self)
Beispiel #6
0
    def __init__(self, nx, ny, nz, precision_float='single'):
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('precision_float', precision_float, ('single', 'double'))

        # local variables
        dtype = {'single':np.float32, 'double':np.float64}[precision_float]
        ns = [nx, ny, nz]

        # allocations
        ehs = [np.zeros(ns, dtype) for i in range(6)]

        # global variables
        self.dx = 1.
        self.dt = 0.5
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns

        self.precision_float = precision_float
        self.dtype = dtype

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        
        self.ce_on, self.ch_on, self.rd_on = False, False, False
        self.ces = self.cex, self.cey, self.cez = 0.5, 0.5, 0.5
        self.chs = self.chx, self.chy, self.chz = 0.5, 0.5, 0.5
        self.erds = self.erdx, self.erdy, self.erdz = 1., 1., 1.
        self.hrds = self.hrdx, self.hrdy, self.hrdz = 1., 1., 1.

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Beispiel #7
0
    def __init__(self, nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            use_cpu_core=0):
        """
        """

        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, \
                ('single', 'double'))
        common.check_type('use_cpu_core', use_cpu_core, int)

        # local variables
        dtype = {'single': np.float32, 'double': np.float64}[precision_float]

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        segment_nbytes = 16
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [nx, ny, nz]
        ns_pitch = [nx, ny, nz_pitch]
        ns_pad = [nx, ny, pad]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        ehs = [np.zeros(ns_pitch, dtype) for i in range(6)]

        if ce_on:
            ces = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)]

        if ch_on:
            chs = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)]

        # global variables and functions
        self.device_type = 'cpu'
        self.qtask = QueueTask()
        self.enqueue = self.qtask.enqueue
        self.enqueue_barrier = self.qtask.enqueue_barrier

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.use_cpu_core = use_cpu_core
        self.dtype = dtype

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        if ce_on:
            self.ces = ces
            self.cex, self.cey, self.cez = ces
        if ch_on:
            self.chs = chs
            self.chx, self.chy, self.chz = chs

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Beispiel #8
0
    def __init__(self, queue_task, nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            use_cpu_core=0):
        """
        """

        common.check_type('queue_task', queue_task, QueueTask)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, \
                ('single', 'double'))
        common.check_type('use_cpu_core', use_cpu_core, int)

        # local variables
        dtype = {'single':np.float32, 'double':np.float64}[precision_float]

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        segment_nbytes = 16
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [nx, ny, nz]
        ns_pitch = [nx, ny, nz_pitch]
        ns_pad = [nx, ny, pad]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        ehs = [np.zeros(ns_pitch, dtype) for i in range(6)]

        if ce_on:
            ces = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]

        if ch_on:
            chs = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]

        # global variables and functions
        self.device_type = 'cpu'
        self.qtask = queue_task
        self.enqueue = queue_task.enqueue
        self.enqueue_barrier = queue_task.enqueue_barrier

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.use_cpu_core = use_cpu_core
        self.dtype = dtype

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        if ce_on:
            self.ces = ces
            self.cex, self.cey, self.cez = ces
        if ch_on:
            self.chs = chs
            self.chx, self.chy, self.chz = chs

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Beispiel #9
0
    def __init__(self, context, device, \
            nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            local_work_size=256, \
            global_work_size=0):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_type('global_work_size', global_work_size, int)
        common.check_type('local_work_size', local_work_size, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, \
                ('single', 'double'))

        # local variables
        queue = cl.CommandQueue(context, device)
        pragma_fp64 = ''
        if precision_float == 'double':
            extensions = device.get_info(cl.device_info.EXTENSIONS)
            if 'cl_khr_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'
            elif 'cl_amd_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable'
            else:
                precision_float = 'single'
                print('Warning: The %s GPU device is not support the double-precision.') % \
                        device.get_info(cl.device_info.NAME)
                print('The precision is changed to \'single\'.')

        dtype = {'single': np.float32, 'double': np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', pragma_fp64] }[precision_float]

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        align_size = {'single': 16, 'double': 8}[precision_float]  # 64 Bytes
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        f = np.zeros(ns_pitch, dtype)
        cf = np.ones_like(f) * 0.5

        mflags = cl.mem_flags.READ_WRITE
        eh_bufs = [cl.Buffer(context, mflags, f.nbytes) for i in range(6)]
        for eh_buf in eh_bufs:
            cl.enqueue_copy(queue, eh_buf, f)

        if ce_on:
            mflags = cl.mem_flags.READ_ONLY
            ce_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)]

        if ch_on:
            mflags = cl.mem_flags.READ_ONLY
            ch_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)]

        del f, cf

        # global variables
        self.device_type = 'gpu'
        self.context = context
        self.device = device
        self.queue = queue

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]
        if ce_on:
            self.ce_bufs = ce_bufs
            self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs
        if ch_on:
            self.ch_bufs = ch_bufs
            self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs

        self.ls = local_work_size
        self.gs = global_work_size
        if self.gs == 0:
            self.gs = common_gpu.get_optimal_gs(device)

        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)
Beispiel #10
0
    def __init__(self, device_id, \
            nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            block_size=256, \
            grid_size=0):
        """
        """

        common.check_type('device_id', device_id, int)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, ('single', 'double'))
        common.check_type('block_size', block_size, int)
        common.check_type('grid_size', grid_size, int)

        # local variables
        dtype = {'single':np.float32, 'double':np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float'], \
                'double':['double'] }[precision_float]

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        segment_nbytes = 64
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # CUDA device and context
        cuda.init()
        device = cuda.Device(device_id)
        context = device.make_context()
        stream = cuda.Stream()

        # allocations
        f = np.zeros(ns_pitch, dtype)
        cf = np.ones_like(f) * 0.5

        eh_bufs = [cuda.to_device(f) for i in range(6)]
        ce_bufs = [cuda.to_device(cf) for i in range(3)] if ce_on else None
        ch_bufs = [cuda.to_device(cf) for i in range(3)] if ch_on else None
        del f, cf

        # global variables
        self.device_type = 'gpu'
        self.device = device
        self.context = context
        self.stream = stream

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list 

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]
        self.ce_bufs = ce_bufs
        self.ch_bufs = ch_bufs
        if ce_on: self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs
        if ch_on: self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs

        self.bs = (block_size, 1, 1)
        self.gs = (grid_size, 1) if grid_size != 0 else (common_gpu.get_optimal_gs(device, block_size), 1)

        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)
Beispiel #11
0
    def __init__(self, nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            use_cpu_core=0, \
            mpi_type=''):
        """
        """

        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, \
                ('single', 'double'))
        common.check_type('use_cpu_core', use_cpu_core, int)
        common.check_value('mpi_type', mpi_type, \
                ('', 'x+', 'x-', 'y+', 'y-', 'z+', 'z-'))

        # local variables
        dtype = {'single':np.float32, 'double':np.float64}[precision_float]

        # padding for the nz which is multiple of 4 (float32) or 2 (float64)
        align_size = {'single':4, 'double':2}[precision_float]   # 16 Bytes
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [nx, ny, nz]
        ns_pitch = [nx, ny, nz_pitch]
        ns_pad = [nx, ny, pad]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        ehs = [np.zeros(ns_pitch, dtype) for i in range(6)]

        if ce_on:
            ces = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]

        if ch_on:
            chs = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)]

        # global variables and functions
        self.device_type = 'cpu'
        self.qtask = QueueTask()
        self.enqueue = self.qtask.enqueue
        self.enqueue_barrier = self.qtask.enqueue_barrier

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.use_cpu_core = use_cpu_core
        self.dtype = dtype

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]
        if ce_on:
            self.ces = ces
            self.cex, self.cey, self.cez = ces
        if ch_on:
            self.chs = chs
            self.chx, self.chy, self.chz = chs

        self.mpi_type = mpi_type

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)

        split = {'+': 'h', '-': 'e', '': ''}[mpi_type[1:]]
        if split == '':
            self.update_e = self.update_e_whole
            self.update_h = self.update_h_whole

        elif split == 'e':
            self.update_e = self.update_e_split
            self.update_h = self.update_h_whole

        elif split == 'h':
            self.update_e = self.update_e_whole
            self.update_h = self.update_h_split
Beispiel #12
0
    def __init__(self, context, device, \
            nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            local_work_size=256, \
            global_work_size=0):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_type('global_work_size', global_work_size, int)
        common.check_type('local_work_size', local_work_size, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, \
                ('single', 'double'))

        # local variables
        queue = cl.CommandQueue(context, device)
        pragma_fp64 = ''
        if precision_float == 'double':
            extensions = device.get_info(cl.device_info.EXTENSIONS)
            if 'cl_khr_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'
            elif 'cl_amd_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable'
            else:
                precision_float = 'single'
                print('Warning: The %s GPU device is not support the double-precision.') % \
                        device.get_info(cl.device_info.NAME)
                print('The precision is changed to \'single\'.')

        dtype = {'single':np.float32, 'double':np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', pragma_fp64] }[precision_float]

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        align_size = {'single':16, 'double':8}[precision_float]  # 64 Bytes
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        f = np.zeros(ns_pitch, dtype)
        cf = np.ones_like(f) * 0.5

        mflags = cl.mem_flags.READ_WRITE
        eh_bufs = [cl.Buffer(context, mflags, f.nbytes) for i in range(6)]
        for eh_buf in eh_bufs:
            cl.enqueue_copy(queue, eh_buf, f) 

        if ce_on:
            mflags = cl.mem_flags.READ_ONLY
            ce_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)]

        if ch_on:
            mflags = cl.mem_flags.READ_ONLY
            ch_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)]

        del f, cf

        # global variables
        self.device_type = 'gpu'
        self.context = context
        self.device = device
        self.queue = queue

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list 

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]
        if ce_on:
            self.ce_bufs = ce_bufs
            self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs
        if ch_on:
            self.ch_bufs = ch_bufs
            self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs

        self.ls = local_work_size
        self.gs = global_work_size
        if self.gs == 0:
            self.gs = common_gpu.get_optimal_gs(device)

        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)
Beispiel #13
0
    def __init__(self, queue_task, \
                 nx, ny, nz, \
                 precision_float='single', \
                 use_cpu_core=0):
        """
        """

        common.check_type('queue_task', queue_task, QueueTask)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))
        common.check_type('use_cpu_core', use_cpu_core, int)

        # local variables
        ns = [nx, ny, nz]
        dtype = {'single': np.float32, 'double': np.float64}[precision_float]

        # allocations
        ehs = [np.zeros(ns, dtype) for i in range(6)]

        # common macros for C templates
        dtype_macros = ['DTYPE']
        dtype_values = {
            'single': ['float'],
            ' double': ['double']
        }[precision_float]

        omp_macros = ['OMP ', 'SET_NUM_THREADS']
        if use_cpu_core == 0:
            omp_values = ['', '']
        elif use_cpu_core == 1:
            omp_values = ['// ', '']
        else:
            omp_values = ['', 'omp_set_num_threads(%d);' % use_cpu_core]

        # global variables and functions
        self.device_type = 'cpu'
        self.qtask = queue_task
        self.enqueue = queue_task.enqueue
        self.enqueue_barrier = queue_task.enqueue_barrier

        self.dx = 1.
        self.dt = 0.5
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns

        self.dtype = dtype
        self.dtype_omp_macros = dtype_macros + omp_macros
        self.dtype_omp_values = dtype_values + omp_values

        self.ehs = ehs
        self.ex, self.ey, self.ez = ehs[:3]
        self.hx, self.hy, self.hz = ehs[3:]

        self.ce_on, self.ch_on = False, False
        self.rd_on = False

        # update list
        self.instance_list = []
        self.append_instance = lambda instance: \
                common.append_instance(self.instance_list, instance)
Beispiel #14
0
    def __init__(self, context, device, queue_task, \
            nx, ny, nz, \
            coeff_use='', \
            precision_float='single', \
            local_work_size=256):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('queue_task', queue_task, QueueTask)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh'))
        common.check_value('precision_float', precision_float, ('single', 'double'))
        common.check_type('local_work_size', local_work_size, int)

        # local variables
        queue = cl.CommandQueue(context, device)
        pragma_fp64 = ''
        if precision_float == 'double':
            extensions = device.get_info(cl.device_info.EXTENSIONS)
            if 'cl_khr_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'
            elif 'cl_amd_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable'
            else:
                precision_float = 'single'
                print('Warning: The %s GPU device is not support the double-precision.') % \
                        device.get_info(cl.device_info.NAME)
                print('The precision is changed to \'single\'.')

        dtype = {'single':np.float32, 'double':np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', pragma_fp64] }[precision_float]

        # padding for the nz which is multiple of 16 (float32) or 8 (float64)
        segment_nbytes = 64
        align_size = segment_nbytes / np.nbytes[dtype]
        pad = int(np.ceil(float(nz) / align_size) * align_size) - nz
        slice_z = slice(None, None) if pad == 0 else slice(None, -pad)
        nz_pitch = nz + pad

        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]
        ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)]
        ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)]

        # on/off the coefficient arrays
        ce_on = True if 'e' in coeff_use else False
        ch_on = True if 'h' in coeff_use else False

        # allocations
        f = np.zeros(ns_pitch, dtype)
        cf = np.ones_like(f) * 0.5

        mflags = cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR
        eh_bufs = [cl.Buffer(context, mflags, hostbuf=f) for i in range(6)]

        c_mflags = cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR
        if ce_on:
            ce_bufs = [cl.Buffer(context, c_mflags, hostbuf=cf) for i in range(3)]

        if ch_on:
            ch_bufs = [cl.Buffer(context, c_mflags, hostbuf=cf) for i in range(3)]

        del f, cf

        # global variables
        self.device_type = 'gpu'
        self.context = context
        self.device = device
        self.queue = queue
        self.qtask = queue_task
        self.enqueue = queue_task.enqueue
        self.enqueue_barrier = queue_task.enqueue_barrier

        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns
        self.ns_pitch = ns_pitch
        self.ns_pad = ns_pad

        self.align_size = align_size
        self.pad = pad
        self.slice_z = slice_z

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list 

        self.coeff_use = coeff_use
        self.ce_on = ce_on
        self.ch_on = ch_on

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]
        if ce_on:
            self.ce_bufs = ce_bufs
            self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs
        if ch_on:
            self.ch_bufs = ch_bufs
            self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs

        self.ls = ls = local_work_size
        nmax = nx * ny * nz_pitch
        remainder = nmax % ls
        self.gs = nmax if remainder == 0 else nmax - remainder + ls 


        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)
Beispiel #15
0
    def __init__(self, context, device, \
                 nx, ny, nz, \
                 precision_float='single', \
                 local_work_size=256):
        """
        """

        common.check_type('context', context, cl.Context)
        common.check_type('device', device, cl.Device)
        common.check_type('nx', nx, int)
        common.check_type('ny', ny, int)
        common.check_type('nz', nz, int)
        common.check_value('precision_float', precision_float,
                           ('single', 'double'))
        common.check_type('local_work_size', local_work_size, int)

        # local variables
        ns = [np.int32(nx), np.int32(ny), np.int32(nz)]

        queue = cl.CommandQueue(context, device)
        pragma_fp64 = ''
        if precision_float == 'double':
            extensions = device.get_info(cl.device_info.EXTENSIONS)
            if 'cl_khr_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'
            elif 'cl_amd_fp64' in extensions:
                pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable'
            else:
                precision_float = 'single'
                print('Warning: The %s GPU device is not support the double-precision.') % \
                        device.get_info(cl.device_info.NAME)
                print('The precision is changed to \'single\'.')

        dtype = {'single': np.float32, 'double': np.float64}[precision_float]
        dtype_str_list = { \
                'single':['float', ''], \
                'double':['double', pragma_fp64] }[precision_float]

        # allocations
        f = np.zeros(ns, dtype)
        eh_bufs = [
            cl.Buffer(context, cl.mem_flags.READ_WRITE, f.nbytes)
            for i in range(6)
        ]
        for eh_buf in eh_bufs:
            cl.enqueue_copy(queue, eh_buf, f)

        # global variables
        self.device_type = 'gpu'
        self.context = context
        self.device = device
        self.queue = queue

        self.dx = 1.
        self.dt = 0.5
        self.nx = nx
        self.ny = ny
        self.nz = nz
        self.ns = ns

        self.precision_float = precision_float
        self.dtype = dtype
        self.dtype_str_list = dtype_str_list

        self.eh_bufs = eh_bufs
        self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3]
        self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:]

        self.ce_on, self.ch_on = False, False
        self.rd_on = False

        self.ls = local_work_size

        # create update list
        self.instance_list = []
        self.append_instance = lambda instance: \
            common.append_instance(self.instance_list, instance)