Python gpu_device_list 예제들, kemp.fdtd3d.util.common_gpu.gpu_device_list Python 예제들

예제 #1

0

파일 보기

파일: test_get_set_fields.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1 = self.args

        slidx = common.slice_index_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '', 'single')
        getf = GetFields(fields, str_f, pt0, pt1) 
        
        # host allocations
        ehs = common_update.generate_random_ehs(nx, ny, nz, fields.dtype)
        eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) )
        fields.set_eh_bufs(*ehs)

        # verify
        getf.get_event().wait()

        for str_f in str_fs:
            original = eh_dict[str_f][slidx]
            copy = getf.get_fields(str_f)
            norm = np.linalg.norm(original - copy)
            self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #2

0

파일 보기

파일: get_set_fields.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1 = self.args

        slidx = common.slice_index_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '')
        getf = GetFields(fields, str_f, pt0, pt1) 
        
        # host allocations
        eh_dict = {}
        for sf in str_fs:
            eh_dict[sf] = np.random.rand(*fields.ns).astype(fields.dtype)
            cl.enqueue_copy(fields.queue, fields.get_buf(sf), eh_dict[sf])

        # verify
        getf.get_event().wait()

        for str_f in str_fs:
            original = eh_dict[str_f][slidx]
            copy = getf.get_fields(str_f)
            self.assertEqual(np.abs(eh_dict[str_f][slidx] - getf.get_fields(str_f)).max(), 0, self.args)

예제 #3

0

파일 보기

파일: test_get_set_fields.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1 = self.args

        slidx = common.slices_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '', 'single')
        getf = GetFields(fields, str_f, pt0, pt1) 
        
        # host allocations
        ehs = common_random.generate_ehs(nx, ny, nz, fields.dtype)
        eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) )
        fields.set_eh_bufs(*ehs)

        # verify
        getf.get_event().wait()

        for str_f in str_fs:
            original = eh_dict[str_f][slidx]
            copy = getf.get_fields(str_f)
            norm = np.linalg.norm(original - copy)
            self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #4

0

파일 보기

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1 = self.args

        slidx = common.slice_index_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '')
        getf = GetFields(fields, str_f, pt0, pt1)

        # host allocations
        eh_dict = {}
        for sf in str_fs:
            eh_dict[sf] = np.random.rand(*fields.ns).astype(fields.dtype)
            cl.enqueue_copy(fields.queue, fields.get_buf(sf), eh_dict[sf])

        # verify
        getf.get_event().wait()

        for str_f in str_fs:
            original = eh_dict[str_f][slidx]
            copy = getf.get_fields(str_f)
            self.assertEqual(
                np.abs(eh_dict[str_f][slidx] - getf.get_fields(str_f)).max(),
                0, self.args)

예제 #5

0

파일 보기

    def runTest(self):
        ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = gpu.Fields(context, device, nx, ny, nz, coeff_use,
                            precision_float)
        gpu.Core(fields)

        fields_ref = naive.Fields(nx, ny, nz, precision_float)
        naive.Core(fields_ref)

        # allocations
        ns = fields.ns
        dtype = fields.dtype
        strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']

        ehs = common_random.generate_ehs(nx, ny, nz, dtype, ufunc)
        fields.set_eh_bufs(*ehs)
        fields_ref.set_ehs(*ehs)

        ces, chs = common_random.generate_cs(nx, ny, nz, dtype, coeff_use)
        if 'e' in coeff_use:
            fields.set_ce_bufs(*ces)
            fields_ref.set_ces(*ces)
        if 'h' in coeff_use:
            fields.set_ch_bufs(*chs)
            fields_ref.set_chs(*chs)

        tmpf = np.zeros(fields.ns, dtype=dtype)

        # update
        if ufunc == 'e':
            for tstep in xrange(0, tmax):
                fields.update_e()
                fields_ref.update_e()

            for strf, eh in zip(strf_list, ehs)[:3]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(fields_ref.get(strf) - tmpf)
                max_diff = np.abs(fields_ref.get(strf) - tmpf).max()
                self.assertEqual(
                    norm, 0,
                    '%s, %s, %g, %g' % (self.args, strf, norm, max_diff))

        elif ufunc == 'h':
            for tstep in xrange(0, tmax):
                fields.update_h()
                fields_ref.update_h()

            for strf, eh in zip(strf_list, ehs)[3:]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(fields_ref.get(strf) - tmpf)
                max_diff = np.abs(fields_ref.get(strf) - tmpf).max()
                self.assertEqual(
                    norm, 0,
                    '%s, %s, %g, %g' % (self.args, strf, norm, max_diff))

예제 #6

0

파일 보기

파일: test_pbc.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        axis, nx, ny, nz, precision_float = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '', precision_float)
        pbc = Pbc(fields, axis)

        # allocations
        ehs = common_update.generate_random_ehs(nx, ny, nz, fields.dtype)
        fields.set_eh_bufs(*ehs)

        # update
        fields.update_e()
        fields.update_h()

        # verify
        getf0, getf1 = {}, {}
        strfs_e = {'x':['ey', 'ez'], 'y':['ex', 'ez'], 'z':['ex', 'ey']}[axis]
        strfs_h = {'x':['hy', 'hz'], 'y':['hx', 'hz'], 'z':['hx', 'hy']}[axis]

        pt0 = (0, 0, 0)
        pt1 = { 'x': (0, ny-2, nz-2), \
                'y': (nx-2, 0, nz-2), \
                'z': (nx-2, ny-2, 0) }[axis]
        getf0['e'] = GetFields(fields, strfs_e, pt0, pt1)

        pt0 = { 'x': (nx-1, 0, 0), \
                'y': (0, ny-1, 0), \
                'z': (0, 0, nz-1) }[axis]
        pt1 = { 'x': (nx-1, ny-2, nz-2), \
                'y': (nx-2, ny-1, nz-2), \
                'z': (nx-2, ny-2, nz-1) }[axis]
        getf1['e'] = GetFields(fields, strfs_e, pt0, pt1)

        pt0 = { 'x': (0, 1, 1), \
                'y': (1, 0, 1), \
                'z': (1, 1, 0) }[axis]
        pt1 = { 'x': (0, ny-1, nz-1), \
                'y': (nx-1, 0, nz-1), \
                'z': (nx-1, ny-1, 0) }[axis]
        getf0['h'] = GetFields(fields, strfs_h, pt0, pt1)

        pt0 = { 'x': (nx-1, 1, 1), \
                'y': (1, ny-1, 1), \
                'z': (1, 1, nz-1) }[axis]
        pt1 = (nx-1, ny-1, nz-1)
        getf1['h'] = GetFields(fields, strfs_h, pt0, pt1)

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            norm = np.linalg.norm( \
                    getf0[eh].get_fields() - getf1[eh].get_fields() )
            self.assertEqual(norm, 0, '%g, %s, %s' % (norm, self.args, eh))

예제 #7

0

파일 보기

파일: test_get_set_fields.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1, is_array = self.args

        slices = common.slices_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append(cpu.Fields(nx, ny, nz))
        nodef = Fields(mainf_list)
        dtype = nodef.dtype
        anx = nodef.accum_nx_list

        getf = GetFields(nodef, str_f, (0, 0, 0),
                         (nodef.nx - 1, ny - 1, nz - 1))
        setf = SetFields(nodef, str_f, pt0, pt1, is_array)

        # generate random source
        if is_array:
            shape = common.shape_two_points(pt0, pt1, len(str_fs))
            value = np.random.rand(*shape).astype(nodef.dtype)
            split_value = np.split(value, len(str_fs))
            split_value_dict = dict(zip(str_fs, split_value))
        else:
            value = np.random.ranf()

        # host allocations
        global_ehs = [np.zeros(nodef.ns, dtype) for i in range(6)]
        eh_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], global_ehs))

        # verify
        for str_f in str_fs:
            if is_array:
                eh_dict[str_f][slices] = split_value_dict[str_f]
            else:
                eh_dict[str_f][slices] = value

        setf.set_fields(value)
        gpu_getf = gpu.GetFields(mainf_list[0], str_fs, (0, 0, 0),
                                 (nx - 1, ny - 1, nz - 1))
        gpu_getf.get_event().wait()
        getf.wait()

        for str_f in str_fs:
            original = eh_dict[str_f]
            copy = getf.get_fields(str_f)
            norm = np.linalg.norm(original - copy)
            #if norm != 0:
            #print '\ngpu getf\n', gpu_getf.get_fields(str_f)
            #print original[slices]
            #print copy[slices]
            self.assertEqual(norm, 0, '%s, %g, %s' % (self.args, norm, str_f))

예제 #8

0

파일 보기

파일: test_core.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        qtask = cpu.QueueTask()
        fields = gpu.Fields(context, device, qtask, nx, ny, nz, coeff_use, precision_float)
        gpu.Core(fields)

        fields_ref = naive.Fields(nx, ny, nz, precision_float, segment_nbytes=64)
        naive.Core(fields_ref)

        # allocations
        ns = fields.ns
        dtype = fields.dtype
        strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']

        ehs = common_random.generate_ehs(nx, ny, nz, dtype, ufunc)
        fields.set_eh_bufs(*ehs)
        fields_ref.set_ehs(*ehs)

        ces, chs = common_random.generate_cs(nx, ny, nz, dtype, coeff_use)
        if 'e' in coeff_use:
            fields.set_ce_bufs(*ces)
            fields_ref.set_ces(*ces)
        if 'h' in coeff_use:
            fields.set_ch_bufs(*chs)
            fields_ref.set_chs(*chs)

        tmpf = np.zeros(fields.ns_pitch, dtype=dtype)

        # update
        if ufunc == 'e':
            for tstep in xrange(0, tmax):
                fields.update_e()
                fields_ref.update_e()
            qtask.enqueue_barrier()

            for strf, eh in zip(strf_list, ehs)[:3]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(fields_ref.get(strf) - tmpf)
                max_diff = np.abs(fields_ref.get(strf) - tmpf).max()
                self.assertEqual(norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff) )

        elif ufunc == 'h':
            for tstep in xrange(0, tmax):
                fields.update_h()
                fields_ref.update_h()

            for strf, eh in zip(strf_list, ehs)[3:]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(fields_ref.get(strf) - tmpf)
                max_diff = np.abs(fields_ref.get(strf) - tmpf).max()
                self.assertEqual(norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff) )

예제 #9

0

파일 보기

파일: test_get_set_fields.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1, is_array = self.args

        slices = common.slices_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append( cpu.Fields(nx, ny, nz) )
        nodef = Fields(mainf_list)
        dtype = nodef.dtype
        anx = nodef.accum_nx_list

        getf = GetFields(nodef, str_f, (0, 0, 0), (nodef.nx-1, ny-1, nz-1)) 
        setf = SetFields(nodef, str_f, pt0, pt1, is_array) 
        
        # generate random source
        if is_array:
            shape = common.shape_two_points(pt0, pt1, len(str_fs))
            value = np.random.rand(*shape).astype(nodef.dtype)
            split_value = np.split(value, len(str_fs))
            split_value_dict = dict( zip(str_fs, split_value) )
        else:
            value = np.random.ranf()

        # host allocations
        global_ehs = [np.zeros(nodef.ns, dtype) for i in range(6)]
        eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], global_ehs) )

        # verify
        for str_f in str_fs:
            if is_array:
                eh_dict[str_f][slices] = split_value_dict[str_f]
            else:
                eh_dict[str_f][slices] = value

        setf.set_fields(value)
        gpu_getf = gpu.GetFields(mainf_list[0], str_fs, (0, 0, 0), (nx-1, ny-1, nz-1))
        gpu_getf.get_event().wait()
        getf.wait()
        

        for str_f in str_fs:
            original = eh_dict[str_f]
            copy = getf.get_fields(str_f)
            norm = np.linalg.norm(original - copy)
            #if norm != 0:
                #print '\ngpu getf\n', gpu_getf.get_fields(str_f)
                #print original[slices]
                #print copy[slices]
            self.assertEqual(norm, 0, '%s, %g, %s' % (self.args, norm, str_f))

예제 #10

0

파일 보기

    def runTest(self):
        ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, coeff_use,
                        precision_float)

        # allocations
        ns = fields.ns
        dtype = fields.dtype
        strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']

        ehs = common_update.generate_random_ehs(ufunc, nx, ny, nz, dtype)
        pad_arr = np.zeros(fields.ns_pad, dtype=dtype)
        for strf, eh in zip(strf_list, ehs):
            eh_pitch = np.append(eh, pad_arr, 2).copy('C')
            cl.enqueue_copy(fields.queue, fields.get_buf(strf), eh_pitch)

        ces, chs = common_update.generate_random_cs(coeff_use, nx, ny, nz,
                                                    dtype)
        if 'e' in coeff_use:
            fields.set_ce_bufs(*ces)
        if 'h' in coeff_use:
            fields.set_ch_bufs(*chs)

        tmpf = np.zeros(fields.ns_pitch, dtype=dtype)

        # update
        if ufunc == 'e':
            for tstep in xrange(0, tmax):
                fields.update_e()
                common_update.update_e(ehs, ces)

            for strf, eh in zip(strf_list, ehs):
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slz])
                max_diff = np.abs(eh - tmpf[:, :, fields.slz]).max()
                self.assertEqual(norm, 0, '%s, %s, %g, %g' % \
                        (self.args, strf, norm, max_diff) )

        elif ufunc == 'h':
            for tstep in xrange(0, tmax):
                fields.update_h()
                common_update.update_h(ehs, chs)

            for strf, eh in zip(strf_list, ehs):
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slz])
                max_diff = np.abs(eh - tmpf[:, :, fields.slz]).max()
                self.assertEqual(norm, 0, '%s, %s, %g, %g' % \
                        (self.args, strf, norm, max_diff) )

예제 #11

0

파일 보기

파일: fields.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, coeff_use, precision_float)

        # allocations
        ns = fields.ns
        dtype = fields.dtype
        strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"]

        ehs = common_update.generate_random_ehs(ufunc, nx, ny, nz, dtype)
        pad_arr = np.zeros(fields.ns_pad, dtype=dtype)
        for strf, eh in zip(strf_list, ehs):
            eh_pitch = np.append(eh, pad_arr, 2).copy("C")
            cl.enqueue_copy(fields.queue, fields.get_buf(strf), eh_pitch)

        ces, chs = common_update.generate_random_cs(coeff_use, nx, ny, nz, dtype)
        if "e" in coeff_use:
            fields.set_ce_bufs(*ces)
        if "h" in coeff_use:
            fields.set_ch_bufs(*chs)

        tmpf = np.zeros(fields.ns_pitch, dtype=dtype)

        # update
        if ufunc == "e":
            for tstep in xrange(0, tmax):
                fields.update_e()
                common_update.update_e(ehs, ces)

            for strf, eh in zip(strf_list, ehs):
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slz])
                max_diff = np.abs(eh - tmpf[:, :, fields.slz]).max()
                self.assertEqual(norm, 0, "%s, %s, %g, %g" % (self.args, strf, norm, max_diff))

        elif ufunc == "h":
            for tstep in xrange(0, tmax):
                fields.update_h()
                common_update.update_h(ehs, chs)

            for strf, eh in zip(strf_list, ehs):
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slz])
                max_diff = np.abs(eh - tmpf[:, :, fields.slz]).max()
                self.assertEqual(norm, 0, "%s, %s, %g, %g" % (self.args, strf, norm, max_diff))

예제 #12

0

파일 보기

    def runTest(self):
        if len(self.args) == 6:
            nx, ny, nz, str_f, pt0, pt1 = self.args
            src_is_array = False
        elif len(self.args) == 7:
            nx, ny, nz, str_f, pt0, pt1, src_is_array = self.args

        slidx = common.slice_index_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '')
        setf = SetFields(fields, str_f, pt0, pt1, src_is_array)

        # generate random source
        if src_is_array:
            shape = list(common.shape_two_points(pt0, pt1))
            shape[0] *= len(str_fs)
            value = np.random.rand(*shape).astype(fields.dtype)
            split_value = np.split(value, len(str_fs))
            split_value_dict = dict(zip(str_fs, split_value))
        else:
            value = np.random.ranf()

        # host allocations
        eh_dict = {}
        for sf in str_fs:
            eh_dict[sf] = np.zeros(fields.ns, dtype=fields.dtype)
        gpu_eh = np.zeros(fields.ns, dtype=fields.dtype)

        # verify
        for str_f in str_fs:
            if src_is_array:
                eh_dict[str_f][slidx] = split_value_dict[str_f]
            else:
                eh_dict[str_f][slidx] = value

        setf.set_fields(value)

        for str_f in str_fs:
            cl.enqueue_copy(fields.queue, gpu_eh, fields.get_buf(str_f))
            self.assertEqual(
                np.abs(eh_dict[str_f] - gpu_eh).max(), 0, self.args)

예제 #13

0

파일 보기

파일: test_get_set_fields.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1 = self.args

        slices = common.slices_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append( cpu.Fields(nx, ny, nz) )
        nodef = Fields(mainf_list)
        dtype = nodef.dtype
        anx = nodef.accum_nx_list

        getf = GetFields(nodef, str_f, pt0, pt1) 
        
        # generate random source
        global_ehs = [np.zeros(nodef.ns, dtype) for i in range(6)]
        eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], global_ehs) )

        for i, f in enumerate(mainf_list[:-1]):
            nx, ny, nz = f.ns
            ehs = common_random.generate_ehs(nx, ny, nz, dtype)
            f.set_eh_bufs(*ehs)
            for eh, geh in zip(ehs, global_ehs):
                geh[anx[i]:anx[i+1],:,:] = eh[:-1,:,:]

        f = mainf_list[-1]
        nx, ny, nz = f.ns
        ehs = common_random.generate_ehs(nx, ny, nz, dtype)
        f.set_ehs(*ehs)
        for eh, geh in zip(ehs, global_ehs):
            geh[anx[-2]:anx[-1]+1,:,:] = eh[:]

        # verify
        getf.wait()

        for str_f in str_fs:
            original = eh_dict[str_f][slices]
            copy = getf.get_fields(str_f)
            norm = np.linalg.norm(original - copy)
            self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #14

0

파일 보기

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1 = self.args

        slices = common.slice_index_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append(cpu.Fields(nx, ny, nz))
        nodef = NodeFields(mainf_list)
        dtype = nodef.dtype
        anx = nodef.accum_nx_list

        getf = NodeGetFields(nodef, str_f, pt0, pt1)

        # generate random source
        global_ehs = [np.zeros(nodef.ns, dtype) for i in range(6)]
        eh_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], global_ehs))

        for i, f in enumerate(mainf_list[:-1]):
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_eh_bufs(*ehs)
            for eh, geh in zip(ehs, global_ehs):
                geh[anx[i]:anx[i + 1], :, :] = eh[:-1, :, :]

        f = mainf_list[-1]
        nx, ny, nz = f.ns
        ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
        f.set_ehs(*ehs)
        for eh, geh in zip(ehs, global_ehs):
            geh[anx[-2]:anx[-1] + 1, :, :] = eh[:]

        # verify
        getf.wait()

        for str_f in str_fs:
            original = eh_dict[str_f][slices]
            copy = getf.get_fields(str_f)
            norm = np.linalg.norm(original - copy)
            self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #15

0

파일 보기

파일: get_set_fields.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        if len(self.args) == 6:
            nx, ny, nz, str_f, pt0, pt1 = self.args
            src_is_array = False
        elif len(self.args) == 7:
            nx, ny, nz, str_f, pt0, pt1, src_is_array = self.args

        slidx = common.slice_index_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '')
        setf = SetFields(fields, str_f, pt0, pt1, src_is_array) 
        
        # generate random source
        if src_is_array:
            shape = list( common.shape_two_points(pt0, pt1) )
            shape[0] *= len(str_fs)
            value = np.random.rand(*shape).astype(fields.dtype)
            split_value = np.split(value, len(str_fs))
            split_value_dict = dict( zip(str_fs, split_value) )
        else:
            value = np.random.ranf()

        # host allocations
        eh_dict = {}
        for sf in str_fs:
            eh_dict[sf] = np.zeros(fields.ns, dtype=fields.dtype)
        gpu_eh = np.zeros(fields.ns, dtype=fields.dtype)

        # verify
        for str_f in str_fs:
            if src_is_array:
                eh_dict[str_f][slidx] = split_value_dict[str_f]
            else:
                eh_dict[str_f][slidx] = value

        setf.set_fields(value)

        for str_f in str_fs:
            cl.enqueue_copy(fields.queue, gpu_eh, fields.get_buf(str_f))
            self.assertEqual(np.abs(eh_dict[str_f] - gpu_eh).max(), 0, self.args)

예제 #16

0

파일 보기

파일: test_get_set_fields.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1, is_array = self.args

        slidx = common.slices_two_points(pt0, pt1)
        str_fs = common.convert_to_tuple(str_f)

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        qtask = QueueTask()
        fields = Fields(context, device, qtask, nx, ny, nz, '', 'single')
        setf = SetFields(fields, str_f, pt0, pt1, is_array) 
        
        # generate random source
        if is_array:
            shape = common.shape_two_points(pt0, pt1, len(str_fs))
            value = np.random.rand(*shape).astype(fields.dtype)
            split_value = np.split(value, len(str_fs))
            split_value_dict = dict( zip(str_fs, split_value) )
        else:
            value = np.random.ranf()

        # host allocations
        ehs = [np.zeros(fields.ns, dtype=fields.dtype) for i in range(6)]
        eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) )
        gpu_eh = np.zeros(fields.ns_pitch, dtype=fields.dtype)

        # verify
        for str_f in str_fs:
            if is_array:
                eh_dict[str_f][slidx] = split_value_dict[str_f]
            else:
                eh_dict[str_f][slidx] = value

        setf.set_fields(value)
        setf.mainf.enqueue_barrier()

        for str_f in str_fs:
            cl.enqueue_copy(fields.queue, gpu_eh, fields.get_buf(str_f))
            original = eh_dict[str_f]
            copy = gpu_eh[:,:,fields.slice_z]
            norm = np.linalg.norm(original - copy)
            self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #17

0

파일 보기

파일: test_direct_incident.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1, is_array = self.args

        slices = common.slice_index_two_points(pt0, pt1)

        # generate random source
        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            value = np.random.rand(*shape).astype(np.float32)
        else:
            value = np.random.ranf()

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append(cpu.Fields(nx, ny, nz))
        nodef = NodeFields(mainf_list)
        dtype = nodef.dtype
        anx = nodef.accum_nx_list

        tfunc = lambda tstep: np.sin(0.03 * tstep)
        incident = NodeDirectIncident(nodef, str_f, pt0, pt1, tfunc, value)

        # allocations for verify
        eh = np.zeros(nodef.ns, dtype)
        getf = NodeGetFields(nodef, str_f, pt0, pt1)

        # verify
        eh[slices] = dtype(value) * dtype(tfunc(1))

        e_or_h = str_f[0]
        nodef.update_e()
        nodef.update_h()
        getf.wait()

        original = eh[slices]
        copy = getf.get_fields(str_f)
        norm = np.linalg.norm(original - copy)
        self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #18

0

파일 보기

파일: test_direct_incident.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1, is_array = self.args

        slices = common.slice_index_two_points(pt0, pt1)

        # generate random source
        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            value = np.random.rand(*shape).astype(np.float32)
        else:
            value = np.random.ranf()

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append( cpu.Fields(nx, ny, nz) )
        nodef = NodeFields(mainf_list)
        dtype = nodef.dtype
        anx = nodef.accum_nx_list

        tfunc = lambda tstep: np.sin(0.03*tstep)
        incident = NodeDirectIncident(nodef, str_f, pt0, pt1, tfunc, value) 

        # allocations for verify
        eh = np.zeros(nodef.ns, dtype)
        getf = NodeGetFields(nodef, str_f, pt0, pt1)

        # verify
        eh[slices] = dtype(value) * dtype(tfunc(1))

        e_or_h = str_f[0]
        nodef.update_e()
        nodef.update_h()
        getf.wait()

        original = eh[slices]
        copy = getf.get_fields(str_f)
        norm = np.linalg.norm(original - copy)
        self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #19

0

파일 보기

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1, is_array = self.args
        slice_xyz = common.slices_two_points(pt0, pt1)

        # generate random source
        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            value = np.random.rand(*shape).astype(np.float32)
        else:
            value = np.random.ranf()

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        qtask = QueueTask()
        fields = Fields(context, device, qtask, nx, ny, nz, '', 'single')

        tfunc = lambda tstep: np.sin(0.03 * tstep)
        incident = IncidentDirect(fields, str_f, pt0, pt1, tfunc, value)

        # host allocations
        eh = np.zeros(fields.ns_pitch, dtype=fields.dtype)

        # verify
        eh[slice_xyz] = fields.dtype(value) * fields.dtype(tfunc(1))
        fields.update_e()
        fields.update_h()
        fields.enqueue_barrier()

        copy_eh_buf = fields.get_buf(str_f)
        copy_eh = np.zeros_like(eh)
        cl.enqueue_copy(fields.queue, copy_eh, copy_eh_buf)

        original = eh[slice_xyz]
        copy = copy_eh[slice_xyz]
        norm = np.linalg.norm(original - copy)
        self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #20

0

파일 보기

파일: test_incident_direct.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        nx, ny, nz, str_f, pt0, pt1, is_array = self.args
        slice_xyz = common.slices_two_points(pt0, pt1)

        # generate random source
        if is_array:
            shape = common.shape_two_points(pt0, pt1)
            value = np.random.rand(*shape).astype(np.float32)
        else:
            value = np.random.ranf()

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        qtask = QueueTask()
        fields = Fields(context, device, qtask, nx, ny, nz, '', 'single')

        tfunc = lambda tstep: np.sin(0.03*tstep)
        incident = IncidentDirect(fields, str_f, pt0, pt1, tfunc, value) 

        # host allocations
        eh = np.zeros(fields.ns_pitch, dtype=fields.dtype)

        # verify
        eh[slice_xyz] = fields.dtype(value) * fields.dtype(tfunc(1))
        fields.update_e()
        fields.update_h()
        fields.enqueue_barrier()

        copy_eh_buf = fields.get_buf(str_f)
        copy_eh = np.zeros_like(eh)
        cl.enqueue_copy(fields.queue, copy_eh, copy_eh_buf)

        original = eh[slice_xyz]
        copy = copy_eh[slice_xyz]
        norm = np.linalg.norm(original - copy)
        self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))

예제 #21

0

파일 보기

파일: test_exchange.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        nx, ny, nz = self.args

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append( cpu.Fields(nx, ny, nz) )
        nodef = NodeFields(mainf_list)
        dtype = nodef.dtype

        # buffer instance
        nodef.append_buffer_fields(cpu.Fields(3, ny, nz, mpi_type='x-'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y+'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y-'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z+'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z-'))

        exchange = NodeExchange(nodef)
        
        # generate random source
        for f in mainf_list[:-1]:
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_eh_bufs(*ehs)

        for f in nodef.cpuf_dict.values():
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_ehs(*ehs)

        # verify
        exchange.update_e()
        exchange.update_h()
        getf0, getf1 = {}, {}

        # mainf list
        self.gpu, self.cpu = gpu, cpu
        for f0, f1 in zip(mainf_list[:-1], mainf_list[1:]):
            getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ey', 'ez'], \
                    (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2))
            getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ey', 'ez'], \
                    (0, 0, 0), (0, f1.ny-2, f1.nz-2))

            getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hy', 'hz'], \
                    (f0.nx-1, 1, 1), (f0.nx-1, f0.ny-1, f0.nz-1))
            getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hy', 'hz'], \
                    (0, 1, 1), (0, f1.ny-1, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm(getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%s, %g, %s, %s, %s' % \
                        (self.args, norm, 'mainf', \
                        getf0[eh].mainf.device_type, getf1[eh].mainf.device_type) )

        # buffer 'x-'
        f0, f1 = nodef.cpuf_dict['x-'], mainf_list[0]
        getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \
                (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2))
        getf1['e'] = gpu.GetFields(f1, ['ey', 'ez'], \
                (1, 0, 0), (1, f1.ny-2, f1.nz-2))

        getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \
                (f0.nx-2, 1, 1), (f0.nx-2, f0.ny-1, f0.nz-1))
        getf1['h'] = gpu.GetFields(f1, ['hy', 'hz'], \
                (0, 1, 1), (0, f1.ny-1, f1.nz-1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            norm = np.linalg.norm( \
                    getf0[eh].get_fields() - getf1[eh].get_fields())
            self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x-', eh) )

        # buffer 'y+'
        anx_list = nodef.accum_nx_list

        f1 = nodef.cpuf_dict['y+']
        for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ez'], \
                    (0, f0.ny-1, 0), (f0.nx-2, f0.ny-1, f0.nz-2))
            getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \
                    (1, anx0, 0), (1, anx1-1, f1.nz-2))

            getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hz'], \
                    (1, f0.ny-2, 1), (f0.nx-1, f0.ny-2, f0.nz-1))
            getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \
                    (0, anx0+1, 1), (0, anx1, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y+', eh) )

        # buffer 'y-'
        f0 = nodef.cpuf_dict['y-']
        for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \
                    (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2))
            getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ez'], \
                    (0, 1, 0), (f1.nx-2, 1, f1.nz-2))

            getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \
                    (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1))
            getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hz'], \
                    (1, 0, 1), (f1.nx-1, 0, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-', eh) )

        # buffer 'z+'
        f1 = nodef.cpuf_dict['z+']
        for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ey'], \
                    (0, 0, f0.nz-1), (f0.nx-2, f0.ny-2, f0.nz-1))
            getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \
                    (1, anx0, 0), (1, anx1-1, f1.nz-2))

            getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hy'], \
                    (1, 1, f0.nz-2), (f0.nx-1, f0.ny-1, f0.nz-2))
            getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \
                    (0, anx0+1, 1), (0, anx1, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z+', eh) )

        # buffer 'z-'
        f0 = nodef.cpuf_dict['z-']
        for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \
                    (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2))
            getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ey'], \
                    (0, 0, 1), (f1.nx-2, f1.ny-2, 1))

            getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \
                    (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1))
            getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hy'], \
                    (1, 1, 0), (f1.nx-1, f1.ny-1, 0))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z-', eh) )

예제 #22

0

파일 보기

size = comm.Get_size()

is_plot = False

#nx, ny, nz = 240, 256, 256  # 540 MB
#nx, ny, nz = 544, 544, 544  # 5527 MB
#nx, ny, nz = 512, 512, 512  # 4608 MB
#nx, ny, nz = 480, 480, 480  # 3796 MB
nx, ny, nz = 800, 256, 256  # 576 MB
#nx, ny, nz = 128, 128, 128  # 72 MB

coeff_use = 'e'
precision_float = 'single'

# instances
gpu_devices = common_gpu.gpu_device_list(print_info=False)
context = cl.Context(gpu_devices)
device = gpu_devices[0]
qtask = cpu.QueueTask()
fields = Fields(context, device, qtask, nx, ny, nz, coeff_use, precision_float)
Core(fields)

tmax = 250 if is_plot else 1000
if rank == 0: direction = '+'
elif rank == size - 1: direction = '-'
else: direction = '+-'

#exch = node.ExchangeMpiNonBlock(fields, direction)

#exch = node.ExchangeMpiBufferBlock(fields, direction)
#exch = node.ExchangeMpiBufferBlockSplit(fields, direction)

예제 #23

0

파일 보기

파일: test_exchange.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        nx, ny, nz = self.args

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append(cpu.Fields(nx, ny, nz))
        nodef = NodeFields(mainf_list)
        dtype = nodef.dtype

        # buffer instance
        nodef.append_buffer_fields(cpu.Fields(3, ny, nz, mpi_type='x-'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y+'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y-'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z+'))
        nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z-'))

        exchange = NodeExchange(nodef)

        # generate random source
        for f in mainf_list[:-1]:
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_eh_bufs(*ehs)

        for f in nodef.cpuf_dict.values():
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_ehs(*ehs)

        # verify
        exchange.update_e()
        exchange.update_h()
        getf0, getf1 = {}, {}

        # mainf list
        self.gpu, self.cpu = gpu, cpu
        for f0, f1 in zip(mainf_list[:-1], mainf_list[1:]):
            getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ey', 'ez'], \
                    (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2))
            getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ey', 'ez'], \
                    (0, 0, 0), (0, f1.ny-2, f1.nz-2))

            getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hy', 'hz'], \
                    (f0.nx-1, 1, 1), (f0.nx-1, f0.ny-1, f0.nz-1))
            getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hy', 'hz'], \
                    (0, 1, 1), (0, f1.ny-1, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm(getf0[eh].get_fields() -
                                      getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%s, %g, %s, %s, %s' % \
                        (self.args, norm, 'mainf', \
                        getf0[eh].mainf.device_type, getf1[eh].mainf.device_type) )

        # buffer 'x-'
        f0, f1 = nodef.cpuf_dict['x-'], mainf_list[0]
        getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \
                (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2))
        getf1['e'] = gpu.GetFields(f1, ['ey', 'ez'], \
                (1, 0, 0), (1, f1.ny-2, f1.nz-2))

        getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \
                (f0.nx-2, 1, 1), (f0.nx-2, f0.ny-1, f0.nz-1))
        getf1['h'] = gpu.GetFields(f1, ['hy', 'hz'], \
                (0, 1, 1), (0, f1.ny-1, f1.nz-1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            norm = np.linalg.norm( \
                    getf0[eh].get_fields() - getf1[eh].get_fields())
            self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x-', eh))

        # buffer 'y+'
        anx_list = nodef.accum_nx_list

        f1 = nodef.cpuf_dict['y+']
        for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ez'], \
                    (0, f0.ny-1, 0), (f0.nx-2, f0.ny-1, f0.nz-2))
            getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \
                    (1, anx0, 0), (1, anx1-1, f1.nz-2))

            getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hz'], \
                    (1, f0.ny-2, 1), (f0.nx-1, f0.ny-2, f0.nz-1))
            getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \
                    (0, anx0+1, 1), (0, anx1, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y+', eh))

        # buffer 'y-'
        f0 = nodef.cpuf_dict['y-']
        for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \
                    (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2))
            getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ez'], \
                    (0, 1, 0), (f1.nx-2, 1, f1.nz-2))

            getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \
                    (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1))
            getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hz'], \
                    (1, 0, 1), (f1.nx-1, 0, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-', eh))

        # buffer 'z+'
        f1 = nodef.cpuf_dict['z+']
        for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ey'], \
                    (0, 0, f0.nz-1), (f0.nx-2, f0.ny-2, f0.nz-1))
            getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \
                    (1, anx0, 0), (1, anx1-1, f1.nz-2))

            getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hy'], \
                    (1, 1, f0.nz-2), (f0.nx-1, f0.ny-1, f0.nz-2))
            getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \
                    (0, anx0+1, 1), (0, anx1, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z+', eh))

        # buffer 'z-'
        f0 = nodef.cpuf_dict['z-']
        for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]):
            getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \
                    (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2))
            getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ey'], \
                    (0, 0, 1), (f1.nx-2, f1.ny-2, 1))

            getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \
                    (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1))
            getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hy'], \
                    (1, 1, 0), (f1.nx-1, f1.ny-1, 0))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm( \
                        getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z-', eh))

예제 #24

0

파일 보기

파일: test_core.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, coeff_use,
                        precision_float)
        core = Core(fields)

        # allocations
        ns = fields.ns
        dtype = fields.dtype
        strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz']

        ehs = common_update.generate_random_ehs(nx, ny, nz, dtype, ufunc)
        fields.set_eh_bufs(*ehs)

        ces, chs = common_update.generate_random_cs(coeff_use, nx, ny, nz,
                                                    dtype)
        if 'e' in coeff_use:
            fields.set_ce_bufs(*ces)
        if 'h' in coeff_use:
            fields.set_ch_bufs(*chs)

        tmpf = np.zeros(fields.ns_pitch, dtype=dtype)

        # update
        if ufunc == 'e':
            for tstep in xrange(0, tmax):
                fields.update_e()
                common_update.update_e(ehs, ces)

            for strf, eh in zip(strf_list, ehs)[:3]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z])
                max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max()
                self.assertEqual(
                    norm, 0,
                    '%s, %s, %g, %g' % (self.args, strf, norm, max_diff))

                if fields.pad != 0:
                    if strf == 'ez':
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:])
                    else:
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad - 1:])
                    self.assertEqual(
                        norm2, 0,
                        '%s, %s, %g, padding' % (self.args, strf, norm2))

        elif ufunc == 'h':
            for tstep in xrange(0, tmax):
                fields.update_h()
                common_update.update_h(ehs, chs)

            for strf, eh in zip(strf_list, ehs)[3:]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z])
                max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max()
                self.assertEqual(
                    norm, 0,
                    '%s, %s, %g, %g' % (self.args, strf, norm, max_diff))

                if fields.pad != 0:
                    if strf == 'hz':
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:])
                    else:
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:])
                    self.assertEqual(
                        norm2, 0,
                        '%s, %s, %g, padding' % (self.args, strf, norm2))

예제 #25

0

파일 보기

    def runTest(self):
        axis, nx, ny, nz = self.args
        self.gpu, self.cpu = gpu, cpu

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) \
                for device in gpu_devices]
        mainf_list.append(cpu.Fields(nx, ny, nz))
        nodef = NodeFields(mainf_list)
        dtype = nodef.dtype

        pbc = NodePbc(nodef, axis)
        exchange = NodeExchange(nodef)

        # generate random source
        for f in mainf_list[:-1]:
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_eh_bufs(*ehs)

        for f in nodef.cpuf_dict.values():
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_ehs(*ehs)

        # verify
        for mainf in mainf_list:
            mainf.update_e()
        pbc.update_e()
        exchange.update_e()

        for mainf in mainf_list:
            mainf.update_h()
        pbc.update_h()
        exchange.update_h()

        mainf_list[-1].enqueue_barrier()

        getf0, getf1 = {}, {}

        if axis == 'x':
            f0, f1 = mainf_list[0], mainf_list[-1]
            getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ey', 'ez'], \
                    (0, 0, 0), (0, f0.ny-2, f0.nz-2))
            getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ey', 'ez'], \
                    (f1.nx-1, 0, 0), (f1.nx-1, f1.ny-2, f1.nz-2))

            getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hy', 'hz'], \
                    (0, 1, 1), (0, f0.ny-1, f0.nz-1))
            getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hy', 'hz'], \
                    (f1.nx-1, 1, 1), (f1.nx-1, f1.ny-1, f1.nz-1))

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ['e', 'h']:
                norm = np.linalg.norm(getf0[eh].get_fields() -
                                      getf1[eh].get_fields())
                self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x', eh))

        elif axis == 'y':
            for f in mainf_list:
                getf0['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ez'], \
                        (0, 0, 0), (f.nx-2, 0, f.nz-2))
                getf1['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ez'], \
                        (0, f.ny-1, 0), (f.nx-2, f.ny-1, f.nz-2))

                getf0['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hz'], \
                        (1, 0, 1), (f.nx-1, 0, f.nz-1))
                getf1['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hz'], \
                        (1, f.ny-1, 1), (f.nx-1, f.ny-1, f.nz-1))

                for getf in getf0.values() + getf1.values():
                    getf.get_event().wait()

                for eh in ['e', 'h']:
                    norm = np.linalg.norm( \
                            getf0[eh].get_fields() - getf1[eh].get_fields())
                    self.assertEqual(
                        norm, 0,
                        '%g, %s, %s, %s' % (norm, 'y', eh, f.device_type))

        elif axis == 'z':
            for f in mainf_list:
                getf0['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ey'], \
                        (0, 0, f.nz-1), (f.nx-2, f.ny-2, f.nz-1))
                getf1['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ey'], \
                        (0, 0, 0), (f.nx-2, f.ny-2, 0))

                getf0['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hy'], \
                        (1, 1, f.nz-1), (f.nx-1, f.ny-1, f.nz-1))
                getf1['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hy'], \
                        (1, 1, 0), (f.nx-1, f.ny-1, 0))

                for getf in getf0.values() + getf1.values():
                    getf.get_event().wait()

                for eh in ['e', 'h']:
                    norm = np.linalg.norm( \
                            getf0[eh].get_fields() - getf1[eh].get_fields())
                    self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z', eh))

예제 #26

0

파일 보기

파일: test_pbc_exchange.py 프로젝트: wbkifun/fdtd_accelerate

    def test_y_pbc_x_exchange(self):
        # instance
        nx, ny, nz = 40, 50, 60
        #nx, ny, nz = 3, 4, 5
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        gpuf = gpu.Fields(context, gpu_devices[0], nx, ny, nz)
        cpuf = cpu.Fields(nx, ny, nz)
        mainf_list = [gpuf, cpuf]
        nodef = NodeFields(mainf_list)
        core = NodeCore(nodef)
        pbc = NodePbc(nodef, 'y')
        exchange = NodeExchange(nodef)
        
        # generate random source
        ehs_gpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype)
        gpuf.set_eh_bufs(*ehs_gpu)
        ehs_gpu_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_gpu) )

        ehs_cpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype)
        cpuf.set_ehs(*ehs_cpu)
        ehs_cpu_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_cpu) )

        # verify
        for mainf in mainf_list:
            mainf.update_e()
        pbc.update_e()
        exchange.update_e()

        for mainf in mainf_list:
            mainf.update_h()
        pbc.update_h()
        exchange.update_h()

        mainf_list[-1].enqueue_barrier()
        getf0, getf1 = {}, {}

        # x-axis exchange
        getf0['e'] = gpu.GetFields(gpuf, ['ey', 'ez'], (nx-1, 0, 0), (nx-1, ny-2, nz-2))
        getf1['e'] = cpu.GetFields(cpuf, ['ey', 'ez'], (0, 0, 0), (0, ny-2, nz-2))

        getf0['h'] = gpu.GetFields(gpuf, ['hy', 'hz'], (nx-1, 1, 1), (nx-1, ny-1, nz-1))
        getf1['h'] = cpu.GetFields(cpuf, ['hy', 'hz'], (0, 1, 1), (0, ny-1, nz-1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            g0 = getf0[eh].get_fields()
            g1 = getf1[eh].get_fields()
            norm = np.linalg.norm(g0 - g1)
            self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x-axis exchange', eh))

        # y-axis pbc gpu
        getf0['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, ny-1, 0), (nx-2, ny-1, nz-2))
        getf1['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, 0, 0), (nx-2, 0, nz-2))

        getf0['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, ny-1, 1), (nx-1, ny-1, nz-1))
        getf1['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, 0, 1), (nx-1, 0, nz-1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            g0 = getf0[eh].get_fields()
            g1 = getf1[eh].get_fields()
            norm = np.linalg.norm(g0 - g1)
            self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-axis pbc gpu', eh))

        # y-axis pbc cpu
        getf0['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, ny-1, 0), (nx-2, ny-1, nz-2))
        getf1['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, 0, 0), (nx-2, 0, nz-2))

        getf0['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, ny-1, 1), (nx-1, ny-1, nz-1))
        getf1['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, 0, 1), (nx-1, 0, nz-1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            g0 = getf0[eh].get_fields()
            g1 = getf1[eh].get_fields()
            norm = np.linalg.norm(g0 - g1)
            self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-axis pbc cpu', eh))

예제 #27

0

파일 보기

    def runTest(self):
        nx, ny, nz = self.args

        # instances
        buffer_dict = {}
        buffer_dict['x+'] = cpu.BufferFields('x+', ny, nz, '', 'single')
        buffer_dict['x-'] = cpu.BufferFields('x-', ny, nz, '', 'single')

        import pyopencl as cl
        from kemp.fdtd3d.util import common_gpu
        from kemp.fdtd3d import gpu
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        mainf_list = [ gpu.Fields(context, gpu_devices[0], nx, ny, nz) ]
        #mainf_list = [ cpu.Fields(nx, ny, nz) ]
        nodef = node.Fields(mainf_list, buffer_dict)

        # generate random source
        dtype = nodef.dtype
        ehs = common_random.generate_ehs(nx, ny, nz, dtype)
        buf_ehs_p = common_random.generate_ehs(3, ny, nz, dtype)
        buf_ehs_m = common_random.generate_ehs(3, ny, nz, dtype)
        nodef.mainf_list[0].set_eh_bufs(*ehs)
        #nodef.mainf_list[0].set_ehs(*ehs)
        nodef.buffer_dict['x+'].set_ehs(*buf_ehs_p)
        nodef.buffer_dict['x-'].set_ehs(*buf_ehs_m)
        node.Core(nodef)

        # allocations for verify
        getf_dict = {'x+': {}, 'x-': {}}
        getf_buf_dict = {'x+': {}, 'x-': {}}

        getf_dict['x+']['e'] = gpu.GetFields(nodef.mainf_list[0], ['ey', 'ez'], (nx-1, 0, 0), (nx-1, ny-1, nz-1))
        getf_dict['x+']['h'] = gpu.GetFields(nodef.mainf_list[0], ['hy', 'hz'], (nx-2, 0, 0), (nx-2, ny-1, nz-1))

        getf_buf_dict['x+']['e'] = cpu.GetFields(nodef.buffer_dict['x+'], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1))
        getf_buf_dict['x+']['h'] = cpu.GetFields(nodef.buffer_dict['x+'], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1))

        getf_dict['x-']['e'] = gpu.GetFields(nodef.mainf_list[0], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1))
        getf_dict['x-']['h'] = gpu.GetFields(nodef.mainf_list[0], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1))

        getf_buf_dict['x-']['e'] = cpu.GetFields(nodef.buffer_dict['x-'], ['ey', 'ez'], (2, 0, 0), (2, ny-1, nz-1))
        getf_buf_dict['x-']['h'] = cpu.GetFields(nodef.buffer_dict['x-'], ['hy', 'hz'], (1, 0, 0), (1, ny-1, nz-1))

        # verify
        nodef.update_e()
        nodef.update_h()
        print 'nodef, instance_list', nodef.instance_list
        print 'mainf_list[0], instance_list', nodef.mainf_list[0].instance_list

        for direction in ['x+', 'x-']:
            for e_or_h in ['e', 'h']:
                getf = getf_dict[direction][e_or_h]
                getf_buf = getf_buf_dict[direction][e_or_h]

                getf.get_event().wait()
                getf_buf.get_event().wait()

                original = getf.get_fields()
                copy = getf_buf.get_fields()
                norm = np.linalg.norm(original - copy)
                self.assertEqual(norm, 0, '%s, %g, %s, %s' % (self.args, norm, direction, e_or_h))

예제 #28

0

파일 보기

파일: test_core.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, coeff_use, precision_float)
        core = Core(fields)

        # allocations
        ns = fields.ns
        dtype = fields.dtype
        strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"]

        ehs = common_update.generate_random_ehs(nx, ny, nz, dtype, ufunc)
        fields.set_eh_bufs(*ehs)

        ces, chs = common_update.generate_random_cs(coeff_use, nx, ny, nz, dtype)
        if "e" in coeff_use:
            fields.set_ce_bufs(*ces)
        if "h" in coeff_use:
            fields.set_ch_bufs(*chs)

        tmpf = np.zeros(fields.ns_pitch, dtype=dtype)

        # update
        if ufunc == "e":
            for tstep in xrange(0, tmax):
                fields.update_e()
                common_update.update_e(ehs, ces)

            for strf, eh in zip(strf_list, ehs)[:3]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z])
                max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max()
                self.assertEqual(norm, 0, "%s, %s, %g, %g" % (self.args, strf, norm, max_diff))

                if fields.pad != 0:
                    if strf == "ez":
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad :])
                    else:
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad - 1 :])
                    self.assertEqual(norm2, 0, "%s, %s, %g, padding" % (self.args, strf, norm2))

        elif ufunc == "h":
            for tstep in xrange(0, tmax):
                fields.update_h()
                common_update.update_h(ehs, chs)

            for strf, eh in zip(strf_list, ehs)[3:]:
                cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf))
                norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z])
                max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max()
                self.assertEqual(norm, 0, "%s, %s, %g, %g" % (self.args, strf, norm, max_diff))

                if fields.pad != 0:
                    if strf == "hz":
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad :])
                    else:
                        norm2 = np.linalg.norm(tmpf[:, :, -fields.pad :])
                    self.assertEqual(norm2, 0, "%s, %s, %g, padding" % (self.args, strf, norm2))

예제 #29

0

파일 보기

파일: main.py 프로젝트: xj361685640/fdtd_accelerate

    def __init__(self,
                 geometry_h5_path,
                 max_tstep,
                 mpi_shape,
                 pbc_axes='',
                 target_device='all',
                 precision_float='single',
                 **kargs):
        """
        """

        common.check_type('geometry_h5_path', geometry_h5_path, str)
        common.check_type('max_tstep', max_tstep, int)
        common.check_type('mpi_shape', mpi_shape, (list, tuple), int)
        common.check_type('pbc_axes', pbc_axes, str)
        common.check_type('target_device', target_device, str)
        common.check_value('precision_float', precision_float,
                           ['single', 'double'])

        # import modules
        global is_mpi, is_gpu

        is_mpi = False if mpi_shape == (1, 1, 1) else True

        if is_mpi:
            global network, common_mpi, comm, size, rank, coord
            from mpi4py import MPI
            from kemp.fdtd3d import network
            from kemp.fdtd3d.util import common_mpi
            comm = MPI.COMM_WORLD
            size = comm.Get_size()
            rank = comm.Get_rank()
            coord = common_mpi.my_coord(rank, mpi_shape)

        is_master = False if is_mpi and rank != 0 else True
        is_cpu = True if target_device == 'all' or 'cpu' in target_device else False
        is_gpu = True if target_device == 'all' or 'gpu' in target_device else False

        if is_mpi:
            if reduce(lambda a, b: a * b, mpi_shape) != size:
                if is_master:
                    print("The MPI size %d is not matched the mpi_shape %s" %
                          (size, mpi_shape))
                sys.exit()

        if is_gpu:
            try:
                global cl, gpu, common_gpu
                import pyopencl as cl
                from kemp.fdtd3d import gpu
                from kemp.fdtd3d.util import common_gpu
            except:
                if is_master:
                    print("The 'pyopencl' module is not found.")

                if is_cpu:
                    if is_master:
                        print("The CPU is only used.")
                    target_device = 'cpu'
                    is_gpu = False
                else:
                    sys.exit()

        # read from the h5 file
        try:
            h5f = h5py.File(geometry_h5_path, 'r')
            coeff_use = h5f.attrs['coeff_use']
            nx = h5f.attrs['nx']
            ny = h5f.attrs['ny']
            nz = h5f.attrs['nz']
        except:
            if is_master:
                print(repr(sys.exc_info()))
                print("To load the geometry HDF5 file '%s' is failed." %
                      geometry_h5_path)
            sys.exit()

        # local variables
        device_nx_list = kargs['device_nx_list'] if kargs.has_key(
            'device_nx_list') else None
        ny_list = kargs['ny_list'] if kargs.has_key('ny_list') else None
        nz_list = kargs['nz_list'] if kargs.has_key('nz_list') else None

        # Set the number of device and the device_n_list
        ndev = 1 if is_cpu else 0
        if is_gpu:
            try:
                gpu_devices = common_gpu.gpu_device_list(print_info=False)
                context = cl.Context(gpu_devices)
                ndev += len(gpu_devices)
            except Exception as errinst:
                if is_master:
                    print(repr(sys.exc_info()))
                    print(
                        "To get the GPU devices is failed. The CPU is only used."
                    )
                target_device = 'cpu'
                is_gpu = False

        if is_mpi:
            mi, mj, mk = coord
            dnx_list = device_nx_list[mi * ndev:(mi + 1) * ndev]
            dny = ny_list[mj]
            dnz = nz_list[mk]
        else:
            dnx_list = device_nx_list
            dny = ny_list[0]
            dnz = nz_list[0]

        total_ndev = mpi_shape[0] * ndev
        if len(device_nx_list) != total_ndev:
            if is_master:
                print(
                    "The device_nx_list %s is not matched with the number of total devices %d."
                    % (device_nx_list, total_ndev))
            sys.exit()

        # create the mainf_list and the buffer_dict
        buffer_dict = {}
        if is_mpi:
            # create BufferFields instances
            snx = sum(dnx_list) - ndev + 1
            sny, snz = dny, dnz

            mpi_target_dict = common_mpi.mpi_target_dict(
                rank, mpi_shape, pbc_axes)
            for direction, target_rank in mpi_target_dict.items():
                if target_rank != None:
                    n0, n1 = {
                        'x': (sny, snz),
                        'y': (snx, snz),
                        'z': (snx, sny)
                    }[direction[0]]
                    bufferf = cpu.BufferFields(direction, target_rank, n0, n1,
                                               coeff_use, precision_float)
                    buffer_dict[direction] = bufferf
                    #network.ExchangeMpi(bufferf, target_rank, max_tstep)
                    #network.ExchangeMpiNoSplitBlock(bufferf, target_rank)
                    #network.ExchangeMpiBlock(bufferf, target_rank)

        mainf_list = []
        if is_cpu:
            mainf_list += [
                cpu.Fields(dnx_list.pop(0),
                           dny,
                           dnz,
                           coeff_use,
                           precision_float,
                           use_cpu_core=1)
            ]

        if is_gpu:
            mainf_list += [
                gpu.Fields(context, gpu_device, dnx, dny, dnz, coeff_use,
                           precision_float)
                for gpu_device, dnx in zip(gpu_devices, dnx_list)
            ]

        # create node.Fields instance
        nodef = node.Fields(mainf_list, buffer_dict)

        # create nodePbc instance
        node_pbc_axes = ''.join([
            axis for i, axis in enumerate(['x', 'y', 'z'])
            if mpi_shape[i] == 1 and axis in pbc_axes
        ])
        if node_pbc_axes != '':
            node.Pbc(nodef, node_pbc_axes)

        # create update instances
        node.Core(nodef)
        for bufferf in nodef.buffer_dict.values():
            #network.ExchangeMpiSplitBlock(bufferf)
            network.ExchangeMpiSplitNonBlock(bufferf, max_tstep)
            '''
            if rank == 0:
                direction = 'x+'
                target_rank = 1
            elif rank == 1:
                direction = 'x-'
                target_rank = 0

            #network.ExchangeMpiNoBufferBlock(nodef, target_rank, direction)    # no buffer, block
            self.mpi_instance_list = []
            self.mpi_instance_list.append( network.ExchangeMpiNoBufferNonBlock(nodef, target_rank, direction) )
            '''

        # accum_sub_ns_dict, node_pts
        if is_mpi:
            asn_dict = common_mpi.accum_sub_ns_dict(mpi_shape, ndev,
                                                    device_nx_list, ny_list,
                                                    nz_list)
            axes = ['x', 'y', 'z']
            node_pt0 = [asn_dict[ax][m] for ax, m in zip(axes, coord)]
            node_pt1 = [asn_dict[ax][m + 1] - 1 for ax, m in zip(axes, coord)]

        # global variables
        self.max_tstep = max_tstep
        self.mpi_shape = mpi_shape
        #self.ns = (nx, ny, nz)
        self.ns = (asn_dict['x'][-1], asn_dict['y'][-1],
                   asn_dict['z'][-1]) if is_mpi else nodef.ns

        self.nodef = nodef
        self.is_master = is_master

        if is_mpi:
            self.asn_dict = asn_dict
            self.node_pt0 = node_pt0
            self.node_pt1 = node_pt1

        # for savefields
        self.savef_tag_list = []
        self.savef_list = []

예제 #30

0

파일 보기

파일: main.py 프로젝트: wbkifun/fdtd_accelerate

    def __init__(self, geometry_h5_path, max_tstep, mpi_shape, pbc_axes='', target_device='all', precision_float='single', **kargs):
        """
        """

        common.check_type('geometry_h5_path', geometry_h5_path, str)
        common.check_type('max_tstep', max_tstep, int)
        common.check_type('mpi_shape', mpi_shape, (list, tuple), int)
        common.check_type('pbc_axes', pbc_axes, str)
        common.check_type('target_device', target_device, str)
        common.check_value('precision_float', precision_float, ['single', 'double'])

        # import modules
        global is_mpi, is_gpu

        is_mpi = False if mpi_shape == (1, 1, 1) else True

        if is_mpi:
            global network, common_mpi, comm, size, rank, coord
            from mpi4py import MPI
            from kemp.fdtd3d import network
            from kemp.fdtd3d.util import common_mpi
            comm = MPI.COMM_WORLD
            size = comm.Get_size()
            rank = comm.Get_rank()
            coord = common_mpi.my_coord(rank, mpi_shape)

        is_master = False if is_mpi and rank != 0 else True
        is_cpu = True if target_device == 'all' or 'cpu' in target_device else False
        is_gpu = True if target_device == 'all' or 'gpu' in target_device else False

        if is_mpi:
            if reduce(lambda a,b:a*b, mpi_shape) != size:
                if is_master:
                    print("The MPI size %d is not matched the mpi_shape %s" % (size, mpi_shape) )
                sys.exit()

        if is_gpu:
            try:
                global cl, gpu, common_gpu
                import pyopencl as cl
                from kemp.fdtd3d import gpu
                from kemp.fdtd3d.util import common_gpu
            except:
                if is_master:
                    print("The 'pyopencl' module is not found.")

                if is_cpu:
                    if is_master:
                        print("The CPU is only used.")
                    target_device = 'cpu'
                    is_gpu = False
                else:
                    sys.exit()

        # read from the h5 file
        try:
            h5f = h5py.File(geometry_h5_path, 'r')
            coeff_use = h5f.attrs['coeff_use']
            nx = h5f.attrs['nx']
            ny = h5f.attrs['ny']
            nz = h5f.attrs['nz']
        except:
            if is_master:
                print( repr(sys.exc_info()) )
                print("To load the geometry HDF5 file '%s' is failed." % geometry_h5_path)
            sys.exit()

        # local variables
        device_nx_list = kargs['device_nx_list'] if kargs.has_key('device_nx_list') else None
        ny_list = kargs['ny_list'] if kargs.has_key('ny_list') else None
        nz_list = kargs['nz_list'] if kargs.has_key('nz_list') else None

        # Set the number of device and the device_n_list
        ndev = 1 if is_cpu else 0
        if is_gpu:
            try:
                gpu_devices = common_gpu.gpu_device_list(print_info=False)
                context = cl.Context(gpu_devices)
                ndev += len(gpu_devices)
            except Exception as errinst:
                if is_master:
                    print( repr(sys.exc_info()) )
                    print("To get the GPU devices is failed. The CPU is only used.")
                target_device = 'cpu'
                is_gpu = False

        if is_mpi:
            mi, mj, mk = coord
            dnx_list = device_nx_list[mi*ndev:(mi+1)*ndev]
            dny = ny_list[mj]
            dnz = nz_list[mk]
        else:
            dnx_list = device_nx_list
            dny = ny_list[0]
            dnz = nz_list[0]
            
        total_ndev = mpi_shape[0] * ndev
        if len(device_nx_list) != total_ndev:
            if is_master:
                print("The device_nx_list %s is not matched with the number of total devices %d." % (device_nx_list, total_ndev) )
            sys.exit()

        # create the mainf_list and the buffer_dict
        buffer_dict = {}
        if is_mpi:
            # create BufferFields instances
            snx = sum(dnx_list) - ndev + 1
            sny, snz = dny, dnz

            mpi_target_dict = common_mpi.mpi_target_dict(rank, mpi_shape, pbc_axes)
            for direction, target_rank in mpi_target_dict.items():
                if target_rank != None:
                    n0, n1 = {'x': (sny, snz), 'y': (snx, snz), 'z': (snx, sny)}[direction[0]]
                    bufferf = cpu.BufferFields(direction, target_rank, n0, n1, coeff_use, precision_float)
                    buffer_dict[direction] = bufferf
                    #network.ExchangeMpi(bufferf, target_rank, max_tstep)
                    #network.ExchangeMpiNoSplitBlock(bufferf, target_rank)
                    #network.ExchangeMpiBlock(bufferf, target_rank)

        mainf_list = []
        if is_cpu:
            mainf_list += [cpu.Fields(dnx_list.pop(0), dny, dnz, coeff_use, precision_float, use_cpu_core=1)]

        if is_gpu:
            mainf_list += [gpu.Fields(context, gpu_device, dnx, dny, dnz, coeff_use, precision_float) for gpu_device, dnx in zip(gpu_devices, dnx_list)]

        # create node.Fields instance
        nodef = node.Fields(mainf_list, buffer_dict)

        # create nodePbc instance
        node_pbc_axes = ''.join([axis for i, axis in enumerate(['x', 'y', 'z']) if mpi_shape[i] == 1 and axis in pbc_axes])
        if node_pbc_axes != '':
            node.Pbc(nodef, node_pbc_axes)

        # create update instances
        node.Core(nodef)
        for bufferf in nodef.buffer_dict.values():
            #network.ExchangeMpiSplitBlock(bufferf)
            network.ExchangeMpiSplitNonBlock(bufferf, max_tstep)
            '''
            if rank == 0:
                direction = 'x+'
                target_rank = 1
            elif rank == 1:
                direction = 'x-'
                target_rank = 0

            #network.ExchangeMpiNoBufferBlock(nodef, target_rank, direction)    # no buffer, block
            self.mpi_instance_list = []
            self.mpi_instance_list.append( network.ExchangeMpiNoBufferNonBlock(nodef, target_rank, direction) )
            '''

        # accum_sub_ns_dict, node_pts
        if is_mpi:
            asn_dict = common_mpi.accum_sub_ns_dict(mpi_shape, ndev, device_nx_list, ny_list, nz_list)
            axes = ['x', 'y', 'z']
            node_pt0 = [asn_dict[ax][m] for ax, m in zip(axes, coord)]
            node_pt1 = [asn_dict[ax][m+1] - 1 for ax, m in zip(axes, coord)]

        # global variables
        self.max_tstep = max_tstep
        self.mpi_shape = mpi_shape
        #self.ns = (nx, ny, nz)
        self.ns = (asn_dict['x'][-1], asn_dict['y'][-1], asn_dict['z'][-1]) if is_mpi else nodef.ns

        self.nodef = nodef
        self.is_master = is_master

        if is_mpi:
            self.asn_dict = asn_dict
            self.node_pt0 = node_pt0
            self.node_pt1 = node_pt1

        # for savefields
        self.savef_tag_list = []
        self.savef_list = []

예제 #31

0

파일 보기

    def test_y_pbc_x_exchange(self):
        # instance
        nx, ny, nz = 40, 50, 60
        #nx, ny, nz = 3, 4, 5
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        gpuf = gpu.Fields(context, gpu_devices[0], nx, ny, nz)
        cpuf = cpu.Fields(nx, ny, nz)
        mainf_list = [gpuf, cpuf]
        nodef = NodeFields(mainf_list)
        core = NodeCore(nodef)
        pbc = NodePbc(nodef, 'y')
        exchange = NodeExchange(nodef)

        # generate random source
        ehs_gpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype)
        gpuf.set_eh_bufs(*ehs_gpu)
        ehs_gpu_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_gpu))

        ehs_cpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype)
        cpuf.set_ehs(*ehs_cpu)
        ehs_cpu_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_cpu))

        # verify
        for mainf in mainf_list:
            mainf.update_e()
        pbc.update_e()
        exchange.update_e()

        for mainf in mainf_list:
            mainf.update_h()
        pbc.update_h()
        exchange.update_h()

        mainf_list[-1].enqueue_barrier()
        getf0, getf1 = {}, {}

        # x-axis exchange
        getf0['e'] = gpu.GetFields(gpuf, ['ey', 'ez'], (nx - 1, 0, 0),
                                   (nx - 1, ny - 2, nz - 2))
        getf1['e'] = cpu.GetFields(cpuf, ['ey', 'ez'], (0, 0, 0),
                                   (0, ny - 2, nz - 2))

        getf0['h'] = gpu.GetFields(gpuf, ['hy', 'hz'], (nx - 1, 1, 1),
                                   (nx - 1, ny - 1, nz - 1))
        getf1['h'] = cpu.GetFields(cpuf, ['hy', 'hz'], (0, 1, 1),
                                   (0, ny - 1, nz - 1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            g0 = getf0[eh].get_fields()
            g1 = getf1[eh].get_fields()
            norm = np.linalg.norm(g0 - g1)
            self.assertEqual(norm, 0,
                             '%g, %s, %s' % (norm, 'x-axis exchange', eh))

        # y-axis pbc gpu
        getf0['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, ny - 1, 0),
                                   (nx - 2, ny - 1, nz - 2))
        getf1['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, 0, 0),
                                   (nx - 2, 0, nz - 2))

        getf0['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, ny - 1, 1),
                                   (nx - 1, ny - 1, nz - 1))
        getf1['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, 0, 1),
                                   (nx - 1, 0, nz - 1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            g0 = getf0[eh].get_fields()
            g1 = getf1[eh].get_fields()
            norm = np.linalg.norm(g0 - g1)
            self.assertEqual(norm, 0,
                             '%g, %s, %s' % (norm, 'y-axis pbc gpu', eh))

        # y-axis pbc cpu
        getf0['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, ny - 1, 0),
                                   (nx - 2, ny - 1, nz - 2))
        getf1['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, 0, 0),
                                   (nx - 2, 0, nz - 2))

        getf0['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, ny - 1, 1),
                                   (nx - 1, ny - 1, nz - 1))
        getf1['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, 0, 1),
                                   (nx - 1, 0, nz - 1))

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            g0 = getf0[eh].get_fields()
            g1 = getf1[eh].get_fields()
            norm = np.linalg.norm(g0 - g1)
            self.assertEqual(norm, 0,
                             '%g, %s, %s' % (norm, 'y-axis pbc cpu', eh))

예제 #32

0

파일 보기

파일: test_pbc.py 프로젝트: wbkifun/fdtd_accelerate

    def runTest(self):
        axis, nx, ny, nz = self.args
        self.gpu, self.cpu = gpu, cpu

        # instance
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)

        mainf_list = [gpu.Fields(context, device, nx, ny, nz) for device in gpu_devices]
        mainf_list.append(cpu.Fields(nx, ny, nz))
        nodef = NodeFields(mainf_list)
        dtype = nodef.dtype

        pbc = NodePbc(nodef, axis)
        exchange = NodeExchange(nodef)

        # generate random source
        for f in mainf_list[:-1]:
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_eh_bufs(*ehs)

        for f in nodef.cpuf_dict.values():
            nx, ny, nz = f.ns
            ehs = common_update.generate_random_ehs(nx, ny, nz, dtype)
            f.set_ehs(*ehs)

        # verify
        for mainf in mainf_list:
            mainf.update_e()
        pbc.update_e()
        exchange.update_e()

        for mainf in mainf_list:
            mainf.update_h()
        pbc.update_h()
        exchange.update_h()

        mainf_list[-1].enqueue_barrier()

        getf0, getf1 = {}, {}

        if axis == "x":
            f0, f1 = mainf_list[0], mainf_list[-1]
            getf0["e"] = getattr(self, f0.device_type).GetFields(f0, ["ey", "ez"], (0, 0, 0), (0, f0.ny - 2, f0.nz - 2))
            getf1["e"] = getattr(self, f1.device_type).GetFields(
                f1, ["ey", "ez"], (f1.nx - 1, 0, 0), (f1.nx - 1, f1.ny - 2, f1.nz - 2)
            )

            getf0["h"] = getattr(self, f0.device_type).GetFields(f0, ["hy", "hz"], (0, 1, 1), (0, f0.ny - 1, f0.nz - 1))
            getf1["h"] = getattr(self, f1.device_type).GetFields(
                f1, ["hy", "hz"], (f1.nx - 1, 1, 1), (f1.nx - 1, f1.ny - 1, f1.nz - 1)
            )

            for getf in getf0.values() + getf1.values():
                getf.get_event().wait()

            for eh in ["e", "h"]:
                norm = np.linalg.norm(getf0[eh].get_fields() - getf1[eh].get_fields())
                self.assertEqual(norm, 0, "%g, %s, %s" % (norm, "x", eh))

        elif axis == "y":
            for f in mainf_list:
                getf0["e"] = getattr(self, f.device_type).GetFields(f, ["ex", "ez"], (0, 0, 0), (f.nx - 2, 0, f.nz - 2))
                getf1["e"] = getattr(self, f.device_type).GetFields(
                    f, ["ex", "ez"], (0, f.ny - 1, 0), (f.nx - 2, f.ny - 1, f.nz - 2)
                )

                getf0["h"] = getattr(self, f.device_type).GetFields(f, ["hx", "hz"], (1, 0, 1), (f.nx - 1, 0, f.nz - 1))
                getf1["h"] = getattr(self, f.device_type).GetFields(
                    f, ["hx", "hz"], (1, f.ny - 1, 1), (f.nx - 1, f.ny - 1, f.nz - 1)
                )

                for getf in getf0.values() + getf1.values():
                    getf.get_event().wait()

                for eh in ["e", "h"]:
                    norm = np.linalg.norm(getf0[eh].get_fields() - getf1[eh].get_fields())
                    self.assertEqual(norm, 0, "%g, %s, %s, %s" % (norm, "y", eh, f.device_type))

        elif axis == "z":
            for f in mainf_list:
                getf0["e"] = getattr(self, f.device_type).GetFields(
                    f, ["ex", "ey"], (0, 0, f.nz - 1), (f.nx - 2, f.ny - 2, f.nz - 1)
                )
                getf1["e"] = getattr(self, f.device_type).GetFields(f, ["ex", "ey"], (0, 0, 0), (f.nx - 2, f.ny - 2, 0))

                getf0["h"] = getattr(self, f.device_type).GetFields(
                    f, ["hx", "hy"], (1, 1, f.nz - 1), (f.nx - 1, f.ny - 1, f.nz - 1)
                )
                getf1["h"] = getattr(self, f.device_type).GetFields(f, ["hx", "hy"], (1, 1, 0), (f.nx - 1, f.ny - 1, 0))

                for getf in getf0.values() + getf1.values():
                    getf.get_event().wait()

                for eh in ["e", "h"]:
                    norm = np.linalg.norm(getf0[eh].get_fields() - getf1[eh].get_fields())
                    self.assertEqual(norm, 0, "%g, %s, %s" % (norm, "z", eh))

예제 #33

0

파일 보기

파일: test_exchange.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        nx, ny, nz = self.args
        tmax = 10

        # instances
        buffer_dict = {}
        if rank == 0: buffer_dict['x+'] = cpu.BufferFields('x+', ny, nz, '', 'single')
        elif rank == 1: buffer_dict['x-'] = cpu.BufferFields('x-', ny, nz, '', 'single')

        import pyopencl as cl
        from kemp.fdtd3d.util import common_gpu
        from kemp.fdtd3d import gpu
        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        mainf_list = [ gpu.Fields(context, gpu_devices[0], nx, ny, nz) ]
        #mainf_list = [ cpu.Fields(nx, ny, nz, use_cpu_core=1) ]
        nodef = node.Fields(mainf_list, buffer_dict)

        # generate random source
        dtype = nodef.dtype
        ehs = common_random.generate_ehs(nx, ny, nz, dtype)
        buf_ehs = common_random.generate_ehs(3, ny, nz, dtype)
        #nodef.cpuf.set_ehs(*ehs)
        nodef.mainf_list[0].set_eh_bufs(*ehs)
        other = {0: 1, 1: 0}[rank]
        if rank == 0: 
            #nodef.buffer_dict['x+'].set_ehs(*buf_ehs)
            ExchangeMpi(nodef.buffer_dict['x+'], other, tmax)
        elif rank == 1:
            #nodef.buffer_dict['x-'].set_ehs(*buf_ehs)
            ExchangeMpi(nodef.buffer_dict['x-'], other, tmax)
        node.Core(nodef)

        # allocations for verify
        if rank == 0:
            getf_e = cpu.GetFields(nodef.buffer_dict['x+'], ['ey', 'ez'], (2, 0, 0), (2, ny-1, nz-1))
            getf_h = cpu.GetFields(nodef.buffer_dict['x+'], ['hy', 'hz'], (1, 0, 0), (1, ny-1, nz-1))
        elif rank == 1:
            getf_e = cpu.GetFields(nodef.buffer_dict['x-'], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1))
            getf_h = cpu.GetFields(nodef.buffer_dict['x-'], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1))

        # verify
        print 'nodef, instance_list', rank, nodef.instance_list
        print 'f0, instance_list', rank, nodef.mainf_list[0].instance_list
        exch = nodef.instance_list[0]
        main_core = nodef.mainf_list[0].instance_list[0]
        if rank == 0: 
            #nodef.buffer_dict['x+'].instance_list.pop(0)
            print 'bufferf x+, instance_list', rank, nodef.buffer_dict['x+'].instance_list
            core, mpi = nodef.buffer_dict['x+'].instance_list
        elif rank == 1: 
            #nodef.buffer_dict['x-'].instance_list.pop(0)
            print 'bufferf x-, instance_list', rank, nodef.buffer_dict['x-'].instance_list
            core, mpi = nodef.buffer_dict['x-'].instance_list



        for tstep in xrange(1, tmax+1):
            #if rank == 0: print 'tstep', tstep

            #nodef.update_e()
            main_core.update_e()
            if rank == 0: 
                #print tstep, rank, 'core upE'
                core.update_e('')
                #print tstep, rank, 'mpi upE'
                mpi.update_e('')
            elif rank == 1: 
                #print tstep, rank, 'core upE pre'
                core.update_e('pre')
                #print tstep, rank, 'mpi upE pre'
                mpi.update_e('pre')
                #print tstep, rank, 'core upE post'
                core.update_e('post')
                #print tstep, rank, 'mpi upE post'
                mpi.update_e('post')
            exch.update_e()

            # verify the buffer
            #print tstep, rank, 'pre get'
            getf_h.get_event().wait()
            #print tstep, rank, 'after get'
            if rank == 1:
                #print tstep, rank, 'pre save'
                np.save('rank1_h_%d' % tstep, getf_h.get_fields())
                #print tstep, rank, 'after save'
            elif rank == 0:
                no_exist_npy = True
                while no_exist_npy:
                    try:
                        arr1 = np.load('rank1_h_%d.npy' % tstep)
                        no_exist_npy = False
                    except:
                        sleep(0.5)

                arr0 = getf_h.get_fields()
                #print tstep, 'h arr0\n', arr0
                #print tstep, 'h arr1\n', arr1
                norm = np.linalg.norm(arr0 - arr1)
                if norm != 0: print tstep, 'h norm', norm
                #if tstep > 1: self.assertEqual(norm, 0, '%s, %g, h' % (self.args, norm))


            #nodef.update_h()
            main_core.update_h()
            if rank == 0: 
                #print tstep, rank, 'core upH pre'
                core.update_h('pre')
                #print tstep, rank, 'mpi upH pre'
                mpi.update_h('pre')
                #print tstep, rank, 'core upH post'
                core.update_h('post')
                #print tstep, rank, 'mpi upH post'
                mpi.update_h('post')
            elif rank == 1: 
                #print tstep, rank, 'core upH'
                core.update_h('')
                #print tstep, rank, 'mpi upH'
                mpi.update_h('')
            exch.update_h()

            getf_e.get_event().wait()
            if rank == 1:
                np.save('rank1_e_%d' % tstep, getf_e.get_fields())
            elif rank == 0:
                no_exist_npy = True
                while no_exist_npy:
                    try:
                        arr1 = np.load('rank1_e_%d.npy' % tstep)
                        no_exist_npy = False
                    except:
                        sleep(0.5)

                arr0 = getf_e.get_fields()
                norm = np.linalg.norm(arr0 - arr1)
                if norm != 0: print tstep, 'e norm', norm
                #self.assertEqual(norm, 0, '%s, %g, e' % (self.args, norm))

        '''

예제 #34

0

파일 보기

파일: measure_mpi.py 프로젝트: wbkifun/fdtd_accelerate


is_plot = False

#nx, ny, nz = 240, 256, 256  # 540 MB
#nx, ny, nz = 544, 544, 544  # 5527 MB
#nx, ny, nz = 512, 512, 512  # 4608 MB
#nx, ny, nz = 480, 480, 480  # 3796 MB
nx, ny, nz = 800, 256, 256  # 576 MB
#nx, ny, nz = 128, 128, 128  # 72 MB

coeff_use = 'e'
precision_float = 'single'

# instances 
gpu_devices = common_gpu.gpu_device_list(print_info=False)
context = cl.Context(gpu_devices)
device = gpu_devices[0]
qtask = cpu.QueueTask()
fields = Fields(context, device, qtask, nx, ny, nz, coeff_use, precision_float)
Core(fields)

tmax = 250 if is_plot else 1000
if rank == 0: direction = '+'
elif rank == size - 1: direction = '-'
else: direction = '+-'

#exch = node.ExchangeMpiNonBlock(fields, direction)

#exch = node.ExchangeMpiBufferBlock(fields, direction)
#exch = node.ExchangeMpiBufferBlockSplit(fields, direction)

예제 #35

0

파일 보기

파일: test_pbc.py 프로젝트: xj361685640/fdtd_accelerate

    def runTest(self):
        axis, nx, ny, nz, precision_float = self.args

        gpu_devices = common_gpu.gpu_device_list(print_info=False)
        context = cl.Context(gpu_devices)
        device = gpu_devices[0]
        fields = Fields(context, device, nx, ny, nz, '', precision_float)
        pbc = Pbc(fields, axis)

        # allocations
        ehs = common_update.generate_random_ehs(nx, ny, nz, fields.dtype)
        fields.set_eh_bufs(*ehs)

        # update
        fields.update_e()
        fields.update_h()

        # verify
        getf0, getf1 = {}, {}
        strfs_e = {
            'x': ['ey', 'ez'],
            'y': ['ex', 'ez'],
            'z': ['ex', 'ey']
        }[axis]
        strfs_h = {
            'x': ['hy', 'hz'],
            'y': ['hx', 'hz'],
            'z': ['hx', 'hy']
        }[axis]

        pt0 = (0, 0, 0)
        pt1 = { 'x': (0, ny-2, nz-2), \
                'y': (nx-2, 0, nz-2), \
                'z': (nx-2, ny-2, 0) }[axis]
        getf0['e'] = GetFields(fields, strfs_e, pt0, pt1)

        pt0 = { 'x': (nx-1, 0, 0), \
                'y': (0, ny-1, 0), \
                'z': (0, 0, nz-1) }[axis]
        pt1 = { 'x': (nx-1, ny-2, nz-2), \
                'y': (nx-2, ny-1, nz-2), \
                'z': (nx-2, ny-2, nz-1) }[axis]
        getf1['e'] = GetFields(fields, strfs_e, pt0, pt1)

        pt0 = { 'x': (0, 1, 1), \
                'y': (1, 0, 1), \
                'z': (1, 1, 0) }[axis]
        pt1 = { 'x': (0, ny-1, nz-1), \
                'y': (nx-1, 0, nz-1), \
                'z': (nx-1, ny-1, 0) }[axis]
        getf0['h'] = GetFields(fields, strfs_h, pt0, pt1)

        pt0 = { 'x': (nx-1, 1, 1), \
                'y': (1, ny-1, 1), \
                'z': (1, 1, nz-1) }[axis]
        pt1 = (nx - 1, ny - 1, nz - 1)
        getf1['h'] = GetFields(fields, strfs_h, pt0, pt1)

        for getf in getf0.values() + getf1.values():
            getf.get_event().wait()

        for eh in ['e', 'h']:
            norm = np.linalg.norm( \
                    getf0[eh].get_fields() - getf1[eh].get_fields() )
            self.assertEqual(norm, 0, '%g, %s, %s' % (norm, self.args, eh))