def runTest(self): nx, ny, nz, str_f, pt0, pt1 = self.args slidx = common.slices_two_points(pt0, pt1) str_fs = common.convert_to_tuple(str_f) # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, '', 'single') getf = GetFields(fields, str_f, pt0, pt1) # host allocations ehs = common_random.generate_ehs(nx, ny, nz, fields.dtype) eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) ) fields.set_eh_bufs(*ehs) # verify getf.get_event().wait() for str_f in str_fs: original = eh_dict[str_f][slidx] copy = getf.get_fields(str_f) norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))
def runTest(self): nx, ny, nz, str_f, pt0, pt1 = self.args slidx = common.slices_two_points(pt0, pt1) str_fs = common.convert_to_tuple(str_f) # instance fields = Fields(0, nx, ny, nz, '', 'single') getf = GetFields(fields, str_f, pt0, pt1) # host allocations ehs = common_random.generate_ehs(nx, ny, nz, fields.dtype) eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) ) fields.set_eh_bufs(*ehs) # verify getf.wait() for str_f in str_fs: original = eh_dict[str_f][slidx] copy = getf.get_fields(str_f) norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm)) fields.context_pop()
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slice_xyz = common.slices_two_points(pt0, pt1) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1) value = np.random.rand(*shape).astype(np.float32) else: value = np.random.ranf() # instance fields = Fields(0, nx, ny, nz, '', 'single') tfunc = lambda tstep: np.sin(0.03*tstep) incident = IncidentDirect(fields, str_f, pt0, pt1, tfunc, value) # host allocations eh = np.zeros(fields.ns_pitch, dtype=fields.dtype) # verify eh[slice_xyz] = fields.dtype(value) * fields.dtype(tfunc(1)) fields.update_e() fields.update_h() copy_eh_buf = fields.get_buf(str_f) copy_eh = np.zeros_like(eh) cuda.memcpy_dtoh(copy_eh, copy_eh_buf) original = eh[slice_xyz] copy = copy_eh[slice_xyz] norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm)) fields.context_pop()
def runTest(self): nx, ny, nz, str_f, pt0, pt1 = self.args slidx = common.slice_index_two_points(pt0, pt1) str_fs = common.convert_to_tuple(str_f) # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, '', 'single') getf = GetFields(fields, str_f, pt0, pt1) # host allocations ehs = common_update.generate_random_ehs(nx, ny, nz, fields.dtype) eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) ) fields.set_eh_bufs(*ehs) # verify getf.get_event().wait() for str_f in str_fs: original = eh_dict[str_f][slidx] copy = getf.get_fields(str_f) norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slidx = common.slices_two_points(pt0, pt1) str_fs = common.convert_to_tuple(str_f) # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] qtask = QueueTask() fields = Fields(context, device, qtask, nx, ny, nz, '', 'single') setf = SetFields(fields, str_f, pt0, pt1, is_array) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1, len(str_fs)) value = np.random.rand(*shape).astype(fields.dtype) split_value = np.split(value, len(str_fs)) split_value_dict = dict( zip(str_fs, split_value) ) else: value = np.random.ranf() # host allocations ehs = [np.zeros(fields.ns, dtype=fields.dtype) for i in range(6)] eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) ) gpu_eh = np.zeros(fields.ns_pitch, dtype=fields.dtype) # verify for str_f in str_fs: if is_array: eh_dict[str_f][slidx] = split_value_dict[str_f] else: eh_dict[str_f][slidx] = value setf.set_fields(value) setf.mainf.enqueue_barrier() for str_f in str_fs: cl.enqueue_copy(fields.queue, gpu_eh, fields.get_buf(str_f)) original = eh_dict[str_f] copy = gpu_eh[:,:,fields.slice_z] norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))
def runTest(self): axis, nx, ny, nz, precision_float = self.args gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, '', precision_float) pbc = Pbc(fields, axis) # allocations ehs = common_update.generate_random_ehs(nx, ny, nz, fields.dtype) fields.set_eh_bufs(*ehs) # update fields.update_e() fields.update_h() # verify getf0, getf1 = {}, {} strfs_e = {'x':['ey', 'ez'], 'y':['ex', 'ez'], 'z':['ex', 'ey']}[axis] strfs_h = {'x':['hy', 'hz'], 'y':['hx', 'hz'], 'z':['hx', 'hy']}[axis] pt0 = (0, 0, 0) pt1 = { 'x': (0, ny-2, nz-2), \ 'y': (nx-2, 0, nz-2), \ 'z': (nx-2, ny-2, 0) }[axis] getf0['e'] = GetFields(fields, strfs_e, pt0, pt1) pt0 = { 'x': (nx-1, 0, 0), \ 'y': (0, ny-1, 0), \ 'z': (0, 0, nz-1) }[axis] pt1 = { 'x': (nx-1, ny-2, nz-2), \ 'y': (nx-2, ny-1, nz-2), \ 'z': (nx-2, ny-2, nz-1) }[axis] getf1['e'] = GetFields(fields, strfs_e, pt0, pt1) pt0 = { 'x': (0, 1, 1), \ 'y': (1, 0, 1), \ 'z': (1, 1, 0) }[axis] pt1 = { 'x': (0, ny-1, nz-1), \ 'y': (nx-1, 0, nz-1), \ 'z': (nx-1, ny-1, 0) }[axis] getf0['h'] = GetFields(fields, strfs_h, pt0, pt1) pt0 = { 'x': (nx-1, 1, 1), \ 'y': (1, ny-1, 1), \ 'z': (1, 1, nz-1) }[axis] pt1 = (nx-1, ny-1, nz-1) getf1['h'] = GetFields(fields, strfs_h, pt0, pt1) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields() ) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, self.args, eh))
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slidx = common.slices_two_points(pt0, pt1) str_fs = common.convert_to_tuple(str_f) # instance fields = Fields(0, nx, ny, nz, '', 'single') setf = SetFields(fields, str_f, pt0, pt1, is_array) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1, len(str_fs)) value = np.random.rand(*shape).astype(fields.dtype) split_value = np.split(value, len(str_fs)) split_value_dict = dict( zip(str_fs, split_value) ) else: value = np.random.ranf() # host allocations ehs = [np.zeros(fields.ns, dtype=fields.dtype) for i in range(6)] eh_dict = dict( zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs) ) gpu_eh = np.zeros(fields.ns_pitch, dtype=fields.dtype) # verify for str_f in str_fs: if is_array: eh_dict[str_f][slidx] = split_value_dict[str_f] else: eh_dict[str_f][slidx] = value setf.set_fields(value) for str_f in str_fs: cuda.memcpy_dtoh(gpu_eh, fields.get_buf(str_f)) original = eh_dict[str_f] copy = gpu_eh[:,:,fields.slice_z] norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm)) fields.context_pop()
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slice_xyz = common.slices_two_points(pt0, pt1) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1) value = np.random.rand(*shape).astype(np.float32) else: value = np.random.ranf() # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] qtask = QueueTask() fields = Fields(context, device, qtask, nx, ny, nz, '', 'single') tfunc = lambda tstep: np.sin(0.03*tstep) incident = IncidentDirect(fields, str_f, pt0, pt1, tfunc, value) # host allocations eh = np.zeros(fields.ns_pitch, dtype=fields.dtype) # verify eh[slice_xyz] = fields.dtype(value) * fields.dtype(tfunc(1)) fields.update_e() fields.update_h() fields.enqueue_barrier() copy_eh_buf = fields.get_buf(str_f) copy_eh = np.zeros_like(eh) cl.enqueue_copy(fields.queue, copy_eh, copy_eh_buf) original = eh[slice_xyz] copy = copy_eh[slice_xyz] norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slice_xyz = common.slice_index_two_points(pt0, pt1) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1) value = np.random.rand(*shape).astype(np.float32) else: value = np.random.ranf() # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, '', 'single') tfunc = lambda tstep: np.sin(0.03 * tstep) incident = DirectIncident(fields, str_f, pt0, pt1, tfunc, value) # host allocations eh = np.zeros(fields.ns_pitch, dtype=fields.dtype) # verify eh[slice_xyz] = fields.dtype(value) * fields.dtype(tfunc(1)) fields.update_e() fields.update_h() copy_eh_buf = fields.get_buf(str_f) copy_eh = np.zeros_like(eh) cl.enqueue_copy(fields.queue, copy_eh, copy_eh_buf) original = eh[slice_xyz] copy = copy_eh[slice_xyz] norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))
#nx, ny, nz = 240, 256, 256 # 540 MB #nx, ny, nz = 544, 544, 544 # 5527 MB #nx, ny, nz = 512, 512, 512 # 4608 MB #nx, ny, nz = 480, 480, 480 # 3796 MB nx, ny, nz = 240, 256, 256 # 576 MB #nx, ny, nz = 128, 128, 128 # 72 MB coeff_use = 'e' precision_float = 'single' # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, coeff_use, precision_float) tmax = 250 if is_plot else 1000 if rank == 0: direction = '+' elif rank == size - 1: direction = '-' else: direction = '+-' core = CoreSplit2(fields) #exch = network.ExchangeMpiBlock(fields, core, direction) #exch = network.ExchangeMpiNonBlock(fields, core, direction) exch = network.ExchangeMpiSplit2(fields, core, direction) #core = CoreSplit3(fields) #exch = network.ExchangeMpiSplit3(fields, core, direction, tmax)
# plot import matplotlib.pyplot as plt import matplotlib as mpl mpl.rc('image', interpolation='nearest', origin='lower') #plt.ion() fig = plt.figure(figsize=(14, 8)) # gpu device gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] # z-axis nx, ny, nz = 180, 160, 2 fields = Fields(context, device, nx, ny, nz) Pbc(fields, 'xyz') Core(fields) IncidentDirect(fields, 'ey', (20, 0, 0), (20, ny - 1, nz - 1), tfunc) IncidentDirect(fields, 'ex', (0, 20, 0), (nx - 1, 20, nz - 1), tfunc) for tstep in xrange(1, tmax + 1): fields.update_e() fields.update_h() ax1 = fig.add_subplot(2, 3, 1) getf = GetFields(fields, 'ey', (0, 0, nz / 2), (nx - 1, ny - 1, nz / 2)) getf.get_event().wait() ax1.imshow(getf.get_fields().T, vmin=-1.1, vmax=1.1) ax1.set_title('%s, ey[20,:,:]' % repr(fields.ns)) ax1.set_xlabel('x')
#nx, ny, nz = 240, 256, 256 # 540 MB #nx, ny, nz = 544, 544, 544 # 5527 MB #nx, ny, nz = 512, 512, 512 # 4608 MB #nx, ny, nz = 480, 480, 480 # 3796 MB nx, ny, nz = 800, 256, 256 # 576 MB #nx, ny, nz = 128, 128, 128 # 72 MB tmax, tgap = 1000, 10 # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, coeff_use='e', precision_float='single') Core(fields) print 'ns_pitch', fields.ns_pitch print 'nbytes (MB)', nx * ny * nz * 9 * 4. / (1024**2) ''' Pbc(fields, 'xyz') tfunc = lambda tstep: 40 * np.sin(0.05 * tstep) IncidentDirect(fields, 'ez', (220, 20, 0), (220, 20, -1), tfunc) getf = GetFields(fields, 'ez', (0, 0, 0.5), (-1, -1, 0.5)) # plot
from kemp.fdtd3d import common_gpu from kemp.fdtd3d.gpu import Fields, DirectSrc, GetFields, ExchangeFields import numpy as np import pyopencl as cl nx, ny, nz = 240, 640, 640 tmax, tgap = 200, 10 divide_axes = 'x' gpu_devices = common_gpu.get_gpu_devices() context = cl.Context(gpu_devices) ngpu = len(gpu_devices) fdtds = [ Fields(context, device, nx, ny, nz, coeff_use='') for device in gpu_devices ] outputs = [GetFields(fdtds[0], 'ez', (0, 0, nz / 2), (nx - 1, ny - 1, nz / 2))] outputs += [ GetFields(fdtd, 'ez', (1, 0, nz / 2), (nx - 1, ny - 1, nz / 2)) for fdtd in fdtds[1:] ] src = DirectSrc(fdtds[1], 'ez', (nx / 5 * 4, ny / 2, 0), (nx / 5 * 4, ny / 2, nz - 1), lambda tstep: np.sin(0.1 * tstep)) exch = ExchangeFields(fdtds, 'x') # Plot import matplotlib.pyplot as plt plt.ion()
sys.path.append('/home/kifang') from kemp.fdtd3d import common_gpu from kemp.fdtd3d.gpu import Fields, DirectSrc, GetFields, PbcInt import numpy as np import pyopencl as cl nx, ny, nz = 2, 640, 640 tmax, tgap = 1000, 10 gpu_id = 0 gpu_devices = common_gpu.get_gpu_devices() context = cl.Context(gpu_devices) device = gpu_devices[gpu_id] fdtd = Fields(context, device, nx, ny, nz, coeff_use='') src = DirectSrc(fdtd, 'ex', (1, ny / 5 * 4, nz / 5 * 3), (1, ny / 5 * 4, nz / 5 * 3), lambda tstep: np.sin(0.1 * tstep)) pbc = PbcInt(fdtd, 'x') output = GetFields(fdtd, 'ex', (1, 0, 0), (1, ny - 1, nz - 1)) # Plot import matplotlib.pyplot as plt plt.ion() imag = plt.imshow(output.get_fields().T, cmap=plt.cm.hot, origin='lower', vmin=0, vmax=0.05) plt.colorbar()
import sys, os sys.path.append( os.path.expanduser('~') ) from kemp.fdtd3d.gpu import Fields, Core, DirectIncident, GetFields, Pbc from kemp.fdtd3d.util import common_exchange nx, ny, nz = 200, 300, 16 tmax, tgap = 200, 10 # instances from kemp.fdtd3d.util import common_gpu import pyopencl as cl gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) fields = Fields(context, gpu_devices[0], nx, ny, nz) #fields = Fields(nx, ny, nz) core = Core(fields) pbc = Pbc(fields, 'x') pbc = Pbc(fields, 'y') pbc = Pbc(fields, 'z') print fields.instance_list tfunc = lambda tstep: np.sin(0.05 * tstep) incident = DirectIncident(fields, 'ez', (20, 0, 0), (20, ny-1, nz-1), tfunc) incident = DirectIncident(fields, 'ey', (0, 20, 0), (nx-1, 20, nz-1), tfunc) #incident = DirectIncident(fields, 'ex', (0, 0, 20), (nx-1, ny-1, 20), tfunc) getf = GetFields(fields, 'ez', (0, 0, nz/2), (nx-1, ny-1, nz/2)) # for verify pbc
import numpy as np import pyopencl as cl import sys, os sys.path.append(os.path.expanduser('~')) from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d.gpu import Fields, Core, Pbc, IncidentDirect, GetFields nx, ny, nz = 160, 140, 32 tmax, tgap = 150, 10 # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz) Core(fields) Pbc(fields, 'xyz') ''' tfunc = lambda tstep: np.sin(0.05 * tstep) IncidentDirect(fields, 'ez', (20, 0, 0), (20, ny-1, nz-1), tfunc) #IncidentDirect(fields, 'ez', (0, 20, 0), (nx-1, 20, nz-1), tfunc) getf = GetFields(fields, 'ez', (0, 0, nz/2), (nx-1, ny-1, nz/2)) print fields.instance_list # plot import matplotlib.pyplot as plt plt.ion() fig = plt.figure(figsize=(12,8)) imag = plt.imshow(np.zeros((nx, ny), fields.dtype).T, interpolation='nearest', origin='lower', vmin=-1.1, vmax=1.1)
def runTest(self): ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, coeff_use, precision_float) core = Core(fields) # allocations ns = fields.ns dtype = fields.dtype strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"] ehs = common_update.generate_random_ehs(nx, ny, nz, dtype, ufunc) fields.set_eh_bufs(*ehs) ces, chs = common_update.generate_random_cs(coeff_use, nx, ny, nz, dtype) if "e" in coeff_use: fields.set_ce_bufs(*ces) if "h" in coeff_use: fields.set_ch_bufs(*chs) tmpf = np.zeros(fields.ns_pitch, dtype=dtype) # update if ufunc == "e": for tstep in xrange(0, tmax): fields.update_e() common_update.update_e(ehs, ces) for strf, eh in zip(strf_list, ehs)[:3]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z]) max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max() self.assertEqual(norm, 0, "%s, %s, %g, %g" % (self.args, strf, norm, max_diff)) if fields.pad != 0: if strf == "ez": norm2 = np.linalg.norm(tmpf[:, :, -fields.pad :]) else: norm2 = np.linalg.norm(tmpf[:, :, -fields.pad - 1 :]) self.assertEqual(norm2, 0, "%s, %s, %g, padding" % (self.args, strf, norm2)) elif ufunc == "h": for tstep in xrange(0, tmax): fields.update_h() common_update.update_h(ehs, chs) for strf, eh in zip(strf_list, ehs)[3:]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z]) max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max() self.assertEqual(norm, 0, "%s, %s, %g, %g" % (self.args, strf, norm, max_diff)) if fields.pad != 0: if strf == "hz": norm2 = np.linalg.norm(tmpf[:, :, -fields.pad :]) else: norm2 = np.linalg.norm(tmpf[:, :, -fields.pad :]) self.assertEqual(norm2, 0, "%s, %s, %g, padding" % (self.args, strf, norm2))
def runTest(self): ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, coeff_use, precision_float) core = Core(fields) # allocations ns = fields.ns dtype = fields.dtype strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] ehs = common_update.generate_random_ehs(nx, ny, nz, dtype, ufunc) fields.set_eh_bufs(*ehs) ces, chs = common_update.generate_random_cs(coeff_use, nx, ny, nz, dtype) if 'e' in coeff_use: fields.set_ce_bufs(*ces) if 'h' in coeff_use: fields.set_ch_bufs(*chs) tmpf = np.zeros(fields.ns_pitch, dtype=dtype) # update if ufunc == 'e': for tstep in xrange(0, tmax): fields.update_e() common_update.update_e(ehs, ces) for strf, eh in zip(strf_list, ehs)[:3]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z]) max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) if fields.pad != 0: if strf == 'ez': norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:]) else: norm2 = np.linalg.norm(tmpf[:, :, -fields.pad - 1:]) self.assertEqual( norm2, 0, '%s, %s, %g, padding' % (self.args, strf, norm2)) elif ufunc == 'h': for tstep in xrange(0, tmax): fields.update_h() common_update.update_h(ehs, chs) for strf, eh in zip(strf_list, ehs)[3:]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z]) max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) if fields.pad != 0: if strf == 'hz': norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:]) else: norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:]) self.assertEqual( norm2, 0, '%s, %s, %g, padding' % (self.args, strf, norm2))
#nx, ny, nz = 240, 256, 256 # 540 MB #nx, ny, nz = 544, 544, 544 # 5527 MB #nx, ny, nz = 512, 512, 512 # 4608 MB #nx, ny, nz = 480, 480, 480 # 3796 MB nx, ny, nz = 800, 256, 256 # 576 MB #nx, ny, nz = 128, 128, 128 # 72 MB coeff_use = 'e' precision_float = 'single' # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] qtask = cpu.QueueTask() fields = Fields(context, device, qtask, nx, ny, nz, coeff_use, precision_float) Core(fields) tmax = 250 if is_plot else 1000 if rank == 0: direction = '+' elif rank == size - 1: direction = '-' else: direction = '+-' #exch = node.ExchangeMpiNonBlock(fields, direction) #exch = node.ExchangeMpiBufferBlock(fields, direction) #exch = node.ExchangeMpiBufferBlockSplit(fields, direction) exch = node.ExchangeMpiBufferNonBlockSplitEnqueue(fields, direction, tmax) if '+' in direction: cpu.Core(exch.cpuf_p) if '-' in direction: cpu.Core(exch.cpuf_m)
sys.path.append( os.path.expanduser('~') ) from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d.gpu import Fields, Core, Pbc, IncidentDirect, GetFields #nx, ny, nz = 240, 256, 256 # 540 MB #nx, ny, nz = 544, 544, 544 # 5527 MB #nx, ny, nz = 512, 512, 512 # 4608 MB #nx, ny, nz = 480, 480, 480 # 3796 MB #nx, ny, nz = 256, 256, 256 # 576 MB nx, ny, nz = 240, 256, 256 #nx, ny, nz = 128, 128, 128 # 72 MB tmax, tgap = 1000, 10 # instances fields = Fields(0, nx, ny, nz, coeff_use='e', precision_float='single') Core(fields) print 'ns_pitch', fields.ns_pitch print 'nbytes (MB)', nx*ny*nz * 9 * 4. / (1024**2) ''' Pbc(fields, 'xyz') tfunc = lambda tstep: np.sin(0.05 * tstep) IncidentDirect(fields, 'ez', (20, 0, 0), (20, ny-1, nz-1), tfunc) #IncidentDirect(fields, 'ez', (0, 20, 0), (nx-1, 20, nz-1), tfunc) getf = GetFields(fields, 'ez', (0, 0, nz/2), (nx-1, ny-1, nz/2)) print fields.instance_list
values = np.random.rand(*shape_dict[axis]).astype(s.fdtd.dtype) fset.set_fields(values) fget = GetFields(s.fdtd, str_fs, pt0, pt1) fget.get_event().wait() copy = fget.get_fields() assert np.linalg.norm(values - copy) == 0 if __name__ == '__main__': nx, ny, nz = 100, 110, 128 gpu_id = 0 gpu_devices = common_gpu.get_gpu_devices() context = cl.Context(gpu_devices) device = gpu_devices[gpu_id] fdtd = Fields(context, device, nx, ny, nz, coeff_use='') print('-' * 47 + '\nTest GetFields') testget = TestGetFields(fdtd, nx, ny, nz) testget.set_iteration(3) testget.test() testget.test_boundary() print('-' * 47 + '\nTest SetFields') testset = TestSetFields(fdtd, nx, ny, nz) testset.set_iteration(3) testset.test() testset.test_boundary()
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slice_xyz = common.slices_two_points(pt0, pt1) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1) value = np.random.rand(*shape).astype(np.float32) else: value = np.random.ranf() # instance fields = Fields(0, nx, ny, nz, '', 'single') tfunc = lambda tstep: np.sin(0.03 * tstep) incident = IncidentDirect(fields, str_f, pt0, pt1, tfunc, value) # host allocations eh = np.zeros(fields.ns_pitch, dtype=fields.dtype) # verify eh[slice_xyz] = fields.dtype(value) * fields.dtype(tfunc(1)) fields.update_e() fields.update_h() copy_eh_buf = fields.get_buf(str_f) copy_eh = np.zeros_like(eh) cuda.memcpy_dtoh(copy_eh, copy_eh_buf) original = eh[slice_xyz] copy = copy_eh[slice_xyz] norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm)) fields.context_pop()
#nx, ny, nz = 240, 256, 256 # 540 MB #nx, ny, nz = 544, 544, 544 # 5527 MB #nx, ny, nz = 512, 512, 512 # 4608 MB #nx, ny, nz = 480, 480, 480 # 3796 MB nx, ny, nz = 240, 256, 256 # 576 MB #nx, ny, nz = 128, 128, 128 # 72 MB coeff_use = 'e' precision_float = 'single' # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, coeff_use, precision_float) Core(fields) if rank == 0: direction = '+' elif rank == size - 1: direction = '-' else: direction = '+-' exch = ExchangeMpi(fields, direction) is_master = True if rank == 0 else False tmax = 150 if is_plot else 1000 if is_plot: Pbc(fields, 'yz') getf = GetFields(fields, 'ez', (0, 0, 0.5), (-1, -1, 0.5)) if rank < size - 1:
from kemp.fdtd3d import common_gpu from kemp.fdtd3d.gpu import Fields, DirectSrc, GetFields, PbcInt import numpy as np import pyopencl as cl nx, ny, nz = 2, 640, 640 tmax, tgap = 1000, 10 gpu_id = 0 gpu_devices = common_gpu.get_gpu_devices() context = cl.Context(gpu_devices) device = gpu_devices[gpu_id] fdtd = Fields(context, device, nx, ny, nz, coeff_use='') src = DirectSrc(fdtd, 'ex', (1, ny/5*4, nz/5*3), (1, ny/5*4, nz/5*3), lambda tstep: np.sin(0.1 * tstep)) pbc = PbcInt(fdtd, 'x') output = GetFields(fdtd, 'ex', (1, 0, 0), (1, ny-1, nz-1)) # Plot import matplotlib.pyplot as plt plt.ion() imag = plt.imshow(output.get_fields().T, cmap=plt.cm.hot, origin='lower', vmin=0, vmax=0.05) plt.colorbar() # Main loop from datetime import datetime t0 = datetime.now()
def runTest(self): axis, nx, ny, nz, precision_float = self.args gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, '', precision_float) pbc = Pbc(fields, axis) # allocations ehs = common_update.generate_random_ehs(nx, ny, nz, fields.dtype) fields.set_eh_bufs(*ehs) # update fields.update_e() fields.update_h() # verify getf0, getf1 = {}, {} strfs_e = { 'x': ['ey', 'ez'], 'y': ['ex', 'ez'], 'z': ['ex', 'ey'] }[axis] strfs_h = { 'x': ['hy', 'hz'], 'y': ['hx', 'hz'], 'z': ['hx', 'hy'] }[axis] pt0 = (0, 0, 0) pt1 = { 'x': (0, ny-2, nz-2), \ 'y': (nx-2, 0, nz-2), \ 'z': (nx-2, ny-2, 0) }[axis] getf0['e'] = GetFields(fields, strfs_e, pt0, pt1) pt0 = { 'x': (nx-1, 0, 0), \ 'y': (0, ny-1, 0), \ 'z': (0, 0, nz-1) }[axis] pt1 = { 'x': (nx-1, ny-2, nz-2), \ 'y': (nx-2, ny-1, nz-2), \ 'z': (nx-2, ny-2, nz-1) }[axis] getf1['e'] = GetFields(fields, strfs_e, pt0, pt1) pt0 = { 'x': (0, 1, 1), \ 'y': (1, 0, 1), \ 'z': (1, 1, 0) }[axis] pt1 = { 'x': (0, ny-1, nz-1), \ 'y': (nx-1, 0, nz-1), \ 'z': (nx-1, ny-1, 0) }[axis] getf0['h'] = GetFields(fields, strfs_h, pt0, pt1) pt0 = { 'x': (nx-1, 1, 1), \ 'y': (1, ny-1, 1), \ 'z': (1, 1, nz-1) }[axis] pt1 = (nx - 1, ny - 1, nz - 1) getf1['h'] = GetFields(fields, strfs_h, pt0, pt1) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields() ) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, self.args, eh))