def build_vertex_mat(self, pairs, quad): block_size = 128 gpu_cfg = dict(block_size=block_size, float_type=gpu.np_to_c_type(self.float_type)) module = gpu.load_gpu('interior_corners.cl', tmpl_args=gpu_cfg, no_caching=True) n_pairs = pairs.shape[0] gpu_result = gpu.zeros_gpu((n_pairs, 3, 3, 3), self.float_type) gpu_pairs = gpu.to_gpu(pairs.copy(), np.int32) n_threads = int(np.ceil(n_pairs / block_size)) if n_pairs != 0: module.interior_corners(gpu_result, np.int32(quad[0].shape[0]), quad[0], quad[1], self.farfield.gpu_obs_pts, self.farfield.gpu_obs_ns, self.farfield.gpu_src_pts, self.farfield.gpu_src_tris, gpu_pairs, np.int32(0), np.int32(n_pairs), self.farfield.gpu_params, grid=(n_threads, 1, 1), block=(block_size, 1, 1)) return make_pairs_mat(pairs, gpu_result.get(), self.farfield.shape)
def __init__(self, obs_pts, obs_ns, src_mesh, K_name, nq, params, float_type): self.shape = (obs_pts.shape[0] * 3, src_mesh[1].shape[0] * 9) self.dim = obs_pts.shape[1] self.tensor_dim = kernels[K_name].tensor_dim self.n_obs = obs_pts.shape[0] self.n_src = src_mesh[1].shape[0] in_size = self.n_src * self.dim * self.tensor_dim out_size = self.n_obs * self.tensor_dim self.gpu_in = gpu.empty_gpu(in_size, float_type) self.gpu_out = gpu.empty_gpu(out_size, float_type) self.q = gauss2d_tri(nq) self.gpu_obs_pts = gpu.to_gpu(obs_pts, float_type) self.gpu_obs_ns = gpu.to_gpu(obs_ns, float_type) self.gpu_src_pts = gpu.to_gpu(src_mesh[0], float_type) self.gpu_src_tris = gpu.to_gpu(src_mesh[1], np.int32) self.gpu_params = gpu.to_gpu(np.array(params), float_type) self.block_size = 128 self.n_blocks = int(np.ceil(self.n_obs / self.block_size)) self.module = gpu.load_gpu('matrix_free.cl', tmpl_args=dict( block_size=self.block_size, float_type=gpu.np_to_c_type(float_type), quad_pts=self.q[0], quad_wts=self.q[1])) self.fnc = getattr(self.module, "farfield_tris_to_pts" + K_name)
def load_gpu_module(self): quad = gauss2d_tri(self.cfg['quad_order']) self.gpu_module = gpu.load_gpu( 'fmm/ts_kernels.cl', tmpl_args=dict(order=self.cfg['order'], gpu_float_type=gpu.np_to_c_type( self.cfg['float_type']), quad_pts=quad[0], quad_wts=quad[1], n_workers_per_block=self.cfg['n_workers_per_block'], K=self.K))
def get_gpu_module(surf, quad, K, float_type, n_workers_per_block): args = dict( n_workers_per_block = n_workers_per_block, gpu_float_type = gpu.np_to_c_type(float_type), surf_pts = surf[0], surf_tris = surf[1], quad_pts = quad[0], quad_wts = quad[1], K = K ) gpu_module = gpu.load_gpu( 'fmm/tri_gpu_kernels.cl', tmpl_args = args ) return gpu_module
def test_simple_module(): n = 10 in_arr = np.random.rand(n) arg = 1.0 this_dir = os.path.dirname(os.path.realpath(__file__)) modules = [ gpu.load_gpu('kernel.cl', tmpl_dir=this_dir, tmpl_args=dict(arg=arg)), gpu.load_gpu_from_code(open(os.path.join(this_dir, 'kernel.cl')).read(), tmpl_args=dict(arg=arg)) ] for m in modules: fnc = m.add in_gpu = gpu.to_gpu(in_arr, np.float32) out_gpu = gpu.empty_gpu(n, np.float32) fnc(out_gpu, in_gpu, grid=(n, 1, 1), block=(1, 1, 1)) output = out_gpu.get() correct = in_arr + arg np.testing.assert_almost_equal(correct, output)
def get_gpu_module(float_type): return gpu.load_gpu('farfield_direct.cl', tmpl_args=get_gpu_config(float_type))
def get_gpu_module(kernel, float_type): return gpu.load_gpu('assemble.cl', tmpl_args = get_gpu_config( kernel, float_type ))
def load_module(): import os D = os.path.dirname(os.path.realpath(__file__)) return gpu.load_gpu('kernels.cl', tmpl_dir=D, tmpl_args=dict(arg=arg))