Exemple #1
0
    def build_vertex_mat(self, pairs, quad):
        block_size = 128
        gpu_cfg = dict(block_size=block_size,
                       float_type=gpu.np_to_c_type(self.float_type))
        module = gpu.load_gpu('interior_corners.cl',
                              tmpl_args=gpu_cfg,
                              no_caching=True)

        n_pairs = pairs.shape[0]
        gpu_result = gpu.zeros_gpu((n_pairs, 3, 3, 3), self.float_type)
        gpu_pairs = gpu.to_gpu(pairs.copy(), np.int32)
        n_threads = int(np.ceil(n_pairs / block_size))

        if n_pairs != 0:
            module.interior_corners(gpu_result,
                                    np.int32(quad[0].shape[0]),
                                    quad[0],
                                    quad[1],
                                    self.farfield.gpu_obs_pts,
                                    self.farfield.gpu_obs_ns,
                                    self.farfield.gpu_src_pts,
                                    self.farfield.gpu_src_tris,
                                    gpu_pairs,
                                    np.int32(0),
                                    np.int32(n_pairs),
                                    self.farfield.gpu_params,
                                    grid=(n_threads, 1, 1),
                                    block=(block_size, 1, 1))
        return make_pairs_mat(pairs, gpu_result.get(), self.farfield.shape)
Exemple #2
0
    def __init__(self, obs_pts, obs_ns, src_mesh, K_name, nq, params,
                 float_type):

        self.shape = (obs_pts.shape[0] * 3, src_mesh[1].shape[0] * 9)
        self.dim = obs_pts.shape[1]
        self.tensor_dim = kernels[K_name].tensor_dim
        self.n_obs = obs_pts.shape[0]
        self.n_src = src_mesh[1].shape[0]

        in_size = self.n_src * self.dim * self.tensor_dim
        out_size = self.n_obs * self.tensor_dim
        self.gpu_in = gpu.empty_gpu(in_size, float_type)
        self.gpu_out = gpu.empty_gpu(out_size, float_type)

        self.q = gauss2d_tri(nq)

        self.gpu_obs_pts = gpu.to_gpu(obs_pts, float_type)
        self.gpu_obs_ns = gpu.to_gpu(obs_ns, float_type)
        self.gpu_src_pts = gpu.to_gpu(src_mesh[0], float_type)
        self.gpu_src_tris = gpu.to_gpu(src_mesh[1], np.int32)
        self.gpu_params = gpu.to_gpu(np.array(params), float_type)
        self.block_size = 128
        self.n_blocks = int(np.ceil(self.n_obs / self.block_size))

        self.module = gpu.load_gpu('matrix_free.cl',
                                   tmpl_args=dict(
                                       block_size=self.block_size,
                                       float_type=gpu.np_to_c_type(float_type),
                                       quad_pts=self.q[0],
                                       quad_wts=self.q[1]))
        self.fnc = getattr(self.module, "farfield_tris_to_pts" + K_name)
Exemple #3
0
 def load_gpu_module(self):
     quad = gauss2d_tri(self.cfg['quad_order'])
     self.gpu_module = gpu.load_gpu(
         'fmm/ts_kernels.cl',
         tmpl_args=dict(order=self.cfg['order'],
                        gpu_float_type=gpu.np_to_c_type(
                            self.cfg['float_type']),
                        quad_pts=quad[0],
                        quad_wts=quad[1],
                        n_workers_per_block=self.cfg['n_workers_per_block'],
                        K=self.K))
Exemple #4
0
def get_gpu_module(surf, quad, K, float_type, n_workers_per_block):
    args = dict(
        n_workers_per_block = n_workers_per_block,
        gpu_float_type = gpu.np_to_c_type(float_type),
        surf_pts = surf[0],
        surf_tris = surf[1],
        quad_pts = quad[0],
        quad_wts = quad[1],
        K = K
    )
    gpu_module = gpu.load_gpu(
        'fmm/tri_gpu_kernels.cl',
        tmpl_args = args
    )
    return gpu_module
Exemple #5
0
def test_simple_module():
    n = 10
    in_arr = np.random.rand(n)
    arg = 1.0
    this_dir = os.path.dirname(os.path.realpath(__file__))
    modules = [
        gpu.load_gpu('kernel.cl', tmpl_dir=this_dir, tmpl_args=dict(arg=arg)),
        gpu.load_gpu_from_code(open(os.path.join(this_dir,
                                                 'kernel.cl')).read(),
                               tmpl_args=dict(arg=arg))
    ]
    for m in modules:
        fnc = m.add

        in_gpu = gpu.to_gpu(in_arr, np.float32)
        out_gpu = gpu.empty_gpu(n, np.float32)
        fnc(out_gpu, in_gpu, grid=(n, 1, 1), block=(1, 1, 1))
        output = out_gpu.get()

        correct = in_arr + arg
        np.testing.assert_almost_equal(correct, output)
Exemple #6
0
def get_gpu_module(float_type):
    return gpu.load_gpu('farfield_direct.cl',
                        tmpl_args=get_gpu_config(float_type))
Exemple #7
0
def get_gpu_module(kernel, float_type):
    return gpu.load_gpu('assemble.cl', tmpl_args = get_gpu_config(
        kernel, float_type
    ))
Exemple #8
0
def load_module():
    import os
    D = os.path.dirname(os.path.realpath(__file__))
    return gpu.load_gpu('kernels.cl', tmpl_dir=D, tmpl_args=dict(arg=arg))