def assemble(self, pts, obs_tris, src_tris):
        gpu_pts = gpu.to_gpu(pts, self.float_type)
        gpu_src_tris = gpu.to_gpu(src_tris, np.int32)

        n = obs_tris.shape[0]
        out = np.empty((n, 3, 3, src_tris.shape[0], 3, 3),
                       dtype=self.float_type)

        def call_integrator(start_idx, end_idx):
            n_items = end_idx - start_idx
            gpu_result = gpu.empty_gpu(
                (n_items, 3, 3, src_tris.shape[0], 3, 3), self.float_type)
            gpu_obs_tris = gpu.to_gpu(obs_tris[start_idx:end_idx], np.int32)
            self.integrator(gpu_result,
                            np.int32(self.q[0].shape[0]),
                            self.gpu_qx,
                            self.gpu_qw,
                            gpu_pts,
                            np.int32(n_items),
                            gpu_obs_tris,
                            np.int32(src_tris.shape[0]),
                            gpu_src_tris,
                            self.gpu_params,
                            grid=(n_items, src_tris.shape[0], 1),
                            block=(1, 1, 1))
            out[start_idx:end_idx] = gpu_result.get()

        call_size = 1024
        for I in gpu.intervals(n, call_size):
            call_integrator(*I)

        return out
Exemple #2
0
    def __init__(self, obs_pts, obs_ns, src_mesh, K_name, nq, params,
                 float_type):

        self.shape = (obs_pts.shape[0] * 3, src_mesh[1].shape[0] * 9)
        self.dim = obs_pts.shape[1]
        self.tensor_dim = kernels[K_name].tensor_dim
        self.n_obs = obs_pts.shape[0]
        self.n_src = src_mesh[1].shape[0]

        in_size = self.n_src * self.dim * self.tensor_dim
        out_size = self.n_obs * self.tensor_dim
        self.gpu_in = gpu.empty_gpu(in_size, float_type)
        self.gpu_out = gpu.empty_gpu(out_size, float_type)

        self.q = gauss2d_tri(nq)

        self.gpu_obs_pts = gpu.to_gpu(obs_pts, float_type)
        self.gpu_obs_ns = gpu.to_gpu(obs_ns, float_type)
        self.gpu_src_pts = gpu.to_gpu(src_mesh[0], float_type)
        self.gpu_src_tris = gpu.to_gpu(src_mesh[1], np.int32)
        self.gpu_params = gpu.to_gpu(np.array(params), float_type)
        self.block_size = 128
        self.n_blocks = int(np.ceil(self.n_obs / self.block_size))

        self.module = gpu.load_gpu('matrix_free.cl',
                                   tmpl_args=dict(
                                       block_size=self.block_size,
                                       float_type=gpu.np_to_c_type(float_type),
                                       quad_pts=self.q[0],
                                       quad_wts=self.q[1]))
        self.fnc = getattr(self.module, "farfield_tris_to_pts" + K_name)
Exemple #3
0
 def __init__(self, kernel, params, float_type, nq_far, nq_near, pts, tris):
     self.float_type = float_type
     self.module = get_gpu_module(kernel, float_type)
     self.gpu_params = gpu.to_gpu(np.array(params), self.float_type)
     self.gpu_near_q = self.quad_to_gpu(gauss4d_tri(nq_near, nq_near))
     self.gpu_far_q = self.quad_to_gpu(gauss4d_tri(nq_far, nq_far))
     self.gpu_pts = gpu.to_gpu(pts, self.float_type)
     self.gpu_tris = gpu.to_gpu(tris, np.int32)
    def __init__(self, kernel, params, n_q, float_type):
        self.float_type = float_type
        self.integrator = getattr(get_gpu_module(kernel, float_type),
                                  "farfield_tris")
        self.q = gauss4d_tri(n_q, n_q)

        self.gpu_qx = gpu.to_gpu(self.q[0], float_type)
        self.gpu_qw = gpu.to_gpu(self.q[1], float_type)
        self.gpu_params = gpu.to_gpu(np.array(params), float_type)
Exemple #5
0
    def pairs_quad(self, integrator, q, pairs_list):
        gpu_pairs_list = gpu.to_gpu(pairs_list.copy(), np.int32)
        n = pairs_list.shape[0]

        if n == 0:
            return np.empty((0,3,3,3,3), dtype = self.float_type)

        call_size = 2 ** 17
        result = np.empty((n, 3, 3, 3, 3), dtype = self.float_type)

        def call_integrator(start_idx, end_idx):
            n_pairs = (end_idx - start_idx)
            n_threads = int(np.ceil(n_pairs / block_size))
            gpu_result = gpu.empty_gpu((n_pairs, 3, 3, 3, 3), self.float_type)
            integrator(
                gpu_result, np.int32(q[0].shape[0]), q[0], q[1],
                self.gpu_pts, self.gpu_tris,
                gpu_pairs_list, np.int32(start_idx), np.int32(end_idx),
                self.gpu_params,
                grid = (n_threads, 1, 1), block = (block_size, 1, 1)
            )
            result[start_idx:end_idx] = gpu_result.get()

        for I in gpu.intervals(n, call_size):
            call_integrator(*I)
        return result
Exemple #6
0
    def build_vertex_mat(self, pairs, quad):
        block_size = 128
        gpu_cfg = dict(block_size=block_size,
                       float_type=gpu.np_to_c_type(self.float_type))
        module = gpu.load_gpu('interior_corners.cl',
                              tmpl_args=gpu_cfg,
                              no_caching=True)

        n_pairs = pairs.shape[0]
        gpu_result = gpu.zeros_gpu((n_pairs, 3, 3, 3), self.float_type)
        gpu_pairs = gpu.to_gpu(pairs.copy(), np.int32)
        n_threads = int(np.ceil(n_pairs / block_size))

        if n_pairs != 0:
            module.interior_corners(gpu_result,
                                    np.int32(quad[0].shape[0]),
                                    quad[0],
                                    quad[1],
                                    self.farfield.gpu_obs_pts,
                                    self.farfield.gpu_obs_ns,
                                    self.farfield.gpu_src_pts,
                                    self.farfield.gpu_src_tris,
                                    gpu_pairs,
                                    np.int32(0),
                                    np.int32(n_pairs),
                                    self.farfield.gpu_params,
                                    grid=(n_threads, 1, 1),
                                    block=(block_size, 1, 1))
        return make_pairs_mat(pairs, gpu_result.get(), self.farfield.shape)
Exemple #7
0
def interior_pairs_quad(K_name, pairs_list, gpu_quad, gpu_obs_pts, gpu_obs_ns,
                        gpu_src_pts, gpu_src_tris, gpu_params, float_type,
                        finite_part):

    n_pairs = pairs_list.shape[0]
    gpu_result = gpu.zeros_gpu((n_pairs, 3, 3, 3), float_type)
    gpu_pairs_list = gpu.to_gpu(pairs_list.copy(), np.int32)
    module = get_gpu_module(K_name, float_type)
    n_threads = int(np.ceil(n_pairs / block_size))

    if n_pairs != 0:
        module.interior_pairs(gpu_result,
                              np.int32(gpu_quad[0].shape[0]),
                              gpu_quad[0],
                              gpu_quad[1],
                              gpu_obs_pts,
                              gpu_obs_ns,
                              gpu_src_pts,
                              gpu_src_tris,
                              gpu_pairs_list,
                              np.int32(0),
                              np.int32(n_pairs),
                              gpu_params,
                              np.int32(1 if finite_part else 0),
                              grid=(n_threads, 1, 1),
                              block=(block_size, 1, 1))
    return gpu_result.get()
Exemple #8
0
def test_async_get():
    R = np.random.rand(10)
    gpu_R = gpu.to_gpu(R, np.float32)

    async def f(w):
        return await gpu.get(w, gpu_R)

    R2 = taskloaf.run(f)
    np.testing.assert_almost_equal(R, R2)
Exemple #9
0
def farfield_pts_direct(K, obs_pts, obs_ns, src_pts, src_ns, vec, params,
                        float_type):
    module = get_gpu_module(float_type)
    fnc = getattr(module, "farfield_pts" + K)

    n_obs, dim = obs_pts.shape
    n_src = src_pts.shape[0]

    tensor_dim = int(vec.shape[0] / n_src)

    gpu_result = gpu.empty_gpu(n_obs * tensor_dim, float_type)
    gpu_obs_pts = gpu.to_gpu(obs_pts, float_type)
    gpu_obs_ns = gpu.to_gpu(obs_ns, float_type)
    gpu_src_pts = gpu.to_gpu(src_pts, float_type)
    gpu_src_ns = gpu.to_gpu(src_ns, float_type)
    gpu_vec = gpu.to_gpu(vec, float_type)
    gpu_params = gpu.to_gpu(np.array(params), float_type)

    n_blocks = int(np.ceil(n_obs / block_size))
    fnc(gpu_result,
        gpu_obs_pts,
        gpu_obs_ns,
        gpu_src_pts,
        gpu_src_ns,
        gpu_vec,
        gpu_params,
        np.int32(n_obs),
        np.int32(n_src),
        grid=(n_blocks, 1, 1),
        block=(block_size, 1, 1))
    return gpu_result.get()
Exemple #10
0
async def gpu_run():
    # gd = tsk.get_service('gpu_data')
    # if 'add' not in gd:
    #     gd['add'] = (fnc, arg, gpu_R)
    # else:
    #     fnc, arg, gpu_R = gd['add']
    module = load_module()
    fnc = module.add
    R = np.random.rand(10000000)
    gpu_R = gpu.to_gpu(R)

    gpu_out = gpu.empty_gpu(gpu_R.shape)
    fnc(gpu_out, gpu_R, grid=(gpu_R.shape[0], 1, 1), block=(1, 1, 1))
    R2 = await gpu.get(gpu_out)
    gpu.logger.debug('run')
Exemple #11
0
 def call_integrator(start_idx, end_idx):
     n_items = end_idx - start_idx
     gpu_result = gpu.empty_gpu(
         (n_items, 3, 3, src_tris.shape[0], 3, 3), self.float_type)
     gpu_obs_tris = gpu.to_gpu(obs_tris[start_idx:end_idx], np.int32)
     self.integrator(gpu_result,
                     np.int32(self.q[0].shape[0]),
                     self.gpu_qx,
                     self.gpu_qw,
                     gpu_pts,
                     np.int32(n_items),
                     gpu_obs_tris,
                     np.int32(src_tris.shape[0]),
                     gpu_src_tris,
                     self.gpu_params,
                     grid=(n_items, src_tris.shape[0], 1),
                     block=(1, 1, 1))
     out[start_idx:end_idx] = gpu_result.get()
Exemple #12
0
def test_simple_module():
    n = 10
    in_arr = np.random.rand(n)
    arg = 1.0
    this_dir = os.path.dirname(os.path.realpath(__file__))
    modules = [
        gpu.load_gpu('kernel.cl', tmpl_dir=this_dir, tmpl_args=dict(arg=arg)),
        gpu.load_gpu_from_code(open(os.path.join(this_dir,
                                                 'kernel.cl')).read(),
                               tmpl_args=dict(arg=arg))
    ]
    for m in modules:
        fnc = m.add

        in_gpu = gpu.to_gpu(in_arr, np.float32)
        out_gpu = gpu.empty_gpu(n, np.float32)
        fnc(out_gpu, in_gpu, grid=(n, 1, 1), block=(1, 1, 1))
        output = out_gpu.get()

        correct = in_arr + arg
        np.testing.assert_almost_equal(correct, output)
Exemple #13
0
 def int_gpu(self, arr):
     return gpu.to_gpu(arr, np.int32)
Exemple #14
0
 def float_gpu(self, arr):
     return gpu.to_gpu(arr, self.cfg.float_type)
Exemple #15
0
 def quad_to_gpu(self, q):
     return [gpu.to_gpu(arr, self.float_type) for arr in q]