Beispiel #1
0
    def gamma_exp_kernel(self, N, dtype):
        if (self._gamma_exp_kernel is None
                or dtype != self._gamma_exp_kernel_dtype):
            func_mod_template = Template("""
            // Macro for converting subscripts to linear index:
            #define INDEX(a, b) a*${M}+b

            __global__ void func(${dtype} *x, ${dtype} *y, ${dtype} *l, unsigned int N) {
            // Obtain the linear index corresponding to the current thread:
            unsigned int idx = blockIdx.y*${max_threads_per_block}*${max_blocks_per_grid}+
                               blockIdx.x*${max_threads_per_block}+threadIdx.x;

            // Convert the linear index to subscripts:
            unsigned int a = idx/${M};
            unsigned int b = idx%${M};
            ${dtype} Pvalue = 0.0;
            ${dtype} darg = 0.0;
            ${dtype} abs_arg = 0.0;

            // Use the subscripts to access the array:
            if (idx < N) {
                darg = (y[a] - y[b]) / l[0];
                abs_arg = fabs(darg);
                Pvalue = exp(-1.0*pow(abs_arg, l[1]));
                x[INDEX(a,b)] = Pvalue;
                }
            }
            """)
            max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(  # NOQA
                self.device)
            max_blocks_per_grid = max(max_grid_dim)
            func_mod = SourceModule(
                func_mod_template.substitute(
                    max_threads_per_block=max_threads_per_block,
                    max_blocks_per_grid=max_blocks_per_grid,
                    M=N,
                    dtype=dtype))

            self._gamma_exp_kernel = func_mod.get_function('func')
            self._gamma_exp_kernel_dtype = dtype

        return self._gamma_exp_kernel
Beispiel #2
0
    def pairwised(self, N, dtype):
        if (self._pairwised is None or dtype != self._pairwised_dtype):
            func_mod_template = Template("""
            // Macro for converting subscripts to linear index:
            #define INDEX(a, b) a*${M}+b

            __global__ void func(${dtype} *x, ${dtype} *y, unsigned int N) {
            // Obtain the linear index corresponding to the current thread:
            unsigned int idx = blockIdx.y*${max_threads_per_block}*${max_blocks_per_grid}+
                               blockIdx.x*${max_threads_per_block}+threadIdx.x;

            // Convert the linear index to subscripts:
            unsigned int a = idx/${M};
            unsigned int b = idx%${M};
            ${dtype} Pvalue = 0.0;

            // Use the subscripts to access the array:
            if (idx < N) {
                Pvalue = y[a] - y[b];
                if (Pvalue<0) {
                    Pvalue *= -1.0;
                }
                x[INDEX(a,b)] = Pvalue;
                }
            }
            """)
            max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(  # NOQA
                self.device)
            max_blocks_per_grid = max(max_grid_dim)
            func_mod = SourceModule(
                func_mod_template.substitute(
                    max_threads_per_block=max_threads_per_block,
                    max_blocks_per_grid=max_blocks_per_grid,
                    M=N,
                    dtype=dtype))

            self._pairwised = func_mod.get_function('func')
            self._pairwised_dtype = dtype

        return self._pairwised
    // Convert the linear index to subscripts:
    unsigned int a = idx/(${B}*${C}*${D});
    unsigned int b = (idx%(${B}*${C}*${D}))/(${C}*${D});
    unsigned int c = ((idx%(${B}*${C}*${D}))%(${C}*${D}))/${D};
    unsigned int d = ((idx%(${B}*${C}*${D}))%(${C}*${D}))%${D};

    // Use the subscripts to access the array:
    if (idx < N) {
        if (c == 0)
           x[INDEX(a,b,c,d)] = 100;
    }
}
""")

max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(
    pycuda.autoinit.device)
block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device,
                                                   x.shape)
max_blocks_per_grid = max(max_grid_dim)

func_mod = \
         SourceModule(func_mod_template.substitute(max_threads_per_block=max_threads_per_block,
                                                   max_blocks_per_grid=max_blocks_per_grid,
                                                   A=A, B=B, C=C, D=D))
func = func_mod.get_function('func')
x_gpu = gpuarray.to_gpu(x)
func(x_gpu.gpudata, np.uint32(x_gpu.size), block=block_dim, grid=grid_dim)
x_np = x.copy()
x_np[:, :, 0, :] = 100

print 'Success status: ', np.allclose(x_np, x_gpu.get())
                       blockIdx.x*${max_threads_per_block}+threadIdx.x;

    // Convert the linear index to subscripts:
    unsigned int a = idx/(${B}*${C});
    unsigned int b = (idx%(${B}*${C}))/${C};
    unsigned int c = (idx%(${B}*${C}))%${C};

    // Use the subscripts to access the array:
    if (idx < N) {
        if (b == 0)
           x[INDEX(a,b,c)] = 100;
    }
}
""")

max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(pycuda.autoinit.device)
block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, x.shape)
max_blocks_per_grid = max(max_grid_dim)

func_mod = \
    SourceModule(func_mod_template.substitute(max_threads_per_block=max_threads_per_block,
                                              max_blocks_per_grid=max_blocks_per_grid,
                                              A=A, B=B, C=C))
func = func_mod.get_function('func')
x_gpu = gpuarray.to_gpu(x)
func(x_gpu.gpudata, np.uint32(x_gpu.size),
     block=block_dim,
     grid=grid_dim)
x_np = x.copy()
x_np[:, 0, :] = 100