def __init_bootstrap_kernel(self): """ Compile the kernels and GPUArrays needed to generate the bootstrap samples""" ctype_indices = dtype_to_ctype(self.dtype_indices) self.bootstrap_fill= mk_kernel((ctype_indices,), "bootstrap_fill", "bootstrap_fill.cu") self.bootstrap_reshuffle, tex_ref = mk_tex_kernel((ctype_indices, 128), "bootstrap_reshuffle", "tex_mark", "bootstrap_reshuffle.cu") self.bootstrap_fill.prepare("PPii") self.bootstrap_reshuffle.prepare("PPPi") self.mark_table.bind_to_texref_ext(tex_ref)
def __compile_kernels(self): ctype_indices = dtype_to_ctype(self.dtype_indices) ctype_labels = dtype_to_ctype(self.dtype_labels) ctype_counts = dtype_to_ctype(self.dtype_counts) ctype_samples = dtype_to_ctype(self.dtype_samples) n_labels = self.n_labels n_threads = self.COMPUTE_THREADS_PER_BLOCK n_shf_threads = self.RESHUFFLE_THREADS_PER_BLOCK """ DFS module """ dfs_module = compile_module("dfs_module.cu", (n_threads, n_shf_threads, n_labels, ctype_samples, ctype_labels, ctype_counts, ctype_indices, self.MAX_BLOCK_PER_FEATURE, self.debug)) const_stride = dfs_module.get_global("stride")[0] driver.memcpy_htod(const_stride, np.uint32(self.stride)) self.find_min_kernel = dfs_module.get_function("find_min_imp") self.find_min_kernel.prepare("PPPi") self.fill_kernel = dfs_module.get_function("fill_table") self.fill_kernel.prepare("PiiP") self.scan_reshuffle_tex = dfs_module.get_function("scan_reshuffle") self.scan_reshuffle_tex.prepare("PPii") tex_ref = dfs_module.get_texref("tex_mark") self.mark_table.bind_to_texref_ext(tex_ref) self.comput_total_2d = dfs_module.get_function("compute_2d") self.comput_total_2d.prepare("PPPPPPPii") self.reduce_2d = dfs_module.get_function("reduce_2d") self.reduce_2d.prepare("PPPPPi") self.scan_total_2d = dfs_module.get_function("scan_gini_large") self.scan_total_2d.prepare("PPPPii") self.scan_reduce = dfs_module.get_function("scan_reduce") self.scan_reduce.prepare("Pi") """ BFS module """ bfs_module = compile_module("bfs_module.cu", (self.BFS_THREADS, n_labels, ctype_samples, ctype_labels, ctype_counts, ctype_indices, self.debug)) const_stride = bfs_module.get_global("stride")[0] const_n_features = bfs_module.get_global("n_features")[0] const_max_features = bfs_module.get_global("max_features")[0] driver.memcpy_htod(const_stride, np.uint32(self.stride)) driver.memcpy_htod(const_n_features, np.uint16(self.n_features)) driver.memcpy_htod(const_max_features, np.uint16(self.max_features)) self.scan_total_bfs = bfs_module.get_function("scan_bfs") self.scan_total_bfs.prepare("PPPP") self.comput_bfs_2d = bfs_module.get_function("compute_2d") self.comput_bfs_2d.prepare("PPPPPPPPP") self.fill_bfs = bfs_module.get_function("fill_table") self.fill_bfs.prepare("PPPPP") self.reshuffle_bfs = bfs_module.get_function("scan_reshuffle") tex_ref = bfs_module.get_texref("tex_mark") self.mark_table.bind_to_texref_ext(tex_ref) self.reshuffle_bfs.prepare("PPP") self.reduce_bfs_2d = bfs_module.get_function("reduce") self.reduce_bfs_2d.prepare("PPPPPPi") self.get_thresholds = bfs_module.get_function("get_thresholds") self.get_thresholds.prepare("PPPPP") self.predict_kernel = mk_kernel( params = (ctype_indices, ctype_samples, ctype_labels), func_name = "predict", kernel_file = "predict.cu", prepare_args = "PPPPPPPii") self.bfs_module = bfs_module self.dfs_module = dfs_module
def __compile_kernels(self): ctype_indices = dtype_to_ctype(self.dtype_indices) ctype_labels = dtype_to_ctype(self.dtype_labels) ctype_counts = dtype_to_ctype(self.dtype_counts) ctype_samples = dtype_to_ctype(self.dtype_samples) n_labels = self.n_labels n_threads = self.COMPUTE_THREADS_PER_BLOCK n_shf_threads = self.RESHUFFLE_THREADS_PER_BLOCK """ DFS module """ dfs_module = compile_module( "dfs_module.cu", (n_threads, n_shf_threads, n_labels, ctype_samples, ctype_labels, ctype_counts, ctype_indices, self.MAX_BLOCK_PER_FEATURE, self.debug)) const_stride = dfs_module.get_global("stride")[0] driver.memcpy_htod(const_stride, np.asarray(np.uint32(self.stride))) self.find_min_kernel = dfs_module.get_function("find_min_imp") self.find_min_kernel.prepare("PPPi") self.fill_kernel = dfs_module.get_function("fill_table") self.fill_kernel.prepare("PiiP") self.scan_reshuffle_tex = dfs_module.get_function("scan_reshuffle") self.scan_reshuffle_tex.prepare("PPii") tex_ref = dfs_module.get_texref("tex_mark") self.mark_table.bind_to_texref_ext(tex_ref) self.comput_total_2d = dfs_module.get_function("compute_2d") self.comput_total_2d.prepare("PPPPPPPii") self.reduce_2d = dfs_module.get_function("reduce_2d") self.reduce_2d.prepare("PPPPPi") self.scan_total_2d = dfs_module.get_function("scan_gini_large") self.scan_total_2d.prepare("PPPPii") self.scan_reduce = dfs_module.get_function("scan_reduce") self.scan_reduce.prepare("Pi") """ BFS module """ bfs_module = compile_module( "bfs_module.cu", (self.BFS_THREADS, n_labels, ctype_samples, ctype_labels, ctype_counts, ctype_indices, self.debug)) const_stride = bfs_module.get_global("stride")[0] const_n_features = bfs_module.get_global("n_features")[0] const_max_features = bfs_module.get_global("max_features")[0] driver.memcpy_htod(const_stride, np.asarray(np.uint32(self.stride))) driver.memcpy_htod(const_n_features, np.asarray(np.uint16(self.n_features))) driver.memcpy_htod(const_max_features, np.asarray(np.uint16(self.max_features))) self.scan_total_bfs = bfs_module.get_function("scan_bfs") self.scan_total_bfs.prepare("PPPP") self.comput_bfs_2d = bfs_module.get_function("compute_2d") self.comput_bfs_2d.prepare("PPPPPPPPP") self.fill_bfs = bfs_module.get_function("fill_table") self.fill_bfs.prepare("PPPPP") self.reshuffle_bfs = bfs_module.get_function("scan_reshuffle") tex_ref = bfs_module.get_texref("tex_mark") self.mark_table.bind_to_texref_ext(tex_ref) self.reshuffle_bfs.prepare("PPP") self.reduce_bfs_2d = bfs_module.get_function("reduce") self.reduce_bfs_2d.prepare("PPPPPPi") self.get_thresholds = bfs_module.get_function("get_thresholds") self.get_thresholds.prepare("PPPPP") self.predict_kernel = mk_kernel(params=(ctype_indices, ctype_samples, ctype_labels), func_name="predict", kernel_file="predict.cu", prepare_args="PPPPPPPii") self.bfs_module = bfs_module self.dfs_module = dfs_module