Esempio n. 1
0
 def __init_bootstrap_kernel(self):
   """ Compile the kernels and GPUArrays needed to generate the bootstrap samples"""
   ctype_indices = dtype_to_ctype(self.dtype_indices)
   self.bootstrap_fill= mk_kernel((ctype_indices,), "bootstrap_fill",
       "bootstrap_fill.cu")
   self.bootstrap_reshuffle, tex_ref = mk_tex_kernel((ctype_indices, 128), "bootstrap_reshuffle",
       "tex_mark", "bootstrap_reshuffle.cu")
   
   self.bootstrap_fill.prepare("PPii")
   self.bootstrap_reshuffle.prepare("PPPi")
   self.mark_table.bind_to_texref_ext(tex_ref)
Esempio n. 2
0
  def __compile_kernels(self):
    ctype_indices = dtype_to_ctype(self.dtype_indices)
    ctype_labels = dtype_to_ctype(self.dtype_labels)
    ctype_counts = dtype_to_ctype(self.dtype_counts)
    ctype_samples = dtype_to_ctype(self.dtype_samples)
    n_labels = self.n_labels
    n_threads = self.COMPUTE_THREADS_PER_BLOCK
    n_shf_threads = self.RESHUFFLE_THREADS_PER_BLOCK
    
    """ DFS module """
    dfs_module = compile_module("dfs_module.cu", (n_threads, n_shf_threads, n_labels, 
      ctype_samples, ctype_labels, ctype_counts, ctype_indices, self.MAX_BLOCK_PER_FEATURE, 
      self.debug))
    
    const_stride = dfs_module.get_global("stride")[0]
    driver.memcpy_htod(const_stride, np.uint32(self.stride))

    self.find_min_kernel = dfs_module.get_function("find_min_imp")
    self.find_min_kernel.prepare("PPPi")
  
    self.fill_kernel = dfs_module.get_function("fill_table")
    self.fill_kernel.prepare("PiiP")
    
    self.scan_reshuffle_tex = dfs_module.get_function("scan_reshuffle")
    self.scan_reshuffle_tex.prepare("PPii")
    tex_ref = dfs_module.get_texref("tex_mark")
    self.mark_table.bind_to_texref_ext(tex_ref) 
      
    self.comput_total_2d = dfs_module.get_function("compute_2d")
    self.comput_total_2d.prepare("PPPPPPPii")

    self.reduce_2d = dfs_module.get_function("reduce_2d")
    self.reduce_2d.prepare("PPPPPi")
    
    self.scan_total_2d = dfs_module.get_function("scan_gini_large")
    self.scan_total_2d.prepare("PPPPii")
    
    self.scan_reduce = dfs_module.get_function("scan_reduce")
    self.scan_reduce.prepare("Pi")

    """ BFS module """
    bfs_module = compile_module("bfs_module.cu", (self.BFS_THREADS, n_labels, ctype_samples,
      ctype_labels, ctype_counts, ctype_indices,  self.debug))

    const_stride = bfs_module.get_global("stride")[0]
    const_n_features = bfs_module.get_global("n_features")[0]
    const_max_features = bfs_module.get_global("max_features")[0]
    driver.memcpy_htod(const_stride, np.uint32(self.stride))
    driver.memcpy_htod(const_n_features, np.uint16(self.n_features))
    driver.memcpy_htod(const_max_features, np.uint16(self.max_features))

    self.scan_total_bfs = bfs_module.get_function("scan_bfs")
    self.scan_total_bfs.prepare("PPPP")

    self.comput_bfs_2d = bfs_module.get_function("compute_2d")
    self.comput_bfs_2d.prepare("PPPPPPPPP")

    self.fill_bfs = bfs_module.get_function("fill_table")
    self.fill_bfs.prepare("PPPPP")

    self.reshuffle_bfs = bfs_module.get_function("scan_reshuffle")
    tex_ref = bfs_module.get_texref("tex_mark")
    self.mark_table.bind_to_texref_ext(tex_ref) 
    self.reshuffle_bfs.prepare("PPP") 

    self.reduce_bfs_2d = bfs_module.get_function("reduce")
    self.reduce_bfs_2d.prepare("PPPPPPi")
    
    self.get_thresholds = bfs_module.get_function("get_thresholds")
    self.get_thresholds.prepare("PPPPP")
   
    self.predict_kernel = mk_kernel(
        params = (ctype_indices, ctype_samples, ctype_labels), 
        func_name = "predict", 
        kernel_file = "predict.cu", 
        prepare_args = "PPPPPPPii")
  
    self.bfs_module = bfs_module
    self.dfs_module = dfs_module