def get_compute_kernels(self, runner, full_output, bulk): signature, args1, args2 = self._compute_kernels_arguments(runner, full_output, bulk) cnp_primary = runner.get_kernel( 'CollideAndPropagate', args1, signature, needs_iteration=self.config.needs_iteration_num) if self.config.access_pattern == 'AB': secondary_args = args2 cnp_secondary = runner.get_kernel( 'CollideAndPropagate', secondary_args, signature, needs_iteration=self.config.needs_iteration_num) return KernelPair([cnp_primary], [cnp_secondary]) else: return KernelPair([cnp_primary], [cnp_primary])
def get_compute_kernels(self, runner, full_output, bulk): gpu_rho = runner.gpu_field(self.rho) gpu_v = runner.gpu_field(self.v) gpu_dist1a = runner.gpu_dist(0, 0) gpu_dist1b = runner.gpu_dist(0, 1) gpu_map = runner.gpu_geo_map() args1 = [gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho] + gpu_v args2 = [gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho] + gpu_v options = 0 if full_output: options |= 1 if bulk: options |= 2 args1.append(np.uint32(options)) args2.append(np.uint32(options)) signature = 'P' * (len(args1) - 1) + 'i' if runner.gpu_scratch_space is not None: args1.append(runner.gpu_scratch_space) args2.append(runner.gpu_scratch_space) signature += 'P' # Alpha field for the entropic LBM. if self.alpha_output: args1.append(runner.gpu_field(self.alpha)) args2.append(runner.gpu_field(self.alpha)) signature += 'P' cnp_primary = runner.get_kernel( 'CollideAndPropagate', args1, signature, needs_iteration=self.config.needs_iteration_num) if self.config.access_pattern == 'AB': secondary_args = args2 if self.config.access_pattern == 'AB' else args1 cnp_secondary = runner.get_kernel( 'CollideAndPropagate', secondary_args, signature, needs_iteration=self.config.needs_iteration_num) return KernelPair([cnp_primary], [cnp_secondary]) else: return KernelPair([cnp_primary], [cnp_primary])
def get_aux_kernels(self, runner): gpu_dist1a = runner.gpu_dist(0, 0) gpu_dist1b = runner.gpu_dist(0, 1) gpu_map = runner.gpu_geo_map() gpu_entropy = runner.gpu_field(self.entropy) ce1 = runner.get_kernel('ComputeEntropy', [gpu_map, gpu_dist1b, gpu_entropy], 'PPP') ce2 = runner.get_kernel('ComputeEntropy', [gpu_map, gpu_dist1a, gpu_entropy], 'PPP') return KernelPair([ce1], [ce2])
def get_compute_kernels(self, runner, full_output, bulk): gpu_rho = runner.gpu_field(self.rho) gpu_phi = runner.gpu_field(self.phi) gpu_v = runner.gpu_field(self.v) gpu_map = runner.gpu_geo_map() gpu_dist1a = runner.gpu_dist(0, 0) gpu_dist1b = runner.gpu_dist(0, 1) gpu_dist2a = runner.gpu_dist(1, 0) gpu_dist2b = runner.gpu_dist(1, 1) options = 0 if full_output: options |= 1 if bulk: options |= 2 options = np.uint32(options) # Primary. args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] + gpu_v + [options]) args1b = ([gpu_map, gpu_dist2a, gpu_dist2b, gpu_rho, gpu_phi] + gpu_v + [options]) # Secondary. args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] + gpu_v + [options]) args2b = ([gpu_map, gpu_dist2b, gpu_dist2a, gpu_rho, gpu_phi] + gpu_v + [options]) macro_args1 = ([gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi] + gpu_v + [options]) macro_args2 = ([gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi] + gpu_v + [options]) if self.config.node_addressing == 'indirect': gpu_nodes = runner.gpu_indirect_address() args1a = [gpu_nodes] + args1a args1b = [gpu_nodes] + args1b args2a = [gpu_nodes] + args2a args2b = [gpu_nodes] + args2b macro_args1 = [gpu_nodes] + macro_args1 macro_args2 = [gpu_nodes] + macro_args2 args_a_signature = 'P' * (len(args1a) - 1) + 'i' args_b_signature = 'P' * (len(args1b) - 1) + 'i' macro_signature = 'P' * (len(macro_args1) - 1) + 'i' if runner.gpu_scratch_space is not None: macro_args1.append(runner.gpu_scratch_space) macro_args2.append(runner.gpu_scratch_space) macro_signature += 'P' args1a.append(runner.gpu_scratch_space) args2a.append(runner.gpu_scratch_space) args1b.append(runner.gpu_scratch_space) args2b.append(runner.gpu_scratch_space) args_a_signature += 'P' args_b_signature += 'P' macro = runner.get_kernel( 'ShanChenPrepareMacroFields', macro_args1, macro_signature, needs_iteration=self.config.needs_iteration_num) if self.config.access_pattern == 'AB': macro_secondary = runner.get_kernel( 'ShanChenPrepareMacroFields', macro_args2, macro_signature, needs_iteration=self.config.needs_iteration_num) macro_pair = KernelPair(macro, macro_secondary) else: macro_pair = KernelPair(macro, macro) # TODO(michalj): These kernels can actually run in parallel. primary = [ runner.get_kernel('ShanChenCollideAndPropagate0', args1a, args_a_signature, needs_iteration=self.config.needs_iteration_num), runner.get_kernel('ShanChenCollideAndPropagate1', args1b, args_b_signature, needs_iteration=self.config.needs_iteration_num) ] if self.config.access_pattern == 'AB': secondary = [ runner.get_kernel( 'ShanChenCollideAndPropagate0', args2a, args_a_signature, needs_iteration=self.config.needs_iteration_num), runner.get_kernel( 'ShanChenCollideAndPropagate1', args2b, args_b_signature, needs_iteration=self.config.needs_iteration_num) ] sim_pair = KernelPair(primary, secondary) else: sim_pair = KernelPair(primary, primary) return list(zip(macro_pair, sim_pair))
def get_compute_kernels(self, runner, full_output, bulk): gpu_rho = runner.gpu_field(self.rho) gpu_phi = runner.gpu_field(self.phi) gpu_lap = runner.gpu_field(self.phi_laplacian) gpu_v = runner.gpu_field(self.v) gpu_map = runner.gpu_geo_map() gpu_dist1a = runner.gpu_dist(0, 0) gpu_dist1b = runner.gpu_dist(0, 1) gpu_dist2a = runner.gpu_dist(1, 0) gpu_dist2b = runner.gpu_dist(1, 1) options = 0 if full_output: options |= 1 if bulk: options |= 2 if hasattr( self, '_force_term_for_eq') and self._force_term_for_eq.get(1) == 0: phi_args = [gpu_rho, gpu_phi] else: phi_args = [gpu_phi] options = np.uint32(options) # Primary. args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] + gpu_v + [gpu_lap, options]) args1b = ([gpu_map, gpu_dist2a, gpu_dist2b] + phi_args + gpu_v + [gpu_lap, options]) # Secondary. args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] + gpu_v + [gpu_lap, options]) args2b = ([gpu_map, gpu_dist2b, gpu_dist2a] + phi_args + gpu_v + [gpu_lap, options]) macro_args1 = [ gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi, options ] macro_args2 = [ gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi, options ] if self.config.node_addressing == 'indirect': gpu_nodes = runner.gpu_indirect_address() args1a = [gpu_nodes] + args1a args1b = [gpu_nodes] + args1b args2a = [gpu_nodes] + args2a args2b = [gpu_nodes] + args2b macro_args1 = [gpu_nodes] + macro_args1 macro_args2 = [gpu_nodes] + macro_args2 args_a_signature = 'P' * (len(args1a) - 1) + 'i' args_b_signature = 'P' * (len(args1b) - 1) + 'i' macro_signature = 'P' * (len(macro_args1) - 1) + 'i' if runner.gpu_scratch_space is not None: macro_args1.append(runner.gpu_scratch_space) macro_args2.append(runner.gpu_scratch_space) macro_signature += 'P' args1a.append(runner.gpu_scratch_space) args2a.append(runner.gpu_scratch_space) args1b.append(runner.gpu_scratch_space) args2b.append(runner.gpu_scratch_space) args_a_signature += 'P' args_b_signature += 'P' macro = runner.get_kernel( 'FreeEnergyPrepareMacroFields', macro_args1, macro_signature, needs_iteration=self.config.needs_iteration_num) if self.config.access_pattern == 'AB': macro_secondary = runner.get_kernel( 'FreeEnergyPrepareMacroFields', macro_args2, macro_signature, needs_iteration=self.config.needs_iteration_num) macro_pair = KernelPair(macro, macro_secondary) else: macro_pair = KernelPair(macro, macro) # Note: these two kernels need to be executed in order. primary = [ runner.get_kernel('FreeEnergyCollideAndPropagateFluid', args1a, args_a_signature, needs_iteration=self.config.needs_iteration_num), runner.get_kernel('FreeEnergyCollideAndPropagateOrderParam', args1b, args_b_signature, needs_iteration=self.config.needs_iteration_num) ] if self.config.access_pattern == 'AB': secondary = [ runner.get_kernel( 'FreeEnergyCollideAndPropagateFluid', args2a, args_a_signature, needs_iteration=self.config.needs_iteration_num), runner.get_kernel( 'FreeEnergyCollideAndPropagateOrderParam', args2b, args_b_signature, needs_iteration=self.config.needs_iteration_num) ] sim_pair = KernelPair(primary, secondary) else: sim_pair = KernelPair(primary, primary) return list(zip(macro_pair, sim_pair))