Example #1
0
    def get_compute_kernels(self, runner, full_output, bulk):
        signature, args1, args2 = self._compute_kernels_arguments(runner, full_output, bulk)

        cnp_primary = runner.get_kernel(
            'CollideAndPropagate', args1, signature,
            needs_iteration=self.config.needs_iteration_num)

        if self.config.access_pattern == 'AB':
            secondary_args = args2
            cnp_secondary = runner.get_kernel(
                'CollideAndPropagate', secondary_args, signature,
                needs_iteration=self.config.needs_iteration_num)
            return KernelPair([cnp_primary], [cnp_secondary])
        else:
            return KernelPair([cnp_primary], [cnp_primary])
Example #2
0
    def get_compute_kernels(self, runner, full_output, bulk):
        gpu_rho = runner.gpu_field(self.rho)
        gpu_v = runner.gpu_field(self.v)
        gpu_dist1a = runner.gpu_dist(0, 0)
        gpu_dist1b = runner.gpu_dist(0, 1)
        gpu_map = runner.gpu_geo_map()

        args1 = [gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho] + gpu_v
        args2 = [gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho] + gpu_v

        options = 0
        if full_output:
            options |= 1
        if bulk:
            options |= 2

        args1.append(np.uint32(options))
        args2.append(np.uint32(options))

        signature = 'P' * (len(args1) - 1) + 'i'

        if runner.gpu_scratch_space is not None:
            args1.append(runner.gpu_scratch_space)
            args2.append(runner.gpu_scratch_space)
            signature += 'P'

        # Alpha field for the entropic LBM.
        if self.alpha_output:
            args1.append(runner.gpu_field(self.alpha))
            args2.append(runner.gpu_field(self.alpha))
            signature += 'P'

        cnp_primary = runner.get_kernel(
            'CollideAndPropagate',
            args1,
            signature,
            needs_iteration=self.config.needs_iteration_num)

        if self.config.access_pattern == 'AB':
            secondary_args = args2 if self.config.access_pattern == 'AB' else args1
            cnp_secondary = runner.get_kernel(
                'CollideAndPropagate',
                secondary_args,
                signature,
                needs_iteration=self.config.needs_iteration_num)
            return KernelPair([cnp_primary], [cnp_secondary])
        else:
            return KernelPair([cnp_primary], [cnp_primary])
Example #3
0
    def get_aux_kernels(self, runner):
        gpu_dist1a = runner.gpu_dist(0, 0)
        gpu_dist1b = runner.gpu_dist(0, 1)
        gpu_map = runner.gpu_geo_map()
        gpu_entropy = runner.gpu_field(self.entropy)

        ce1 = runner.get_kernel('ComputeEntropy',
                                [gpu_map, gpu_dist1b, gpu_entropy],
                                'PPP')
        ce2 = runner.get_kernel('ComputeEntropy',
                                [gpu_map, gpu_dist1a, gpu_entropy],
                                'PPP')
        return KernelPair([ce1], [ce2])
Example #4
0
    def get_compute_kernels(self, runner, full_output, bulk):
        gpu_rho = runner.gpu_field(self.rho)
        gpu_phi = runner.gpu_field(self.phi)
        gpu_v = runner.gpu_field(self.v)
        gpu_map = runner.gpu_geo_map()

        gpu_dist1a = runner.gpu_dist(0, 0)
        gpu_dist1b = runner.gpu_dist(0, 1)
        gpu_dist2a = runner.gpu_dist(1, 0)
        gpu_dist2b = runner.gpu_dist(1, 1)

        options = 0
        if full_output:
            options |= 1
        if bulk:
            options |= 2

        options = np.uint32(options)
        # Primary.
        args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] + gpu_v +
                  [options])
        args1b = ([gpu_map, gpu_dist2a, gpu_dist2b, gpu_rho, gpu_phi] + gpu_v +
                  [options])
        # Secondary.
        args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] + gpu_v +
                  [options])
        args2b = ([gpu_map, gpu_dist2b, gpu_dist2a, gpu_rho, gpu_phi] + gpu_v +
                  [options])

        macro_args1 = ([gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi] +
                       gpu_v + [options])
        macro_args2 = ([gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi] +
                       gpu_v + [options])

        if self.config.node_addressing == 'indirect':
            gpu_nodes = runner.gpu_indirect_address()
            args1a = [gpu_nodes] + args1a
            args1b = [gpu_nodes] + args1b
            args2a = [gpu_nodes] + args2a
            args2b = [gpu_nodes] + args2b
            macro_args1 = [gpu_nodes] + macro_args1
            macro_args2 = [gpu_nodes] + macro_args2

        args_a_signature = 'P' * (len(args1a) - 1) + 'i'
        args_b_signature = 'P' * (len(args1b) - 1) + 'i'
        macro_signature = 'P' * (len(macro_args1) - 1) + 'i'

        if runner.gpu_scratch_space is not None:
            macro_args1.append(runner.gpu_scratch_space)
            macro_args2.append(runner.gpu_scratch_space)
            macro_signature += 'P'

            args1a.append(runner.gpu_scratch_space)
            args2a.append(runner.gpu_scratch_space)
            args1b.append(runner.gpu_scratch_space)
            args2b.append(runner.gpu_scratch_space)
            args_a_signature += 'P'
            args_b_signature += 'P'

        macro = runner.get_kernel(
            'ShanChenPrepareMacroFields',
            macro_args1,
            macro_signature,
            needs_iteration=self.config.needs_iteration_num)

        if self.config.access_pattern == 'AB':
            macro_secondary = runner.get_kernel(
                'ShanChenPrepareMacroFields',
                macro_args2,
                macro_signature,
                needs_iteration=self.config.needs_iteration_num)
            macro_pair = KernelPair(macro, macro_secondary)
        else:
            macro_pair = KernelPair(macro, macro)

        # TODO(michalj): These kernels can actually run in parallel.
        primary = [
            runner.get_kernel('ShanChenCollideAndPropagate0',
                              args1a,
                              args_a_signature,
                              needs_iteration=self.config.needs_iteration_num),
            runner.get_kernel('ShanChenCollideAndPropagate1',
                              args1b,
                              args_b_signature,
                              needs_iteration=self.config.needs_iteration_num)
        ]

        if self.config.access_pattern == 'AB':
            secondary = [
                runner.get_kernel(
                    'ShanChenCollideAndPropagate0',
                    args2a,
                    args_a_signature,
                    needs_iteration=self.config.needs_iteration_num),
                runner.get_kernel(
                    'ShanChenCollideAndPropagate1',
                    args2b,
                    args_b_signature,
                    needs_iteration=self.config.needs_iteration_num)
            ]
            sim_pair = KernelPair(primary, secondary)
        else:
            sim_pair = KernelPair(primary, primary)

        return list(zip(macro_pair, sim_pair))
Example #5
0
    def get_compute_kernels(self, runner, full_output, bulk):
        gpu_rho = runner.gpu_field(self.rho)
        gpu_phi = runner.gpu_field(self.phi)
        gpu_lap = runner.gpu_field(self.phi_laplacian)
        gpu_v = runner.gpu_field(self.v)
        gpu_map = runner.gpu_geo_map()

        gpu_dist1a = runner.gpu_dist(0, 0)
        gpu_dist1b = runner.gpu_dist(0, 1)
        gpu_dist2a = runner.gpu_dist(1, 0)
        gpu_dist2b = runner.gpu_dist(1, 1)

        options = 0
        if full_output:
            options |= 1
        if bulk:
            options |= 2

        if hasattr(
                self,
                '_force_term_for_eq') and self._force_term_for_eq.get(1) == 0:
            phi_args = [gpu_rho, gpu_phi]
        else:
            phi_args = [gpu_phi]

        options = np.uint32(options)
        # Primary.
        args1a = ([gpu_map, gpu_dist1a, gpu_dist1b, gpu_rho, gpu_phi] + gpu_v +
                  [gpu_lap, options])
        args1b = ([gpu_map, gpu_dist2a, gpu_dist2b] + phi_args + gpu_v +
                  [gpu_lap, options])
        # Secondary.
        args2a = ([gpu_map, gpu_dist1b, gpu_dist1a, gpu_rho, gpu_phi] + gpu_v +
                  [gpu_lap, options])
        args2b = ([gpu_map, gpu_dist2b, gpu_dist2a] + phi_args + gpu_v +
                  [gpu_lap, options])

        macro_args1 = [
            gpu_map, gpu_dist1a, gpu_dist2a, gpu_rho, gpu_phi, options
        ]
        macro_args2 = [
            gpu_map, gpu_dist1b, gpu_dist2b, gpu_rho, gpu_phi, options
        ]

        if self.config.node_addressing == 'indirect':
            gpu_nodes = runner.gpu_indirect_address()
            args1a = [gpu_nodes] + args1a
            args1b = [gpu_nodes] + args1b
            args2a = [gpu_nodes] + args2a
            args2b = [gpu_nodes] + args2b
            macro_args1 = [gpu_nodes] + macro_args1
            macro_args2 = [gpu_nodes] + macro_args2

        args_a_signature = 'P' * (len(args1a) - 1) + 'i'
        args_b_signature = 'P' * (len(args1b) - 1) + 'i'
        macro_signature = 'P' * (len(macro_args1) - 1) + 'i'

        if runner.gpu_scratch_space is not None:
            macro_args1.append(runner.gpu_scratch_space)
            macro_args2.append(runner.gpu_scratch_space)
            macro_signature += 'P'

            args1a.append(runner.gpu_scratch_space)
            args2a.append(runner.gpu_scratch_space)
            args1b.append(runner.gpu_scratch_space)
            args2b.append(runner.gpu_scratch_space)
            args_a_signature += 'P'
            args_b_signature += 'P'

        macro = runner.get_kernel(
            'FreeEnergyPrepareMacroFields',
            macro_args1,
            macro_signature,
            needs_iteration=self.config.needs_iteration_num)

        if self.config.access_pattern == 'AB':
            macro_secondary = runner.get_kernel(
                'FreeEnergyPrepareMacroFields',
                macro_args2,
                macro_signature,
                needs_iteration=self.config.needs_iteration_num)
            macro_pair = KernelPair(macro, macro_secondary)
        else:
            macro_pair = KernelPair(macro, macro)

        # Note: these two kernels need to be executed in order.
        primary = [
            runner.get_kernel('FreeEnergyCollideAndPropagateFluid',
                              args1a,
                              args_a_signature,
                              needs_iteration=self.config.needs_iteration_num),
            runner.get_kernel('FreeEnergyCollideAndPropagateOrderParam',
                              args1b,
                              args_b_signature,
                              needs_iteration=self.config.needs_iteration_num)
        ]

        if self.config.access_pattern == 'AB':
            secondary = [
                runner.get_kernel(
                    'FreeEnergyCollideAndPropagateFluid',
                    args2a,
                    args_a_signature,
                    needs_iteration=self.config.needs_iteration_num),
                runner.get_kernel(
                    'FreeEnergyCollideAndPropagateOrderParam',
                    args2b,
                    args_b_signature,
                    needs_iteration=self.config.needs_iteration_num)
            ]
            sim_pair = KernelPair(primary, secondary)
        else:
            sim_pair = KernelPair(primary, primary)

        return list(zip(macro_pair, sim_pair))