예제 #1
0
    def generate_function_signature(self, function_descriptor):
        """Generates a function signature from a :class:`FunctionDescriptor`

        :param function_descriptor: The :class:`FunctionDescriptor` to process
        :returns: :class:`cgen.FunctionDeclaration` -- The function declaration generated
         from the function_descriptor
        """
        function_params = []

        for param in function_descriptor.matrix_params:
            param_vec_def = cgen.Pointer(
                cgen.POD(param['dtype'], param['name'] + "_vec"))
            function_params.append(param_vec_def)

        if self.mic_flag:
            function_params += [
                cgen.Pointer(
                    cgen.POD(param['dtype'], param['name'] + "_pointer"))
                for param in function_descriptor.value_params
            ]
        else:
            function_params += [
                cgen.POD(param['dtype'], param['name'])
                for param in function_descriptor.value_params
            ]

        for param in function_descriptor.struct_params:
            function_params.append(
                cgen.Pointer(
                    cgen.Value("struct %s" % (param['stype']), param['name'])))

        if self.mic_flag:
            return cgen.FunctionDeclaration(
                cgen.Value(self._pymic_attribute + '\nint',
                           function_descriptor.name), function_params)
        else:
            return cgen.Extern(
                "C",
                cgen.FunctionDeclaration(
                    cgen.Value('int', function_descriptor.name),
                    function_params))
예제 #2
0
    def sympy_to_cgen(self, stencils):
        """Converts sympy stencils to cgen statements

        :param stencils: A list of stencils to be converted
        :returns: :class:`cgen.Block` containing the converted kernel
        """

        factors = []
        if len(self.factorized) > 0:
            for name, term in zip(self.factorized.keys(), self.factorized):
                expr = self.factorized[name]
                self.add_local_var(name, self.dtype)
                sub = str(
                    ccode(
                        self.time_substitutions(expr).xreplace(self._mapper)))
                if self.dtype is np.float32:
                    factors.append(
                        cgen.Assign(name, (sub.replace("pow", "powf").replace(
                            "fabs", "fabsf"))))
                else:
                    factors.append(cgen.Assign(name, sub))

        decl = []

        declared = defaultdict(bool)
        for eqn in stencils:
            s_lhs = str(eqn.lhs)
            if s_lhs.find("temp") is not -1 and not declared[s_lhs]:
                expr_dtype = dse_dtype(eqn.rhs) or self.dtype
                declared[s_lhs] = True
                decl.append(
                    cgen.Value(cgen.dtype_to_ctype(expr_dtype),
                               ccode(eqn.lhs)))

        stmts = [self.convert_equality_to_cgen(x) for x in stencils]

        for idx, dec in enumerate(decl):
            stmts[idx] = cgen.Assign(dec.inline(), stmts[idx].rvalue)

        kernel = stmts

        return cgen.Block(factors + kernel)
예제 #3
0
    def generate_space_loops(self, loop_body):
        """Generate list<cgen.For> for a non cache blocking space loop
        :param loop_body: Statement representing the loop body
        :returns: :list<cgen.For> a list of for loops
        """
        inner_most_dim = True

        for spc_var in reversed(list(self.space_dims)):
            dim_var = self._mapper[spc_var]
            loop_limits = self._space_loop_limits[spc_var]
            loop_body = cgen.For(
                cgen.InlineInitializer(cgen.Value("int", dim_var),
                                       str(loop_limits[0])),
                str(dim_var) + "<" + str(loop_limits[1]),
                str(dim_var) + "++", loop_body)

            loop_body = self.add_inner_most_dim_pragma(inner_most_dim,
                                                       self.space_dims,
                                                       loop_body)
            inner_most_dim = False
        return [loop_body]  # returns body as a list
예제 #4
0
    def generate_space_loops_blocking(self, loop_body):
        """Generate list<cgen.For> for a cache blocking space loop
        :param loop_body: Statement representing the loop body
        :returns: :list<cgen.For> a list of for loops
        """

        inner_most_dim = True
        orig_loop_body = loop_body

        omp_for = [cgen.Pragma("omp for schedule(static)")
                   ] if self.compiler.openmp else []

        for spc_var, block_size in reversed(
                zip(list(self.space_dims), self.block_sizes)):
            orig_var = str(self._mapper[spc_var])
            block_var = orig_var + "b"
            loop_limits = self._space_loop_limits[spc_var]

            if block_size is not None:
                upper_limit_str = "%s+%sblock" % (block_var, orig_var)
                lower_limit_str = block_var
            else:
                lower_limit_str = str(loop_limits[0])
                upper_limit_str = str(loop_limits[1])

            loop_body = cgen.For(
                cgen.InlineInitializer(cgen.Value("int", orig_var),
                                       lower_limit_str),
                orig_var + "<" + upper_limit_str, orig_var + "++", loop_body)

            loop_body = self.add_inner_most_dim_pragma(inner_most_dim,
                                                       self.space_dims,
                                                       loop_body)
            inner_most_dim = False

        remainder_counter = 0  # indicates how many remainder loops we need
        for spc_var, block_size in reversed(
                zip(list(self.space_dims), self.block_sizes)):
            # if block size set to None do not block this dimension
            if block_size is not None:
                orig_var = str(self._mapper[spc_var])
                block_var = orig_var + "b"
                loop_limits = self._space_loop_limits[spc_var]

                block_size_str = orig_var + "block"
                upper_limit_str = "%d - (%d %% %s)" % (
                    loop_limits[1], loop_limits[1] - loop_limits[0],
                    block_size_str)

                loop_body = cgen.For(
                    cgen.InlineInitializer(cgen.Value("int", block_var),
                                           str(loop_limits[0])),
                    str(block_var) + "<" + upper_limit_str,
                    str(block_var) + "+=" + block_size_str, loop_body)
                remainder_counter += 1

        full_remainder = []
        # weights for deciding remainder loop limit
        weights = self._decide_weights(self.block_sizes, remainder_counter)
        for i in range(remainder_counter):
            remainder_loop = orig_loop_body
            inner_most_dim = True

            for spc_var, block_size in reversed(
                    zip(list(self.space_dims), self.block_sizes)):
                orig_var = str(self._mapper[spc_var])
                loop_limits = self._space_loop_limits[
                    spc_var]  # Full loop limits
                lower_limit_str = str(loop_limits[0])
                upper_limit_str = str(loop_limits[1])

                if block_size is not None:
                    if weights[orig_var] < 0:
                        # already blocked loop limits
                        upper_limit_str = "%d - (%d %% %s)" % (
                            loop_limits[1], loop_limits[1] - loop_limits[0],
                            orig_var + "block")
                    elif weights[orig_var] == 0:
                        # remainder loop limits
                        lower_limit_str = "%d - (%d %% %s)" % (
                            loop_limits[1], loop_limits[1] - loop_limits[0],
                            orig_var + "block")
                    weights[orig_var] += 1

                remainder_loop = cgen.For(
                    cgen.InlineInitializer(cgen.Value("int", orig_var),
                                           lower_limit_str),
                    str(orig_var) + "<" + upper_limit_str,
                    str(orig_var) + "++", remainder_loop)

                remainder_loop = self.add_inner_most_dim_pragma(
                    inner_most_dim, self.space_dims, remainder_loop)
                inner_most_dim = False

            full_remainder += omp_for
            full_remainder.append(remainder_loop)

        return [loop_body] + full_remainder if full_remainder else [loop_body]
예제 #5
0
    def generate_loops(self, loop_body):
        """Assuming that the variable order defined in init (#var_order) is the
        order the corresponding dimensions are layout in memory, the last variable
        in that definition should be the fastest varying dimension in the arrays.
        Therefore reverse the list of dimensions, making the last variable in
        #var_order (z in the 3D case) vary in the inner-most loop

        :param loop_body: Statement representing the loop body
        :returns: :class:`cgen.Block` representing the loop
        """
        # Space loops
        if not isinstance(loop_body,
                          cgen.Block) or len(loop_body.contents) > 0:
            if self.cache_blocking is not None:
                self._decide_block_sizes()

                loop_body = self.generate_space_loops_blocking(loop_body)
            else:
                loop_body = self.generate_space_loops(loop_body)
        else:
            loop_body = []
        omp_master = [cgen.Pragma("omp master")
                      ] if self.compiler.openmp else []
        omp_single = [cgen.Pragma("omp single")
                      ] if self.compiler.openmp else []
        omp_parallel = [cgen.Pragma("omp parallel")
                        ] if self.compiler.openmp else []
        omp_for = [cgen.Pragma("omp for schedule(static)")
                   ] if self.compiler.openmp else []
        t_loop_limits = self.time_loop_limits
        t_var = str(self._mapper[self.time_dim])
        cond_op = "<" if self._forward else ">"

        if self.save is not True:
            # To cycle between array elements when we are not saving time history
            time_stepping = self.get_time_stepping()
        else:
            time_stepping = []
        if len(loop_body) > 0:
            loop_body = [cgen.Block(omp_for + loop_body)]
        # Statements to be inserted into the time loop before the spatial loop
        pre_stencils = [
            self.time_substitutions(x) for x in self.time_loop_stencils_b
        ]
        pre_stencils = [
            self.convert_equality_to_cgen(x) for x in self.time_loop_stencils_b
        ]

        # Statements to be inserted into the time loop after the spatial loop
        post_stencils = [
            self.time_substitutions(x) for x in self.time_loop_stencils_a
        ]
        post_stencils = [
            self.convert_equality_to_cgen(x) for x in self.time_loop_stencils_a
        ]

        if self.profile:
            pre_stencils = list(
                flatten([
                    self.profiler.add_profiling([s], "%s%d" %
                                                (PRE_STENCILS.name, i))
                    for i, s in enumerate(pre_stencils)
                ]))
            post_stencils = list(
                flatten([
                    self.profiler.add_profiling([s], "%s%d" %
                                                (POST_STENCILS.name, i))
                    for i, s in enumerate(post_stencils)
                ]))

        initial_block = time_stepping + pre_stencils

        if initial_block:
            initial_block = omp_single + [cgen.Block(initial_block)]

        end_block = post_stencils

        if end_block:
            end_block = omp_single + [cgen.Block(end_block)]

        if self.profile:
            loop_body = self.profiler.add_profiling(loop_body,
                                                    LOOP_BODY.name,
                                                    omp_flag=omp_master)

        loop_body = cgen.Block(initial_block + loop_body + end_block)

        loop_body = cgen.For(
            cgen.InlineInitializer(cgen.Value("int", t_var),
                                   str(t_loop_limits[0])),
            t_var + cond_op + str(t_loop_limits[1]),
            t_var + "+=" + str(self._time_step), loop_body)

        # Code to declare the time stepping variables (outside the time loop)
        def_time_step = [
            cgen.Value("int", t_var_def.name)
            for t_var_def in self.time_steppers
        ]
        body = def_time_step + omp_parallel + [loop_body]

        return cgen.Block(body)