Example #1
0
    def map_subscript(self, expr, enclosing_prec, type_context):
        from loopy.kernel.data import TemporaryVariable

        ary = self.find_array(expr)

        if isinstance(ary, TemporaryVariable):
            gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs()
            if lsize:
                lsize, = lsize
                from loopy.kernel.array import get_access_info
                from pymbolic import evaluate

                access_info = get_access_info(
                    self.kernel.target, ary, expr.index, lambda expr: evaluate(
                        expr, self.codegen_state.var_subst_map),
                    self.codegen_state.vectorization_info)

                subscript, = access_info.subscripts
                result = self.parenthesize_if_needed(
                    "%s[programIndex + %s]" %
                    (access_info.array_name,
                     self.rec(lsize * subscript, PREC_SUM, 'i')),
                    enclosing_prec, PREC_CALL)

                if access_info.vector_index is not None:
                    return self.kernel.target.add_vector_access(
                        result, access_info.vector_index)
                else:
                    return result

        return super(ExprToISPCMapper,
                     self).map_subscript(expr, enclosing_prec, type_context)
Example #2
0
    def map_subscript(self, expr, type_context):
        from loopy.kernel.data import TemporaryVariable

        ary = self.find_array(expr)

        if (isinstance(ary, TemporaryVariable)
                and ary.scope == temp_var_scope.PRIVATE):
            # generate access code for acccess to private-index temporaries

            gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs()
            if lsize:
                lsize, = lsize
                from loopy.kernel.array import get_access_info
                from pymbolic import evaluate

                access_info = get_access_info(
                    self.kernel.target, ary, expr.index, lambda expr: evaluate(
                        expr, self.codegen_state.var_subst_map),
                    self.codegen_state.vectorization_info)

                subscript, = access_info.subscripts
                result = var(
                    access_info.array_name)[var("programIndex") +
                                            self.rec(lsize * subscript, 'i')]

                if access_info.vector_index is not None:
                    return self.kernel.target.add_vector_access(
                        result, access_info.vector_index)
                else:
                    return result

        return super(ExprToISPCExprMapper,
                     self).map_subscript(expr, type_context)
Example #3
0
    def map_subscript(self, expr, type_context):
        from loopy.kernel.data import TemporaryVariable

        ary = self.find_array(expr)

        if (isinstance(ary, TemporaryVariable)
                and ary.address_space == AddressSpace.PRIVATE):
            # generate access code for acccess to private-index temporaries

            gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs()
            if lsize:
                lsize, = lsize
                from loopy.kernel.array import get_access_info
                from pymbolic import evaluate

                access_info = get_access_info(self.kernel.target, ary, expr.index,
                    lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                    self.codegen_state.vectorization_info)

                subscript, = access_info.subscripts
                result = var(access_info.array_name)[
                        var("programIndex") + self.rec(lsize*subscript, 'i')]

                if access_info.vector_index is not None:
                    return self.kernel.target.add_vector_access(
                        result, access_info.vector_index)
                else:
                    return result

        return super(ExprToISPCExprMapper, self).map_subscript(
                expr, type_context)
Example #4
0
    def map_subscript(self, expr, enclosing_prec, type_context):
        from loopy.kernel.data import TemporaryVariable

        ary = self.find_array(expr)

        if isinstance(ary, TemporaryVariable):
            gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs()
            if lsize:
                lsize, = lsize
                from loopy.kernel.array import get_access_info
                from pymbolic import evaluate

                access_info = get_access_info(self.kernel.target, ary, expr.index,
                    lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                    self.codegen_state.vectorization_info)

                subscript, = access_info.subscripts
                result = self.parenthesize_if_needed(
                        "%s[programIndex + %s]" % (
                            access_info.array_name,
                            self.rec(lsize*subscript, PREC_SUM, 'i')),
                        enclosing_prec, PREC_CALL)

                if access_info.vector_index is not None:
                    return self.kernel.target.add_vector_access(
                        result, access_info.vector_index)
                else:
                    return result

        return super(ExprToISPCMapper, self).map_subscript(
                expr, enclosing_prec, type_context)
Example #5
0
    def emit_call(self, expression_to_code_mapper, expression, target):
        from pymbolic.primitives import Subscript

        if len(expression.parameters) != 1:
            raise LoopyError("%s takes exactly one argument" % self.name)
        arg, = expression.parameters
        if not isinstance(arg, Subscript):
            raise LoopyError("argument to %s must be a subscript" % self.name)

        ary = expression_to_code_mapper.find_array(arg)

        from loopy.kernel.array import get_access_info
        from pymbolic import evaluate
        access_info = get_access_info(
            expression_to_code_mapper.kernel.target, ary, arg.index,
            lambda expr: evaluate(
                expr, expression_to_code_mapper.codegen_state.var_subst_map),
            expression_to_code_mapper.codegen_state.vectorization_info)

        from loopy.kernel.data import ImageArg
        if isinstance(ary, ImageArg):
            raise LoopyError("%s does not support images" % self.name)

        if self.name == "indexof":
            return access_info.subscripts[0]
        elif self.name == "indexof_vec":
            from loopy.kernel.array import VectorArrayDimTag
            ivec = None
            for iaxis, dim_tag in enumerate(ary.dim_tags):
                if isinstance(dim_tag, VectorArrayDimTag):
                    ivec = iaxis

            if ivec is None:
                return access_info.subscripts[0]
            else:
                return (access_info.subscripts[0] * ary.shape[ivec] +
                        access_info.vector_index)

        else:
            raise RuntimeError("should not get here")
Example #6
0
    def map_call(self, expr, enclosing_prec, type_context):
        from pymbolic.primitives import Variable, Subscript
        from pymbolic.mapper.stringifier import PREC_NONE

        identifier = expr.function

        # {{{ implement indexof, indexof_vec

        if identifier.name in ["indexof", "indexof_vec"]:
            if len(expr.parameters) != 1:
                raise LoopyError("%s takes exactly one argument" %
                                 identifier.name)
            arg, = expr.parameters
            if not isinstance(arg, Subscript):
                raise LoopyError("argument to %s must be a subscript" %
                                 identifier.name)

            ary = self.find_array(arg)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate
            access_info = get_access_info(
                self.kernel.target, ary, arg.index,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                self.codegen_state.vectorization_info)

            from loopy.kernel.data import ImageArg
            if isinstance(ary, ImageArg):
                raise LoopyError("%s does not support images" %
                                 identifier.name)

            if identifier.name == "indexof":
                return access_info.subscripts[0]
            elif identifier.name == "indexof_vec":
                from loopy.kernel.array import VectorArrayDimTag
                ivec = None
                for iaxis, dim_tag in enumerate(ary.dim_tags):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        ivec = iaxis

                if ivec is None:
                    return access_info.subscripts[0]
                else:
                    return (access_info.subscripts[0] * ary.shape[ivec] +
                            access_info.vector_index)

            else:
                raise RuntimeError("should not get here")

        # }}}

        if isinstance(identifier, Variable):
            identifier = identifier.name

        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)

        str_parameters = None

        mangle_result = self.kernel.mangle_function(
            identifier, par_dtypes, ast_builder=self.codegen_state.ast_builder)

        if mangle_result is None:
            raise RuntimeError(
                "function '%s' unknown--"
                "maybe you need to register a function mangler?" % identifier)

        if len(mangle_result.result_dtypes) != 1:
            raise LoopyError(
                "functions with more or fewer than one return value "
                "may not be used in an expression")

        if mangle_result.arg_dtypes is not None:
            str_parameters = [
                self.rec(par, PREC_NONE,
                         dtype_to_type_context(self.kernel.target, tgt_dtype),
                         tgt_dtype) for par, par_dtype, tgt_dtype in
                zip(expr.parameters, par_dtypes, mangle_result.arg_dtypes)
            ]

        else:
            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
            # propagate the type context here. But for many others, it does
            # not. Using the inferred type as a stopgap for now.
            str_parameters = [
                self.rec(par,
                         PREC_NONE,
                         type_context=dtype_to_type_context(
                             self.kernel.target, par_dtype))
                for par, par_dtype in zip(expr.parameters, par_dtypes)
            ]

            from warnings import warn
            warn(
                "Calling function '%s' with unknown C signature--"
                "return CallMangleInfo.arg_dtypes" % identifier, LoopyWarning)

        from loopy.codegen import SeenFunction
        self.codegen_state.seen_functions.add(
            SeenFunction(identifier, mangle_result.target_name,
                         mangle_result.arg_dtypes or par_dtypes))

        return "%s(%s)" % (mangle_result.target_name,
                           ", ".join(str_parameters))
Example #7
0
    def map_subscript(self, expr, enclosing_prec, type_context):
        def base_impl(expr, enclosing_prec, type_context):
            return self.parenthesize_if_needed(
                "%s[%s]" % (self.rec(expr.aggregate, PREC_CALL, type_context),
                            self.rec(expr.index, PREC_NONE, 'i')),
                enclosing_prec, PREC_CALL)

        from pymbolic.primitives import Variable
        if not isinstance(expr.aggregate, Variable):
            return base_impl(expr, enclosing_prec, type_context)

        ary = self.find_array(expr)

        from loopy.kernel.array import get_access_info
        from pymbolic import evaluate

        from loopy.symbolic import simplify_using_aff
        index_tuple = tuple(
            simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple)

        access_info = get_access_info(
            self.kernel.target, ary, index_tuple,
            lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
            self.codegen_state.vectorization_info)

        from loopy.kernel.data import ImageArg, GlobalArg, TemporaryVariable

        if isinstance(ary, ImageArg):
            base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))" %
                           (ary.name, ary.dimensions, ", ".join(
                               self.rec(idx, PREC_NONE, 'i')
                               for idx in expr.index[::-1])))

            if ary.dtype.numpy_dtype == np.float32:
                return base_access + ".x"
            if self.kernel.target.is_vector_dtype(ary.dtype):
                return base_access
            elif ary.dtype.numpy_dtype == np.float64:
                return "as_double(%s.xy)" % base_access
            else:
                raise NotImplementedError(
                    "non-floating-point images not supported for now")

        elif isinstance(ary, (GlobalArg, TemporaryVariable)):
            if len(access_info.subscripts) == 0:
                if isinstance(ary, GlobalArg):
                    # unsubscripted global args are pointers
                    result = "*" + access_info.array_name

                else:
                    # unsubscripted temp vars are scalars
                    result = access_info.array_name

            else:
                subscript, = access_info.subscripts
                result = self.parenthesize_if_needed(
                    "%s[%s]" % (access_info.array_name,
                                self.rec(subscript, PREC_NONE, 'i')),
                    enclosing_prec, PREC_CALL)

            if access_info.vector_index is not None:
                return self.codegen_state.ast_builder.add_vector_access(
                    result, access_info.vector_index)
            else:
                return result

        else:
            assert False
Example #8
0
    def emit_assignment(self, codegen_state, insn):
        kernel = codegen_state.kernel
        ecm = codegen_state.expression_to_code_mapper

        assignee_var_name, = insn.assignee_var_names()

        lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
        lhs_dtype = lhs_var.dtype

        if insn.atomicity:
            raise NotImplementedError("atomic ops in ISPC")

        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE

        rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
        rhs_code = ecm(insn.expression,
                       prec=PREC_NONE,
                       type_context=rhs_type_context,
                       needed_dtype=lhs_dtype)

        lhs = insn.assignee

        # {{{ handle streaming stores

        if "!streaming_store" in insn.tags:
            ary = ecm.find_array(lhs)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate

            from loopy.symbolic import simplify_using_aff
            index_tuple = tuple(
                simplify_using_aff(kernel, idx) for idx in lhs.index_tuple)

            access_info = get_access_info(
                kernel.target, ary, index_tuple,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                codegen_state.vectorization_info)

            from loopy.kernel.data import GlobalArg, TemporaryVariable

            if not isinstance(ary, (GlobalArg, TemporaryVariable)):
                raise LoopyError("array type not supported in ISPC: %s" %
                                 type(ary).__name)

            if len(access_info.subscripts) != 1:
                raise LoopyError("streaming stores must have a subscript")
            subscript, = access_info.subscripts

            from pymbolic.primitives import Sum, flattened_sum, Variable
            if isinstance(subscript, Sum):
                terms = subscript.children
            else:
                terms = (subscript.children, )

            new_terms = []

            from loopy.kernel.data import LocalIndexTag
            from loopy.symbolic import get_dependencies

            saw_l0 = False
            for term in terms:
                if (isinstance(term, Variable) and isinstance(
                        kernel.iname_to_tag.get(term.name), LocalIndexTag)
                        and kernel.iname_to_tag.get(term.name).axis == 0):
                    if saw_l0:
                        raise LoopyError("streaming store must have stride 1 "
                                         "in local index, got: %s" % subscript)
                    saw_l0 = True
                    continue
                else:
                    for dep in get_dependencies(term):
                        if (isinstance(kernel.iname_to_tag.get(dep),
                                       LocalIndexTag)
                                and kernel.iname_to_tag.get(dep).axis == 0):
                            raise LoopyError(
                                "streaming store must have stride 1 "
                                "in local index, got: %s" % subscript)

                    new_terms.append(term)

            if not saw_l0:
                raise LoopyError("streaming store must have stride 1 in "
                                 "local index, got: %s" % subscript)

            if access_info.vector_index is not None:
                raise LoopyError("streaming store may not use a short-vector "
                                 "data type")

            rhs_has_programindex = any(
                isinstance(kernel.iname_to_tag.get(dep), LocalIndexTag)
                and kernel.iname_to_tag.get(dep).axis == 0
                for dep in get_dependencies(insn.expression))

            if not rhs_has_programindex:
                rhs_code = "broadcast(%s, 0)" % rhs_code

            from cgen import Statement
            return Statement(
                "streaming_store(%s + %s, %s)" %
                (access_info.array_name,
                 ecm(flattened_sum(new_terms), PREC_NONE, 'i'), rhs_code))

        # }}}

        from cgen import Assign
        return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)
Example #9
0
    def map_subscript(self, expr, type_context):
        def base_impl(expr, type_context):
            return self.rec(expr.aggregate, type_context)[self.rec(expr.index, 'i')]

        def make_var(name):
            from loopy import TaggedVariable
            if isinstance(expr.aggregate, TaggedVariable):
                return TaggedVariable(name, expr.aggregate.tag)
            else:
                return var(name)

        from pymbolic.primitives import Variable
        if not isinstance(expr.aggregate, Variable):
            return base_impl(expr, type_context)

        ary = self.find_array(expr)

        from loopy.kernel.array import get_access_info
        from pymbolic import evaluate

        from loopy.symbolic import simplify_using_aff
        index_tuple = tuple(
                simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple)

        access_info = get_access_info(self.kernel.target, ary, index_tuple,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                self.codegen_state.vectorization_info)

        from loopy.kernel.data import (
                ImageArg, ArrayArg, TemporaryVariable, ConstantArg)

        if isinstance(ary, ImageArg):
            extra_axes = 0

            num_target_axes = ary.num_target_axes()
            if num_target_axes in [1, 2]:
                idx_vec_type = "float2"
                extra_axes = 2-num_target_axes
            elif num_target_axes == 3:
                idx_vec_type = "float4"
                extra_axes = 4-num_target_axes
            else:
                raise LoopyError("unsupported number (%d) of target axes in image"
                        % num_target_axes)

            idx_tuple = expr.index_tuple[::-1] + (0,) * extra_axes

            base_access = var("read_imagef")(
                    var(ary.name),
                    var("loopy_sampler"),
                    var("(%s)" % idx_vec_type)(*self.rec(idx_tuple, 'i')))

            if ary.dtype.numpy_dtype == np.float32:
                return base_access.attr("x")
            if self.kernel.target.is_vector_dtype(ary.dtype):
                return base_access
            elif ary.dtype.numpy_dtype == np.float64:
                return var("as_double")(base_access.attr("xy"))
            else:
                raise NotImplementedError(
                        "non-floating-point images not supported for now")

        elif isinstance(ary, (ArrayArg, TemporaryVariable, ConstantArg)):
            if len(access_info.subscripts) == 0:
                if (
                        (isinstance(ary, (ConstantArg, ArrayArg)) or
                         (isinstance(ary, TemporaryVariable) and ary.base_storage))):
                    # unsubscripted global args are pointers
                    result = make_var(access_info.array_name)[0]

                else:
                    # unsubscripted temp vars are scalars
                    # (unless they use base_storage)
                    result = make_var(access_info.array_name)

            else:
                subscript, = access_info.subscripts
                result = make_var(access_info.array_name)[simplify_using_aff(
                    self.kernel, self.rec(subscript, 'i'))]

            if access_info.vector_index is not None:
                return self.codegen_state.ast_builder.add_vector_access(
                    result, access_info.vector_index)
            else:
                return result

        else:
            assert False
Example #10
0
    def map_call(self, expr, enclosing_prec, type_context):
        from pymbolic.primitives import Variable, Subscript
        from pymbolic.mapper.stringifier import PREC_NONE

        identifier = expr.function

        # {{{ implement indexof, indexof_vec

        if identifier.name in ["indexof", "indexof_vec"]:
            if len(expr.parameters) != 1:
                raise LoopyError("%s takes exactly one argument" % identifier.name)
            arg, = expr.parameters
            if not isinstance(arg, Subscript):
                raise LoopyError(
                        "argument to %s must be a subscript" % identifier.name)

            ary = self.find_array(arg)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate
            access_info = get_access_info(self.kernel.target, ary, arg.index,
                    lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                    self.codegen_state.vectorization_info)

            from loopy.kernel.data import ImageArg
            if isinstance(ary, ImageArg):
                raise LoopyError("%s does not support images" % identifier.name)

            if identifier.name == "indexof":
                return access_info.subscripts[0]
            elif identifier.name == "indexof_vec":
                from loopy.kernel.array import VectorArrayDimTag
                ivec = None
                for iaxis, dim_tag in enumerate(ary.dim_tags):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        ivec = iaxis

                if ivec is None:
                    return access_info.subscripts[0]
                else:
                    return (
                        access_info.subscripts[0]*ary.shape[ivec]
                        + access_info.vector_index)

            else:
                raise RuntimeError("should not get here")

        # }}}

        if isinstance(identifier, Variable):
            identifier = identifier.name

        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)

        str_parameters = None

        mangle_result = self.kernel.mangle_function(
                identifier, par_dtypes,
                ast_builder=self.codegen_state.ast_builder)

        if mangle_result is None:
            raise RuntimeError("function '%s' unknown--"
                    "maybe you need to register a function mangler?"
                    % identifier)

        if len(mangle_result.result_dtypes) != 1:
            raise LoopyError("functions with more or fewer than one return value "
                    "may not be used in an expression")

        if mangle_result.arg_dtypes is not None:
            str_parameters = [
                    self.rec(par, PREC_NONE,
                        dtype_to_type_context(self.kernel.target, tgt_dtype),
                        tgt_dtype)
                    for par, par_dtype, tgt_dtype in zip(
                        expr.parameters, par_dtypes, mangle_result.arg_dtypes)]

        else:
            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
            # propagate the type context here. But for many others, it does
            # not. Using the inferred type as a stopgap for now.
            str_parameters = [
                    self.rec(par, PREC_NONE,
                        type_context=dtype_to_type_context(
                            self.kernel.target, par_dtype))
                    for par, par_dtype in zip(expr.parameters, par_dtypes)]

            from warnings import warn
            warn("Calling function '%s' with unknown C signature--"
                    "return CallMangleInfo.arg_dtypes"
                    % identifier, LoopyWarning)

        from loopy.codegen import SeenFunction
        self.codegen_state.seen_functions.add(
                SeenFunction(identifier,
                    mangle_result.target_name,
                    mangle_result.arg_dtypes or par_dtypes))

        return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters))
Example #11
0
    def map_subscript(self, expr, enclosing_prec, type_context):
        def base_impl(expr, enclosing_prec, type_context):
            return self.parenthesize_if_needed(
                    "%s[%s]" % (
                        self.rec(expr.aggregate, PREC_CALL, type_context),
                        self.rec(expr.index, PREC_NONE, 'i')),
                    enclosing_prec, PREC_CALL)

        from pymbolic.primitives import Variable
        if not isinstance(expr.aggregate, Variable):
            return base_impl(expr, enclosing_prec, type_context)

        ary = self.find_array(expr)

        from loopy.kernel.array import get_access_info
        from pymbolic import evaluate

        from loopy.symbolic import simplify_using_aff
        index_tuple = tuple(
                simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple)

        access_info = get_access_info(self.kernel.target, ary, index_tuple,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                self.codegen_state.vectorization_info)

        from loopy.kernel.data import ImageArg, GlobalArg, TemporaryVariable

        if isinstance(ary, ImageArg):
            base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))"
                    % (ary.name, ary.dimensions,
                        ", ".join(self.rec(idx, PREC_NONE, 'i')
                            for idx in expr.index[::-1])))

            if ary.dtype.numpy_dtype == np.float32:
                return base_access+".x"
            if self.kernel.target.is_vector_dtype(ary.dtype):
                return base_access
            elif ary.dtype.numpy_dtype == np.float64:
                return "as_double(%s.xy)" % base_access
            else:
                raise NotImplementedError(
                        "non-floating-point images not supported for now")

        elif isinstance(ary, (GlobalArg, TemporaryVariable)):
            if len(access_info.subscripts) == 0:
                if isinstance(ary, GlobalArg):
                    # unsubscripted global args are pointers
                    result = "*" + access_info.array_name

                else:
                    # unsubscripted temp vars are scalars
                    result = access_info.array_name

            else:
                subscript, = access_info.subscripts
                result = self.parenthesize_if_needed(
                        "%s[%s]" % (
                            access_info.array_name,
                            self.rec(subscript, PREC_NONE, 'i')),
                        enclosing_prec, PREC_CALL)

            if access_info.vector_index is not None:
                return self.codegen_state.ast_builder.add_vector_access(
                    result, access_info.vector_index)
            else:
                return result

        else:
            assert False
Example #12
0
    def map_call(self, expr, enclosing_prec, type_context):
        from pymbolic.primitives import Variable, Subscript
        from pymbolic.mapper.stringifier import PREC_NONE

        identifier = expr.function

        # {{{ implement indexof, indexof_vec

        if identifier.name in ["indexof", "indexof_vec"]:
            if len(expr.parameters) != 1:
                raise LoopyError("%s takes exactly one argument" % identifier.name)
            arg, = expr.parameters
            if not isinstance(arg, Subscript):
                raise LoopyError("argument to %s must be a subscript" % identifier.name)

            ary = self.find_array(arg)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate

            access_info = get_access_info(
                self.kernel.target,
                ary,
                arg.index,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                self.codegen_state.vectorization_info,
            )

            from loopy.kernel.data import ImageArg

            if isinstance(ary, ImageArg):
                raise LoopyError("%s does not support images" % identifier.name)

            if identifier.name == "indexof":
                return access_info.subscripts[0]
            elif identifier.name == "indexof_vec":
                from loopy.kernel.array import VectorArrayDimTag

                ivec = None
                for iaxis, dim_tag in enumerate(ary.dim_tags):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        ivec = iaxis

                if ivec is None:
                    return access_info.subscripts[0]
                else:
                    return access_info.subscripts[0] * ary.shape[ivec] + access_info.vector_index

            else:
                raise RuntimeError("should not get here")

        # }}}

        c_name = None
        if isinstance(identifier, Variable):
            identifier = identifier.name
            c_name = identifier

        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)

        str_parameters = None

        mangle_result = self.kernel.mangle_function(identifier, par_dtypes)
        if mangle_result is not None:
            if len(mangle_result) == 2:
                result_dtype, c_name = mangle_result
            elif len(mangle_result) == 3:
                result_dtype, c_name, arg_tgt_dtypes = mangle_result

                str_parameters = [
                    self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype)
                    for par, par_dtype, tgt_dtype in zip(expr.parameters, par_dtypes, arg_tgt_dtypes)
                ]
            else:
                raise RuntimeError("result of function mangler " "for function '%s' not understood" % identifier)

        from loopy.codegen import SeenFunction

        self.codegen_state.seen_functions.add(SeenFunction(identifier, c_name, par_dtypes))
        if str_parameters is None:
            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
            # propagate the type context here. But for many others, it does
            # not. Using the inferred type as a stopgap for now.
            str_parameters = [
                self.rec(par, PREC_NONE, type_context=dtype_to_type_context(self.kernel.target, par_dtype))
                for par, par_dtype in zip(expr.parameters, par_dtypes)
            ]

        if c_name is None:
            raise RuntimeError("unable to find C name for function identifier '%s'" % identifier)

        return "%s(%s)" % (c_name, ", ".join(str_parameters))
Example #13
0
    def emit_assignment(self, codegen_state, insn):
        kernel = codegen_state.kernel
        ecm = codegen_state.expression_to_code_mapper

        assignee_var_name, = insn.assignee_var_names()

        lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
        lhs_dtype = lhs_var.dtype

        if insn.atomicity:
            raise NotImplementedError("atomic ops in ISPC")

        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE

        rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
        rhs_code = ecm(insn.expression, prec=PREC_NONE,
                    type_context=rhs_type_context,
                    needed_dtype=lhs_dtype)

        lhs = insn.assignee

        # {{{ handle streaming stores

        if "!streaming_store" in insn.tags:
            ary = ecm.find_array(lhs)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate

            from loopy.symbolic import simplify_using_aff
            index_tuple = tuple(
                    simplify_using_aff(kernel, idx) for idx in lhs.index_tuple)

            access_info = get_access_info(kernel.target, ary, index_tuple,
                    lambda expr: evaluate(expr, codegen_state.var_subst_map),
                    codegen_state.vectorization_info)

            from loopy.kernel.data import ArrayArg, TemporaryVariable

            if not isinstance(ary, (ArrayArg, TemporaryVariable)):
                raise LoopyError("array type not supported in ISPC: %s"
                        % type(ary).__name)

            if len(access_info.subscripts) != 1:
                raise LoopyError("streaming stores must have a subscript")
            subscript, = access_info.subscripts

            from pymbolic.primitives import Sum, flattened_sum, Variable
            if isinstance(subscript, Sum):
                terms = subscript.children
            else:
                terms = (subscript.children,)

            new_terms = []

            from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type
            from loopy.symbolic import get_dependencies

            saw_l0 = False
            for term in terms:
                if (isinstance(term, Variable)
                            and kernel.iname_tags_of_type(term.name, LocalIndexTag)):
                    tag, = kernel.iname_tags_of_type(
                        term.name, LocalIndexTag, min_num=1, max_num=1)
                    if tag.axis == 0:
                        if saw_l0:
                            raise LoopyError(
                                "streaming store must have stride 1 in "
                                "local index, got: %s" % subscript)
                        saw_l0 = True
                        continue
                else:
                    for dep in get_dependencies(term):
                        if filter_iname_tags_by_type(
                                kernel.iname_to_tags.get(dep, []), LocalIndexTag):
                            tag, = filter_iname_tags_by_type(
                                kernel.iname_to_tags.get(dep, []), LocalIndexTag, 1)
                            if tag.axis == 0:
                                raise LoopyError(
                                    "streaming store must have stride 1 in "
                                    "local index, got: %s" % subscript)

                    new_terms.append(term)

            if not saw_l0:
                raise LoopyError("streaming store must have stride 1 in "
                        "local index, got: %s" % subscript)

            if access_info.vector_index is not None:
                raise LoopyError("streaming store may not use a short-vector "
                        "data type")

            rhs_has_programindex = any(
                isinstance(tag, LocalIndexTag) and tag.axis == 0
                for tag in kernel.iname_tags(dep)
                for dep in get_dependencies(insn.expression))

            if not rhs_has_programindex:
                rhs_code = "broadcast(%s, 0)" % rhs_code

            from cgen import Statement
            return Statement(
                    "streaming_store(%s + %s, %s)"
                    % (
                        access_info.array_name,
                        ecm(flattened_sum(new_terms), PREC_NONE, 'i'),
                        rhs_code))

        # }}}

        from cgen import Assign
        return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)
Example #14
0
    def map_subscript(self, expr, enclosing_prec, type_context):
        def base_impl(expr, enclosing_prec, type_context):
            return self.parenthesize_if_needed(
                    "%s[%s]" % (
                        self.rec(expr.aggregate, PREC_CALL, type_context),
                        self.rec(expr.index, PREC_NONE, 'i')),
                    enclosing_prec, PREC_CALL)

        from pymbolic.primitives import Variable
        if not isinstance(expr.aggregate, Variable):
            return base_impl(expr, enclosing_prec, type_context)

        if expr.aggregate.name in self.kernel.arg_dict:
            ary = self.kernel.arg_dict[expr.aggregate.name]
        elif expr.aggregate.name in self.kernel.temporary_variables:
            ary = self.kernel.temporary_variables[expr.aggregate.name]
        else:
            raise RuntimeError("nothing known about subscripted variable '%s'"
                    % expr.aggregate.name)

        from loopy.kernel.array import ArrayBase
        if not isinstance(ary, ArrayBase):
            raise RuntimeError("subscripted variable '%s' is not an array"
                    % expr.aggregate.name)

        from loopy.kernel.array import get_access_info
        from pymbolic import evaluate

        access_info = get_access_info(self.kernel.target, ary, expr.index,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                self.codegen_state.vectorization_info)

        vec_member = get_opencl_vec_member(access_info.vector_index)

        from loopy.kernel.data import ImageArg, GlobalArg, TemporaryVariable

        if isinstance(ary, ImageArg):
            base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))"
                    % (ary.name, ary.dimensions,
                        ", ".join(self.rec(idx, PREC_NONE, 'i')
                            for idx in expr.index[::-1])))

            if ary.dtype == np.float32:
                return base_access+".x"
            if self.kernel.target.is_vector_dtype(ary.dtype):
                return base_access
            elif ary.dtype == np.float64:
                return "as_double(%s.xy)" % base_access
            else:
                raise NotImplementedError(
                        "non-floating-point images not supported for now")

        elif isinstance(ary, (GlobalArg, TemporaryVariable)):
            if len(access_info.subscripts) == 0:
                if isinstance(ary, GlobalArg):
                    # unsubscripted global args are pointers
                    if vec_member is not None:
                        return "%s->%s" % (
                                access_info.array_name,
                                vec_member)
                    else:
                        return "*" + access_info.array_name

                else:
                    # unsubscripted temp vars are scalars
                    if vec_member is not None:
                        return "%s.%s" % (
                                access_info.array_name,
                                vec_member)
                    else:
                        return access_info.array_name

            else:
                subscript, = access_info.subscripts
                result = self.parenthesize_if_needed(
                        "%s[%s]" % (
                            access_info.array_name,
                            self.rec(subscript, PREC_NONE, 'i')),
                        enclosing_prec, PREC_CALL)

                if vec_member:
                    result += "."+vec_member

                return result

        else:
            assert False