Ejemplo n.º 1
0
    def map_call(self, expr, enclosing_prec, type_context):
        from pymbolic.primitives import Variable
        from pymbolic.mapper.stringifier import PREC_NONE

        identifier = expr.function

        c_name = None
        if isinstance(identifier, Variable):
            identifier = identifier.name
            c_name = identifier

        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)

        str_parameters = None

        mangle_result = self.kernel.mangle_function(identifier, par_dtypes)
        if mangle_result is not None:
            if len(mangle_result) == 2:
                result_dtype, c_name = mangle_result
            elif len(mangle_result) == 3:
                result_dtype, c_name, arg_tgt_dtypes = mangle_result

                str_parameters = [
                        self.rec(par, PREC_NONE,
                            dtype_to_type_context(self.kernel.target, tgt_dtype),
                            tgt_dtype)
                        for par, par_dtype, tgt_dtype in zip(
                            expr.parameters, par_dtypes, arg_tgt_dtypes)]
            else:
                raise RuntimeError("result of function mangler "
                        "for function '%s' not understood"
                        % identifier)

        from loopy.codegen import SeenFunction
        self.codegen_state.seen_functions.add(
                SeenFunction(identifier, c_name, par_dtypes))
        if str_parameters is None:
            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
            # propagate the type context here. But for many others, it does
            # not. Using the inferred type as a stopgap for now.
            str_parameters = [
                    self.rec(par, PREC_NONE,
                        type_context=dtype_to_type_context(
                            self.kernel.target, par_dtype))
                    for par, par_dtype in zip(expr.parameters, par_dtypes)]

        if c_name is None:
            raise RuntimeError("unable to find C name for function identifier '%s'"
                    % identifier)

        return "%s(%s)" % (c_name, ", ".join(str_parameters))
Ejemplo n.º 2
0
    def emit_call_insn(self, insn, target, expression_to_code_mapper):
        assert self.is_ready_for_codegen()
        assert isinstance(insn, loopy.CallInstruction)

        parameters = insn.expression.parameters

        parameters = list(parameters)
        par_dtypes = [self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)]

        parameters.append(insn.assignees[-1])
        par_dtypes.append(self.arg_id_to_dtype[0])

        mat_descr = self.arg_id_to_descr[0]
        arg_c_parameters = [
            expression_to_code_mapper(
                par,
                PREC_NONE,
                dtype_to_type_context(target, par_dtype),
                par_dtype
            ).expr
            for par, par_dtype in zip(parameters, par_dtypes)
        ]
        c_parameters = [arg_c_parameters[-1]]
        c_parameters.extend([arg for arg in arg_c_parameters[:-1]])
        c_parameters.append(numpy.int32(mat_descr.shape[1]))  # n
        return var(self.name_in_target)(*c_parameters), False
Ejemplo n.º 3
0
    def map_comparison(self, expr, type_context):
        inner_type_context = dtype_to_type_context(
            self.kernel.target, self.infer_type(expr.left - expr.right))

        return type(expr)(self.rec(expr.left, inner_type_context),
                          expr.operator,
                          self.rec(expr.right, inner_type_context))
Ejemplo n.º 4
0
    def emit_call_insn(self, insn, target, expression_to_code_mapper):
        # reorder arguments, e.g. a,c = f(b,d) to f(a,b,c,d)
        parameters = []
        reads = iter(insn.expression.parameters)
        writes = iter(insn.assignees)
        for ac in self.access:
            if ac is READ:
                parameters.append(next(reads))
            else:
                parameters.append(next(writes))

        # pass layer argument if needed
        for layer in reads:
            parameters.append(layer)

        par_dtypes = tuple(expression_to_code_mapper.infer_type(p) for p in parameters)

        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE
        from pymbolic import var

        c_parameters = [
            expression_to_code_mapper(
                par, PREC_NONE, dtype_to_type_context(target, par_dtype),
                par_dtype).expr
            for par, par_dtype in zip(parameters, par_dtypes)]

        assignee_is_returned = False
        return var(self.name_in_target)(*c_parameters), assignee_is_returned
Ejemplo n.º 5
0
    def emit_tuple_assignment(self, codegen_state, insn):
        ecm = codegen_state.expression_to_code_mapper

        from cgen import Assign, block_if_necessary
        assignments = []

        for i, (assignee, parameter) in enumerate(
                zip(insn.assignees, insn.expression.parameters)):
            lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None)
            assignee_var_name = insn.assignee_var_names()[i]
            lhs_var = codegen_state.kernel.get_var_descriptor(
                assignee_var_name)
            lhs_dtype = lhs_var.dtype

            from loopy.expression import dtype_to_type_context
            rhs_type_context = dtype_to_type_context(
                codegen_state.kernel.target, lhs_dtype)
            rhs_code = ecm(parameter,
                           prec=PREC_NONE,
                           type_context=rhs_type_context,
                           needed_dtype=lhs_dtype)

            assignments.append(Assign(lhs_code, rhs_code))

        return block_if_necessary(assignments)
Ejemplo n.º 6
0
    def emit_call_insn(self, insn, target, expression_to_code_mapper):
        # reorder arguments, e.g. a,c = f(b,d) to f(a,b,c,d)
        parameters = []
        reads = iter(insn.expression.parameters)
        writes = iter(insn.assignees)
        for ac in self.access:
            if ac is READ:
                parameters.append(next(reads))
            else:
                parameters.append(next(writes))

        # pass layer argument if needed
        for layer in reads:
            parameters.append(layer)

        par_dtypes = tuple(
            expression_to_code_mapper.infer_type(p) for p in parameters)

        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE
        from pymbolic import var

        c_parameters = [
            expression_to_code_mapper(par, PREC_NONE,
                                      dtype_to_type_context(target, par_dtype),
                                      par_dtype).expr
            for par, par_dtype in zip(parameters, par_dtypes)
        ]

        assignee_is_returned = False
        return var(self.name_in_target)(*c_parameters), assignee_is_returned
Ejemplo n.º 7
0
def generate_array_literal(codegen_state, array, value):
    data = generate_linearized_array(array, value)

    ecm = codegen_state.expression_to_code_mapper

    from pymbolic.mapper.stringifier import PREC_NONE
    from loopy.expression import dtype_to_type_context

    type_context = dtype_to_type_context(codegen_state.kernel.target,
                                         array.dtype)
    return "{ %s }" % ", ".join(
        ecm(d_i, PREC_NONE, type_context, array.dtype) for d_i in data)
Ejemplo n.º 8
0
def generate_array_literal(codegen_state, array, value):
    data = generate_linearized_array(array, value)

    ecm = codegen_state.expression_to_code_mapper

    from pymbolic.mapper.stringifier import PREC_NONE
    from loopy.expression import dtype_to_type_context

    type_context = dtype_to_type_context(codegen_state.kernel.target, array.dtype)
    return "{ %s }" % ", ".join(
            ecm(d_i, PREC_NONE, type_context, array.dtype)
            for d_i in data)
Ejemplo n.º 9
0
    def map_comparison(self, expr, enclosing_prec, type_context):
        from pymbolic.mapper.stringifier import PREC_COMPARISON

        inner_type_context = dtype_to_type_context(
            self.kernel.target, self.infer_type(expr.left - expr.right))

        return self.parenthesize_if_needed(
            "%s %s %s" %
            (self.rec(expr.left, PREC_COMPARISON,
                      inner_type_context), expr.operator,
             self.rec(expr.right, PREC_COMPARISON, inner_type_context)),
            enclosing_prec, PREC_COMPARISON)
Ejemplo n.º 10
0
    def map_comparison(self, expr, enclosing_prec, type_context):
        from pymbolic.mapper.stringifier import PREC_COMPARISON

        inner_type_context = dtype_to_type_context(
                self.kernel.target,
                self.infer_type(expr.left - expr.right))

        return self.parenthesize_if_needed(
                "%s %s %s" % (
                    self.rec(expr.left, PREC_COMPARISON, inner_type_context),
                    expr.operator,
                    self.rec(expr.right, PREC_COMPARISON, inner_type_context)),
                enclosing_prec, PREC_COMPARISON)
Ejemplo n.º 11
0
def generate_array_literal(codegen_state, array, value):
    data = generate_linearized_array(array, value)

    ecm = codegen_state.expression_to_code_mapper

    from loopy.expression import dtype_to_type_context
    from loopy.symbolic import ArrayLiteral

    type_context = dtype_to_type_context(codegen_state.kernel.target,
                                         array.dtype)
    return CExpression(
        codegen_state.ast_builder.get_c_expression_to_code_mapper(),
        ArrayLiteral(tuple(
            ecm.map_constant(d_i, type_context) for d_i in data)))
Ejemplo n.º 12
0
    def emit_assignment(self, codegen_state, insn):
        kernel = codegen_state.kernel
        ecm = codegen_state.expression_to_code_mapper

        assignee_var_name, = insn.assignee_var_names()

        lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
        lhs_dtype = lhs_var.dtype

        if insn.atomicity is not None:
            lhs_atomicity = [
                    a for a in insn.atomicity if a.var_name == assignee_var_name]
            assert len(lhs_atomicity) <= 1
            if lhs_atomicity:
                lhs_atomicity, = lhs_atomicity
            else:
                lhs_atomicity = None
        else:
            lhs_atomicity = None

        from loopy.kernel.data import AtomicInit, AtomicUpdate
        from loopy.expression import dtype_to_type_context

        lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
        rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
        if lhs_atomicity is None:
            from cgen import Assign
            return Assign(
                    lhs_code,
                    ecm(insn.expression, prec=PREC_NONE,
                        type_context=rhs_type_context,
                        needed_dtype=lhs_dtype))

        elif isinstance(lhs_atomicity, AtomicInit):
            codegen_state.seen_atomic_dtypes.add(lhs_dtype)
            return codegen_state.ast_builder.emit_atomic_init(
                    codegen_state, lhs_atomicity, lhs_var,
                    insn.assignee, insn.expression,
                    lhs_dtype, rhs_type_context)

        elif isinstance(lhs_atomicity, AtomicUpdate):
            codegen_state.seen_atomic_dtypes.add(lhs_dtype)
            return codegen_state.ast_builder.emit_atomic_update(
                    codegen_state, lhs_atomicity, lhs_var,
                    insn.assignee, insn.expression,
                    lhs_dtype, rhs_type_context)

        else:
            raise ValueError("unexpected lhs atomicity type: %s"
                    % type(lhs_atomicity).__name__)
Ejemplo n.º 13
0
    def emit_assignment(self, codegen_state, insn):
        kernel = codegen_state.kernel
        ecm = codegen_state.expression_to_code_mapper

        assignee_var_name, = insn.assignee_var_names()

        lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
        lhs_dtype = lhs_var.dtype

        if insn.atomicity is not None:
            lhs_atomicity = [
                    a for a in insn.atomicity if a.var_name == assignee_var_name]
            assert len(lhs_atomicity) <= 1
            if lhs_atomicity:
                lhs_atomicity, = lhs_atomicity
            else:
                lhs_atomicity = None
        else:
            lhs_atomicity = None

        from loopy.kernel.data import AtomicInit, AtomicUpdate
        from loopy.expression import dtype_to_type_context

        lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
        rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
        if lhs_atomicity is None:
            from cgen import Assign
            return Assign(
                    lhs_code,
                    ecm(insn.expression, prec=PREC_NONE,
                        type_context=rhs_type_context,
                        needed_dtype=lhs_dtype))

        elif isinstance(lhs_atomicity, AtomicInit):
            codegen_state.seen_atomic_dtypes.add(lhs_dtype)
            return codegen_state.ast_builder.emit_atomic_init(
                    codegen_state, lhs_atomicity, lhs_var,
                    insn.assignee, insn.expression,
                    lhs_dtype, rhs_type_context)

        elif isinstance(lhs_atomicity, AtomicUpdate):
            codegen_state.seen_atomic_dtypes.add(lhs_dtype)
            return codegen_state.ast_builder.emit_atomic_update(
                    codegen_state, lhs_atomicity, lhs_var,
                    insn.assignee, insn.expression,
                    lhs_dtype, rhs_type_context)

        else:
            raise ValueError("unexpected lhs atomicity type: %s"
                    % type(lhs_atomicity).__name__)
Ejemplo n.º 14
0
def generate_array_literal(codegen_state, array, value):
    data = generate_linearized_array(array, value)

    ecm = codegen_state.expression_to_code_mapper

    from loopy.expression import dtype_to_type_context
    from loopy.symbolic import ArrayLiteral

    type_context = dtype_to_type_context(codegen_state.kernel.target, array.dtype)
    return CExpression(
            codegen_state.ast_builder.get_c_expression_to_code_mapper(),
            ArrayLiteral(
                tuple(
                    ecm.map_constant(d_i, type_context)
                    for d_i in data)))
Ejemplo n.º 15
0
    def emit_call_insn(self, insn, target, expression_to_code_mapper):
        from loopy.target.c import CFamilyTarget
        if not isinstance(target, CFamilyTarget):
            raise NotImplementedError()

        from loopy.kernel.instruction import CallInstruction

        assert self.is_ready_for_codegen()
        assert isinstance(insn, CallInstruction)

        ecm = expression_to_code_mapper
        parameters = insn.expression.parameters
        assignees = insn.assignees

        parameters = list(parameters)
        par_dtypes = [
            self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)
        ]
        kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel)

        # insert the assignees at the required positions
        assignee_write_count = -1
        for i, arg in enumerate(self.subkernel.args):
            if arg.is_output:
                if not arg.is_input:
                    assignee = assignees[-assignee_write_count - 1]
                    parameters.insert(i, assignee)
                    par_dtypes.insert(
                        i, self.arg_id_to_dtype[assignee_write_count])

                assignee_write_count -= 1

        # no type casting in array calls
        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE
        from pymbolic import var

        tgt_parameters = [
            ecm(par, PREC_NONE, dtype_to_type_context(target, par_dtype),
                par_dtype).expr
            for par, par_dtype in zip(parameters, par_dtypes)
        ]

        return var(self.subkernel.name)(*tgt_parameters), False
Ejemplo n.º 16
0
    def emit_tuple_assignment(self, codegen_state, insn):
        ecm = codegen_state.expression_to_code_mapper

        from cgen import Assign, block_if_necessary
        assignments = []

        for i, (assignee, parameter) in enumerate(
                zip(insn.assignees, insn.expression.parameters)):
            lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None)
            assignee_var_name = insn.assignee_var_names()[i]
            lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
            lhs_dtype = lhs_var.dtype

            from loopy.expression import dtype_to_type_context
            rhs_type_context = dtype_to_type_context(
                    codegen_state.kernel.target, lhs_dtype)
            rhs_code = ecm(parameter, prec=PREC_NONE,
                    type_context=rhs_type_context, needed_dtype=lhs_dtype)

            assignments.append(Assign(lhs_code, rhs_code))

        return block_if_necessary(assignments)
Ejemplo n.º 17
0
    def emit_call(self, expression_to_code_mapper, expression, target):

        assert self.is_ready_for_codegen()

        # must have single assignee
        assert len(expression.parameters) == len(self.arg_id_to_dtype) - 1
        arg_dtypes = tuple(self.arg_id_to_dtype[id]
                           for id in range(len(self.arg_id_to_dtype) - 1))

        par_dtypes = tuple(
            expression_to_code_mapper.infer_type(par)
            for par in expression.parameters)

        from loopy.expression import dtype_to_type_context
        # processing the parameters with the required dtypes
        processed_parameters = tuple(
            expression_to_code_mapper.rec(
                par, dtype_to_type_context(target, tgt_dtype), tgt_dtype)
            for par, par_dtype, tgt_dtype in zip(expression.parameters,
                                                 par_dtypes, arg_dtypes))

        from pymbolic import var
        return var(self.name_in_target)(*processed_parameters)
Ejemplo n.º 18
0
def generate_assignment_instruction_code(codegen_state, insn):
    kernel = codegen_state.kernel

    ecm = codegen_state.expression_to_code_mapper

    from loopy.expression import dtype_to_type_context, VectorizabilityChecker

    # {{{ vectorization handling

    if codegen_state.vectorization_info:
        if insn.atomicity:
            raise Unvectorizable("atomic operation")

        vinfo = codegen_state.vectorization_info
        vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length)
        lhs_is_vector = vcheck(insn.assignee)
        rhs_is_vector = vcheck(insn.expression)

        if not lhs_is_vector and rhs_is_vector:
            raise Unvectorizable("LHS is scalar, RHS is vector, cannot assign")

        is_vector = lhs_is_vector

        del lhs_is_vector
        del rhs_is_vector

    # }}}

    from pymbolic.primitives import Variable, Subscript
    from loopy.symbolic import LinearSubscript

    lhs = insn.assignee
    if isinstance(lhs, Variable):
        assignee_var_name = lhs.name
        assignee_indices = ()

    elif isinstance(lhs, Subscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = lhs.index_tuple

    elif isinstance(lhs, LinearSubscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = (lhs.index, )

    else:
        raise RuntimeError("invalid lvalue '%s'" % lhs)

    lhs_var = kernel.get_var_descriptor(assignee_var_name)
    lhs_dtype = lhs_var.dtype

    if insn.atomicity is not None:
        lhs_atomicity = [
            a for a in insn.atomicity if a.var_name == assignee_var_name
        ]
        assert len(lhs_atomicity) <= 1
        if lhs_atomicity:
            lhs_atomicity, = lhs_atomicity
        else:
            lhs_atomicity = None
    else:
        lhs_atomicity = None

    from loopy.kernel.data import AtomicInit, AtomicUpdate

    lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
    rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
    if lhs_atomicity is None:
        result = codegen_state.ast_builder.emit_assignment(
            codegen_state, lhs_code,
            ecm(insn.expression,
                prec=PREC_NONE,
                type_context=rhs_type_context,
                needed_dtype=lhs_dtype))

    elif isinstance(lhs_atomicity, AtomicInit):
        raise NotImplementedError("atomic init")

    elif isinstance(lhs_atomicity, AtomicUpdate):
        codegen_state.seen_atomic_dtypes.add(lhs_dtype)
        result = codegen_state.ast_builder.generate_atomic_update(
            kernel, codegen_state, lhs_atomicity, lhs_var, insn.assignee,
            insn.expression, lhs_dtype, rhs_type_context)

    else:
        raise ValueError("unexpected lhs atomicity type: %s" %
                         type(lhs_atomicity).__name__)

    # {{{ tracing

    if kernel.options.trace_assignments or kernel.options.trace_assignment_values:
        if codegen_state.vectorization_info and is_vector:
            raise Unvectorizable("tracing does not support vectorization")

        from cgen import Statement as S  # noqa

        gs, ls = kernel.get_grid_size_upper_bounds()

        printf_format = "%s.%s[%s][%s]: %s" % (kernel.name, insn.id, ", ".join(
            "gid%d=%%d" % i for i in range(len(gs))), ", ".join(
                "lid%d=%%d" % i for i in range(len(ls))), assignee_var_name)

        printf_args = (["gid(%d)" % i for i in range(len(gs))] +
                       ["lid(%d)" % i for i in range(len(ls))])

        if assignee_indices:
            printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"])
            printf_args.extend(
                ecm(i, prec=PREC_NONE, type_context="i")
                for i in assignee_indices)

        if kernel.options.trace_assignment_values:
            if lhs_dtype.numpy_dtype.kind == "i":
                printf_format += " = %d"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "f":
                printf_format += " = %g"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "c":
                printf_format += " = %g + %gj"
                printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code])

        if printf_args:
            printf_args_str = ", " + ", ".join(printf_args)
        else:
            printf_args_str = ""

        printf_insn = S("printf(\"%s\\n\"%s)" %
                        (printf_format, printf_args_str))

        from cgen import Block
        if kernel.options.trace_assignment_values:
            result = Block([result, printf_insn])
        else:
            # print first, execute later -> helps find segfaults
            result = Block([printf_insn, result])

    # }}}

    return result
Ejemplo n.º 19
0
    def emit_multiple_assignment(self, codegen_state, insn):
        ecm = codegen_state.expression_to_code_mapper

        from pymbolic.primitives import Variable
        from pymbolic.mapper.stringifier import PREC_NONE

        func_id = insn.expression.function
        parameters = insn.expression.parameters

        if isinstance(func_id, Variable):
            func_id = func_id.name

        assignee_var_descriptors = [
            codegen_state.kernel.get_var_descriptor(a)
            for a in insn.assignee_var_names()
        ]

        par_dtypes = tuple(ecm.infer_type(par) for par in parameters)

        mangle_result = codegen_state.kernel.mangle_function(
            func_id, par_dtypes)
        if mangle_result is None:
            raise RuntimeError(
                "function '%s' unknown--"
                "maybe you need to register a function mangler?" % func_id)

        assert mangle_result.arg_dtypes is not None

        from loopy.expression import dtype_to_type_context
        c_parameters = [
            ecm(par, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype),
                tgt_dtype).expr for par, par_dtype, tgt_dtype in zip(
                    parameters, par_dtypes, mangle_result.arg_dtypes)
        ]

        from loopy.codegen import SeenFunction
        codegen_state.seen_functions.add(
            SeenFunction(func_id, mangle_result.target_name,
                         mangle_result.arg_dtypes))

        from pymbolic import var
        for i, (a, tgt_dtype) in enumerate(
                zip(insn.assignees[1:], mangle_result.result_dtypes[1:])):
            if tgt_dtype != ecm.infer_type(a):
                raise LoopyError("type mismatch in %d'th (1-based) left-hand "
                                 "side of instruction '%s'" % (i + 1, insn.id))
            c_parameters.append(
                # TODO Yuck: The "where-at function": &(...)
                var("&")(ecm(a, PREC_NONE,
                             dtype_to_type_context(self.target, tgt_dtype),
                             tgt_dtype).expr))

        from pymbolic import var
        result = var(mangle_result.target_name)(*c_parameters)

        # In case of no assignees, we are done
        if len(mangle_result.result_dtypes) == 0:
            from cgen import ExpressionStatement
            return ExpressionStatement(
                CExpression(self.get_c_expression_to_code_mapper(), result))

        result = ecm.wrap_in_typecast(mangle_result.result_dtypes[0],
                                      assignee_var_descriptors[0].dtype,
                                      result)

        lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None)

        from cgen import Assign
        return Assign(
            lhs_code,
            CExpression(self.get_c_expression_to_code_mapper(), result))
Ejemplo n.º 20
0
    def emit_assignment(self, codegen_state, insn):
        kernel = codegen_state.kernel
        ecm = codegen_state.expression_to_code_mapper

        assignee_var_name, = insn.assignee_var_names()

        lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
        lhs_dtype = lhs_var.dtype

        if insn.atomicity:
            raise NotImplementedError("atomic ops in ISPC")

        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE

        rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
        rhs_code = ecm(insn.expression, prec=PREC_NONE,
                    type_context=rhs_type_context,
                    needed_dtype=lhs_dtype)

        lhs = insn.assignee

        # {{{ handle streaming stores

        if "!streaming_store" in insn.tags:
            ary = ecm.find_array(lhs)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate

            from loopy.symbolic import simplify_using_aff
            index_tuple = tuple(
                    simplify_using_aff(kernel, idx) for idx in lhs.index_tuple)

            access_info = get_access_info(kernel.target, ary, index_tuple,
                    lambda expr: evaluate(expr, codegen_state.var_subst_map),
                    codegen_state.vectorization_info)

            from loopy.kernel.data import ArrayArg, TemporaryVariable

            if not isinstance(ary, (ArrayArg, TemporaryVariable)):
                raise LoopyError("array type not supported in ISPC: %s"
                        % type(ary).__name)

            if len(access_info.subscripts) != 1:
                raise LoopyError("streaming stores must have a subscript")
            subscript, = access_info.subscripts

            from pymbolic.primitives import Sum, flattened_sum, Variable
            if isinstance(subscript, Sum):
                terms = subscript.children
            else:
                terms = (subscript.children,)

            new_terms = []

            from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type
            from loopy.symbolic import get_dependencies

            saw_l0 = False
            for term in terms:
                if (isinstance(term, Variable)
                            and kernel.iname_tags_of_type(term.name, LocalIndexTag)):
                    tag, = kernel.iname_tags_of_type(
                        term.name, LocalIndexTag, min_num=1, max_num=1)
                    if tag.axis == 0:
                        if saw_l0:
                            raise LoopyError(
                                "streaming store must have stride 1 in "
                                "local index, got: %s" % subscript)
                        saw_l0 = True
                        continue
                else:
                    for dep in get_dependencies(term):
                        if filter_iname_tags_by_type(
                                kernel.iname_to_tags.get(dep, []), LocalIndexTag):
                            tag, = filter_iname_tags_by_type(
                                kernel.iname_to_tags.get(dep, []), LocalIndexTag, 1)
                            if tag.axis == 0:
                                raise LoopyError(
                                    "streaming store must have stride 1 in "
                                    "local index, got: %s" % subscript)

                    new_terms.append(term)

            if not saw_l0:
                raise LoopyError("streaming store must have stride 1 in "
                        "local index, got: %s" % subscript)

            if access_info.vector_index is not None:
                raise LoopyError("streaming store may not use a short-vector "
                        "data type")

            rhs_has_programindex = any(
                isinstance(tag, LocalIndexTag) and tag.axis == 0
                for tag in kernel.iname_tags(dep)
                for dep in get_dependencies(insn.expression))

            if not rhs_has_programindex:
                rhs_code = "broadcast(%s, 0)" % rhs_code

            from cgen import Statement
            return Statement(
                    "streaming_store(%s + %s, %s)"
                    % (
                        access_info.array_name,
                        ecm(flattened_sum(new_terms), PREC_NONE, 'i'),
                        rhs_code))

        # }}}

        from cgen import Assign
        return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)
Ejemplo n.º 21
0
    def map_call(self, expr, enclosing_prec, type_context):
        from pymbolic.primitives import Variable, Subscript
        from pymbolic.mapper.stringifier import PREC_NONE

        identifier = expr.function

        # {{{ implement indexof, indexof_vec

        if identifier.name in ["indexof", "indexof_vec"]:
            if len(expr.parameters) != 1:
                raise LoopyError("%s takes exactly one argument" % identifier.name)
            arg, = expr.parameters
            if not isinstance(arg, Subscript):
                raise LoopyError("argument to %s must be a subscript" % identifier.name)

            ary = self.find_array(arg)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate

            access_info = get_access_info(
                self.kernel.target,
                ary,
                arg.index,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                self.codegen_state.vectorization_info,
            )

            from loopy.kernel.data import ImageArg

            if isinstance(ary, ImageArg):
                raise LoopyError("%s does not support images" % identifier.name)

            if identifier.name == "indexof":
                return access_info.subscripts[0]
            elif identifier.name == "indexof_vec":
                from loopy.kernel.array import VectorArrayDimTag

                ivec = None
                for iaxis, dim_tag in enumerate(ary.dim_tags):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        ivec = iaxis

                if ivec is None:
                    return access_info.subscripts[0]
                else:
                    return access_info.subscripts[0] * ary.shape[ivec] + access_info.vector_index

            else:
                raise RuntimeError("should not get here")

        # }}}

        c_name = None
        if isinstance(identifier, Variable):
            identifier = identifier.name
            c_name = identifier

        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)

        str_parameters = None

        mangle_result = self.kernel.mangle_function(identifier, par_dtypes)
        if mangle_result is not None:
            if len(mangle_result) == 2:
                result_dtype, c_name = mangle_result
            elif len(mangle_result) == 3:
                result_dtype, c_name, arg_tgt_dtypes = mangle_result

                str_parameters = [
                    self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype)
                    for par, par_dtype, tgt_dtype in zip(expr.parameters, par_dtypes, arg_tgt_dtypes)
                ]
            else:
                raise RuntimeError("result of function mangler " "for function '%s' not understood" % identifier)

        from loopy.codegen import SeenFunction

        self.codegen_state.seen_functions.add(SeenFunction(identifier, c_name, par_dtypes))
        if str_parameters is None:
            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
            # propagate the type context here. But for many others, it does
            # not. Using the inferred type as a stopgap for now.
            str_parameters = [
                self.rec(par, PREC_NONE, type_context=dtype_to_type_context(self.kernel.target, par_dtype))
                for par, par_dtype in zip(expr.parameters, par_dtypes)
            ]

        if c_name is None:
            raise RuntimeError("unable to find C name for function identifier '%s'" % identifier)

        return "%s(%s)" % (c_name, ", ".join(str_parameters))
Ejemplo n.º 22
0
    def emit_multiple_assignment(self, codegen_state, insn):
        ecm = codegen_state.expression_to_code_mapper

        from pymbolic.primitives import Variable
        from pymbolic.mapper.stringifier import PREC_NONE

        func_id = insn.expression.function
        parameters = insn.expression.parameters

        if isinstance(func_id, Variable):
            func_id = func_id.name

        assignee_var_descriptors = [
                codegen_state.kernel.get_var_descriptor(a)
                for a in insn.assignee_var_names()]

        par_dtypes = tuple(ecm.infer_type(par) for par in parameters)

        str_parameters = None

        mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes)
        if mangle_result is None:
            raise RuntimeError("function '%s' unknown--"
                    "maybe you need to register a function mangler?"
                    % func_id)

        assert mangle_result.arg_dtypes is not None

        from loopy.expression import dtype_to_type_context
        str_parameters = [
                ecm(par, PREC_NONE,
                    dtype_to_type_context(self.target, tgt_dtype),
                    tgt_dtype)
                for par, par_dtype, tgt_dtype in zip(
                    parameters, par_dtypes, mangle_result.arg_dtypes)]

        from loopy.codegen import SeenFunction
        codegen_state.seen_functions.add(
                SeenFunction(func_id,
                    mangle_result.target_name,
                    mangle_result.arg_dtypes))

        for i, (a, tgt_dtype) in enumerate(
                zip(insn.assignees[1:], mangle_result.result_dtypes[1:])):
            if tgt_dtype != ecm.infer_type(a):
                raise LoopyError("type mismatch in %d'th (1-based) left-hand "
                        "side of instruction '%s'" % (i+1, insn.id))
            str_parameters.append(
                    "&(%s)" % ecm(a, PREC_NONE,
                        dtype_to_type_context(self.target, tgt_dtype),
                        tgt_dtype))

        result = "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters))

        result = ecm.wrap_in_typecast(
                mangle_result.result_dtypes[0],
                assignee_var_descriptors[0].dtype,
                result)

        lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None)

        from cgen import Assign
        return Assign(
                lhs_code,
                result)
Ejemplo n.º 23
0
    def map_call(self, expr, enclosing_prec, type_context):
        from pymbolic.primitives import Variable, Subscript
        from pymbolic.mapper.stringifier import PREC_NONE

        identifier = expr.function

        # {{{ implement indexof, indexof_vec

        if identifier.name in ["indexof", "indexof_vec"]:
            if len(expr.parameters) != 1:
                raise LoopyError("%s takes exactly one argument" % identifier.name)
            arg, = expr.parameters
            if not isinstance(arg, Subscript):
                raise LoopyError(
                        "argument to %s must be a subscript" % identifier.name)

            ary = self.find_array(arg)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate
            access_info = get_access_info(self.kernel.target, ary, arg.index,
                    lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                    self.codegen_state.vectorization_info)

            from loopy.kernel.data import ImageArg
            if isinstance(ary, ImageArg):
                raise LoopyError("%s does not support images" % identifier.name)

            if identifier.name == "indexof":
                return access_info.subscripts[0]
            elif identifier.name == "indexof_vec":
                from loopy.kernel.array import VectorArrayDimTag
                ivec = None
                for iaxis, dim_tag in enumerate(ary.dim_tags):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        ivec = iaxis

                if ivec is None:
                    return access_info.subscripts[0]
                else:
                    return (
                        access_info.subscripts[0]*ary.shape[ivec]
                        + access_info.vector_index)

            else:
                raise RuntimeError("should not get here")

        # }}}

        if isinstance(identifier, Variable):
            identifier = identifier.name

        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)

        str_parameters = None

        mangle_result = self.kernel.mangle_function(
                identifier, par_dtypes,
                ast_builder=self.codegen_state.ast_builder)

        if mangle_result is None:
            raise RuntimeError("function '%s' unknown--"
                    "maybe you need to register a function mangler?"
                    % identifier)

        if len(mangle_result.result_dtypes) != 1:
            raise LoopyError("functions with more or fewer than one return value "
                    "may not be used in an expression")

        if mangle_result.arg_dtypes is not None:
            str_parameters = [
                    self.rec(par, PREC_NONE,
                        dtype_to_type_context(self.kernel.target, tgt_dtype),
                        tgt_dtype)
                    for par, par_dtype, tgt_dtype in zip(
                        expr.parameters, par_dtypes, mangle_result.arg_dtypes)]

        else:
            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
            # propagate the type context here. But for many others, it does
            # not. Using the inferred type as a stopgap for now.
            str_parameters = [
                    self.rec(par, PREC_NONE,
                        type_context=dtype_to_type_context(
                            self.kernel.target, par_dtype))
                    for par, par_dtype in zip(expr.parameters, par_dtypes)]

            from warnings import warn
            warn("Calling function '%s' with unknown C signature--"
                    "return CallMangleInfo.arg_dtypes"
                    % identifier, LoopyWarning)

        from loopy.codegen import SeenFunction
        self.codegen_state.seen_functions.add(
                SeenFunction(identifier,
                    mangle_result.target_name,
                    mangle_result.arg_dtypes or par_dtypes))

        return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters))
Ejemplo n.º 24
0
def generate_assignment_instruction_code(codegen_state, insn):
    kernel = codegen_state.kernel

    ecm = codegen_state.expression_to_code_mapper

    from loopy.expression import dtype_to_type_context, VectorizabilityChecker

    # {{{ vectorization handling

    if codegen_state.vectorization_info:
        if insn.atomicity:
            raise Unvectorizable("atomic operation")

        vinfo = codegen_state.vectorization_info
        vcheck = VectorizabilityChecker(
                kernel, vinfo.iname, vinfo.length)
        lhs_is_vector = vcheck(insn.assignee)
        rhs_is_vector = vcheck(insn.expression)

        if not lhs_is_vector and rhs_is_vector:
            raise Unvectorizable(
                    "LHS is scalar, RHS is vector, cannot assign")

        is_vector = lhs_is_vector

        del lhs_is_vector
        del rhs_is_vector

    # }}}

    from pymbolic.primitives import Variable, Subscript
    from loopy.symbolic import LinearSubscript

    lhs = insn.assignee
    if isinstance(lhs, Variable):
        assignee_var_name = lhs.name
        assignee_indices = ()

    elif isinstance(lhs, Subscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = lhs.index_tuple

    elif isinstance(lhs, LinearSubscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = (lhs.index,)

    else:
        raise RuntimeError("invalid lvalue '%s'" % lhs)

    lhs_var = kernel.get_var_descriptor(assignee_var_name)
    lhs_dtype = lhs_var.dtype

    if insn.atomicity is not None:
        lhs_atomicity = [
                a for a in insn.atomicity if a.var_name == assignee_var_name]
        assert len(lhs_atomicity) <= 1
        if lhs_atomicity:
            lhs_atomicity, = lhs_atomicity
        else:
            lhs_atomicity = None
    else:
        lhs_atomicity = None

    from loopy.kernel.data import AtomicInit, AtomicUpdate

    lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
    rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
    if lhs_atomicity is None:
        result = codegen_state.ast_builder.emit_assignment(
                codegen_state,
                lhs_code,
                ecm(insn.expression, prec=PREC_NONE,
                    type_context=rhs_type_context,
                    needed_dtype=lhs_dtype))

    elif isinstance(lhs_atomicity, AtomicInit):
        raise NotImplementedError("atomic init")

    elif isinstance(lhs_atomicity, AtomicUpdate):
        codegen_state.seen_atomic_dtypes.add(lhs_dtype)
        result = codegen_state.ast_builder.generate_atomic_update(
                kernel, codegen_state, lhs_atomicity, lhs_var,
                insn.assignee, insn.expression,
                lhs_dtype, rhs_type_context)

    else:
        raise ValueError("unexpected lhs atomicity type: %s"
                % type(lhs_atomicity).__name__)

    # {{{ tracing

    if kernel.options.trace_assignments or kernel.options.trace_assignment_values:
        if codegen_state.vectorization_info and is_vector:
            raise Unvectorizable("tracing does not support vectorization")

        from cgen import Statement as S  # noqa

        gs, ls = kernel.get_grid_size_upper_bounds()

        printf_format = "%s.%s[%s][%s]: %s" % (
                kernel.name,
                insn.id,
                ", ".join("gid%d=%%d" % i for i in range(len(gs))),
                ", ".join("lid%d=%%d" % i for i in range(len(ls))),
                assignee_var_name)

        printf_args = (
                ["gid(%d)" % i for i in range(len(gs))]
                +
                ["lid(%d)" % i for i in range(len(ls))]
                )

        if assignee_indices:
            printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"])
            printf_args.extend(
                    ecm(i, prec=PREC_NONE, type_context="i")
                    for i in assignee_indices)

        if kernel.options.trace_assignment_values:
            if lhs_dtype.numpy_dtype.kind == "i":
                printf_format += " = %d"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "f":
                printf_format += " = %g"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "c":
                printf_format += " = %g + %gj"
                printf_args.extend([
                    "(%s).x" % lhs_code,
                    "(%s).y" % lhs_code])

        if printf_args:
            printf_args_str = ", " + ", ".join(printf_args)
        else:
            printf_args_str = ""

        printf_insn = S("printf(\"%s\\n\"%s)" % (
                    printf_format, printf_args_str))

        from cgen import Block
        if kernel.options.trace_assignment_values:
            result = Block([result, printf_insn])
        else:
            # print first, execute later -> helps find segfaults
            result = Block([printf_insn, result])

    # }}}

    return result
Ejemplo n.º 25
0
def generate_expr_instruction_code(kernel, insn, codegen_state):
    ecm = codegen_state.expression_to_code_mapper

    from loopy.expression import dtype_to_type_context, VectorizabilityChecker

    if codegen_state.vectorization_info:
        vinfo = codegen_state.vectorization_info
        vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length)
        rhs_is_vector = vcheck(insn.assignee)
        lhs_is_vector = vcheck(insn.expression)

        if lhs_is_vector != rhs_is_vector:
            raise Unvectorizable("LHS and RHS disagree on whether they are vectors")

        is_vector = lhs_is_vector

        del lhs_is_vector
        del rhs_is_vector

    expr = insn.expression

    (assignee_var_name, assignee_indices), = insn.assignees_and_indices()
    target_dtype = kernel.get_var_descriptor(assignee_var_name).dtype

    from cgen import Assign

    lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
    result = Assign(
        lhs_code,
        ecm(
            expr,
            prec=PREC_NONE,
            type_context=dtype_to_type_context(kernel.target, target_dtype),
            needed_dtype=target_dtype,
        ),
    )

    if kernel.options.trace_assignments or kernel.options.trace_assignment_values:
        if codegen_state.vectorization_info and is_vector:
            raise Unvectorizable("tracing does not support vectorization")

        from cgen import Statement as S  # noqa

        gs, ls = kernel.get_grid_sizes()

        printf_format = "%s.%s[%s][%s]: %s" % (
            kernel.name,
            insn.id,
            ", ".join("gid%d=%%d" % i for i in range(len(gs))),
            ", ".join("lid%d=%%d" % i for i in range(len(ls))),
            assignee_var_name,
        )

        printf_args = ["gid(%d)" % i for i in range(len(gs))] + ["lid(%d)" % i for i in range(len(ls))]

        if assignee_indices:
            printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"])
            printf_args.extend(ecm(i, prec=PREC_NONE, type_context="i") for i in assignee_indices)

        if kernel.options.trace_assignment_values:
            if target_dtype.kind == "i":
                printf_format += " = %d"
                printf_args.append(lhs_code)
            elif target_dtype.kind == "f":
                printf_format += " = %g"
                printf_args.append(lhs_code)
            elif target_dtype.kind == "c":
                printf_format += " = %g + %gj"
                printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code])

        if printf_args:
            printf_args_str = ", " + ", ".join(printf_args)
        else:
            printf_args_str = ""

        printf_insn = S('printf("%s\\n"%s)' % (printf_format, printf_args_str))

        from cgen import Block

        if kernel.options.trace_assignment_values:
            result = Block([result, printf_insn])
        else:
            # print first, execute later -> helps find segfaults
            result = Block([printf_insn, result])

    return result
Ejemplo n.º 26
0
    def map_call(self, expr, enclosing_prec, type_context):
        from pymbolic.primitives import Variable, Subscript
        from pymbolic.mapper.stringifier import PREC_NONE

        identifier = expr.function

        # {{{ implement indexof, indexof_vec

        if identifier.name in ["indexof", "indexof_vec"]:
            if len(expr.parameters) != 1:
                raise LoopyError("%s takes exactly one argument" %
                                 identifier.name)
            arg, = expr.parameters
            if not isinstance(arg, Subscript):
                raise LoopyError("argument to %s must be a subscript" %
                                 identifier.name)

            ary = self.find_array(arg)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate
            access_info = get_access_info(
                self.kernel.target, ary, arg.index,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                self.codegen_state.vectorization_info)

            from loopy.kernel.data import ImageArg
            if isinstance(ary, ImageArg):
                raise LoopyError("%s does not support images" %
                                 identifier.name)

            if identifier.name == "indexof":
                return access_info.subscripts[0]
            elif identifier.name == "indexof_vec":
                from loopy.kernel.array import VectorArrayDimTag
                ivec = None
                for iaxis, dim_tag in enumerate(ary.dim_tags):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        ivec = iaxis

                if ivec is None:
                    return access_info.subscripts[0]
                else:
                    return (access_info.subscripts[0] * ary.shape[ivec] +
                            access_info.vector_index)

            else:
                raise RuntimeError("should not get here")

        # }}}

        if isinstance(identifier, Variable):
            identifier = identifier.name

        par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)

        str_parameters = None

        mangle_result = self.kernel.mangle_function(
            identifier, par_dtypes, ast_builder=self.codegen_state.ast_builder)

        if mangle_result is None:
            raise RuntimeError(
                "function '%s' unknown--"
                "maybe you need to register a function mangler?" % identifier)

        if len(mangle_result.result_dtypes) != 1:
            raise LoopyError(
                "functions with more or fewer than one return value "
                "may not be used in an expression")

        if mangle_result.arg_dtypes is not None:
            str_parameters = [
                self.rec(par, PREC_NONE,
                         dtype_to_type_context(self.kernel.target, tgt_dtype),
                         tgt_dtype) for par, par_dtype, tgt_dtype in
                zip(expr.parameters, par_dtypes, mangle_result.arg_dtypes)
            ]

        else:
            # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to
            # propagate the type context here. But for many others, it does
            # not. Using the inferred type as a stopgap for now.
            str_parameters = [
                self.rec(par,
                         PREC_NONE,
                         type_context=dtype_to_type_context(
                             self.kernel.target, par_dtype))
                for par, par_dtype in zip(expr.parameters, par_dtypes)
            ]

            from warnings import warn
            warn(
                "Calling function '%s' with unknown C signature--"
                "return CallMangleInfo.arg_dtypes" % identifier, LoopyWarning)

        from loopy.codegen import SeenFunction
        self.codegen_state.seen_functions.add(
            SeenFunction(identifier, mangle_result.target_name,
                         mangle_result.arg_dtypes or par_dtypes))

        return "%s(%s)" % (mangle_result.target_name,
                           ", ".join(str_parameters))
Ejemplo n.º 27
0
    def emit_call_insn(self, insn, target, expression_to_code_mapper):
        """
        :arg insn: An instance of :class:`loopy.kernel.instructions.CallInstruction`.
        :arg target: An instance of :class:`loopy.target.TargetBase`.
        :arg expression_to_code_mapper: An instance of :class:`IdentityMapper`
            responsible for code mapping from :mod:`loopy` syntax to the
            **target syntax**.

        :returns: A tuple of the call to be generated and an instance of
            :class:`bool` whether the first assignee is a part of the LHS in
            the assignment instruction.

        .. note::

            The default implementation returns the first assignees and the
            references of the rest of the assignees are appended to the
            arguments of the call.

            *Example:* ``c, d = f(a, b)`` is returned as ``c = f(a, b, &d)``
        """
        from loopy.target.c import CFamilyTarget
        if not isinstance(target, CFamilyTarget):
            raise NotImplementedError()

        from loopy.kernel.instruction import CallInstruction
        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE
        from pymbolic import var

        assert isinstance(insn, CallInstruction)
        assert self.is_ready_for_codegen()

        ecm = expression_to_code_mapper
        parameters = insn.expression.parameters
        assignees = insn.assignees[1:]

        par_dtypes = tuple(
            expression_to_code_mapper.infer_type(par) for par in parameters)
        arg_dtypes = tuple(self.arg_id_to_dtype[i]
                           for i, _ in enumerate(parameters))

        assignee_dtypes = tuple(self.arg_id_to_dtype[-i - 2]
                                for i, _ in enumerate(assignees))

        tgt_parameters = [
            ecm(par, PREC_NONE, dtype_to_type_context(target, tgt_dtype),
                tgt_dtype).expr for par, par_dtype, tgt_dtype in zip(
                    parameters, par_dtypes, arg_dtypes)
        ]

        for a, tgt_dtype in zip(assignees, assignee_dtypes):
            if tgt_dtype != expression_to_code_mapper.infer_type(a):
                raise LoopyError("Type Mismatch in function %s. Expected: %s"
                                 "Got: %s" %
                                 (self.name, tgt_dtype,
                                  expression_to_code_mapper.infer_type(a)))
            tgt_parameters.append(
                var("&")(ecm(a, PREC_NONE,
                             dtype_to_type_context(target, tgt_dtype),
                             tgt_dtype).expr))

        # assignee is returned whenever the size of assignees is non zero.
        first_assignee_is_returned = len(insn.assignees) > 0

        return var(
            self.name_in_target)(*tgt_parameters), first_assignee_is_returned
Ejemplo n.º 28
0
    def emit_multiple_assignment(self, codegen_state, insn):
        ecm = codegen_state.expression_to_code_mapper

        from pymbolic.primitives import Variable
        from pymbolic.mapper.stringifier import PREC_NONE

        func_id = insn.expression.function
        parameters = insn.expression.parameters

        if isinstance(func_id, Variable):
            func_id = func_id.name

        assignee_var_descriptors = [
                codegen_state.kernel.get_var_descriptor(a)
                for a in insn.assignee_var_names()]

        par_dtypes = tuple(ecm.infer_type(par) for par in parameters)

        mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes)
        if mangle_result is None:
            raise RuntimeError("function '%s' unknown--"
                    "maybe you need to register a function mangler?"
                    % func_id)

        assert mangle_result.arg_dtypes is not None

        if mangle_result.target_name == "loopy_make_tuple":
            # This shorcut avoids actually having to emit a 'make_tuple' function.
            return self.emit_tuple_assignment(codegen_state, insn)

        from loopy.expression import dtype_to_type_context
        c_parameters = [
                ecm(par, PREC_NONE,
                    dtype_to_type_context(self.target, tgt_dtype),
                    tgt_dtype).expr
                for par, par_dtype, tgt_dtype in zip(
                    parameters, par_dtypes, mangle_result.arg_dtypes)]

        from loopy.codegen import SeenFunction
        codegen_state.seen_functions.add(
                SeenFunction(func_id,
                    mangle_result.target_name,
                    mangle_result.arg_dtypes))

        from pymbolic import var
        for i, (a, tgt_dtype) in enumerate(
                zip(insn.assignees[1:], mangle_result.result_dtypes[1:])):
            if tgt_dtype != ecm.infer_type(a):
                raise LoopyError("type mismatch in %d'th (1-based) left-hand "
                        "side of instruction '%s'" % (i+1, insn.id))
            c_parameters.append(
                        # TODO Yuck: The "where-at function": &(...)
                        var("&")(
                            ecm(a, PREC_NONE,
                                dtype_to_type_context(self.target, tgt_dtype),
                                tgt_dtype).expr))

        from pymbolic import var
        result = var(mangle_result.target_name)(*c_parameters)

        # In case of no assignees, we are done
        if len(mangle_result.result_dtypes) == 0:
            from cgen import ExpressionStatement
            return ExpressionStatement(
                    CExpression(self.get_c_expression_to_code_mapper(), result))

        result = ecm.wrap_in_typecast(
                mangle_result.result_dtypes[0],
                assignee_var_descriptors[0].dtype,
                result)

        lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None)

        from cgen import Assign
        return Assign(
                lhs_code,
                CExpression(self.get_c_expression_to_code_mapper(), result))
Ejemplo n.º 29
0
    def emit_assignment(self, codegen_state, insn):
        kernel = codegen_state.kernel
        ecm = codegen_state.expression_to_code_mapper

        assignee_var_name, = insn.assignee_var_names()

        lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
        lhs_dtype = lhs_var.dtype

        if insn.atomicity:
            raise NotImplementedError("atomic ops in ISPC")

        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE

        rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
        rhs_code = ecm(insn.expression,
                       prec=PREC_NONE,
                       type_context=rhs_type_context,
                       needed_dtype=lhs_dtype)

        lhs = insn.assignee

        # {{{ handle streaming stores

        if "!streaming_store" in insn.tags:
            ary = ecm.find_array(lhs)

            from loopy.kernel.array import get_access_info
            from pymbolic import evaluate

            from loopy.symbolic import simplify_using_aff
            index_tuple = tuple(
                simplify_using_aff(kernel, idx) for idx in lhs.index_tuple)

            access_info = get_access_info(
                kernel.target, ary, index_tuple,
                lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                codegen_state.vectorization_info)

            from loopy.kernel.data import GlobalArg, TemporaryVariable

            if not isinstance(ary, (GlobalArg, TemporaryVariable)):
                raise LoopyError("array type not supported in ISPC: %s" %
                                 type(ary).__name)

            if len(access_info.subscripts) != 1:
                raise LoopyError("streaming stores must have a subscript")
            subscript, = access_info.subscripts

            from pymbolic.primitives import Sum, flattened_sum, Variable
            if isinstance(subscript, Sum):
                terms = subscript.children
            else:
                terms = (subscript.children, )

            new_terms = []

            from loopy.kernel.data import LocalIndexTag
            from loopy.symbolic import get_dependencies

            saw_l0 = False
            for term in terms:
                if (isinstance(term, Variable) and isinstance(
                        kernel.iname_to_tag.get(term.name), LocalIndexTag)
                        and kernel.iname_to_tag.get(term.name).axis == 0):
                    if saw_l0:
                        raise LoopyError("streaming store must have stride 1 "
                                         "in local index, got: %s" % subscript)
                    saw_l0 = True
                    continue
                else:
                    for dep in get_dependencies(term):
                        if (isinstance(kernel.iname_to_tag.get(dep),
                                       LocalIndexTag)
                                and kernel.iname_to_tag.get(dep).axis == 0):
                            raise LoopyError(
                                "streaming store must have stride 1 "
                                "in local index, got: %s" % subscript)

                    new_terms.append(term)

            if not saw_l0:
                raise LoopyError("streaming store must have stride 1 in "
                                 "local index, got: %s" % subscript)

            if access_info.vector_index is not None:
                raise LoopyError("streaming store may not use a short-vector "
                                 "data type")

            rhs_has_programindex = any(
                isinstance(kernel.iname_to_tag.get(dep), LocalIndexTag)
                and kernel.iname_to_tag.get(dep).axis == 0
                for dep in get_dependencies(insn.expression))

            if not rhs_has_programindex:
                rhs_code = "broadcast(%s, 0)" % rhs_code

            from cgen import Statement
            return Statement(
                "streaming_store(%s + %s, %s)" %
                (access_info.array_name,
                 ecm(flattened_sum(new_terms), PREC_NONE, 'i'), rhs_code))

        # }}}

        from cgen import Assign
        return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)