Beispiel #1
0
def add_store(name: str,
              expr: Array,
              result: ImplementedResult,
              state: CodeGenState,
              cgen_mapper: CodeGenMapper,
              output_to_temporary: bool = False) -> str:
    """Add an instruction that stores to a variable in the kernel.

    :param name: name of the output array, which is created
    :param expr: the :class:`~pytato.Array` to store
    :param result: the corresponding :class:`ImplementedResult`
    :param state: code generation state
    :param output_to_temporary: whether to generate an output argument (default)
        or a temporary variable

    :returns: the id of the generated instruction
    """
    # Get expression.
    inames = tuple(
        state.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim))
    indices = tuple(prim.Variable(iname) for iname in inames)
    loopy_expr_context = PersistentExpressionContext(state)
    loopy_expr = result.to_loopy_expression(indices, loopy_expr_context)

    # Make the instruction
    from loopy.kernel.instruction import make_assignment
    if indices:
        assignee = prim.Variable(name)[indices]
    else:
        assignee = prim.Variable(name)
    insn_id = state.insn_id_gen(f"{name}_store")
    insn = make_assignment((assignee, ),
                           loopy_expr,
                           id=insn_id,
                           within_inames=frozenset(inames),
                           depends_on=loopy_expr_context.depends_on)
    shape = shape_to_scalar_expression(expr.shape, cgen_mapper, state)

    # Get the domain.
    domain = domain_for_shape(inames, shape, {})

    # Update the kernel.
    kernel = state.kernel

    if output_to_temporary:
        tvar = get_loopy_temporary(name, expr, cgen_mapper, state)
        temporary_variables = kernel.temporary_variables.copy()
        temporary_variables[name] = tvar
        kernel = kernel.copy(temporary_variables=temporary_variables,
                             domains=kernel.domains + [domain],
                             instructions=kernel.instructions + [insn])
    else:
        arg = lp.GlobalArg(name,
                           shape=shape,
                           dtype=expr.dtype,
                           order="C",
                           is_input=False,
                           is_output=True,
                           tags=_filter_tags_not_of_type(
                               expr, cgen_mapper.array_tag_t_to_not_propagate))
        kernel = kernel.copy(args=kernel.args + [arg],
                             domains=kernel.domains + [domain],
                             instructions=kernel.instructions + [insn])

    # {{{ axes tags -> iname tags

    for axis, iname in zip(expr.axes, inames):
        for tag in axis.tags:
            if all(not isinstance(tag, tag_t)
                   for tag_t in cgen_mapper.axis_tag_t_to_not_propagate):
                kernel = lp.tag_inames(kernel, {iname: tag})

    # }}}

    state.update_kernel(kernel)
    return insn_id
Beispiel #2
0
def add_store(name: str,
              expr: Array,
              result: ImplementedResult,
              state: CodeGenState,
              output_to_temporary: bool = False) -> str:
    """Add an instruction that stores to a variable in the kernel.

    :param name: name of the output array, which is created
    :param expr: the :class:`~pytato.Array` to store
    :param result: the corresponding :class:`ImplementedResult`
    :param state: code generation state
    :param output_to_temporary: whether to generate an output argument (default)
        or a temporary variable

    :returns: the id of the generated instruction
    """
    # Get expression.
    inames = tuple(
        state.var_name_gen(f"{name}_dim{d}") for d in range(expr.ndim))
    indices = tuple(prim.Variable(iname) for iname in inames)
    loopy_expr_context = LoopyExpressionContext(state, num_indices=0)
    loopy_expr = result.to_loopy_expression(indices, loopy_expr_context)

    # Rename reduction variables to names suitable as inames.
    loopy_expr = rename_reductions(
        loopy_expr, loopy_expr_context,
        lambda old_name: state.var_name_gen(f"{name}{old_name}"))

    # Make the instruction
    from loopy.kernel.instruction import make_assignment
    if indices:
        assignee = prim.Variable(name)[indices]
    else:
        assignee = prim.Variable(name)
    insn_id = state.insn_id_gen(f"{name}_store")
    insn = make_assignment((assignee, ),
                           loopy_expr,
                           id=insn_id,
                           within_inames=frozenset(inames),
                           depends_on=loopy_expr_context.depends_on)

    # Get the domain.
    domain = domain_for_shape(inames, expr.shape,
                              loopy_expr_context.reduction_bounds)

    # Update the kernel.
    kernel = state.kernel

    if output_to_temporary:
        tvar = get_loopy_temporary(name, expr)
        temporary_variables = kernel.temporary_variables.copy()
        temporary_variables[name] = tvar
        kernel = kernel.copy(temporary_variables=temporary_variables,
                             domains=kernel.domains + [domain],
                             instructions=kernel.instructions + [insn])
    else:
        arg = lp.GlobalArg(name,
                           shape=expr.shape,
                           dtype=expr.dtype,
                           order="C",
                           is_output_only=True)
        kernel = kernel.copy(args=kernel.args + [arg],
                             domains=kernel.domains + [domain],
                             instructions=kernel.instructions + [insn])

    state.update_kernel(kernel)
    return insn_id
Beispiel #3
0
    def map_loopy_call(self, expr: LoopyCall, state: CodeGenState) -> None:
        from loopy.kernel.instruction import make_assignment
        from loopy.symbolic import SubArrayRef

        callee_kernel = expr.translation_unit[expr.entrypoint]

        state.update_program(lp.merge([state.program, expr.translation_unit]))

        domains = []

        def _get_sub_array_ref(array: Array,
                               name: str) -> "lp.symbolic.SubArrayRef":
            inames = tuple(
                state.var_name_gen(f"_{name}_dim{d}")
                for d in range(array.ndim))

            domains.append(
                domain_for_shape(
                    inames, shape_to_scalar_expression(array.shape, self,
                                                       state), {}))

            inames_as_vars = tuple(var(iname) for iname in inames)
            return SubArrayRef(inames_as_vars,
                               prim.Subscript(var(name), inames_as_vars))

        assignees = []
        params = []
        depends_on: Set[str] = set()
        new_tvs = {}
        new_insn_id = state.insn_id_gen(f"call_{callee_kernel.name}")

        for arg in callee_kernel.args:
            # must traverse in the order of callee's args to generate the correct
            # assignees order
            if isinstance(arg, lp.ArrayArg):
                if arg.is_output:
                    assignee_name = _generate_name_for_temp(
                        expr[arg.name], state)
                    assignees.append(
                        _get_sub_array_ref(expr[arg.name], assignee_name))

                    named_array = expr[arg.name]

                    # stored result for the assignee
                    result = StoredResult(assignee_name, named_array.ndim,
                                          frozenset([new_insn_id]))
                    # record the result for the corresponding loopy array
                    state.results[named_array] = result

                    new_tvs[assignee_name] = get_loopy_temporary(
                        assignee_name, named_array, self, state)
                else:
                    assert arg.is_input
                    pt_arg = expr.bindings[arg.name]
                    assert isinstance(pt_arg, Array)

                    pt_arg_rec = self.rec(pt_arg, state)

                    if isinstance(pt_arg_rec, StoredResult):
                        # found a stored result corresponding to the argument, use it
                        name = pt_arg_rec.name
                        params.append(_get_sub_array_ref(pt_arg, name))
                        depends_on.update(pt_arg_rec.depends_on)
                    else:
                        # did not find a stored result for the sub-expression, store
                        # it and then pass it to the call
                        name = _generate_name_for_temp(pt_arg, state)
                        store_insn_id = add_store(name,
                                                  pt_arg,
                                                  pt_arg_rec,
                                                  state,
                                                  output_to_temporary=True,
                                                  cgen_mapper=self)
                        depends_on.add(store_insn_id)
                        # replace "arg" with the created stored variable
                        state.results[pt_arg] = StoredResult(
                            name, pt_arg.ndim, frozenset([store_insn_id]))
                        params.append(_get_sub_array_ref(pt_arg, name))
                        new_tvs[name] = get_loopy_temporary(
                            name, pt_arg, self, state)
            else:
                assert isinstance(arg, lp.ValueArg) and arg.is_input
                pt_arg = expr.bindings[arg.name]
                prstnt_ctx = PersistentExpressionContext(state)

                if isinstance(pt_arg, Array):
                    assert pt_arg.ndim == 0
                    pt_arg_rec = self.rec(pt_arg, state)
                    params.append(
                        pt_arg_rec.to_loopy_expression((), prstnt_ctx))
                    depends_on.update(pt_arg_rec.depends_on)
                else:
                    local_ctx = LocalExpressionContext(reduction_bounds={},
                                                       num_indices=0,
                                                       local_namespace={})
                    params.append(
                        self.exprgen_mapper(pt_arg, prstnt_ctx, local_ctx))

        new_insn = make_assignment(tuple(assignees),
                                   var(expr.entrypoint)(*params),
                                   depends_on=frozenset(depends_on),
                                   id=new_insn_id)

        # update kernel
        kernel = state.kernel
        tvs = state.kernel.temporary_variables.copy()
        tvs.update(new_tvs)

        kernel = kernel.copy(instructions=kernel.instructions + [new_insn],
                             temporary_variables=tvs,
                             domains=kernel.domains + domains)

        state.update_kernel(kernel)