Ejemplo n.º 1
0
    def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope,
                              loc):
        last_ind = dim_size
        if end_length != 0:
            # set last index to size minus stencil size to avoid invalid
            # memory access
            index_const = ir.Var(scope, mk_unique_var("stencil_const_var"),
                                 loc)
            self.typemap[index_const.name] = types.intp
            if isinstance(end_length, numbers.Number):
                const_assign = ir.Assign(ir.Const(end_length, loc),
                                         index_const, loc)
            else:
                const_assign = ir.Assign(end_length, index_const, loc)

            gen_nodes.append(const_assign)
            last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc)
            self.typemap[last_ind.name] = types.intp

            g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc)
            check_func = numba.njit(_compute_last_ind)
            func_typ = types.functions.Dispatcher(check_func)
            self.typemap[g_var.name] = func_typ
            g_obj = ir.Global("_compute_last_ind", check_func, loc)
            g_assign = ir.Assign(g_obj, g_var, loc)
            gen_nodes.append(g_assign)
            index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc)
            self.calltypes[index_call] = func_typ.get_call_type(
                self.typingctx, [types.intp, types.intp], {})
            index_assign = ir.Assign(index_call, last_ind, loc)
            gen_nodes.append(index_assign)

        return last_ind
Ejemplo n.º 2
0
 def test_var(self):
     a = ir.Var(None, 'foo', self.loc1)
     b = ir.Var(None, 'foo', self.loc1)
     c = ir.Var(None, 'foo', self.loc2)
     d = ir.Var(ir.Scope(None, ir.unknown_loc), 'foo', self.loc1)
     e = ir.Var(None, 'bar', self.loc1)
     self.check(a, same=[b, c, d], different=[e])
Ejemplo n.º 3
0
            def handle_border(slice_fn_ty,
                              dim,
                              scope,
                              loc,
                              slice_func_var,
                              stmts,
                              border_inds,
                              border_tuple_items,
                              other_arg,
                              other_first):
                # Handle the border for start or end of the index range.
                # ---- Generate call to slice func.
                sig = self.typingctx.resolve_function_type(
                    slice_fn_ty,
                    (types.intp,) * 2,
                    {})
                si = border_inds[dim]
                assert(isinstance(si, (int, ir.Var)))
                si_var = ir.Var(scope, mk_unique_var("$border_ind"), loc)
                self.typemap[si_var.name] = types.intp
                if isinstance(si, int):
                    si_assign = ir.Assign(ir.Const(si, loc), si_var, loc)
                else:
                    si_assign = ir.Assign(si, si_var, loc)
                stmts.append(si_assign)

                slice_callexpr = ir.Expr.call(
                    func=slice_func_var,
                    args=(other_arg, si_var) if other_first else (si_var, other_arg),
                    kws=(),
                    loc=loc)
                self.calltypes[slice_callexpr] = sig
                # ---- Generate slice var
                border_slice_var = ir.Var(scope, mk_unique_var("$slice"), loc)
                self.typemap[border_slice_var.name] = types.slice2_type
                slice_assign = ir.Assign(slice_callexpr, border_slice_var, loc)
                stmts.append(slice_assign)

                border_tuple_items[dim] = border_slice_var
                border_ind_var = ir.Var(scope, mk_unique_var(
                    "$border_index_tuple_var"), loc)
                self.typemap[border_ind_var.name] = types.containers.UniTuple(
                    types.slice2_type, ndims)
                tuple_call = ir.Expr.build_tuple(border_tuple_items, loc)
                tuple_assign = ir.Assign(tuple_call, border_ind_var, loc)
                stmts.append(tuple_assign)

                setitem_call = ir.SetItem(out_arr, border_ind_var, zero_var, loc)
                self.calltypes[setitem_call] = signature(
                                                types.none, self.typemap[out_arr.name],
                                                self.typemap[border_ind_var.name],
                                                self.typemap[out_arr.name].dtype
                                                )
                stmts.append(setitem_call)
Ejemplo n.º 4
0
def replace_var_with_array_in_block(vars, block, typemap, calltypes):
    new_block = []
    for inst in block.body:
        if isinstance(inst, ir.Assign) and inst.target.name in vars:
            const_node = ir.Const(0, inst.loc)
            const_var = ir.Var(inst.target.scope,
                               mk_unique_var("$const_ind_0"), inst.loc)
            typemap[const_var.name] = types.uintp
            const_assign = ir.Assign(const_node, const_var, inst.loc)
            new_block.append(const_assign)

            setitem_node = ir.SetItem(inst.target, const_var, inst.value,
                                      inst.loc)
            calltypes[setitem_node] = signature(
                types.none,
                types.npytypes.Array(typemap[inst.target.name], 1, "C"),
                types.intp,
                typemap[inst.target.name],
            )
            new_block.append(setitem_node)
            continue
        elif isinstance(inst, parfor.Parfor):
            replace_var_with_array_internal(vars, {0: inst.init_block},
                                            typemap, calltypes)
            replace_var_with_array_internal(vars, inst.loop_body, typemap,
                                            calltypes)

        new_block.append(inst)
    return new_block
Ejemplo n.º 5
0
class CheckEquality(unittest.TestCase):

    var_a = ir.Var(None, "a", ir.unknown_loc)
    var_b = ir.Var(None, "b", ir.unknown_loc)
    var_c = ir.Var(None, "c", ir.unknown_loc)
    var_d = ir.Var(None, "d", ir.unknown_loc)
    var_e = ir.Var(None, "e", ir.unknown_loc)
    loc1 = ir.Loc("mock", 1, 0)
    loc2 = ir.Loc("mock", 2, 0)
    loc3 = ir.Loc("mock", 3, 0)

    def check(self, base, same=[], different=[]):
        for s in same:
            self.assertTrue(base == s)
        for d in different:
            self.assertTrue(base != d)
Ejemplo n.º 6
0
class CheckEquality(unittest.TestCase):

    var_a = ir.Var(None, 'a', ir.unknown_loc)
    var_b = ir.Var(None, 'b', ir.unknown_loc)
    var_c = ir.Var(None, 'c', ir.unknown_loc)
    var_d = ir.Var(None, 'd', ir.unknown_loc)
    var_e = ir.Var(None, 'e', ir.unknown_loc)
    loc1 = ir.Loc('mock', 1, 0)
    loc2 = ir.Loc('mock', 2, 0)
    loc3 = ir.Loc('mock', 3, 0)

    def check(self, base, same=[], different=[]):
        for s in same:
            self.assertTrue(base == s)
        for d in different:
            self.assertTrue(base != d)
Ejemplo n.º 7
0
    def test_inline_update_target_def(self):

        def test_impl(a):
            if a == 1:
                b = 2
            else:
                b = 3
            return b

        func_ir = compiler.run_frontend(test_impl)
        blocks = list(func_ir.blocks.values())
        for block in blocks:
            for i, stmt in enumerate(block.body):
                # match b = 2 and replace with lambda: 2
                if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var)
                        and guard(find_const, func_ir, stmt.value) == 2):
                    # replace expr with a dummy call
                    func_ir._definitions[stmt.target.name].remove(stmt.value)
                    stmt.value = ir.Expr.call(ir.Var(block.scope, "myvar", loc=stmt.loc), (), (), stmt.loc)
                    func_ir._definitions[stmt.target.name].append(stmt.value)
                    #func = g.py_func#
                    inline_closure_call(func_ir, {}, block, i, lambda: 2)
                    break

        self.assertEqual(len(func_ir._definitions['b']), 2)
Ejemplo n.º 8
0
def _dbgprint_after_each_array_assignments(lowerer, loop_body, typemap):
    for label, block in loop_body.items():
        new_block = block.copy()
        new_block.clear()
        loc = block.loc
        scope = block.scope
        for inst in block.body:
            new_block.append(inst)
            # Append print after assignment
            if isinstance(inst, ir.Assign):
                # Only apply to numbers
                if typemap[inst.target.name] not in types.number_domain:
                    continue

                # Make constant string
                strval = "{} =".format(inst.target.name)
                strconsttyp = types.StringLiteral(strval)

                lhs = ir.Var(scope, mk_unique_var("str_const"), loc)
                assign_lhs = ir.Assign(value=ir.Const(value=strval, loc=loc),
                                       target=lhs,
                                       loc=loc)
                typemap[lhs.name] = strconsttyp
                new_block.append(assign_lhs)

                # Make print node
                print_node = ir.Print(args=[lhs, inst.target],
                                      vararg=None,
                                      loc=loc)
                new_block.append(print_node)
                sig = numba.typing.signature(types.none, typemap[lhs.name],
                                             typemap[inst.target.name])
                lowerer.fndesc.calltypes[print_node] = sig
        loop_body[label] = new_block
Ejemplo n.º 9
0
    def replace_return_with_setitem(self, blocks, index_vars, out_name):
        """
        Find return statements in the IR and replace them with a SetItem
        call of the value "returned" by the kernel into the result array.
        Returns the block labels that contained return statements.
        """
        ret_blocks = []

        for label, block in blocks.items():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Return):
                    ret_blocks.append(label)
                    # If 1D array then avoid the tuple construction.
                    if len(index_vars) == 1:
                        rvar = ir.Var(scope, out_name, loc)
                        ivar = ir.Var(scope, index_vars[0], loc)
                        new_body.append(ir.SetItem(rvar, ivar, stmt.value,
                                                   loc))
                    else:
                        # Convert the string names of the index variables into
                        # ir.Var's.
                        var_index_vars = []
                        for one_var in index_vars:
                            index_var = ir.Var(scope, one_var, loc)
                            var_index_vars += [index_var]

                        s_index_name = ir_utils.mk_unique_var("stencil_index")
                        s_index_var = ir.Var(scope, s_index_name, loc)
                        # Build a tuple from the index ir.Var's.
                        tuple_call = ir.Expr.build_tuple(var_index_vars, loc)
                        new_body.append(ir.Assign(tuple_call, s_index_var,
                                                  loc))
                        rvar = ir.Var(scope, out_name, loc)
                        # Write the return statements original value into
                        # the array using the tuple index.
                        si = ir.SetItem(rvar, s_index_var, stmt.value, loc)
                        new_body.append(si)
                else:
                    new_body.append(stmt)
            block.body = new_body
        return ret_blocks
Ejemplo n.º 10
0
    def _gen_rebalances(self, rebalance_arrs, blocks):
        #
        for block in blocks.values():
            new_body = []
            for inst in block.body:
                # TODO: handle hiframes filter etc.
                if isinstance(inst, Parfor):
                    self._gen_rebalances(rebalance_arrs, {0: inst.init_block})
                    self._gen_rebalances(rebalance_arrs, inst.loop_body)
                if isinstance(
                        inst,
                        ir.Assign) and inst.target.name in rebalance_arrs:
                    out_arr = inst.target
                    self.func_ir._definitions[out_arr.name].remove(inst.value)
                    # hold inst results in tmp array
                    tmp_arr = ir.Var(out_arr.scope,
                                     mk_unique_var("rebalance_tmp"),
                                     out_arr.loc)
                    self.typemap[tmp_arr.name] = self.typemap[out_arr.name]
                    inst.target = tmp_arr
                    nodes = [inst]

                    def f(in_arr):  # pragma: no cover
                        out_a = sdc.distributed_api.rebalance_array(in_arr)

                    f_block = compile_to_numba_ir(
                        f, {
                            'sdc': sdc
                        }, self.typingctx, (self.typemap[tmp_arr.name], ),
                        self.typemap, self.calltypes).blocks.popitem()[1]
                    replace_arg_nodes(f_block, [tmp_arr])
                    nodes += f_block.body[:-3]  # remove none return
                    nodes[-1].target = out_arr
                    # update definitions
                    dumm_block = ir.Block(out_arr.scope, out_arr.loc)
                    dumm_block.body = nodes
                    build_definitions({0: dumm_block},
                                      self.func_ir._definitions)
                    new_body += nodes
                else:
                    new_body.append(inst)

            block.body = new_body
Ejemplo n.º 11
0
    def assign_inplace(self, rhs, typ, name) -> ir.Var:
        """Assign a value to a new variable or inplace if it already exist

        Parameters
        ----------
        rhs : object
            The value
        typ : types.Type
            type of the value
        name : str
            variable name to store to

        Returns
        -------
        res : ir.Var
        """
        loc = self._loc
        var = ir.Var(self._scope, name, loc)
        assign = ir.Assign(rhs, var, loc)
        self._typemap.setdefault(var.name, typ)
        self._lowerer.lower_inst(assign)
        return var
Ejemplo n.º 12
0
    def assign(self, rhs, typ, name="pf_assign") -> ir.Var:
        """Assign a value to a new variable

        Parameters
        ----------
        rhs : object
            The value
        typ : types.Type
            type of the value
        name : str
            variable name to store to

        Returns
        -------
        res : ir.Var
        """
        loc = self._loc
        var = ir.Var(self._scope, mk_unique_var(name), loc)
        self._typemap[var.name] = typ
        assign = ir.Assign(rhs, var, loc)
        self._lowerer.lower_inst(assign)
        return var
Ejemplo n.º 13
0
    def mutate_with_body(self, func_ir, blocks, blk_start, blk_end,
                         body_blocks, dispatcher_factory, extra):
        ir_utils.dprint_func_ir(func_ir, "Before with changes", blocks=blocks)
        assert extra is not None
        args = extra["args"]
        assert len(args) == 1
        arg = args[0]
        scope = blocks[blk_start].scope
        loc = blocks[blk_start].loc
        if isinstance(arg, ir.Arg):
            arg = ir.Var(scope, arg.name, loc)

        set_state = []
        restore_state = []

        # global for Numba itself
        gvar = scope.redefine("$ngvar", loc)
        set_state.append(ir.Assign(ir.Global('numba', numba, loc), gvar, loc))
        # getattr for set chunksize function in Numba
        spcattr = ir.Expr.getattr(gvar, 'set_parallel_chunksize', loc)
        spcvar = scope.redefine("$spc", loc)
        set_state.append(ir.Assign(spcattr, spcvar, loc))
        # call set_parallel_chunksize
        orig_pc_var = scope.redefine("$save_pc", loc)
        cs_var = scope.redefine("$cs_var", loc)
        set_state.append(ir.Assign(arg, cs_var, loc))
        spc_call = ir.Expr.call(spcvar, [cs_var], (), loc)
        set_state.append(ir.Assign(spc_call, orig_pc_var, loc))

        restore_spc_call = ir.Expr.call(spcvar, [orig_pc_var], (), loc)
        restore_state.append(ir.Assign(restore_spc_call, orig_pc_var, loc))

        blocks[blk_start].body = (blocks[blk_start].body[1:-1] + 
                                  set_state + 
                                  [blocks[blk_start].body[-1]])
        blocks[blk_end].body = restore_state + blocks[blk_end].body
        func_ir._definitions = build_definitions(blocks)
        ir_utils.dprint_func_ir(func_ir, "After with changes", blocks=blocks)
Ejemplo n.º 14
0
def _lower_parfor_gufunc(lowerer, parfor):
    """Lowerer that handles LLVM code generation for parfor.
    This function lowers a parfor IR node to LLVM.
    The general approach is as follows:
    1) The code from the parfor's init block is lowered normally
       in the context of the current function.
    2) The body of the parfor is transformed into a gufunc function.
    3) Code is inserted into the main function that calls do_scheduling
       to divide the iteration space for each thread, allocates
       reduction arrays, calls the gufunc function, and then invokes
       the reduction function across the reduction arrays to produce
       the final reduction values.
    """

    typingctx = lowerer.context.typing_context
    targetctx = lowerer.context
    # We copy the typemap here because for race condition variable we'll
    # update their type to array so they can be updated by the gufunc.
    orig_typemap = lowerer.fndesc.typemap
    # replace original typemap with copy and restore the original at the end.
    lowerer.fndesc.typemap = copy.copy(orig_typemap)
    if config.DEBUG_ARRAY_OPT:
        print("lowerer.fndesc", lowerer.fndesc, type(lowerer.fndesc))
    typemap = lowerer.fndesc.typemap
    varmap = lowerer.varmap

    if config.DEBUG_ARRAY_OPT:
        print("_lower_parfor_parallel")
        parfor.dump()

    loc = parfor.init_block.loc
    scope = parfor.init_block.scope

    # produce instructions for init_block
    if config.DEBUG_ARRAY_OPT:
        print("init_block = ", parfor.init_block, type(parfor.init_block))
    for instr in parfor.init_block.body:
        if config.DEBUG_ARRAY_OPT:
            print("lower init_block instr = ", instr)
        lowerer.lower_inst(instr)

    for racevar in parfor.races:
        if racevar not in varmap:
            rvtyp = typemap[racevar]
            rv = ir.Var(scope, racevar, loc)
            lowerer._alloca_var(rv.name, rvtyp)

    alias_map = {}
    arg_aliases = {}
    numba.parfors.parfor.find_potential_aliases_parfor(parfor, parfor.params,
                                                       typemap,
                                                       lowerer.func_ir,
                                                       alias_map, arg_aliases)
    if config.DEBUG_ARRAY_OPT:
        print("alias_map", alias_map)
        print("arg_aliases", arg_aliases)

    # run get_parfor_outputs() and get_parfor_reductions() before
    # gufunc creation since Jumps are modified so CFG of loop_body
    # dict will become invalid
    assert parfor.params is not None

    parfor_output_arrays = numba.parfors.parfor.get_parfor_outputs(
        parfor, parfor.params)

    # compile parfor body as a separate function to be used with GUFuncWrapper
    flags = copy.copy(parfor.flags)
    flags.error_model = "numpy"

    # Can't get here unless flags.set('auto_parallel', ParallelOptions(True))
    index_var_typ = typemap[parfor.loop_nests[0].index_variable.name]

    # index variables should have the same type, check rest of indices
    for l in parfor.loop_nests[1:]:
        assert typemap[l.index_variable.name] == index_var_typ

    numba.parfors.parfor.sequential_parfor_lowering = True
    loop_ranges = [(l.start, l.stop, l.step) for l in parfor.loop_nests]

    try:
        (
            func,
            func_args,
            func_sig,
            func_arg_types,
            modified_arrays,
        ) = _create_gufunc_for_parfor_body(
            lowerer,
            parfor,
            typemap,
            typingctx,
            targetctx,
            flags,
            loop_ranges,
            {},
            bool(alias_map),
            index_var_typ,
            parfor.races,
        )
    finally:
        numba.parfors.parfor.sequential_parfor_lowering = False

    # get the shape signature
    get_shape_classes = parfor.get_shape_classes

    num_inputs = len(func_args) - len(parfor_output_arrays)
    if config.DEBUG_ARRAY_OPT:
        print("func", func, type(func))
        print("func_args", func_args, type(func_args))
        print("func_sig", func_sig, type(func_sig))
        print("num_inputs = ", num_inputs)
        print("parfor_outputs = ", parfor_output_arrays)

    # call the func in parallel by wrapping it with ParallelGUFuncBuilder
    if config.DEBUG_ARRAY_OPT:
        print("loop_nests = ", parfor.loop_nests)
        print("loop_ranges = ", loop_ranges)

    gu_signature = _create_shape_signature(
        parfor.get_shape_classes,
        num_inputs,
        func_args,
        func_sig,
        parfor.races,
        typemap,
    )

    generate_kernel_launch_ops(
        lowerer,
        func,
        gu_signature,
        func_sig,
        func_args,
        num_inputs,
        func_arg_types,
        loop_ranges,
        modified_arrays,
    )

    if config.DEBUG_ARRAY_OPT:
        sys.stdout.flush()

    # Restore the original typemap of the function that was replaced
    # temporarily at the beginning of this function.
    lowerer.fndesc.typemap = orig_typemap
Ejemplo n.º 15
0
    def _add_index_offsets(self, index_list, index_offsets, new_body, scope,
                           loc):
        """ Does the actual work of adding loop index variables to the
            relative index constants or variables.
        """
        assert len(index_list) == len(index_offsets)

        # shortcut if all values are integer
        if all([isinstance(v, int) for v in index_list + index_offsets]):
            # add offsets in all dimensions
            return list(map(add, index_list, index_offsets))

        out_nodes = []
        index_vars = []
        for i in range(len(index_list)):
            # new_index = old_index + offset
            old_index_var = index_list[i]
            if isinstance(old_index_var, int):
                old_index_var = ir.Var(scope, mk_unique_var("old_index_var"),
                                       loc)
                self.typemap[old_index_var.name] = types.intp
                const_assign = ir.Assign(ir.Const(index_list[i], loc),
                                         old_index_var, loc)
                out_nodes.append(const_assign)

            offset_var = index_offsets[i]
            if isinstance(offset_var, int):
                offset_var = ir.Var(scope, mk_unique_var("offset_var"), loc)
                self.typemap[offset_var.name] = types.intp
                const_assign = ir.Assign(ir.Const(index_offsets[i], loc),
                                         offset_var, loc)
                out_nodes.append(const_assign)

            if (isinstance(old_index_var, slice) or isinstance(
                    self.typemap[old_index_var.name], types.misc.SliceType)):
                # only one arg can be slice
                assert self.typemap[offset_var.name] == types.intp
                index_var = self._add_offset_to_slice(old_index_var,
                                                      offset_var, out_nodes,
                                                      scope, loc)
                index_vars.append(index_var)
                continue

            if (isinstance(offset_var, slice) or isinstance(
                    self.typemap[offset_var.name], types.misc.SliceType)):
                # only one arg can be slice
                assert self.typemap[old_index_var.name] == types.intp
                index_var = self._add_offset_to_slice(offset_var,
                                                      old_index_var, out_nodes,
                                                      scope, loc)
                index_vars.append(index_var)
                continue

            index_var = ir.Var(scope, mk_unique_var("offset_stencil_index"),
                               loc)
            self.typemap[index_var.name] = types.intp
            index_call = ir.Expr.binop(operator.add, old_index_var, offset_var,
                                       loc)
            self.calltypes[index_call] = self.typingctx.resolve_function_type(
                operator.add, (types.intp, types.intp), {})
            index_assign = ir.Assign(index_call, index_var, loc)
            out_nodes.append(index_assign)
            index_vars.append(index_var)

        new_body.extend(out_nodes)
        return index_vars
    def run(self):
        typingctx = self.state.typingctx

        # save array arg to call
        # call_varname -> array
        func_ir = self.state.func_ir
        blocks = func_ir.blocks
        saved_arr_arg = {}
        topo_order = find_topo_order(blocks)
        replaced = False

        for label in topo_order:
            block = blocks[label]
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Assign) and isinstance(
                        stmt.value, ir.Expr):
                    lhs = stmt.target.name
                    rhs = stmt.value
                    # replace A.func with np.func, and save A in saved_arr_arg
                    if (rhs.op == "getattr"
                            and rhs.attr in self.function_name_map
                            and isinstance(self.typemap[rhs.value.name],
                                           types.npytypes.Array)):
                        rhs = stmt.value
                        arr = rhs.value
                        saved_arr_arg[lhs] = arr
                        scope = arr.scope
                        loc = arr.loc

                        g_dppy_var = ir.Var(scope,
                                            mk_unique_var("$load_global"), loc)
                        self.typemap[g_dppy_var.name] = types.misc.Module(
                            numba_dppy)
                        g_dppy = ir.Global("numba_dppy", numba_dppy, loc)
                        g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc)

                        dpnp_var = ir.Var(scope, mk_unique_var("$load_attr"),
                                          loc)
                        self.typemap[dpnp_var.name] = types.misc.Module(
                            numba_dppy.dpnp)
                        getattr_dpnp = ir.Expr.getattr(g_dppy_var, "dpnp", loc)
                        dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc)

                        rhs.value = dpnp_var
                        new_body.append(g_dppy_assign)
                        new_body.append(dpnp_assign)

                        func_ir._definitions[g_dppy_var.name] = [getattr_dpnp]
                        func_ir._definitions[dpnp_var.name] = [getattr_dpnp]

                        # update func var type
                        func = getattr(numba_dppy.dpnp, rhs.attr)
                        func_typ = get_dpnp_func_typ(func)

                        self.typemap.pop(lhs)
                        self.typemap[lhs] = func_typ
                        replaced = True

                    if rhs.op == "call" and rhs.func.name in saved_arr_arg:
                        # add array as first arg
                        arr = saved_arr_arg[rhs.func.name]
                        # update call type signature to include array arg
                        old_sig = self.calltypes.pop(rhs)
                        # argsort requires kws for typing so sig.args can't be used
                        # reusing sig.args since some types become Const in sig
                        argtyps = old_sig.args[:len(rhs.args)]
                        kwtyps = {
                            name: self.typemap[v.name]
                            for name, v in rhs.kws
                        }
                        self.calltypes[rhs] = self.typemap[
                            rhs.func.name].get_call_type(
                                typingctx,
                                [self.typemap[arr.name]] + list(argtyps),
                                kwtyps,
                            )
                        rhs.args = [arr] + rhs.args

                new_body.append(stmt)
            block.body = new_body
        return replaced
    def run(self):
        """
        This function rewrites the name of NumPy functions that exist in self.function_name_map
        e.g np.sum(a) would produce the following:

        np.sum() --> numba_dppy.dpnp.sum()

        ---------------------------------------------------------------------------------------
        Numba IR Before Rewrite:
        ---------------------------------------------------------------------------------------

            $2load_global.0 = global(np: <module 'numpy' from 'numpy/__init__.py'>) ['$2load_global.0']
            $4load_method.1 = getattr(value=$2load_global.0, attr=sum) ['$2load_global.0', '$4load_method.1']
            $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)],
                                                   kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a']

        ---------------------------------------------------------------------------------------
        Numba IR After Rewrite:
        ---------------------------------------------------------------------------------------

            $dppy_replaced_var.0 = global(numba_dppy: <module 'numba_dppy' from 'numba_dppy/__init__.py'>) ['$dppy_replaced_var.0']
            $dpnp_var.1 = getattr(value=$dppy_replaced_var.0, attr=dpnp) ['$dpnp_var.1', '$dppy_replaced_var.0']
            $4load_method.1 = getattr(value=$dpnp_var.1, attr=sum) ['$4load_method.1', '$dpnp_var.1']
            $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)],
                                                   kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a']

        ---------------------------------------------------------------------------------------
        """
        func_ir = self.state.func_ir
        blocks = func_ir.blocks
        topo_order = find_topo_order(blocks)
        replaced = False

        for label in topo_order:
            block = blocks[label]
            saved_arr_arg = {}
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Assign) and isinstance(
                        stmt.value, ir.Expr):
                    lhs = stmt.target.name
                    rhs = stmt.value
                    # replace np.FOO with name from self.function_name_map["FOO"]
                    # e.g. np.sum will be replaced with numba_dppy.dpnp.sum
                    if (rhs.op == "getattr"
                            and rhs.attr in self.function_name_map):
                        module_node = block.find_variable_assignment(
                            rhs.value.name).value
                        if (isinstance(module_node, ir.Global)
                                and module_node.value.__name__
                                in self.function_name_map[rhs.attr][0]) or (
                                    isinstance(module_node, ir.Expr)
                                    and module_node.attr
                                    in self.function_name_map[rhs.attr][0]):
                            rhs = stmt.value
                            rhs.attr = self.function_name_map[rhs.attr][1]

                            global_module = rhs.value
                            saved_arr_arg[lhs] = global_module

                            scope = global_module.scope
                            loc = global_module.loc

                            g_dppy_var = ir.Var(scope,
                                                mk_unique_var("$2load_global"),
                                                loc)
                            # We are trying to rename np.function_name/np.linalg.function_name with
                            # numba_dppy.dpnp.function_name.
                            # Hence, we need to have a global variable representing module numba_dppy.
                            # Next, we add attribute dpnp to global module numba_dppy to
                            # represent numba_dppy.dpnp.
                            g_dppy = ir.Global("numba_dppy", numba_dppy, loc)
                            g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc)

                            dpnp_var = ir.Var(scope,
                                              mk_unique_var("$4load_attr"),
                                              loc)
                            getattr_dpnp = ir.Expr.getattr(
                                g_dppy_var, "dpnp", loc)
                            dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var,
                                                    loc)

                            rhs.value = dpnp_var
                            new_body.append(g_dppy_assign)
                            new_body.append(dpnp_assign)
                            func_ir._definitions[dpnp_var.name] = [
                                getattr_dpnp
                            ]
                            func_ir._definitions[g_dppy_var.name] = [g_dppy]
                            replaced = True

                new_body.append(stmt)
            block.body = new_body
            return replaced
Ejemplo n.º 18
0
    def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir,
                           index_offsets, target, return_type, stencil_func,
                           arg_to_arr_dict):
        """ Converts a set of stencil kernel blocks to a parfor.
        """
        gen_nodes = []
        stencil_blocks = stencil_ir.blocks

        if config.DEBUG_ARRAY_OPT >= 1:
            print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
                  return_type, stencil_func, stencil_blocks)
            ir_utils.dump_blocks(stencil_blocks)

        in_arr = in_args[0]
        # run copy propagate to replace in_args copies (e.g. a = A)
        in_arr_typ = self.typemap[in_arr.name]
        in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
        name_var_table = ir_utils.get_name_var_table(stencil_blocks)

        ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table,
                                      self.typemap, self.calltypes)
        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after copy_propagate")
            ir_utils.dump_blocks(stencil_blocks)
        ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names,
                             stencil_ir, self.typemap)
        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after removing dead code")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor vars
        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        parfor_vars = []
        for i in range(ndims):
            parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc)
            self.typemap[parfor_var.name] = types.intp
            parfor_vars.append(parfor_var)

        start_lengths, end_lengths = self._replace_stencil_accesses(
            stencil_ir, parfor_vars, in_args, index_offsets, stencil_func,
            arg_to_arr_dict)

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after replace stencil accesses")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor loop nests
        loopnests = []
        equiv_set = self.array_analysis.get_equiv_set(label)
        in_arr_dim_sizes = equiv_set.get_shape(in_arr)

        assert ndims == len(in_arr_dim_sizes)
        for i in range(ndims):
            last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
                                                  end_lengths[i], gen_nodes,
                                                  scope, loc)
            start_ind = self._get_stencil_start_ind(start_lengths[i],
                                                    gen_nodes, scope, loc)
            # start from stencil size to avoid invalid array access
            loopnests.append(
                numba.parfors.parfor.LoopNest(parfor_vars[i], start_ind,
                                              last_ind, 1))

        # We have to guarantee that the exit block has maximum label and that
        # there's only one exit block for the parfor body.
        # So, all return statements will change to jump to the parfor exit block.
        parfor_body_exit_label = max(stencil_blocks.keys()) + 1
        stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc)
        exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"),
                                loc)
        self.typemap[exit_value_var.name] = return_type.dtype

        # create parfor index var
        for_replacing_ret = []
        if ndims == 1:
            parfor_ind_var = parfor_vars[0]
        else:
            parfor_ind_var = ir.Var(scope,
                                    mk_unique_var("$parfor_index_tuple_var"),
                                    loc)
            self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
                types.intp, ndims)
            tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
            tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
            for_replacing_ret.append(tuple_assign)

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after creating parfor index var")
            ir_utils.dump_blocks(stencil_blocks)

        # empty init block
        init_block = ir.Block(scope, loc)
        if out_arr is None:
            in_arr_typ = self.typemap[in_arr.name]

            shape_name = ir_utils.mk_unique_var("in_arr_shape")
            shape_var = ir.Var(scope, shape_name, loc)
            shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
            self.typemap[shape_name] = types.containers.UniTuple(
                types.intp, in_arr_typ.ndim)
            init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])

            zero_name = ir_utils.mk_unique_var("zero_val")
            zero_var = ir.Var(scope, zero_name, loc)
            if "cval" in stencil_func.options:
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")

                temp2 = return_type.dtype(cval)
            else:
                temp2 = return_type.dtype(0)
            full_const = ir.Const(temp2, loc)
            self.typemap[zero_name] = return_type.dtype
            init_block.body.extend([ir.Assign(full_const, zero_var, loc)])

            so_name = ir_utils.mk_unique_var("stencil_output")
            out_arr = ir.Var(scope, so_name, loc)
            self.typemap[out_arr.name] = numba.core.types.npytypes.Array(
                return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout)
            dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
            self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
            dtype_g_np = ir.Global('np', np, loc)
            dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
            init_block.body.append(dtype_g_np_assign)

            dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var,
                                                 return_type.dtype.name, loc)
            dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
            self.typemap[dtype_attr_var.name] = types.functions.NumberClass(
                return_type.dtype)
            dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var,
                                          loc)
            init_block.body.append(dtype_attr_assign)

            stmts = ir_utils.gen_np_call("full", np.full, out_arr,
                                         [shape_var, zero_var, dtype_attr_var],
                                         self.typingctx, self.typemap,
                                         self.calltypes)
            equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
            init_block.body.extend(stmts)
        else:  # out is present
            if "cval" in stencil_func.options:  # do out[:] = cval
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                cval_ty = typing.typeof.typeof(cval)
                if not self.typingctx.can_convert(cval_ty, return_type.dtype):
                    msg = "cval type does not match stencil return type."
                    raise ValueError(msg)

                # get slice ref
                slice_var = ir.Var(scope, mk_unique_var("$py_g_var"), loc)
                slice_fn_ty = self.typingctx.resolve_value_type(slice)
                self.typemap[slice_var.name] = slice_fn_ty
                slice_g = ir.Global('slice', slice, loc)
                slice_assigned = ir.Assign(slice_g, slice_var, loc)
                init_block.body.append(slice_assigned)

                sig = self.typingctx.resolve_function_type(
                    slice_fn_ty, (types.none, ) * 2, {})

                callexpr = ir.Expr.call(func=slice_var,
                                        args=(),
                                        kws=(),
                                        loc=loc)

                self.calltypes[callexpr] = sig
                slice_inst_var = ir.Var(scope, mk_unique_var("$slice_inst"),
                                        loc)
                self.typemap[slice_inst_var.name] = types.slice2_type
                slice_assign = ir.Assign(callexpr, slice_inst_var, loc)
                init_block.body.append(slice_assign)

                # get const val for cval
                cval_const_val = ir.Const(return_type.dtype(cval), loc)
                cval_const_var = ir.Var(scope, mk_unique_var("$cval_const"),
                                        loc)
                self.typemap[cval_const_var.name] = return_type.dtype
                cval_const_assign = ir.Assign(cval_const_val, cval_const_var,
                                              loc)
                init_block.body.append(cval_const_assign)

                # do setitem on `out` array
                setitemexpr = ir.StaticSetItem(out_arr, slice(None, None),
                                               slice_inst_var, cval_const_var,
                                               loc)
                init_block.body.append(setitemexpr)
                sig = signature(types.none, self.typemap[out_arr.name],
                                self.typemap[slice_inst_var.name],
                                self.typemap[out_arr.name].dtype)
                self.calltypes[setitemexpr] = sig

        self.replace_return_with_setitem(stencil_blocks, exit_value_var,
                                         parfor_body_exit_label)

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after replacing return")
            ir_utils.dump_blocks(stencil_blocks)

        setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc)
        self.calltypes[setitem_call] = signature(
            types.none, self.typemap[out_arr.name],
            self.typemap[parfor_ind_var.name],
            self.typemap[out_arr.name].dtype)
        stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret)
        stencil_blocks[parfor_body_exit_label].body.append(setitem_call)

        # simplify CFG of parfor body (exit block could be simplified often)
        # add dummy return to enable CFG
        dummy_loc = ir.Loc("stencilparfor_dummy", -1)
        ret_const_var = ir.Var(scope, mk_unique_var("$cval_const"), dummy_loc)
        cval_const_assign = ir.Assign(ir.Const(0, loc=dummy_loc),
                                      ret_const_var, dummy_loc)
        stencil_blocks[parfor_body_exit_label].body.append(cval_const_assign)

        stencil_blocks[parfor_body_exit_label].body.append(
            ir.Return(ret_const_var, dummy_loc), )
        stencil_blocks = ir_utils.simplify_CFG(stencil_blocks)
        stencil_blocks[max(stencil_blocks.keys())].body.pop()

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after adding SetItem")
            ir_utils.dump_blocks(stencil_blocks)

        pattern = ('stencil', [start_lengths, end_lengths])
        parfor = numba.parfors.parfor.Parfor(loopnests, init_block,
                                             stencil_blocks, loc,
                                             parfor_ind_var, equiv_set,
                                             pattern, self.flags)
        gen_nodes.append(parfor)
        gen_nodes.append(ir.Assign(out_arr, target, loc))
        return gen_nodes
Ejemplo n.º 19
0
    def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood,
                              standard_indexed, typemap, calltypes):
        """
        Transforms the stencil kernel as specified by the user into one
        that includes each dimension's index variable as part of the getitem
        calls.  So, in effect array[-1] becomes array[index0-1].
        """
        const_dict = {}
        kernel_consts = []

        if config.DEBUG_ARRAY_OPT >= 1:
            print("add_indices_to_kernel", ndim, neighborhood)
            ir_utils.dump_blocks(kernel.blocks)

        if neighborhood is None:
            need_to_calc_kernel = True
        else:
            need_to_calc_kernel = False
            if len(neighborhood) != ndim:
                raise ValueError("%d dimensional neighborhood specified for %d " \
                    "dimensional input array" % (len(neighborhood), ndim))

        tuple_table = ir_utils.get_tuple_table(kernel.blocks)

        relatively_indexed = set()

        for block in kernel.blocks.values():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Const)):
                    if config.DEBUG_ARRAY_OPT >= 1:
                        print("remembering in const_dict", stmt.target.name,
                              stmt.value.value)
                    # Remember consts for use later.
                    const_dict[stmt.target.name] = stmt.value.value
                if ((isinstance(stmt, ir.Assign)
                     and isinstance(stmt.value, ir.Expr)
                     and stmt.value.op in ['setitem', 'static_setitem']
                     and stmt.value.value.name in kernel.arg_names)
                        or (isinstance(stmt, ir.SetItem)
                            and stmt.target.name in kernel.arg_names)):
                    raise ValueError("Assignments to arrays passed to stencil " \
                        "kernels is not allowed.")
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['getitem', 'static_getitem']
                        and stmt.value.value.name in kernel.arg_names
                        and stmt.value.value.name not in standard_indexed):
                    # We found a getitem from the input array.
                    if stmt.value.op == 'getitem':
                        stmt_index_var = stmt.value.index
                    else:
                        stmt_index_var = stmt.value.index_var
                        # allow static_getitem since rewrite passes are applied
                        #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.")

                    relatively_indexed.add(stmt.value.value.name)

                    # Store the index used after looking up the variable in
                    # the const dictionary.
                    if need_to_calc_kernel:
                        assert hasattr(stmt_index_var, 'name')

                        if stmt_index_var.name in tuple_table:
                            kernel_consts += [tuple_table[stmt_index_var.name]]
                        elif stmt_index_var.name in const_dict:
                            kernel_consts += [const_dict[stmt_index_var.name]]
                        else:
                            raise ValueError(
                                "stencil kernel index is not "
                                "constant, 'neighborhood' option required")

                    if ndim == 1:
                        # Single dimension always has index variable 'index0'.
                        # tmpvar will hold the real index and is computed by
                        # adding the relative offset in stmt.value.index to
                        # the current absolute location in index0.
                        index_var = ir.Var(scope, index_names[0], loc)
                        tmpname = ir_utils.mk_unique_var("stencil_index")
                        tmpvar = ir.Var(scope, tmpname, loc)
                        stmt_index_var_typ = typemap[stmt_index_var.name]
                        # If the array is indexed with a slice then we
                        # have to add the index value with a call to
                        # slice_addition.
                        if isinstance(stmt_index_var_typ,
                                      types.misc.SliceType):
                            sa_var = ir.Var(
                                scope,
                                ir_utils.mk_unique_var("slice_addition"), loc)
                            sa_func = numba.njit(slice_addition)
                            sa_func_typ = types.functions.Dispatcher(sa_func)
                            typemap[sa_var.name] = sa_func_typ
                            g_sa = ir.Global("slice_addition", sa_func, loc)
                            new_body.append(ir.Assign(g_sa, sa_var, loc))
                            slice_addition_call = ir.Expr.call(
                                sa_var, [stmt_index_var, index_var], (), loc)
                            calltypes[
                                slice_addition_call] = sa_func_typ.get_call_type(
                                    self._typingctx,
                                    [stmt_index_var_typ, types.intp], {})
                            new_body.append(
                                ir.Assign(slice_addition_call, tmpvar, loc))
                            new_body.append(
                                ir.Assign(
                                    ir.Expr.getitem(stmt.value.value, tmpvar,
                                                    loc), stmt.target, loc))
                        else:
                            acc_call = ir.Expr.binop(operator.add,
                                                     stmt_index_var, index_var,
                                                     loc)
                            new_body.append(ir.Assign(acc_call, tmpvar, loc))
                            new_body.append(
                                ir.Assign(
                                    ir.Expr.getitem(stmt.value.value, tmpvar,
                                                    loc), stmt.target, loc))
                    else:
                        index_vars = []
                        sum_results = []
                        s_index_name = ir_utils.mk_unique_var("stencil_index")
                        s_index_var = ir.Var(scope, s_index_name, loc)
                        const_index_vars = []
                        ind_stencils = []

                        stmt_index_var_typ = typemap[stmt_index_var.name]
                        # Same idea as above but you have to extract
                        # individual elements out of the tuple indexing
                        # expression and add the corresponding index variable
                        # to them and then reconstitute as a tuple that can
                        # index the array.
                        for dim in range(ndim):
                            tmpname = ir_utils.mk_unique_var("const_index")
                            tmpvar = ir.Var(scope, tmpname, loc)
                            new_body.append(
                                ir.Assign(ir.Const(dim, loc), tmpvar, loc))
                            const_index_vars += [tmpvar]
                            index_var = ir.Var(scope, index_names[dim], loc)
                            index_vars += [index_var]

                            tmpname = ir_utils.mk_unique_var(
                                "ind_stencil_index")
                            tmpvar = ir.Var(scope, tmpname, loc)
                            ind_stencils += [tmpvar]
                            getitemname = ir_utils.mk_unique_var("getitem")
                            getitemvar = ir.Var(scope, getitemname, loc)
                            getitemcall = ir.Expr.getitem(
                                stmt_index_var, const_index_vars[dim], loc)
                            new_body.append(
                                ir.Assign(getitemcall, getitemvar, loc))
                            # Get the type of this particular part of the index tuple.
                            one_index_typ = stmt_index_var_typ[dim]
                            # If the array is indexed with a slice then we
                            # have to add the index value with a call to
                            # slice_addition.
                            if isinstance(one_index_typ, types.misc.SliceType):
                                sa_var = ir.Var(
                                    scope,
                                    ir_utils.mk_unique_var("slice_addition"),
                                    loc)
                                sa_func = numba.njit(slice_addition)
                                sa_func_typ = types.functions.Dispatcher(
                                    sa_func)
                                typemap[sa_var.name] = sa_func_typ
                                g_sa = ir.Global("slice_addition", sa_func,
                                                 loc)
                                new_body.append(ir.Assign(g_sa, sa_var, loc))
                                slice_addition_call = ir.Expr.call(
                                    sa_var, [getitemvar, index_vars[dim]], (),
                                    loc)
                                calltypes[
                                    slice_addition_call] = sa_func_typ.get_call_type(
                                        self._typingctx,
                                        [one_index_typ, types.intp], {})
                                new_body.append(
                                    ir.Assign(slice_addition_call, tmpvar,
                                              loc))
                            else:
                                acc_call = ir.Expr.binop(
                                    operator.add, getitemvar, index_vars[dim],
                                    loc)
                                new_body.append(
                                    ir.Assign(acc_call, tmpvar, loc))

                        tuple_call = ir.Expr.build_tuple(ind_stencils, loc)
                        new_body.append(ir.Assign(tuple_call, s_index_var,
                                                  loc))
                        new_body.append(
                            ir.Assign(
                                ir.Expr.getitem(stmt.value.value, s_index_var,
                                                loc), stmt.target, loc))
                else:
                    new_body.append(stmt)
            block.body = new_body

        if need_to_calc_kernel:
            # Find the size of the kernel by finding the maximum absolute value
            # index used in the kernel specification.
            neighborhood = [[0, 0] for _ in range(ndim)]
            if len(kernel_consts) == 0:
                raise ValueError("Stencil kernel with no accesses to "
                                 "relatively indexed arrays.")

            for index in kernel_consts:
                if isinstance(index, tuple) or isinstance(index, list):
                    for i in range(len(index)):
                        te = index[i]
                        if isinstance(te, ir.Var) and te.name in const_dict:
                            te = const_dict[te.name]
                        if isinstance(te, int):
                            neighborhood[i][0] = min(neighborhood[i][0], te)
                            neighborhood[i][1] = max(neighborhood[i][1], te)
                        else:
                            raise ValueError(
                                "stencil kernel index is not constant,"
                                "'neighborhood' option required")
                    index_len = len(index)
                elif isinstance(index, int):
                    neighborhood[0][0] = min(neighborhood[0][0], index)
                    neighborhood[0][1] = max(neighborhood[0][1], index)
                    index_len = 1
                else:
                    raise ValueError(
                        "Non-tuple or non-integer used as stencil index.")
                if index_len != ndim:
                    raise ValueError(
                        "Stencil index does not match array dimensionality.")

        return (neighborhood, relatively_indexed)
Ejemplo n.º 20
0
    def _replace_stencil_accesses(self, stencil_ir, parfor_vars, in_args,
                                  index_offsets, stencil_func,
                                  arg_to_arr_dict):
        """ Convert relative indexing in the stencil kernel to standard indexing
            by adding the loop index variables to the corresponding dimensions
            of the array index tuples.
        """
        stencil_blocks = stencil_ir.blocks
        in_arr = in_args[0]
        in_arg_names = [x.name for x in in_args]

        if "standard_indexing" in stencil_func.options:
            for x in stencil_func.options["standard_indexing"]:
                if x not in arg_to_arr_dict:
                    raise ValueError("Standard indexing requested for an array " \
                        "name not present in the stencil kernel definition.")
            standard_indexed = [
                arg_to_arr_dict[x]
                for x in stencil_func.options["standard_indexing"]
            ]
        else:
            standard_indexed = []

        if in_arr.name in standard_indexed:
            raise ValueError("The first argument to a stencil kernel must use " \
                "relative indexing, not standard indexing.")

        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        # replace access indices, find access lengths in each dimension
        need_to_calc_kernel = stencil_func.neighborhood is None

        # If we need to infer the kernel size then initialize the minimum and
        # maximum seen indices for each dimension to 0.  If we already have
        # the neighborhood calculated then just convert from neighborhood format
        # to the separate start and end lengths format used here.
        if need_to_calc_kernel:
            start_lengths = ndims * [0]
            end_lengths = ndims * [0]
        else:
            start_lengths = [x[0] for x in stencil_func.neighborhood]
            end_lengths = [x[1] for x in stencil_func.neighborhood]

        # Get all the tuples defined in the stencil blocks.
        tuple_table = ir_utils.get_tuple_table(stencil_blocks)

        found_relative_index = False

        # For all blocks in the stencil kernel...
        for label, block in stencil_blocks.items():
            new_body = []
            # For all statements in those blocks...
            for stmt in block.body:
                # Reject assignments to input arrays.
                if ((isinstance(stmt, ir.Assign)
                     and isinstance(stmt.value, ir.Expr)
                     and stmt.value.op in ['setitem', 'static_setitem']
                     and stmt.value.value.name in in_arg_names)
                        or ((isinstance(stmt, ir.SetItem)
                             or isinstance(stmt, ir.StaticSetItem))
                            and stmt.target.name in in_arg_names)):
                    raise ValueError(
                        "Assignments to arrays passed to stencil kernels is not allowed."
                    )
                # We found a getitem for some array.  If that array is an input
                # array and isn't in the list of standard indexed arrays then
                # update min and max seen indices if we are inferring the
                # kernel size and create a new tuple where the relative offsets
                # are added to loop index vars to get standard indexing.
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['static_getitem', 'getitem']
                        and stmt.value.value.name in in_arg_names
                        and stmt.value.value.name not in standard_indexed):
                    index_list = stmt.value.index
                    # handle 1D case
                    if ndims == 1:
                        index_list = [index_list]
                    else:
                        if hasattr(index_list,
                                   'name') and index_list.name in tuple_table:
                            index_list = tuple_table[index_list.name]
                    # indices can be inferred as constant in simple expressions
                    # like -c where c is constant
                    # handled here since this is a common stencil index pattern
                    stencil_ir._definitions = ir_utils.build_definitions(
                        stencil_blocks)
                    index_list = [
                        _get_const_index_expr(stencil_ir, self.func_ir, v)
                        for v in index_list
                    ]
                    if index_offsets:
                        index_list = self._add_index_offsets(
                            index_list, list(index_offsets), new_body, scope,
                            loc)

                    # update min and max indices
                    if need_to_calc_kernel:
                        # all indices should be integer to be able to calculate
                        # neighborhood automatically
                        if (isinstance(index_list, ir.Var) or any(
                            [not isinstance(v, int) for v in index_list])):
                            raise ValueError(
                                "Variable stencil index only "
                                "possible with known neighborhood")
                        start_lengths = list(
                            map(min, start_lengths, index_list))
                        end_lengths = list(map(max, end_lengths, index_list))
                        found_relative_index = True

                    # update access indices
                    index_vars = self._add_index_offsets(
                        parfor_vars, list(index_list), new_body, scope, loc)

                    # new access index tuple
                    if ndims == 1:
                        ind_var = index_vars[0]
                    else:
                        ind_var = ir.Var(
                            scope, mk_unique_var("$parfor_index_ind_var"), loc)
                        self.typemap[ind_var.name] = types.containers.UniTuple(
                            types.intp, ndims)
                        tuple_call = ir.Expr.build_tuple(index_vars, loc)
                        tuple_assign = ir.Assign(tuple_call, ind_var, loc)
                        new_body.append(tuple_assign)

                    # getitem return type is scalar if all indices are integer
                    if all([
                            self.typemap[v.name] == types.intp
                            for v in index_vars
                    ]):
                        getitem_return_typ = self.typemap[
                            stmt.value.value.name].dtype
                    else:
                        # getitem returns an array
                        getitem_return_typ = self.typemap[
                            stmt.value.value.name]
                    # new getitem with the new index var
                    getitem_call = ir.Expr.getitem(stmt.value.value, ind_var,
                                                   loc)
                    self.calltypes[getitem_call] = signature(
                        getitem_return_typ,
                        self.typemap[stmt.value.value.name],
                        self.typemap[ind_var.name])
                    stmt.value = getitem_call

                new_body.append(stmt)
            block.body = new_body
        if need_to_calc_kernel and not found_relative_index:
            raise ValueError("Stencil kernel with no accesses to " \
                "relatively indexed arrays.")

        return start_lengths, end_lengths
Ejemplo n.º 21
0
    def gen_parquet_read(self, file_name, lhs):
        scope = file_name.scope
        loc = file_name.loc

        table_types = None
        # lhs is temporary and will possibly be assigned to user variable
        assert lhs.name.startswith('$')
        if lhs.name in self.reverse_copies and self.reverse_copies[
                lhs.name] in self.locals:
            table_types = self.locals[self.reverse_copies[lhs.name]]
            self.locals.pop(self.reverse_copies[lhs.name])

        convert_types = {}
        # user-specified type conversion
        if lhs.name in self.reverse_copies and (self.reverse_copies[lhs.name] +
                                                ':convert') in self.locals:
            convert_types = self.locals[self.reverse_copies[lhs.name] +
                                        ':convert']
            self.locals.pop(self.reverse_copies[lhs.name] + ':convert')

        if table_types is None:
            fname_def = guard(get_definition, self.func_ir, file_name)
            if (not isinstance(fname_def, (ir.Const, ir.Global, ir.FreeVar))
                    or not isinstance(fname_def.value, str)):
                raise ValueError("Parquet schema not available")
            file_name_str = fname_def.value
            col_names, col_types = parquet_file_schema(file_name_str)
            # remove Pandas index if exists
            # TODO: handle index properly when indices are supported
            _rm_pd_index(col_names, col_types)
        else:
            col_names = list(table_types.keys())
            col_types = list(table_types.values())

        out_nodes = []

        # get arrow readers once

        def init_arrow_readers(fname):
            arrow_readers = get_arrow_readers(unicode_to_char_ptr(fname))

        f_block = compile_to_numba_ir(
            init_arrow_readers, {
                'get_arrow_readers': _get_arrow_readers,
                'unicode_to_char_ptr': unicode_to_char_ptr,
            }).blocks.popitem()[1]

        replace_arg_nodes(f_block, [file_name])
        out_nodes += f_block.body[:-3]
        arrow_readers_var = out_nodes[-1].target

        col_arrs = []
        for i, cname in enumerate(col_names):
            # get column type from schema
            c_type = col_types[i]
            if cname in convert_types:
                c_type = convert_types[cname].dtype

            # create a variable for column and assign type
            varname = mk_unique_var(cname)
            #self.locals[varname] = c_type
            cvar = ir.Var(scope, varname, loc)
            col_arrs.append(cvar)

            out_nodes += get_column_read_nodes(c_type, cvar, arrow_readers_var,
                                               i)

        # delete arrow readers
        def cleanup_arrow_readers(readers):
            s = del_arrow_readers(readers)

        f_block = compile_to_numba_ir(cleanup_arrow_readers, {
            'del_arrow_readers': _del_arrow_readers,
        }).blocks.popitem()[1]
        replace_arg_nodes(f_block, [arrow_readers_var])
        out_nodes += f_block.body[:-3]
        return col_names, col_arrs, out_nodes
Ejemplo n.º 22
0
def get_stencil_ir(sf, typingctx, args, scope, loc, input_dict, typemap,
                   calltypes):
    """get typed IR from stencil bytecode
    """
    from numba.core.cpu import CPUContext
    from numba.core.registry import cpu_target
    from numba.core.annotations import type_annotations
    from numba.core.typed_passes import type_inference_stage

    # get untyped IR
    stencil_func_ir = sf.kernel_ir.copy()
    # copy the IR nodes to avoid changing IR in the StencilFunc object
    stencil_blocks = copy.deepcopy(stencil_func_ir.blocks)
    stencil_func_ir.blocks = stencil_blocks

    name_var_table = ir_utils.get_name_var_table(stencil_func_ir.blocks)
    if "out" in name_var_table:
        raise ValueError(
            "Cannot use the reserved word 'out' in stencil kernels.")

    # get typed IR with a dummy pipeline (similar to test_parfors.py)
    targetctx = CPUContext(typingctx)
    with cpu_target.nested_context(typingctx, targetctx):
        tp = DummyPipeline(typingctx, targetctx, args, stencil_func_ir)

        rewrites.rewrite_registry.apply('before-inference', tp.state)

        tp.state.typemap, tp.state.return_type, tp.state.calltypes = type_inference_stage(
            tp.state.typingctx, tp.state.func_ir, tp.state.args, None)

        type_annotations.TypeAnnotation(func_ir=tp.state.func_ir,
                                        typemap=tp.state.typemap,
                                        calltypes=tp.state.calltypes,
                                        lifted=(),
                                        lifted_from=None,
                                        args=tp.state.args,
                                        return_type=tp.state.return_type,
                                        html_output=config.HTML)

    # make block labels unique
    stencil_blocks = ir_utils.add_offset_to_labels(stencil_blocks,
                                                   ir_utils.next_label())
    min_label = min(stencil_blocks.keys())
    max_label = max(stencil_blocks.keys())
    ir_utils._max_label = max_label

    if config.DEBUG_ARRAY_OPT >= 1:
        print("Initial stencil_blocks")
        ir_utils.dump_blocks(stencil_blocks)

    # rename variables,
    var_dict = {}
    for v, typ in tp.state.typemap.items():
        new_var = ir.Var(scope, mk_unique_var(v), loc)
        var_dict[v] = new_var
        typemap[new_var.name] = typ  # add new var type for overall function
    ir_utils.replace_vars(stencil_blocks, var_dict)

    if config.DEBUG_ARRAY_OPT >= 1:
        print("After replace_vars")
        ir_utils.dump_blocks(stencil_blocks)

    # add call types to overall function
    for call, call_typ in tp.state.calltypes.items():
        calltypes[call] = call_typ

    arg_to_arr_dict = {}
    # replace arg with arr
    for block in stencil_blocks.values():
        for stmt in block.body:
            if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg):
                if config.DEBUG_ARRAY_OPT >= 1:
                    print("input_dict", input_dict, stmt.value.index,
                          stmt.value.name, stmt.value.index in input_dict)
                arg_to_arr_dict[stmt.value.name] = input_dict[
                    stmt.value.index].name
                stmt.value = input_dict[stmt.value.index]

    if config.DEBUG_ARRAY_OPT >= 1:
        print("arg_to_arr_dict", arg_to_arr_dict)
        print("After replace arg with arr")
        ir_utils.dump_blocks(stencil_blocks)

    ir_utils.remove_dels(stencil_blocks)
    stencil_func_ir.blocks = stencil_blocks
    return stencil_func_ir, sf.get_return_type(args)[0], arg_to_arr_dict