Exemple #1
0
 def test_global(self):
     a = ir.Global('foo', 0, self.loc1)
     b = ir.Global('foo', 0, self.loc1)
     c = ir.Global('foo', 0, self.loc2)
     d = ir.Global('bar', 0, self.loc1)
     e = ir.Global('foo', 1, self.loc1)
     self.check(a, same=[b, c], different=[d, e])
Exemple #2
0
    def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope,
                              loc):
        last_ind = dim_size
        if end_length != 0:
            # set last index to size minus stencil size to avoid invalid
            # memory access
            index_const = ir.Var(scope, mk_unique_var("stencil_const_var"),
                                 loc)
            self.typemap[index_const.name] = types.intp
            if isinstance(end_length, numbers.Number):
                const_assign = ir.Assign(ir.Const(end_length, loc),
                                         index_const, loc)
            else:
                const_assign = ir.Assign(end_length, index_const, loc)

            gen_nodes.append(const_assign)
            last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc)
            self.typemap[last_ind.name] = types.intp

            g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc)
            check_func = numba.njit(_compute_last_ind)
            func_typ = types.functions.Dispatcher(check_func)
            self.typemap[g_var.name] = func_typ
            g_obj = ir.Global("_compute_last_ind", check_func, loc)
            g_assign = ir.Assign(g_obj, g_var, loc)
            gen_nodes.append(g_assign)
            index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc)
            self.calltypes[index_call] = func_typ.get_call_type(
                self.typingctx, [types.intp, types.intp], {})
            index_assign = ir.Assign(index_call, last_ind, loc)
            gen_nodes.append(index_assign)

        return last_ind
    def run_pass(self, state):
        func_ir = state.func_ir  # get the FunctionIR object

        for blk in func_ir.blocks.values():
            for stmt in blk.find_insts(ir.Assign):
                if (isinstance(stmt.value, ir.FreeVar)
                        and stmt.value.name in BufferMeta.class_names):
                    break
            else:
                continue
            break
        else:
            return False  # one does not changes the IR

        for blk in func_ir.blocks.values():
            loc = blk.loc
            scope = blk.scope
            for ret in blk.find_insts(ir.Return):

                name = "free_omnisci_buffer_fn"
                value = ir.Global(name, free_omnisci_buffer, loc)
                target = scope.make_temp(loc)
                stmt = ir.Assign(value, target, loc)
                blk.insert_before_terminator(stmt)

                fn_call = ir.Expr.call(func=target,
                                       args=[ret.value],
                                       kws=(),
                                       loc=loc)
                lhs = scope.make_temp(loc)
                var = ir.Assign(fn_call, lhs, blk.loc)
                blk.insert_before_terminator(var)
                break

        return True  # we changed the IR
    def op_COMPARE_OP(self, inst, lhs, rhs, res):
        op = dis.cmp_op[inst.arg]
        if op == 'in' or op == 'not in':
            lhs, rhs = rhs, lhs

        if op == 'not in':
            self._binop('in', lhs, rhs, res)
            tmp = self.get(res)
            out = ir.Expr.unary('not', value=tmp, loc=self.loc)
            self.store(out, res)
        elif op == 'exception match':
            gv_fn = ir.Global(
                "exception_match",
                eh.exception_match,
                loc=self.loc,
            )
            exc_match_name = '$exc_match'
            self.store(value=gv_fn, name=exc_match_name, redefine=True)
            lhs = self.get(lhs)
            rhs = self.get(rhs)
            exc = ir.Expr.call(
                self.get(exc_match_name),
                args=(lhs, rhs),
                kws=(),
                loc=self.loc,
            )
            self.store(exc, res)
        else:
            self._binop(op, lhs, rhs, res)
    def op_DELETE_SLICE_3(self, inst, base, start, stop, slicevar, indexvar):
        base = self.get(base)
        start = self.get(start)
        stop = self.get(stop)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc)
        self.store(value=index, name=indexvar)
        stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc)
        self.current_block.append(stmt)
    def op_SLICE_3(self, inst, base, start, stop, res, slicevar, indexvar):
        base = self.get(base)
        start = self.get(start)
        stop = self.get(stop)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc)
        self.store(value=index, name=indexvar)

        expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc)
        self.store(value=expr, name=res)
    def op_DELETE_SLICE_0(self, inst, base, slicevar, indexvar, nonevar):
        base = self.get(base)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        nonegv = ir.Const(None, loc=self.loc)
        self.store(value=nonegv, name=nonevar)
        none = self.get(nonevar)

        index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc)
        self.store(value=index, name=indexvar)

        stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc)
        self.current_block.append(stmt)
Exemple #8
0
    def op_BUILD_SLICE(self, inst, start, stop, step, res, slicevar):
        start = self.get(start)
        stop = self.get(stop)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        if step is None:
            sliceinst = ir.Expr.call(self.get(slicevar), (start, stop), (),
                                     loc=self.loc)
        else:
            step = self.get(step)
            sliceinst = ir.Expr.call(self.get(slicevar), (start, stop, step),
                (), loc=self.loc)
        self.store(value=sliceinst, name=res)
    def op_SLICE_0(self, inst, base, res, slicevar, indexvar, nonevar):
        base = self.get(base)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        nonegv = ir.Const(None, loc=self.loc)
        self.store(value=nonegv, name=nonevar)
        none = self.get(nonevar)

        index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc)
        self.store(value=index, name=indexvar)

        expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc)
        self.store(value=expr, name=res)
    def op_STORE_SLICE_1(self, inst, base, start, nonevar, value, slicevar, indexvar):
        base = self.get(base)
        start = self.get(start)

        nonegv = ir.Const(None, loc=self.loc)
        self.store(value=nonegv, name=nonevar)
        none = self.get(nonevar)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        index = ir.Expr.call(self.get(slicevar), (start, none), (), loc=self.loc)
        self.store(value=index, name=indexvar)

        stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc)
        self.current_block.append(stmt)
Exemple #11
0
 def _inject_call(self, func, gv_name, res_name=None):
     """A helper function to inject a call to *func* which is a python
     function.
     Parameters
     ----------
     func : callable
         The function object to be called.
     gv_name : str
         The variable name to be used to store the function object.
     res_name : str; optional
         The variable name to be used to store the call result.
         If ``None``, a name is created automatically.
     """
     gv_fn = ir.Global(gv_name, func, loc=self.loc)
     self.store(value=gv_fn, name=gv_name, redefine=True)
     callres = ir.Expr.call(self.get(gv_name), (), (), loc=self.loc)
     res_name = res_name or '$callres_{}'.format(gv_name)
     self.store(value=callres, name=res_name, redefine=True)
Exemple #12
0
    def _op_JUMP_IF(self, inst, pred, iftrue):
        brs = {
            True: inst.get_jump_target(),
            False: inst.next,
        }
        truebr = brs[iftrue]
        falsebr = brs[not iftrue]

        name = "bool%s" % (inst.offset)
        gv_fn = ir.Global("bool", bool, loc=self.loc)
        self.store(value=gv_fn, name=name)

        callres = ir.Expr.call(self.get(name), (self.get(pred),), (),
                               loc=self.loc)

        pname = "$%spred" % (inst.offset)
        predicate = self.store(value=callres, name=pname)
        bra = ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
                        loc=self.loc)
        self.current_block.append(bra)
Exemple #13
0
    def mutate_with_body(self, func_ir, blocks, blk_start, blk_end,
                         body_blocks, dispatcher_factory, extra):
        ir_utils.dprint_func_ir(func_ir, "Before with changes", blocks=blocks)
        assert extra is not None
        args = extra["args"]
        assert len(args) == 1
        arg = args[0]
        scope = blocks[blk_start].scope
        loc = blocks[blk_start].loc
        if isinstance(arg, ir.Arg):
            arg = ir.Var(scope, arg.name, loc)

        set_state = []
        restore_state = []

        # global for Numba itself
        gvar = scope.redefine("$ngvar", loc)
        set_state.append(ir.Assign(ir.Global('numba', numba, loc), gvar, loc))
        # getattr for set chunksize function in Numba
        spcattr = ir.Expr.getattr(gvar, 'set_parallel_chunksize', loc)
        spcvar = scope.redefine("$spc", loc)
        set_state.append(ir.Assign(spcattr, spcvar, loc))
        # call set_parallel_chunksize
        orig_pc_var = scope.redefine("$save_pc", loc)
        cs_var = scope.redefine("$cs_var", loc)
        set_state.append(ir.Assign(arg, cs_var, loc))
        spc_call = ir.Expr.call(spcvar, [cs_var], (), loc)
        set_state.append(ir.Assign(spc_call, orig_pc_var, loc))

        restore_spc_call = ir.Expr.call(spcvar, [orig_pc_var], (), loc)
        restore_state.append(ir.Assign(restore_spc_call, orig_pc_var, loc))

        blocks[blk_start].body = (blocks[blk_start].body[1:-1] + 
                                  set_state + 
                                  [blocks[blk_start].body[-1]])
        blocks[blk_end].body = restore_state + blocks[blk_end].body
        func_ir._definitions = build_definitions(blocks)
        ir_utils.dprint_func_ir(func_ir, "After with changes", blocks=blocks)
    def bind_global_function(self, fobj, ftype, args, kws={}):
        """Binds a global function to a variable.

        Parameters
        ----------
        fobj : object
            The function to be bound.
        ftype : types.Type
        args : Sequence[types.Type]
        kws : Mapping[str, types.Type]

        Returns
        -------
        callable: _CallableNode
        """
        loc = self._loc
        varname = f"{fobj.__name__}_func"
        gvname = f"{fobj.__name__}"

        func_sig = self._typingctx.resolve_function_type(ftype, args, kws)
        func_var = self.assign(rhs=ir.Global(gvname, fobj, loc=loc),
                               typ=ftype,
                               name=varname)
        return _CallableNode(func=func_var, sig=func_sig)
Exemple #15
0
    def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood,
                              standard_indexed, typemap, calltypes):
        """
        Transforms the stencil kernel as specified by the user into one
        that includes each dimension's index variable as part of the getitem
        calls.  So, in effect array[-1] becomes array[index0-1].
        """
        const_dict = {}
        kernel_consts = []

        if config.DEBUG_ARRAY_OPT >= 1:
            print("add_indices_to_kernel", ndim, neighborhood)
            ir_utils.dump_blocks(kernel.blocks)

        if neighborhood is None:
            need_to_calc_kernel = True
        else:
            need_to_calc_kernel = False
            if len(neighborhood) != ndim:
                raise ValueError("%d dimensional neighborhood specified for %d " \
                    "dimensional input array" % (len(neighborhood), ndim))

        tuple_table = ir_utils.get_tuple_table(kernel.blocks)

        relatively_indexed = set()

        for block in kernel.blocks.values():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Const)):
                    if config.DEBUG_ARRAY_OPT >= 1:
                        print("remembering in const_dict", stmt.target.name,
                              stmt.value.value)
                    # Remember consts for use later.
                    const_dict[stmt.target.name] = stmt.value.value
                if ((isinstance(stmt, ir.Assign)
                     and isinstance(stmt.value, ir.Expr)
                     and stmt.value.op in ['setitem', 'static_setitem']
                     and stmt.value.value.name in kernel.arg_names)
                        or (isinstance(stmt, ir.SetItem)
                            and stmt.target.name in kernel.arg_names)):
                    raise ValueError("Assignments to arrays passed to stencil " \
                        "kernels is not allowed.")
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['getitem', 'static_getitem']
                        and stmt.value.value.name in kernel.arg_names
                        and stmt.value.value.name not in standard_indexed):
                    # We found a getitem from the input array.
                    if stmt.value.op == 'getitem':
                        stmt_index_var = stmt.value.index
                    else:
                        stmt_index_var = stmt.value.index_var
                        # allow static_getitem since rewrite passes are applied
                        #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.")

                    relatively_indexed.add(stmt.value.value.name)

                    # Store the index used after looking up the variable in
                    # the const dictionary.
                    if need_to_calc_kernel:
                        assert hasattr(stmt_index_var, 'name')

                        if stmt_index_var.name in tuple_table:
                            kernel_consts += [tuple_table[stmt_index_var.name]]
                        elif stmt_index_var.name in const_dict:
                            kernel_consts += [const_dict[stmt_index_var.name]]
                        else:
                            raise ValueError(
                                "stencil kernel index is not "
                                "constant, 'neighborhood' option required")

                    if ndim == 1:
                        # Single dimension always has index variable 'index0'.
                        # tmpvar will hold the real index and is computed by
                        # adding the relative offset in stmt.value.index to
                        # the current absolute location in index0.
                        index_var = ir.Var(scope, index_names[0], loc)
                        tmpname = ir_utils.mk_unique_var("stencil_index")
                        tmpvar = ir.Var(scope, tmpname, loc)
                        stmt_index_var_typ = typemap[stmt_index_var.name]
                        # If the array is indexed with a slice then we
                        # have to add the index value with a call to
                        # slice_addition.
                        if isinstance(stmt_index_var_typ,
                                      types.misc.SliceType):
                            sa_var = ir.Var(
                                scope,
                                ir_utils.mk_unique_var("slice_addition"), loc)
                            sa_func = numba.njit(slice_addition)
                            sa_func_typ = types.functions.Dispatcher(sa_func)
                            typemap[sa_var.name] = sa_func_typ
                            g_sa = ir.Global("slice_addition", sa_func, loc)
                            new_body.append(ir.Assign(g_sa, sa_var, loc))
                            slice_addition_call = ir.Expr.call(
                                sa_var, [stmt_index_var, index_var], (), loc)
                            calltypes[
                                slice_addition_call] = sa_func_typ.get_call_type(
                                    self._typingctx,
                                    [stmt_index_var_typ, types.intp], {})
                            new_body.append(
                                ir.Assign(slice_addition_call, tmpvar, loc))
                            new_body.append(
                                ir.Assign(
                                    ir.Expr.getitem(stmt.value.value, tmpvar,
                                                    loc), stmt.target, loc))
                        else:
                            acc_call = ir.Expr.binop(operator.add,
                                                     stmt_index_var, index_var,
                                                     loc)
                            new_body.append(ir.Assign(acc_call, tmpvar, loc))
                            new_body.append(
                                ir.Assign(
                                    ir.Expr.getitem(stmt.value.value, tmpvar,
                                                    loc), stmt.target, loc))
                    else:
                        index_vars = []
                        sum_results = []
                        s_index_name = ir_utils.mk_unique_var("stencil_index")
                        s_index_var = ir.Var(scope, s_index_name, loc)
                        const_index_vars = []
                        ind_stencils = []

                        stmt_index_var_typ = typemap[stmt_index_var.name]
                        # Same idea as above but you have to extract
                        # individual elements out of the tuple indexing
                        # expression and add the corresponding index variable
                        # to them and then reconstitute as a tuple that can
                        # index the array.
                        for dim in range(ndim):
                            tmpname = ir_utils.mk_unique_var("const_index")
                            tmpvar = ir.Var(scope, tmpname, loc)
                            new_body.append(
                                ir.Assign(ir.Const(dim, loc), tmpvar, loc))
                            const_index_vars += [tmpvar]
                            index_var = ir.Var(scope, index_names[dim], loc)
                            index_vars += [index_var]

                            tmpname = ir_utils.mk_unique_var(
                                "ind_stencil_index")
                            tmpvar = ir.Var(scope, tmpname, loc)
                            ind_stencils += [tmpvar]
                            getitemname = ir_utils.mk_unique_var("getitem")
                            getitemvar = ir.Var(scope, getitemname, loc)
                            getitemcall = ir.Expr.getitem(
                                stmt_index_var, const_index_vars[dim], loc)
                            new_body.append(
                                ir.Assign(getitemcall, getitemvar, loc))
                            # Get the type of this particular part of the index tuple.
                            one_index_typ = stmt_index_var_typ[dim]
                            # If the array is indexed with a slice then we
                            # have to add the index value with a call to
                            # slice_addition.
                            if isinstance(one_index_typ, types.misc.SliceType):
                                sa_var = ir.Var(
                                    scope,
                                    ir_utils.mk_unique_var("slice_addition"),
                                    loc)
                                sa_func = numba.njit(slice_addition)
                                sa_func_typ = types.functions.Dispatcher(
                                    sa_func)
                                typemap[sa_var.name] = sa_func_typ
                                g_sa = ir.Global("slice_addition", sa_func,
                                                 loc)
                                new_body.append(ir.Assign(g_sa, sa_var, loc))
                                slice_addition_call = ir.Expr.call(
                                    sa_var, [getitemvar, index_vars[dim]], (),
                                    loc)
                                calltypes[
                                    slice_addition_call] = sa_func_typ.get_call_type(
                                        self._typingctx,
                                        [one_index_typ, types.intp], {})
                                new_body.append(
                                    ir.Assign(slice_addition_call, tmpvar,
                                              loc))
                            else:
                                acc_call = ir.Expr.binop(
                                    operator.add, getitemvar, index_vars[dim],
                                    loc)
                                new_body.append(
                                    ir.Assign(acc_call, tmpvar, loc))

                        tuple_call = ir.Expr.build_tuple(ind_stencils, loc)
                        new_body.append(ir.Assign(tuple_call, s_index_var,
                                                  loc))
                        new_body.append(
                            ir.Assign(
                                ir.Expr.getitem(stmt.value.value, s_index_var,
                                                loc), stmt.target, loc))
                else:
                    new_body.append(stmt)
            block.body = new_body

        if need_to_calc_kernel:
            # Find the size of the kernel by finding the maximum absolute value
            # index used in the kernel specification.
            neighborhood = [[0, 0] for _ in range(ndim)]
            if len(kernel_consts) == 0:
                raise ValueError("Stencil kernel with no accesses to "
                                 "relatively indexed arrays.")

            for index in kernel_consts:
                if isinstance(index, tuple) or isinstance(index, list):
                    for i in range(len(index)):
                        te = index[i]
                        if isinstance(te, ir.Var) and te.name in const_dict:
                            te = const_dict[te.name]
                        if isinstance(te, int):
                            neighborhood[i][0] = min(neighborhood[i][0], te)
                            neighborhood[i][1] = max(neighborhood[i][1], te)
                        else:
                            raise ValueError(
                                "stencil kernel index is not constant,"
                                "'neighborhood' option required")
                    index_len = len(index)
                elif isinstance(index, int):
                    neighborhood[0][0] = min(neighborhood[0][0], index)
                    neighborhood[0][1] = max(neighborhood[0][1], index)
                    index_len = 1
                else:
                    raise ValueError(
                        "Non-tuple or non-integer used as stencil index.")
                if index_len != ndim:
                    raise ValueError(
                        "Stencil index does not match array dimensionality.")

        return (neighborhood, relatively_indexed)
 def op_LOAD_GLOBAL(self, inst, res):
     name = self.code_names[inst.arg]
     value = self.get_global_value(name)
     gl = ir.Global(name, value, loc=self.loc)
     self.store(gl, res)
    def run(self):
        """
        This function rewrites the name of NumPy functions that exist in self.function_name_map
        e.g np.sum(a) would produce the following:

        np.sum() --> numba_dppy.dpnp.sum()

        ---------------------------------------------------------------------------------------
        Numba IR Before Rewrite:
        ---------------------------------------------------------------------------------------

            $2load_global.0 = global(np: <module 'numpy' from 'numpy/__init__.py'>) ['$2load_global.0']
            $4load_method.1 = getattr(value=$2load_global.0, attr=sum) ['$2load_global.0', '$4load_method.1']
            $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)],
                                                   kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a']

        ---------------------------------------------------------------------------------------
        Numba IR After Rewrite:
        ---------------------------------------------------------------------------------------

            $dppy_replaced_var.0 = global(numba_dppy: <module 'numba_dppy' from 'numba_dppy/__init__.py'>) ['$dppy_replaced_var.0']
            $dpnp_var.1 = getattr(value=$dppy_replaced_var.0, attr=dpnp) ['$dpnp_var.1', '$dppy_replaced_var.0']
            $4load_method.1 = getattr(value=$dpnp_var.1, attr=sum) ['$4load_method.1', '$dpnp_var.1']
            $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)],
                                                   kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a']

        ---------------------------------------------------------------------------------------
        """
        func_ir = self.state.func_ir
        blocks = func_ir.blocks
        topo_order = find_topo_order(blocks)
        replaced = False

        for label in topo_order:
            block = blocks[label]
            saved_arr_arg = {}
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Assign) and isinstance(
                        stmt.value, ir.Expr):
                    lhs = stmt.target.name
                    rhs = stmt.value
                    # replace np.FOO with name from self.function_name_map["FOO"]
                    # e.g. np.sum will be replaced with numba_dppy.dpnp.sum
                    if (rhs.op == "getattr"
                            and rhs.attr in self.function_name_map):
                        module_node = block.find_variable_assignment(
                            rhs.value.name).value
                        if (isinstance(module_node, ir.Global)
                                and module_node.value.__name__
                                in self.function_name_map[rhs.attr][0]) or (
                                    isinstance(module_node, ir.Expr)
                                    and module_node.attr
                                    in self.function_name_map[rhs.attr][0]):
                            rhs = stmt.value
                            rhs.attr = self.function_name_map[rhs.attr][1]

                            global_module = rhs.value
                            saved_arr_arg[lhs] = global_module

                            scope = global_module.scope
                            loc = global_module.loc

                            g_dppy_var = ir.Var(scope,
                                                mk_unique_var("$2load_global"),
                                                loc)
                            # We are trying to rename np.function_name/np.linalg.function_name with
                            # numba_dppy.dpnp.function_name.
                            # Hence, we need to have a global variable representing module numba_dppy.
                            # Next, we add attribute dpnp to global module numba_dppy to
                            # represent numba_dppy.dpnp.
                            g_dppy = ir.Global("numba_dppy", numba_dppy, loc)
                            g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc)

                            dpnp_var = ir.Var(scope,
                                              mk_unique_var("$4load_attr"),
                                              loc)
                            getattr_dpnp = ir.Expr.getattr(
                                g_dppy_var, "dpnp", loc)
                            dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var,
                                                    loc)

                            rhs.value = dpnp_var
                            new_body.append(g_dppy_assign)
                            new_body.append(dpnp_assign)
                            func_ir._definitions[dpnp_var.name] = [
                                getattr_dpnp
                            ]
                            func_ir._definitions[g_dppy_var.name] = [g_dppy]
                            replaced = True

                new_body.append(stmt)
            block.body = new_body
            return replaced
    def run(self):
        typingctx = self.state.typingctx

        # save array arg to call
        # call_varname -> array
        func_ir = self.state.func_ir
        blocks = func_ir.blocks
        saved_arr_arg = {}
        topo_order = find_topo_order(blocks)
        replaced = False

        for label in topo_order:
            block = blocks[label]
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Assign) and isinstance(
                        stmt.value, ir.Expr):
                    lhs = stmt.target.name
                    rhs = stmt.value
                    # replace A.func with np.func, and save A in saved_arr_arg
                    if (rhs.op == "getattr"
                            and rhs.attr in self.function_name_map
                            and isinstance(self.typemap[rhs.value.name],
                                           types.npytypes.Array)):
                        rhs = stmt.value
                        arr = rhs.value
                        saved_arr_arg[lhs] = arr
                        scope = arr.scope
                        loc = arr.loc

                        g_dppy_var = ir.Var(scope,
                                            mk_unique_var("$load_global"), loc)
                        self.typemap[g_dppy_var.name] = types.misc.Module(
                            numba_dppy)
                        g_dppy = ir.Global("numba_dppy", numba_dppy, loc)
                        g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc)

                        dpnp_var = ir.Var(scope, mk_unique_var("$load_attr"),
                                          loc)
                        self.typemap[dpnp_var.name] = types.misc.Module(
                            numba_dppy.dpnp)
                        getattr_dpnp = ir.Expr.getattr(g_dppy_var, "dpnp", loc)
                        dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc)

                        rhs.value = dpnp_var
                        new_body.append(g_dppy_assign)
                        new_body.append(dpnp_assign)

                        func_ir._definitions[g_dppy_var.name] = [getattr_dpnp]
                        func_ir._definitions[dpnp_var.name] = [getattr_dpnp]

                        # update func var type
                        func = getattr(numba_dppy.dpnp, rhs.attr)
                        func_typ = get_dpnp_func_typ(func)

                        self.typemap.pop(lhs)
                        self.typemap[lhs] = func_typ
                        replaced = True

                    if rhs.op == "call" and rhs.func.name in saved_arr_arg:
                        # add array as first arg
                        arr = saved_arr_arg[rhs.func.name]
                        # update call type signature to include array arg
                        old_sig = self.calltypes.pop(rhs)
                        # argsort requires kws for typing so sig.args can't be used
                        # reusing sig.args since some types become Const in sig
                        argtyps = old_sig.args[:len(rhs.args)]
                        kwtyps = {
                            name: self.typemap[v.name]
                            for name, v in rhs.kws
                        }
                        self.calltypes[rhs] = self.typemap[
                            rhs.func.name].get_call_type(
                                typingctx,
                                [self.typemap[arr.name]] + list(argtyps),
                                kwtyps,
                            )
                        rhs.args = [arr] + rhs.args

                new_body.append(stmt)
            block.body = new_body
        return replaced
Exemple #19
0
    def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir,
                           index_offsets, target, return_type, stencil_func,
                           arg_to_arr_dict):
        """ Converts a set of stencil kernel blocks to a parfor.
        """
        gen_nodes = []
        stencil_blocks = stencil_ir.blocks

        if config.DEBUG_ARRAY_OPT >= 1:
            print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
                  return_type, stencil_func, stencil_blocks)
            ir_utils.dump_blocks(stencil_blocks)

        in_arr = in_args[0]
        # run copy propagate to replace in_args copies (e.g. a = A)
        in_arr_typ = self.typemap[in_arr.name]
        in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
        name_var_table = ir_utils.get_name_var_table(stencil_blocks)

        ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table,
                                      self.typemap, self.calltypes)
        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after copy_propagate")
            ir_utils.dump_blocks(stencil_blocks)
        ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names,
                             stencil_ir, self.typemap)
        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after removing dead code")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor vars
        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        parfor_vars = []
        for i in range(ndims):
            parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc)
            self.typemap[parfor_var.name] = types.intp
            parfor_vars.append(parfor_var)

        start_lengths, end_lengths = self._replace_stencil_accesses(
            stencil_ir, parfor_vars, in_args, index_offsets, stencil_func,
            arg_to_arr_dict)

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after replace stencil accesses")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor loop nests
        loopnests = []
        equiv_set = self.array_analysis.get_equiv_set(label)
        in_arr_dim_sizes = equiv_set.get_shape(in_arr)

        assert ndims == len(in_arr_dim_sizes)
        for i in range(ndims):
            last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
                                                  end_lengths[i], gen_nodes,
                                                  scope, loc)
            start_ind = self._get_stencil_start_ind(start_lengths[i],
                                                    gen_nodes, scope, loc)
            # start from stencil size to avoid invalid array access
            loopnests.append(
                numba.parfors.parfor.LoopNest(parfor_vars[i], start_ind,
                                              last_ind, 1))

        # We have to guarantee that the exit block has maximum label and that
        # there's only one exit block for the parfor body.
        # So, all return statements will change to jump to the parfor exit block.
        parfor_body_exit_label = max(stencil_blocks.keys()) + 1
        stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc)
        exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"),
                                loc)
        self.typemap[exit_value_var.name] = return_type.dtype

        # create parfor index var
        for_replacing_ret = []
        if ndims == 1:
            parfor_ind_var = parfor_vars[0]
        else:
            parfor_ind_var = ir.Var(scope,
                                    mk_unique_var("$parfor_index_tuple_var"),
                                    loc)
            self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
                types.intp, ndims)
            tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
            tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
            for_replacing_ret.append(tuple_assign)

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after creating parfor index var")
            ir_utils.dump_blocks(stencil_blocks)

        # empty init block
        init_block = ir.Block(scope, loc)
        if out_arr is None:
            in_arr_typ = self.typemap[in_arr.name]

            shape_name = ir_utils.mk_unique_var("in_arr_shape")
            shape_var = ir.Var(scope, shape_name, loc)
            shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
            self.typemap[shape_name] = types.containers.UniTuple(
                types.intp, in_arr_typ.ndim)
            init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])

            zero_name = ir_utils.mk_unique_var("zero_val")
            zero_var = ir.Var(scope, zero_name, loc)
            if "cval" in stencil_func.options:
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")

                temp2 = return_type.dtype(cval)
            else:
                temp2 = return_type.dtype(0)
            full_const = ir.Const(temp2, loc)
            self.typemap[zero_name] = return_type.dtype
            init_block.body.extend([ir.Assign(full_const, zero_var, loc)])

            so_name = ir_utils.mk_unique_var("stencil_output")
            out_arr = ir.Var(scope, so_name, loc)
            self.typemap[out_arr.name] = numba.core.types.npytypes.Array(
                return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout)
            dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
            self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
            dtype_g_np = ir.Global('np', np, loc)
            dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
            init_block.body.append(dtype_g_np_assign)

            dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var,
                                                 return_type.dtype.name, loc)
            dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
            self.typemap[dtype_attr_var.name] = types.functions.NumberClass(
                return_type.dtype)
            dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var,
                                          loc)
            init_block.body.append(dtype_attr_assign)

            stmts = ir_utils.gen_np_call("full", np.full, out_arr,
                                         [shape_var, zero_var, dtype_attr_var],
                                         self.typingctx, self.typemap,
                                         self.calltypes)
            equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
            init_block.body.extend(stmts)
        else:  # out is present
            if "cval" in stencil_func.options:  # do out[:] = cval
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                cval_ty = typing.typeof.typeof(cval)
                if not self.typingctx.can_convert(cval_ty, return_type.dtype):
                    msg = "cval type does not match stencil return type."
                    raise ValueError(msg)

                # get slice ref
                slice_var = ir.Var(scope, mk_unique_var("$py_g_var"), loc)
                slice_fn_ty = self.typingctx.resolve_value_type(slice)
                self.typemap[slice_var.name] = slice_fn_ty
                slice_g = ir.Global('slice', slice, loc)
                slice_assigned = ir.Assign(slice_g, slice_var, loc)
                init_block.body.append(slice_assigned)

                sig = self.typingctx.resolve_function_type(
                    slice_fn_ty, (types.none, ) * 2, {})

                callexpr = ir.Expr.call(func=slice_var,
                                        args=(),
                                        kws=(),
                                        loc=loc)

                self.calltypes[callexpr] = sig
                slice_inst_var = ir.Var(scope, mk_unique_var("$slice_inst"),
                                        loc)
                self.typemap[slice_inst_var.name] = types.slice2_type
                slice_assign = ir.Assign(callexpr, slice_inst_var, loc)
                init_block.body.append(slice_assign)

                # get const val for cval
                cval_const_val = ir.Const(return_type.dtype(cval), loc)
                cval_const_var = ir.Var(scope, mk_unique_var("$cval_const"),
                                        loc)
                self.typemap[cval_const_var.name] = return_type.dtype
                cval_const_assign = ir.Assign(cval_const_val, cval_const_var,
                                              loc)
                init_block.body.append(cval_const_assign)

                # do setitem on `out` array
                setitemexpr = ir.StaticSetItem(out_arr, slice(None, None),
                                               slice_inst_var, cval_const_var,
                                               loc)
                init_block.body.append(setitemexpr)
                sig = signature(types.none, self.typemap[out_arr.name],
                                self.typemap[slice_inst_var.name],
                                self.typemap[out_arr.name].dtype)
                self.calltypes[setitemexpr] = sig

        self.replace_return_with_setitem(stencil_blocks, exit_value_var,
                                         parfor_body_exit_label)

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after replacing return")
            ir_utils.dump_blocks(stencil_blocks)

        setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc)
        self.calltypes[setitem_call] = signature(
            types.none, self.typemap[out_arr.name],
            self.typemap[parfor_ind_var.name],
            self.typemap[out_arr.name].dtype)
        stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret)
        stencil_blocks[parfor_body_exit_label].body.append(setitem_call)

        # simplify CFG of parfor body (exit block could be simplified often)
        # add dummy return to enable CFG
        dummy_loc = ir.Loc("stencilparfor_dummy", -1)
        ret_const_var = ir.Var(scope, mk_unique_var("$cval_const"), dummy_loc)
        cval_const_assign = ir.Assign(ir.Const(0, loc=dummy_loc),
                                      ret_const_var, dummy_loc)
        stencil_blocks[parfor_body_exit_label].body.append(cval_const_assign)

        stencil_blocks[parfor_body_exit_label].body.append(
            ir.Return(ret_const_var, dummy_loc), )
        stencil_blocks = ir_utils.simplify_CFG(stencil_blocks)
        stencil_blocks[max(stencil_blocks.keys())].body.pop()

        if config.DEBUG_ARRAY_OPT >= 1:
            print("stencil_blocks after adding SetItem")
            ir_utils.dump_blocks(stencil_blocks)

        pattern = ('stencil', [start_lengths, end_lengths])
        parfor = numba.parfors.parfor.Parfor(loopnests, init_block,
                                             stencil_blocks, loc,
                                             parfor_ind_var, equiv_set,
                                             pattern, self.flags)
        gen_nodes.append(parfor)
        gen_nodes.append(ir.Assign(out_arr, target, loc))
        return gen_nodes
 def op_PRINT_ITEM(self, inst, item, printvar, res):
     item = self.get(item)
     printgv = ir.Global("print", print, loc=self.loc)
     self.store(value=printgv, name=printvar)
     call = ir.Expr.call(self.get(printvar), (item,), (), loc=self.loc)
     self.store(value=call, name=res)
 def op_PRINT_NEWLINE(self, inst, printvar, res):
     printgv = ir.Global("print", print, loc=self.loc)
     self.store(value=printgv, name=printvar)
     call = ir.Expr.call(self.get(printvar), (), (), loc=self.loc)
     self.store(value=call, name=res)