예제 #1
0
def _analyze_op_pair_first(self, scope, equiv_set, expr):
    # make dummy lhs since we don't have access to lhs
    typ = self.typemap[expr.value.name].first_type
    if not isinstance(typ, types.NamedTuple):
        return None
    lhs = ir.Var(scope, mk_unique_var("tuple_var"), expr.loc)
    self.typemap[lhs.name] = typ
    rhs = ir.Expr.pair_first(expr.value, expr.loc)
    lhs_assign = ir.Assign(rhs, lhs, expr.loc)
    #(shape, post) = self._gen_shape_call(equiv_set, lhs, typ.count, )
    var = lhs
    out = []
    size_vars = []
    ndims = typ.count
    for i in range(ndims):
        # get size: Asize0 = A_sh_attr[0]
        size_var = ir.Var(var.scope, mk_unique_var(
                            "{}_size{}".format(var.name, i)), var.loc)
        getitem = ir.Expr.static_getitem(lhs, i, None, var.loc)
        self.calltypes[getitem] = None
        out.append(ir.Assign(getitem, size_var, var.loc))
        self._define(equiv_set, size_var, types.intp, getitem)
        size_vars.append(size_var)
    shape = tuple(size_vars)
    return shape, [lhs_assign] + out
예제 #2
0
        def _mk_unique_var(self, prefix):
            """Make unique var name checking self.func_ir._definitions"""
            name = mk_unique_var(prefix)
            while name in self.func_ir._definitions:
                name = mk_unique_var(prefix)

            return name
예제 #3
0
파일: pio.py 프로젝트: zmyer/hpat
 def _handle_f_close_call(self, stmt, lhs_var, rhs):
     func_def = guard(get_definition, self.func_ir, rhs.func)
     assert func_def is not None
     # rare case where function variable is assigned to a new variable
     if isinstance(func_def, ir.Var):
         rhs.func = func_def
         return self._handle_f_close_call(stmt, lhs_var, rhs)
     if (isinstance(func_def, ir.Expr) and func_def.op == 'getattr'
             and func_def.value.name in self.h5_files
             and func_def.attr == 'close'):
         f_id = func_def.value
         scope = lhs_var.scope
         loc = lhs_var.loc
         # g_pio_var = Global(hpat.pio_api)
         g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
         g_pio = ir.Global('pio_api', hpat.pio_api, loc)
         g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
         # attr call: h5close_attr = getattr(g_pio_var, h5close)
         h5close_attr_call = ir.Expr.getattr(g_pio_var, "h5close", loc)
         attr_var = ir.Var(scope, mk_unique_var("$h5close_attr"), loc)
         attr_assign = ir.Assign(h5close_attr_call, attr_var, loc)
         # h5close(f_id)
         close_call = ir.Expr.call(attr_var, [f_id], (), loc)
         close_assign = ir.Assign(close_call, lhs_var, loc)
         return [g_pio_assign, attr_assign, close_assign]
     return None
예제 #4
0
 def _handle_merge(self, lhs, rhs):
     if guard(find_callname, self.func_ir, rhs) == ('merge', 'pandas'):
         if len(rhs.args) < 2:
             raise ValueError("left and right arguments required for merge")
         left_df = rhs.args[0]
         right_df = rhs.args[1]
         kws = dict(rhs.kws)
         if 'on' in kws:
             left_on = get_constant(self.func_ir, kws['on'], None)
             right_on = left_on
         else:  # pragma: no cover
             if 'left_on' not in kws or 'right_on' not in kws:
                 raise ValueError("merge 'on' or 'left_on'/'right_on'"
                                  "arguments required")
             left_on = get_constant(self.func_ir, kws['left_on'], None)
             right_on = get_constant(self.func_ir, kws['right_on'], None)
         if left_on is None or right_on is None:
             raise ValueError("merge key values should be constant strings")
         scope = lhs.scope
         loc = lhs.loc
         self.df_vars[lhs.name] = {}
         # add columns from left to output
         for col, _ in self.df_vars[left_df.name].items():
             self.df_vars[lhs.name][col] = ir.Var(scope, mk_unique_var(col),
                                                  loc)
         # add columns from right to output
         for col, _ in self.df_vars[right_df.name].items():
             self.df_vars[lhs.name][col] = ir.Var(scope, mk_unique_var(col),
                                                  loc)
         self._update_df_cols()
         return [
             hiframes_join.Join(lhs.name, left_df.name, right_df.name,
                                left_on, right_on, self.df_vars, lhs.loc)
         ]
     return None
예제 #5
0
 def convert_seq_to_atleast_3d(self, seq_arg_var):
     """convert array sequence to a sequence of arrays with at least 3d dims
     """
     arr_args = self._get_sequence_arrs(seq_arg_var.name)
     new_seq = []
     for arr in arr_args:
         curr_dims = self._get_ndims(arr.name)
         if curr_dims < 3:
             dummy_var = ir.Var(
                 arr.scope, mk_unique_var("__dummy_nd"), arr.loc)
             self.typemap[dummy_var.name] = self.typemap[arr.name].copy(
                 ndim=3)
             corrs = self.array_shape_classes[arr.name].copy()
             if curr_dims == 0:
                 corrs = [CONST_CLASS] * 3
             elif curr_dims == 1:  # Numpy adds both prepends and appends a dim
                 corrs = [CONST_CLASS] + corrs + [CONST_CLASS]
             elif curr_dims == 2:  # append a dim
                 corrs = corrs + [CONST_CLASS]
             self.array_shape_classes[dummy_var.name] = corrs
             new_seq.append(dummy_var)
         else:
             new_seq.append(arr)
     tup_name = mk_unique_var("__dummy_tup")
     self.tuple_table[tup_name] = new_seq
     return ir.Var(arr_args[0].scope, tup_name, arr_args[0].loc)
예제 #6
0
    def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope,
                                                                        loc):
        last_ind = dim_size
        if end_length != 0:
            # set last index to size minus stencil size to avoid invalid
            # memory access
            index_const = ir.Var(scope, mk_unique_var("stencil_const_var"),
                                                                        loc)
            self.typemap[index_const.name] = types.intp
            if isinstance(end_length, numbers.Number):
                const_assign = ir.Assign(ir.Const(end_length, loc),
                                                        index_const, loc)
            else:
                const_assign = ir.Assign(end_length, index_const, loc)

            gen_nodes.append(const_assign)
            last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc)
            self.typemap[last_ind.name] = types.intp

            g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc)
            check_func = numba.njit(_compute_last_ind)
            func_typ = types.functions.Dispatcher(check_func)
            self.typemap[g_var.name] = func_typ
            g_obj = ir.Global("_compute_last_ind", check_func, loc)
            g_assign = ir.Assign(g_obj, g_var, loc)
            gen_nodes.append(g_assign)
            index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc)
            self.calltypes[index_call] = func_typ.get_call_type(
                self.typingctx, [types.intp, types.intp], {})
            index_assign = ir.Assign(index_call, last_ind, loc)
            gen_nodes.append(index_assign)

        return last_ind
예제 #7
0
 def _gen_size_call(self, var, i):
     out = []
     ndims = self._get_ndims(var.name)
     # attr call: A_sh_attr = getattr(A, shape)
     shape_attr_call = ir.Expr.getattr(var, "shape", var.loc)
     attr_var = ir.Var(var.scope,
                       mk_unique_var(var.name + "_sh_attr" + str(i)),
                       var.loc)
     self.typemap[attr_var.name] = types.containers.UniTuple(
         types.intp, ndims)
     attr_assign = ir.Assign(shape_attr_call, attr_var, var.loc)
     out.append(attr_assign)
     # const var for dim: $constA0 = Const(0)
     const_node = ir.Const(i, var.loc)
     const_var = ir.Var(var.scope,
                        mk_unique_var("$const" + var.name + str(i)),
                        var.loc)
     self.typemap[const_var.name] = types.intp
     const_assign = ir.Assign(const_node, const_var, var.loc)
     out.append(const_assign)
     # get size: Asize0 = A_sh_attr[0]
     size_var = ir.Var(var.scope, mk_unique_var(var.name + "size" + str(i)),
                       var.loc)
     self.typemap[size_var.name] = types.intp
     getitem_node = ir.Expr.static_getitem(attr_var, i, const_var, var.loc)
     self.calltypes[getitem_node] = None
     getitem_assign = ir.Assign(getitem_node, size_var, var.loc)
     out.append(getitem_assign)
     return out
예제 #8
0
    def inline_array(array_var, expr, stmts, list_vars, dels):
        """Check to see if the given "array_var" is created from a list
        of constants, and try to inline the list definition as array
        initialization.

        Extra statements produced with be appended to "stmts".
        """
        callname = guard(find_callname, func_ir, expr)
        require(callname and callname[1] == 'numpy' and callname[0] == 'array')
        require(expr.args[0].name in list_vars)
        ret_type = calltypes[expr].return_type
        require(isinstance(ret_type, types.ArrayCompatible) and
                           ret_type.ndim == 1)
        loc = expr.loc
        list_var = expr.args[0]
        array_typ = typemap[array_var.name]
        debug_print("inline array_var = ", array_var, " list_var = ", list_var)
        dtype = array_typ.dtype
        seq, op = find_build_sequence(func_ir, list_var)
        size = len(seq)
        size_var = ir.Var(scope, mk_unique_var("size"), loc)
        size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
        size_typ = types.intp
        size_tuple_typ = types.UniTuple(size_typ, 1)

        typemap[size_var.name] = size_typ
        typemap[size_tuple_var.name] = size_tuple_typ

        stmts.append(_new_definition(func_ir, size_var,
                 ir.Const(size, loc=loc), loc))

        stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

        empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
        fnty = get_np_ufunc_typ(np.empty)
        sig = context.resolve_function_type(fnty, (size_typ,), {})
        typemap[empty_func.name] = fnty #

        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))

        empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc)
        calltypes[empty_call] = typing.signature(array_typ, size_typ)
        stmts.append(_new_definition(func_ir, array_var, empty_call, loc))

        for i in range(size):
            index_var = ir.Var(scope, mk_unique_var("index"), loc)
            index_typ = types.intp
            typemap[index_var.name] = index_typ
            stmts.append(_new_definition(func_ir, index_var,
                    ir.Const(i, loc), loc))
            setitem = ir.SetItem(array_var, index_var, seq[i], loc)
            calltypes[setitem] = typing.signature(types.none, array_typ,
                                                  index_typ, dtype)
            stmts.append(setitem)

        stmts.extend(dels)
        return True
예제 #9
0
def gen_empty_like(in_arr, out_arr):
    scope = in_arr.scope
    loc = in_arr.loc
    # g_np_var = Global(numpy)
    g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
    g_np = ir.Global('np', np, loc)
    g_np_assign = ir.Assign(g_np, g_np_var, loc)
    # attr call: empty_attr = getattr(g_np_var, empty_like)
    empty_attr_call = ir.Expr.getattr(g_np_var, "empty_like", loc)
    attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc)
    attr_assign = ir.Assign(empty_attr_call, attr_var, loc)
    # alloc call: out_arr = empty_attr(in_arr)
    alloc_call = ir.Expr.call(attr_var, [in_arr], (), loc)
    alloc_assign = ir.Assign(alloc_call, out_arr, loc)
    return [g_np_assign, attr_assign, alloc_assign]
예제 #10
0
파일: hiframes.py 프로젝트: zmyer/hpat
def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals,
                                                other_args=None, options=None):
    if other_args is None:
        other_args = []
    if options is None:
        options = {}
    if index_offsets != [0]:
        options['index_offsets'] = index_offsets
    scope = in_arr.scope
    loc = in_arr.loc
    stencil_nodes = []
    stencil_nodes += gen_empty_like(in_arr, out_arr)

    kernel_var = ir.Var(scope, mk_unique_var("kernel_var"), scope)
    if not isinstance(kernel_func, ir.Expr):
        kernel_func = ir.Expr.make_function("kernel", kernel_func.__code__,
                    kernel_func.__closure__, kernel_func.__defaults__, loc)
    stencil_nodes.append(ir.Assign(kernel_func, kernel_var, loc))

    def f(A, B, f):
        numba.stencil(f)(A, out=B)
    f_block = compile_to_numba_ir(f, {'numba': numba}).blocks.popitem()[1]
    replace_arg_nodes(f_block, [in_arr, out_arr, kernel_var])
    stencil_nodes += f_block.body[:-3]  # remove none return
    setup_call = stencil_nodes[-2].value
    stencil_call = stencil_nodes[-1].value
    setup_call.kws = list(options.items())
    stencil_call.args += other_args

    return stencil_nodes
예제 #11
0
파일: hiframes.py 프로젝트: zmyer/hpat
 def _gen_col_var(self, out_var, args, col_var):
     loc = out_var.loc
     scope = out_var.scope
     # calculate mean first
     mean_var = ir.Var(scope, mk_unique_var("mean_val"), loc)
     f_mean_blocks = self._gen_col_mean(mean_var, args, col_var)
     f_mean_blocks = add_offset_to_labels(f_mean_blocks, ir_utils._max_label+1)
     ir_utils._max_label = max(f_mean_blocks.keys())
     m_last_label = find_topo_order(f_mean_blocks)[-1]
     remove_none_return_from_block(f_mean_blocks[m_last_label])
     def f(A, s, m):
         count = 0
         for i in numba.parfor.prange(len(A)):
             val = A[i]
             if not np.isnan(val):
                 s += (val-m)**2
                 count += 1
         if count <= 1:
             s = np.nan
         else:
             s = s/(count-1)
     f_blocks = get_inner_ir(f)
     replace_var_names(f_blocks, {'A': col_var.name})
     replace_var_names(f_blocks, {'s': out_var.name})
     replace_var_names(f_blocks, {'m': mean_var.name})
     f_blocks[0].body.insert(0, ir.Assign(ir.Const(0.0, loc), out_var, loc))
     # attach first var block to last mean block
     f_mean_blocks[m_last_label].body.extend(f_blocks[0].body)
     f_blocks.pop(0)
     f_blocks = add_offset_to_labels(f_blocks, ir_utils._max_label+1)
     # add offset to jump of first f_block since it didn't go through call
     f_mean_blocks[m_last_label].body[-1].target += ir_utils._max_label+1
     ir_utils._max_label = max(f_blocks.keys())
     f_mean_blocks.update(f_blocks)
     return f_mean_blocks
예제 #12
0
    def gen_parquet_read(self, file_name, table_types):
        import pyarrow.parquet as pq
        scope = file_name.scope
        loc = file_name.loc

        if table_types is None:
            fname_def = guard(get_definition, self.func_ir, file_name)
            if not isinstance(fname_def, ir.Const) or not isinstance(
                    fname_def.value, str):
                raise ValueError("Parquet schema not available")
            file_name_str = fname_def.value
            col_names, col_types = parquet_file_schema(file_name_str)
        else:
            col_names = list(table_types.keys())
            col_types = list(table_types.values())

        out_nodes = []
        col_items = []
        for i, cname in enumerate(col_names):
            # get column type from schema
            c_type = col_types[i]
            # create a variable for column and assign type
            varname = mk_unique_var(cname)
            #self.locals[varname] = c_type
            cvar = ir.Var(scope, varname, loc)
            col_items.append((cname, cvar))

            out_nodes += get_column_read_nodes(c_type, cvar, file_name, i)

        return col_items, out_nodes
예제 #13
0
파일: parfor.py 프로젝트: harrism/numba
def replace_var_with_array_in_block(vars, block, typemap, calltypes):
    new_block = []
    for inst in block.body:
        if isinstance(inst, ir.Assign) and inst.target.name in vars:
            const_node = ir.Const(0, inst.loc)
            const_var = ir.Var(inst.target.scope,
                               mk_unique_var("$const_ind_0"), inst.loc)
            typemap[const_var.name] = types.uintp
            const_assign = ir.Assign(const_node, const_var, inst.loc)
            new_block.append(const_assign)

            setitem_node = ir.SetItem(inst.target, const_var, inst.value,
                                      inst.loc)
            calltypes[setitem_node] = signature(
                types.none,
                types.npytypes.Array(typemap[inst.target.name], 1, "C"),
                types.intp, typemap[inst.target.name])
            new_block.append(setitem_node)
            continue
        elif isinstance(inst, parfor.Parfor):
            replace_var_with_array_internal(vars, {0: inst.init_block},
                                            typemap, calltypes)
            replace_var_with_array_internal(vars, inst.loop_body, typemap,
                                            calltypes)

        new_block.append(inst)
    return new_block
예제 #14
0
파일: pio.py 프로젝트: Quantonomist/hpat
 def _gen_h5close(self, stmt, f_id):
     lhs_var = stmt.target
     scope = lhs_var.scope
     loc = lhs_var.loc
     # g_pio_var = Global(hpat.pio_api)
     g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
     g_pio = ir.Global('pio_api', hpat.pio_api, loc)
     g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
     # attr call: h5close_attr = getattr(g_pio_var, h5close)
     h5close_attr_call = ir.Expr.getattr(g_pio_var, "h5close", loc)
     attr_var = ir.Var(scope, mk_unique_var("$h5close_attr"), loc)
     attr_assign = ir.Assign(h5close_attr_call, attr_var, loc)
     # h5close(f_id)
     close_call = ir.Expr.call(attr_var, [f_id], (), loc)
     close_assign = ir.Assign(close_call, lhs_var, loc)
     return [g_pio_assign, attr_assign, close_assign]
예제 #15
0
파일: pio.py 프로젝트: zmyer/hpat
 def _get_slice_range(self, index_slice, out):
     scope = index_slice.scope
     loc = index_slice.loc
     # start = s.start
     start_var = ir.Var(scope, mk_unique_var("$pio_range_start"), loc)
     start_attr_call = ir.Expr.getattr(index_slice, "start", loc)
     start_assign = ir.Assign(start_attr_call, start_var, loc)
     # stop = s.stop
     stop_var = ir.Var(scope, mk_unique_var("$pio_range_stop"), loc)
     stop_attr_call = ir.Expr.getattr(index_slice, "stop", loc)
     stop_assign = ir.Assign(stop_attr_call, stop_var, loc)
     # size = stop-start
     size_var = ir.Var(scope, mk_unique_var("$pio_range_size"), loc)
     size_call = ir.Expr.binop('-', stop_var, start_var, loc)
     size_assign = ir.Assign(size_call, size_var, loc)
     out += [start_assign, stop_assign, size_assign]
     return [start_var], [size_var]
예제 #16
0
    def _gen_rolling_init(self, win_size, func, center):
        nodes = []
        right_length = 0
        scope = win_size.scope
        loc = win_size.loc
        right_length = ir.Var(scope, mk_unique_var('zero_var'), scope)
        nodes.append(ir.Assign(ir.Const(0, loc), right_length, win_size.loc))

        def f(w):
            return -w + 1

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [win_size])
        nodes.extend(f_block.body[:-2])  # remove none return
        left_length = nodes[-1].target

        if center:

            def f(w):
                return -(w // 2)

            f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [win_size])
            nodes.extend(f_block.body[:-2])  # remove none return
            left_length = nodes[-1].target

            def f(w):
                return (w // 2)

            f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [win_size])
            nodes.extend(f_block.body[:-2])  # remove none return
            right_length = nodes[-1].target

        def f(a, b):
            return ((a, b), )

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [left_length, right_length])
        nodes.extend(f_block.body[:-2])  # remove none return
        win_tuple = nodes[-1].target

        index_offsets = [right_length]

        if func == 'apply':
            index_offsets = [left_length]

        def f(a):
            return (a, )

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, index_offsets)
        nodes.extend(f_block.body[:-2])  # remove none return
        index_offsets = nodes[-1].target

        return index_offsets, win_tuple, nodes
예제 #17
0
파일: pio.py 프로젝트: Quantonomist/hpat
 def _gen_h5create_group(self, stmt, f_id):
     lhs_var = stmt.target
     scope = lhs_var.scope
     loc = lhs_var.loc
     args = [f_id] + stmt.value.args
     # g_pio_var = Global(hpat.pio_api)
     g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
     g_pio = ir.Global('pio_api', hpat.pio_api, loc)
     g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
     # attr call: h5create_group_attr = getattr(g_pio_var, h5create_group)
     h5create_group_attr_call = ir.Expr.getattr(g_pio_var, "h5create_group",
                                                loc)
     attr_var = ir.Var(scope, mk_unique_var("$h5create_group_attr"), loc)
     attr_assign = ir.Assign(h5create_group_attr_call, attr_var, loc)
     # group_id = h5create_group(f_id)
     create_group_call = ir.Expr.call(attr_var, args, (), loc)
     create_group_assign = ir.Assign(create_group_call, lhs_var, loc)
     # add to files since group behavior is same as files for many calls
     self.h5_files[lhs_var.name] = "group"
     return [g_pio_assign, attr_assign, create_group_assign]
예제 #18
0
def gen_stencil_call(in_arr, out_arr, code_expr, index_offsets):
    scope = in_arr.scope
    loc = in_arr.loc
    alloc_nodes = gen_empty_like(in_arr, out_arr)
    # generate stencil call
    # g_numba_var = Global(numba)
    g_numba_var = ir.Var(scope, mk_unique_var("$g_numba_var"), loc)
    g_dist = ir.Global('numba', numba, loc)
    g_numba_assign = ir.Assign(g_dist, g_numba_var, loc)
    # attr call: stencil_attr = getattr(g_numba_var, stencil)
    stencil_attr_call = ir.Expr.getattr(g_numba_var, "stencil", loc)
    stencil_attr_var = ir.Var(scope, mk_unique_var("$stencil_attr"), loc)
    stencil_attr_assign = ir.Assign(stencil_attr_call, stencil_attr_var, loc)
    # stencil_out = numba.stencil()
    stencil_out = ir.Var(scope, mk_unique_var("$stencil_out"), loc)
    stencil_call = ir.Expr.call(stencil_attr_var, [in_arr, out_arr], (), loc)
    stencil_call.stencil_def = code_expr
    stencil_call.index_offsets = index_offsets
    stencil_assign = ir.Assign(stencil_call, stencil_out, loc)
    return alloc_nodes + [g_numba_assign, stencil_attr_assign, stencil_assign]
예제 #19
0
파일: pio.py 프로젝트: feitianyiren/hpat
 def _handle_h5_File_call(self, assign, lhs, rhs):
     """
     Handle h5py.File calls like:
       f = h5py.File(file_name, mode)
     """
     # parallel arg = False for this stage
     loc = lhs.loc
     scope = lhs.scope
     parallel_var = ir.Var(scope, mk_unique_var("$const_parallel"), loc)
     parallel_assign = ir.Assign(ir.Const(0, loc), parallel_var, loc)
     rhs.args.append(parallel_var)
     return [parallel_assign, assign]
예제 #20
0
파일: pio.py 프로젝트: zmyer/hpat
    def _gen_h5size(self, f_id, dset, ndims, scope, loc, out):
        # g_pio_var = Global(hpat.pio_api)
        g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
        g_pio = ir.Global('pio_api', hpat.pio_api, loc)
        g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
        # attr call: h5size_attr = getattr(g_pio_var, h5size)
        h5size_attr_call = ir.Expr.getattr(g_pio_var, "h5size", loc)
        attr_var = ir.Var(scope, mk_unique_var("$h5size_attr"), loc)
        attr_assign = ir.Assign(h5size_attr_call, attr_var, loc)
        out += [g_pio_assign, attr_assign]

        size_vars = []
        for i in range(ndims):
            dim_var = ir.Var(scope, mk_unique_var("$h5_dim_var"), loc)
            dim_assign = ir.Assign(ir.Const(np.int32(i), loc), dim_var, loc)
            out.append(dim_assign)
            size_var = ir.Var(scope, mk_unique_var("$h5_size_var"), loc)
            size_vars.append(size_var)
            size_call = ir.Expr.call(attr_var, [f_id, dset, dim_var], (), loc)
            size_assign = ir.Assign(size_call, size_var, loc)
            out.append(size_assign)
        return size_vars
예제 #21
0
파일: pio.py 프로젝트: zmyer/hpat
 def _gen_h5create_dset(self, stmt, f_id):
     lhs_var = stmt.target
     scope = lhs_var.scope
     loc = lhs_var.loc
     args = [f_id] + stmt.value.args
     # append the dtype arg (e.g. dtype='f8')
     assert stmt.value.kws and stmt.value.kws[0][0] == 'dtype'
     args.append(stmt.value.kws[0][1])
     # g_pio_var = Global(hpat.pio_api)
     g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
     g_pio = ir.Global('pio_api', hpat.pio_api, loc)
     g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
     # attr call: h5create_dset_attr = getattr(g_pio_var, h5create_dset)
     h5create_dset_attr_call = ir.Expr.getattr(g_pio_var, "h5create_dset",
                                               loc)
     attr_var = ir.Var(scope, mk_unique_var("$h5create_dset_attr"), loc)
     attr_assign = ir.Assign(h5create_dset_attr_call, attr_var, loc)
     # dset_id = h5create_dset(f_id)
     create_dset_call = ir.Expr.call(attr_var, args, (), loc)
     create_dset_assign = ir.Assign(create_dset_call, lhs_var, loc)
     self.h5_dsets[lhs_var.name] = (f_id, args[1])
     self.h5_dsets_sizes[lhs_var.name] = self.tuple_table[args[2].name]
     return [g_pio_assign, attr_assign, create_dset_assign]
예제 #22
0
파일: pio.py 프로젝트: zmyer/hpat
 def _handle_h5_File_call(self, assign, lhs, rhs):
     """
     Handle h5py.File calls like:
       f = h5py.File(file_name, mode)
     """
     if guard(find_callname, self.func_ir, rhs) == ('File', 'h5py'):
         self.h5_files[lhs.name] = rhs.args[0]
         # parallel arg = False for this stage
         loc = lhs.loc
         scope = lhs.scope
         parallel_var = ir.Var(scope, mk_unique_var("$const_parallel"), loc)
         parallel_assign = ir.Assign(ir.Const(0, loc), parallel_var, loc)
         rhs.args.append(parallel_var)
         return [parallel_assign, assign]
     return None
예제 #23
0
    def _gen_col_std(self, out_var, args, col_var):
        loc = out_var.loc
        scope = out_var.scope
        # calculate var() first
        var_var = ir.Var(scope, mk_unique_var("var_val"), loc)
        v_nodes = self._gen_col_var(var_var, args, col_var)

        def f(a):
            a**0.5

        s_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(s_block, [var_var])
        s_nodes = s_block.body[:-3]
        assert len(s_nodes) == 3
        s_nodes[-1].target = out_var
        return v_nodes + s_nodes
예제 #24
0
    def gen_parquet_read(self, file_name):
        import pyarrow.parquet as pq
        fname_def = guard(get_definition, self.func_ir, file_name)
        if isinstance(fname_def, ir.Const):
            assert isinstance(fname_def.value, str)
            file_name_str = fname_def.value
            col_names, col_types = parquet_file_schema(file_name_str)
            scope = file_name.scope
            loc = file_name.loc
            out_nodes = []
            col_items = []
            for i, cname in enumerate(col_names):
                # get column type from schema
                c_type = col_types[i]
                # create a variable for column and assign type
                varname = mk_unique_var(cname)
                self.locals[varname] = c_type
                cvar = ir.Var(scope, varname, loc)
                col_items.append((cname, cvar))

                size_func_text = (
                    'def f():\n  col_size = get_column_size_parquet("{}", {})\n'
                    .format(file_name_str, i))
                size_func_text += '  column = np.empty(col_size, dtype=np.{})\n'.format(
                    c_type.dtype)
                size_func_text += '  status = read_parquet("{}", {}, column)\n'.format(
                    file_name_str, i)
                loc_vars = {}
                exec(size_func_text, {}, loc_vars)
                size_func = loc_vars['f']
                _, f_block = compile_to_numba_ir(
                    size_func, {
                        'get_column_size_parquet': get_column_size_parquet,
                        'read_parquet': read_parquet,
                        'np': np
                    }).blocks.popitem()

                out_nodes += f_block.body[:-3]
                for stmt in out_nodes:
                    if stmt.target.name.startswith("column"):
                        assign = ir.Assign(stmt.target, cvar, loc)
                        break

                out_nodes.append(assign)

            return col_items, out_nodes
        raise ValueError("Parquet schema not available")
예제 #25
0
def inline_calls_inner(func_ir, block, stmt, i, py_func):
    call_expr = stmt.value
    scope = block.scope
    callee_ir = numba.compiler.run_frontend(py_func)

    # relabel callee_ir by adding an offset
    max_label = max(func_ir.blocks.keys())
    callee_blocks = add_offset_to_labels(callee_ir.blocks, max_label + 1)
    callee_ir.blocks = callee_blocks
    min_label = min(callee_blocks.keys())
    max_label = max(callee_blocks.keys())

    #  init _max_label global in ir_utils before using next_label()
    ir_utils._max_label = max_label

    # rename all variables in callee blocks
    var_table = get_name_var_table(callee_ir.blocks)
    new_var_dict = {}
    for name, var in var_table.items():
        new_var = scope.define(mk_unique_var(var.name), loc=var.loc)
        new_var_dict[name] = new_var
    replace_vars(callee_ir.blocks, new_var_dict)

    # replace callee arguments
    args = list(call_expr.args)
    # TODO: replace defaults (add to args)
    _replace_args(callee_ir.blocks, args)

    # split caller blocks into two
    new_block = ir.Block(scope, block.loc)
    new_block.body = block.body[i + 1:]
    new_label = ir_utils.next_label()
    func_ir.blocks[new_label] = new_block
    block.body = block.body[:i]
    block.body.append(ir.Jump(min_label, stmt.loc))

    # replace Return with assignment to LHS
    _replace_returns(callee_ir.blocks, stmt.target, new_label)

    # insert all new blocks
    for label, bl in callee_ir.blocks.items():
        func_ir.blocks[label] = bl

    # run inline_calls recursively to transform other calls
    inline_calls(func_ir)
    return
예제 #26
0
 def fix_dependencies(expr, varlist):
     """Double check if all variables in varlist are defined before
     expr is used. Try to move constant definition when the check fails.
     Bails out by raising GuardException if it can't be moved.
     """
     debug_print = _make_debug_print("fix_dependencies")
     for label, block in blocks.items():
         scope = block.scope
         body = block.body
         defined = set()
         for i in range(len(body)):
             inst = body[i]
             if isinstance(inst, ir.Assign):
                 defined.add(inst.target.name)
                 if inst.value == expr:
                     new_varlist = []
                     for var in varlist:
                         # var must be defined before this inst, or live
                         # and not later defined.
                         if (var.name in defined or
                             (var.name in livemap[label]
                              and not (var.name in usedefs.defmap[label]))):
                             debug_print(var.name, " already defined")
                             new_varlist.append(var)
                         else:
                             debug_print(var.name, " not yet defined")
                             var_def = get_definition(func_ir, var.name)
                             if isinstance(var_def, ir.Const):
                                 loc = var.loc
                                 new_var = ir.Var(scope,
                                                  mk_unique_var("new_var"),
                                                  loc)
                                 new_const = ir.Const(var_def.value, loc)
                                 new_vardef = _new_definition(
                                     func_ir, new_var, new_const, loc)
                                 new_body = []
                                 new_body.extend(body[:i])
                                 new_body.append(new_vardef)
                                 new_body.extend(body[i:])
                                 block.body = new_body
                                 new_varlist.append(new_var)
                             else:
                                 raise GuardException
                     return new_varlist
     # when expr is not found in block
     raise GuardException
예제 #27
0
    def gen_parquet_read(self, file_name, lhs):
        import pyarrow.parquet as pq
        scope = file_name.scope
        loc = file_name.loc

        table_types = None
        # lhs is temporary and will possibly be assigned to user variable
        assert lhs.name.startswith('$')
        if lhs.name in self.reverse_copies and self.reverse_copies[lhs.name] in self.locals:
            table_types = self.locals[self.reverse_copies[lhs.name]]
            self.locals.pop(self.reverse_copies[lhs.name])

        convert_types = {}
        # user-specified type conversion
        if lhs.name in self.reverse_copies and (self.reverse_copies[lhs.name]+':convert') in self.locals:
            convert_types = self.locals[self.reverse_copies[lhs.name]+':convert']
            self.locals.pop(self.reverse_copies[lhs.name]+':convert')

        if table_types is None:
            fname_def = guard(get_definition, self.func_ir, file_name)
            if not isinstance(fname_def, ir.Const) or not isinstance(fname_def.value, str):
                raise ValueError("Parquet schema not available")
            file_name_str = fname_def.value
            col_names, col_types = parquet_file_schema(file_name_str)
        else:
            col_names = list(table_types.keys())
            col_types = list(table_types.values())

        out_nodes = []
        col_items = []
        for i, cname in enumerate(col_names):
            # get column type from schema
            c_type = col_types[i]
            if cname in convert_types:
                c_type = convert_types[cname]

            # create a variable for column and assign type
            varname = mk_unique_var(cname)
            #self.locals[varname] = c_type
            cvar = ir.Var(scope, varname, loc)
            col_items.append((cname, cvar))

            out_nodes += get_column_read_nodes(c_type, cvar, file_name, i)

        return col_items, out_nodes
예제 #28
0
 def fix_dependencies(expr, varlist):
     """Double check if all variables in varlist are defined before
     expr is used. Try to move constant definition when the check fails.
     Bails out by raising GuardException if it can't be moved.
     """
     debug_print = _make_debug_print("fix_dependencies")
     for label, block in blocks.items():
         scope = block.scope
         body = block.body
         defined = set()
         for i in range(len(body)):
             inst = body[i]
             if isinstance(inst, ir.Assign):
                 defined.add(inst.target.name)
                 if inst.value == expr:
                     new_varlist = []
                     for var in varlist:
                         # var must be defined before this inst, or live
                         # and not later defined.
                         if (var.name in defined or
                             (var.name in livemap[label] and
                              not (var.name in usedefs.defmap[label]))):
                             debug_print(var.name, " already defined")
                             new_varlist.append(var)
                         else:
                             debug_print(var.name, " not yet defined")
                             var_def = get_definition(func_ir, var.name)
                             if isinstance(var_def, ir.Const):
                                 loc = var.loc
                                 new_var = ir.Var(scope, mk_unique_var("new_var"), loc)
                                 new_const = ir.Const(var_def.value, loc)
                                 new_vardef = _new_definition(func_ir,
                                                 new_var, new_const, loc)
                                 new_body = []
                                 new_body.extend(body[:i])
                                 new_body.append(new_vardef)
                                 new_body.extend(body[i:])
                                 block.body = new_body
                                 new_varlist.append(new_var)
                             else:
                                 raise GuardException
                     return new_varlist
     # when expr is not found in block
     raise GuardException
예제 #29
0
    def replace_return_with_setitem(self, blocks, index_vars, out_name):
        """
        Find return statements in the IR and replace them with a SetItem
        call of the value "returned" by the kernel into the result array.
        Returns the block labels that contained return statements.
        """
        ret_blocks = []

        for label, block in blocks.items():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Return):
                    ret_blocks.append(label)
                    # If 1D array then avoid the tuple construction.
                    if len(index_vars) == 1:
                        rvar = ir.Var(scope, out_name, loc)
                        ivar = ir.Var(scope, index_vars[0], loc)
                        new_body.append(ir.SetItem(rvar, ivar, stmt.value,
                                                   loc))
                    else:
                        # Convert the string names of the index variables into
                        # ir.Var's.
                        var_index_vars = []
                        for one_var in index_vars:
                            index_var = ir.Var(scope, one_var, loc)
                            var_index_vars += [index_var]

                        s_index_name = ir_utils.mk_unique_var("stencil_index")
                        s_index_var = ir.Var(scope, s_index_name, loc)
                        # Build a tuple from the index ir.Var's.
                        tuple_call = ir.Expr.build_tuple(var_index_vars, loc)
                        new_body.append(ir.Assign(tuple_call, s_index_var,
                                                  loc))
                        rvar = ir.Var(scope, out_name, loc)
                        # Write the return statements original value into
                        # the array using the tuple index.
                        si = ir.SetItem(rvar, s_index_var, stmt.value, loc)
                        new_body.append(si)
                else:
                    new_body.append(stmt)
            block.body = new_body
        return ret_blocks
예제 #30
0
    def _gen_rebalances(self, rebalance_arrs, blocks):
        #
        for block in blocks.values():
            new_body = []
            for inst in block.body:
                # TODO: handle hiframes filter etc.
                if isinstance(inst, Parfor):
                    self._gen_rebalances(rebalance_arrs, {0: inst.init_block})
                    self._gen_rebalances(rebalance_arrs, inst.loop_body)
                if isinstance(
                        inst,
                        ir.Assign) and inst.target.name in rebalance_arrs:
                    out_arr = inst.target
                    self.func_ir._definitions[out_arr.name].remove(inst.value)
                    # hold inst results in tmp array
                    tmp_arr = ir.Var(out_arr.scope,
                                     mk_unique_var("rebalance_tmp"),
                                     out_arr.loc)
                    self.typemap[tmp_arr.name] = self.typemap[out_arr.name]
                    inst.target = tmp_arr
                    nodes = [inst]

                    def f(in_arr):  # pragma: no cover
                        out_a = hpat.distributed_api.rebalance_array(in_arr)

                    f_block = compile_to_numba_ir(
                        f, {
                            'hpat': hpat
                        }, self.typingctx, (self.typemap[tmp_arr.name], ),
                        self.typemap, self.calltypes).blocks.popitem()[1]
                    replace_arg_nodes(f_block, [tmp_arr])
                    nodes += f_block.body[:-3]  # remove none return
                    nodes[-1].target = out_arr
                    # update definitions
                    dumm_block = ir.Block(out_arr.scope, out_arr.loc)
                    dumm_block.body = nodes
                    build_definitions({0: dumm_block},
                                      self.func_ir._definitions)
                    new_body += nodes
                else:
                    new_body.append(inst)

            block.body = new_body
예제 #31
0
def make_var_name(prefix=None, name=None):
    """
    Creates variable name.
    If prefix is given it would be extended with postfix to create unique name.
    If name is given returning this name.
    Either prefix or name could be not None at the same time
    """

    assert prefix is None or name is None
    var_name = None
    if name is not None:
        var_name = name
    else:
        if prefix is None:
            prefix = '$_var_'

        var_name = mk_unique_var(prefix)

    return var_name
예제 #32
0
파일: stencil.py 프로젝트: cpcloud/numba
    def replace_return_with_setitem(self, blocks, index_vars, out_name):
        """
        Find return statements in the IR and replace them with a SetItem
        call of the value "returned" by the kernel into the result array.
        Returns the block labels that contained return statements.
        """
        ret_blocks = []

        for label, block in blocks.items():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Return):
                    ret_blocks.append(label)
                    # If 1D array then avoid the tuple construction.
                    if len(index_vars) == 1:
                        rvar = ir.Var(scope, out_name, loc)
                        ivar = ir.Var(scope, index_vars[0], loc)
                        new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc))
                    else:
                        # Convert the string names of the index variables into
                        # ir.Var's.
                        var_index_vars = []
                        for one_var in index_vars:
                            index_var = ir.Var(scope, one_var, loc)
                            var_index_vars += [index_var]

                        s_index_name = ir_utils.mk_unique_var("stencil_index")
                        s_index_var  = ir.Var(scope, s_index_name, loc)
                        # Build a tuple from the index ir.Var's.
                        tuple_call = ir.Expr.build_tuple(var_index_vars, loc)
                        new_body.append(ir.Assign(tuple_call, s_index_var, loc))
                        rvar = ir.Var(scope, out_name, loc)
                        # Write the return statements original value into
                        # the array using the tuple index.
                        si = ir.SetItem(rvar, s_index_var, stmt.value, loc)
                        new_body.append(si)
                else:
                    new_body.append(stmt)
            block.body = new_body
        return ret_blocks
예제 #33
0
def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None,
                        arg_typs=None, typemap=None, calltypes=None,
                        work_list=None):
    """Inline the body of `callee` at its callsite (`i`-th instruction of `block`)

    `func_ir` is the func_ir object of the caller function and `glbls` is its
    global variable environment (func_ir.func_id.func.__globals__).
    `block` is the IR block of the callsite and `i` is the index of the
    callsite's node. `callee` is either the called function or a
    make_function node. `typingctx`, `typemap` and `calltypes` are typing
    data structures of the caller, available if we are in a typed pass.
    `arg_typs` includes the types of the arguments at the callsite.
    """
    scope = block.scope
    instr = block.body[i]
    call_expr = instr.value
    debug_print = _make_debug_print("inline_closure_call")
    debug_print("Found closure call: ", instr, " with callee = ", callee)
    # support both function object and make_function Expr
    callee_code = callee.code if hasattr(callee, 'code') else callee.__code__
    callee_defaults = callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__
    callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__
    # first, get the IR of the callee
    callee_ir = get_ir_of_code(glbls, callee_code)
    callee_blocks = callee_ir.blocks

    # 1. relabel callee_ir by adding an offset
    max_label = max(func_ir.blocks.keys())
    callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1)
    callee_blocks = simplify_CFG(callee_blocks)
    callee_ir.blocks = callee_blocks
    min_label = min(callee_blocks.keys())
    max_label = max(callee_blocks.keys())
    #    reset globals in ir_utils before we use it
    ir_utils._max_label = max_label
    debug_print("After relabel")
    _debug_dump(callee_ir)

    # 2. rename all local variables in callee_ir with new locals created in func_ir
    callee_scopes = _get_all_scopes(callee_blocks)
    debug_print("callee_scopes = ", callee_scopes)
    #    one function should only have one local scope
    assert(len(callee_scopes) == 1)
    callee_scope = callee_scopes[0]
    var_dict = {}
    for var in callee_scope.localvars._con.values():
        if not (var.name in callee_code.co_freevars):
            new_var = scope.define(mk_unique_var(var.name), loc=var.loc)
            var_dict[var.name] = new_var
    debug_print("var_dict = ", var_dict)
    replace_vars(callee_blocks, var_dict)
    debug_print("After local var rename")
    _debug_dump(callee_ir)

    # 3. replace formal parameters with actual arguments
    args = list(call_expr.args)
    if callee_defaults:
        debug_print("defaults = ", callee_defaults)
        if isinstance(callee_defaults, tuple): # Python 3.5
            args = args + list(callee_defaults)
        elif isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str):
            defaults = func_ir.get_definition(callee_defaults)
            assert(isinstance(defaults, ir.Const))
            loc = defaults.loc
            args = args + [ir.Const(value=v, loc=loc)
                           for v in defaults.value]
        else:
            raise NotImplementedError(
                "Unsupported defaults to make_function: {}".format(defaults))
    debug_print("After arguments rename: ")
    _debug_dump(callee_ir)

    # 4. replace freevar with actual closure var
    if callee_closure:
        closure = func_ir.get_definition(callee_closure)
        debug_print("callee's closure = ", closure)
        if isinstance(closure, tuple):
            cellget = ctypes.pythonapi.PyCell_Get
            cellget.restype = ctypes.py_object
            cellget.argtypes = (ctypes.py_object,)
            items = tuple(cellget(x) for x in closure)
        else:
            assert(isinstance(closure, ir.Expr)
                   and closure.op == 'build_tuple')
            items = closure.items
        assert(len(callee_code.co_freevars) == len(items))
        _replace_freevars(callee_blocks, items)
        debug_print("After closure rename")
        _debug_dump(callee_ir)

    if typingctx:
        from numba import compiler
        f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage(
                typingctx, callee_ir, arg_typs, None)
        canonicalize_array_math(callee_ir, f_typemap,
                                f_calltypes, typingctx)
        # remove argument entries like arg.a from typemap
        arg_names = [vname for vname in f_typemap if vname.startswith("arg.")]
        for a in arg_names:
            f_typemap.pop(a)
        typemap.update(f_typemap)
        calltypes.update(f_calltypes)

    _replace_args_with(callee_blocks, args)
    # 5. split caller blocks into two
    new_blocks = []
    new_block = ir.Block(scope, block.loc)
    new_block.body = block.body[i + 1:]
    new_label = next_label()
    func_ir.blocks[new_label] = new_block
    new_blocks.append((new_label, new_block))
    block.body = block.body[:i]
    block.body.append(ir.Jump(min_label, instr.loc))

    # 6. replace Return with assignment to LHS
    topo_order = find_topo_order(callee_blocks)
    _replace_returns(callee_blocks, instr.target, new_label)
    #    remove the old definition of instr.target too
    if (instr.target.name in func_ir._definitions):
        func_ir._definitions[instr.target.name] = []

    # 7. insert all new blocks, and add back definitions
    for label in topo_order:
        # block scope must point to parent's
        block = callee_blocks[label]
        block.scope = scope
        _add_definitions(func_ir, block)
        func_ir.blocks[label] = block
        new_blocks.append((label, block))
    debug_print("After merge in")
    _debug_dump(func_ir)

    if work_list != None:
        for block in new_blocks:
            work_list.append(block)
    return callee_blocks
예제 #34
0
파일: parfor.py 프로젝트: cpcloud/numba
def _create_gufunc_for_parfor_body(
        lowerer,
        parfor,
        typemap,
        typingctx,
        targetctx,
        flags,
        locals,
        has_aliases,
        index_var_typ):
    '''
    Takes a parfor and creates a gufunc function for its body.
    There are two parts to this function.
    1) Code to iterate across the iteration space as defined by the schedule.
    2) The parfor body that does the work for a single point in the iteration space.
    Part 1 is created as Python text for simplicity with a sentinel assignment to mark the point
    in the IR where the parfor body should be added.
    This Python text is 'exec'ed into existence and its IR retrieved with run_frontend.
    The IR is scanned for the sentinel assignment where that basic block is split and the IR
    for the parfor body inserted.
    '''

    # The parfor body and the main function body share ir.Var nodes.
    # We have to do some replacements of Var names in the parfor body to make them
    # legal parameter names.  If we don't copy then the Vars in the main function also
    # would incorrectly change their name.
    loop_body = copy.copy(parfor.loop_body)
    remove_dels(loop_body)

    parfor_dim = len(parfor.loop_nests)
    loop_indices = [l.index_variable.name for l in parfor.loop_nests]

    # Get all the parfor params.
    parfor_params = parfor.params
    # Get just the outputs of the parfor.
    parfor_outputs = numba.parfor.get_parfor_outputs(parfor, parfor_params)
    # Get all parfor reduction vars, and operators.
    parfor_redvars, parfor_reddict = numba.parfor.get_parfor_reductions(
        parfor, parfor_params, lowerer.fndesc.calltypes)
    # Compute just the parfor inputs as a set difference.
    parfor_inputs = sorted(
        list(
            set(parfor_params) -
            set(parfor_outputs) -
            set(parfor_redvars)))

    if config.DEBUG_ARRAY_OPT == 1:
        print("parfor_params = ", parfor_params, " ", type(parfor_params))
        print("parfor_outputs = ", parfor_outputs, " ", type(parfor_outputs))
        print("parfor_inputs = ", parfor_inputs, " ", type(parfor_inputs))
        print("parfor_redvars = ", parfor_redvars, " ", type(parfor_redvars))

    # Reduction variables are represented as arrays, so they go under
    # different names.
    parfor_redarrs = []
    for var in parfor_redvars:
        arr = var + "_arr"
        parfor_redarrs.append(arr)
        typemap[arr] = types.npytypes.Array(typemap[var], 1, "C")

    # Reorder all the params so that inputs go first then outputs.
    parfor_params = parfor_inputs + parfor_outputs + parfor_redarrs

    if config.DEBUG_ARRAY_OPT == 1:
        print("parfor_params = ", parfor_params, " ", type(parfor_params))
        print("loop_indices = ", loop_indices, " ", type(loop_indices))
        print("loop_body = ", loop_body, " ", type(loop_body))
        _print_body(loop_body)

    # Some Var are not legal parameter names so create a dict of potentially illegal
    # param name to guaranteed legal name.
    param_dict = legalize_names(parfor_params + parfor_redvars)
    if config.DEBUG_ARRAY_OPT == 1:
        print(
            "param_dict = ",
            sorted(
                param_dict.items()),
            " ",
            type(param_dict))

    # Some loop_indices are not legal parameter names so create a dict of potentially illegal
    # loop index to guaranteed legal name.
    ind_dict = legalize_names(loop_indices)
    # Compute a new list of legal loop index names.
    legal_loop_indices = [ind_dict[v] for v in loop_indices]
    if config.DEBUG_ARRAY_OPT == 1:
        print("ind_dict = ", sorted(ind_dict.items()), " ", type(ind_dict))
        print(
            "legal_loop_indices = ",
            legal_loop_indices,
            " ",
            type(legal_loop_indices))
        for pd in parfor_params:
            print("pd = ", pd)
            print("pd type = ", typemap[pd], " ", type(typemap[pd]))

    # Get the types of each parameter.
    param_types = [typemap[v] for v in parfor_params]
    # if config.DEBUG_ARRAY_OPT==1:
    #    param_types_dict = { v:typemap[v] for v in parfor_params }
    #    print("param_types_dict = ", param_types_dict, " ", type(param_types_dict))
    #    print("param_types = ", param_types, " ", type(param_types))

    # Replace illegal parameter names in the loop body with legal ones.
    replace_var_names(loop_body, param_dict)
    # remember the name before legalizing as the actual arguments
    parfor_args = parfor_params
    # Change parfor_params to be legal names.
    parfor_params = [param_dict[v] for v in parfor_params]
    parfor_params_orig = parfor_params

    parfor_params = []
    ascontig = False
    for pindex in range(len(parfor_params_orig)):
        if ascontig and pindex < len(parfor_inputs) and isinstance(param_types[pindex], types.npytypes.Array):
            parfor_params.append(parfor_params_orig[pindex]+"param")
        else:
            parfor_params.append(parfor_params_orig[pindex])

    # Change parfor body to replace illegal loop index vars with legal ones.
    replace_var_names(loop_body, ind_dict)
    loop_body_var_table = get_name_var_table(loop_body)
    sentinel_name = get_unused_var_name("__sentinel__", loop_body_var_table)

    if config.DEBUG_ARRAY_OPT == 1:
        print(
            "legal parfor_params = ",
            parfor_params,
            " ",
            type(parfor_params))

    # Determine the unique names of the scheduling and gufunc functions.
    # sched_func_name = "__numba_parfor_sched_%s" % (hex(hash(parfor)).replace("-", "_"))
    gufunc_name = "__numba_parfor_gufunc_%s" % (
        hex(hash(parfor)).replace("-", "_"))
    if config.DEBUG_ARRAY_OPT:
        # print("sched_func_name ", type(sched_func_name), " ", sched_func_name)
        print("gufunc_name ", type(gufunc_name), " ", gufunc_name)

    gufunc_txt = ""

    # Create the gufunc function.
    gufunc_txt += "def " + gufunc_name + \
        "(sched, " + (", ".join(parfor_params)) + "):\n"

    for pindex in range(len(parfor_inputs)):
        if ascontig and isinstance(param_types[pindex], types.npytypes.Array):
            gufunc_txt += ("    " + parfor_params_orig[pindex]
                + " = np.ascontiguousarray(" + parfor_params[pindex] + ")\n")

    # Add initialization of reduction variables
    for arr, var in zip(parfor_redarrs, parfor_redvars):
        gufunc_txt += "    " + param_dict[var] + \
            "=" + param_dict[arr] + "[0]\n"

    # For each dimension of the parfor, create a for loop in the generated gufunc function.
    # Iterate across the proper values extracted from the schedule.
    # The form of the schedule is start_dim0, start_dim1, ..., start_dimN, end_dim0,
    # end_dim1, ..., end_dimN
    for eachdim in range(parfor_dim):
        for indent in range(eachdim + 1):
            gufunc_txt += "    "
        sched_dim = eachdim
        gufunc_txt += ("for " +
                       legal_loop_indices[eachdim] +
                       " in range(sched[" +
                       str(sched_dim) +
                       "], sched[" +
                       str(sched_dim +
                           parfor_dim) +
                       "] + np.uint8(1)):\n")

    if config.DEBUG_ARRAY_OPT_RUNTIME:
        for indent in range(parfor_dim + 1):
            gufunc_txt += "    "
        gufunc_txt += "print("
        for eachdim in range(parfor_dim):
            gufunc_txt += "\"" + legal_loop_indices[eachdim] + "\"," + legal_loop_indices[eachdim] + ","
        gufunc_txt += ")\n"

    # Add the sentinel assignment so that we can find the loop body position
    # in the IR.
    for indent in range(parfor_dim + 1):
        gufunc_txt += "    "
    gufunc_txt += sentinel_name + " = 0\n"
    # Add assignments of reduction variables (for returning the value)
    for arr, var in zip(parfor_redarrs, parfor_redvars):
        gufunc_txt += "    " + param_dict[arr] + \
            "[0] = " + param_dict[var] + "\n"
    gufunc_txt += "    return None\n"

    if config.DEBUG_ARRAY_OPT:
        print("gufunc_txt = ", type(gufunc_txt), "\n", gufunc_txt)
    # Force gufunc outline into existence.
    globls = {"np": np}
    locls = {}
    exec_(gufunc_txt, globls, locls)
    gufunc_func = locls[gufunc_name]

    if config.DEBUG_ARRAY_OPT:
        print("gufunc_func = ", type(gufunc_func), "\n", gufunc_func)
    # Get the IR for the gufunc outline.
    gufunc_ir = compiler.run_frontend(gufunc_func)

    if config.DEBUG_ARRAY_OPT:
        print("gufunc_ir dump ", type(gufunc_ir))
        gufunc_ir.dump()
        print("loop_body dump ", type(loop_body))
        _print_body(loop_body)

    # rename all variables in gufunc_ir afresh
    var_table = get_name_var_table(gufunc_ir.blocks)
    new_var_dict = {}
    reserved_names = [sentinel_name] + \
        list(param_dict.values()) + legal_loop_indices
    for name, var in var_table.items():
        if not (name in reserved_names):
            new_var_dict[name] = mk_unique_var(name)
    replace_var_names(gufunc_ir.blocks, new_var_dict)
    if config.DEBUG_ARRAY_OPT:
        print("gufunc_ir dump after renaming ")
        gufunc_ir.dump()

    gufunc_param_types = [
        numba.types.npytypes.Array(
            index_var_typ, 1, "C")] + param_types
    if config.DEBUG_ARRAY_OPT:
        print(
            "gufunc_param_types = ",
            type(gufunc_param_types),
            "\n",
            gufunc_param_types)

    gufunc_stub_last_label = max(gufunc_ir.blocks.keys()) + 1

    # Add gufunc stub last label to each parfor.loop_body label to prevent
    # label conflicts.
    loop_body = add_offset_to_labels(loop_body, gufunc_stub_last_label)
    # new label for splitting sentinel block
    new_label = max(loop_body.keys()) + 1

    # If enabled, add a print statement after every assignment.
    if config.DEBUG_ARRAY_OPT_RUNTIME:
        for label, block in loop_body.items():
            new_block = block.copy()
            new_block.clear()
            loc = block.loc
            scope = block.scope
            for inst in block.body:
                new_block.append(inst)
                # Append print after assignment
                if isinstance(inst, ir.Assign):
                    # Only apply to numbers
                    if typemap[inst.target.name] not in types.number_domain:
                        continue

                    # Make constant string
                    strval = "{} =".format(inst.target.name)
                    strconsttyp = types.Const(strval)

                    lhs = ir.Var(scope, mk_unique_var("str_const"), loc)
                    assign_lhs = ir.Assign(value=ir.Const(value=strval, loc=loc),
                                           target=lhs, loc=loc)
                    typemap[lhs.name] = strconsttyp
                    new_block.append(assign_lhs)

                    # Make print node
                    print_node = ir.Print(args=[lhs, inst.target], vararg=None, loc=loc)
                    new_block.append(print_node)
                    sig = numba.typing.signature(types.none,
                                           typemap[lhs.name],
                                           typemap[inst.target.name])
                    lowerer.fndesc.calltypes[print_node] = sig
            loop_body[label] = new_block

    if config.DEBUG_ARRAY_OPT:
        print("parfor loop body")
        _print_body(loop_body)

    wrapped_blocks = wrap_loop_body(loop_body)
    hoisted = hoist(parfor_params, loop_body, typemap, wrapped_blocks)
    start_block = gufunc_ir.blocks[min(gufunc_ir.blocks.keys())]
    start_block.body = start_block.body[:-1] + hoisted + [start_block.body[-1]]
    unwrap_loop_body(loop_body)

    if config.DEBUG_ARRAY_OPT:
        print("After hoisting")
        _print_body(loop_body)

    # Search all the block in the gufunc outline for the sentinel assignment.
    for label, block in gufunc_ir.blocks.items():
        for i, inst in enumerate(block.body):
            if isinstance(
                    inst,
                    ir.Assign) and inst.target.name == sentinel_name:
                # We found the sentinel assignment.
                loc = inst.loc
                scope = block.scope
                # split block across __sentinel__
                # A new block is allocated for the statements prior to the sentinel
                # but the new block maintains the current block label.
                prev_block = ir.Block(scope, loc)
                prev_block.body = block.body[:i]
                # The current block is used for statements after the sentinel.
                block.body = block.body[i + 1:]
                # But the current block gets a new label.
                body_first_label = min(loop_body.keys())

                # The previous block jumps to the minimum labelled block of the
                # parfor body.
                prev_block.append(ir.Jump(body_first_label, loc))
                # Add all the parfor loop body blocks to the gufunc function's
                # IR.
                for (l, b) in loop_body.items():
                    gufunc_ir.blocks[l] = b
                body_last_label = max(loop_body.keys())
                gufunc_ir.blocks[new_label] = block
                gufunc_ir.blocks[label] = prev_block
                # Add a jump from the last parfor body block to the block containing
                # statements after the sentinel.
                gufunc_ir.blocks[body_last_label].append(
                    ir.Jump(new_label, loc))
                break
        else:
            continue
        break

    if config.DEBUG_ARRAY_OPT:
        print("gufunc_ir last dump before renaming")
        gufunc_ir.dump()

    gufunc_ir.blocks = rename_labels(gufunc_ir.blocks)
    remove_dels(gufunc_ir.blocks)

    if config.DEBUG_ARRAY_OPT:
        print("gufunc_ir last dump")
        gufunc_ir.dump()
        print("flags", flags)
        print("typemap", typemap)

    old_alias = flags.noalias
    if not has_aliases:
        if config.DEBUG_ARRAY_OPT:
            print("No aliases found so adding noalias flag.")
        flags.noalias = True
    kernel_func = compiler.compile_ir(
        typingctx,
        targetctx,
        gufunc_ir,
        gufunc_param_types,
        types.none,
        flags,
        locals)

    flags.noalias = old_alias

    kernel_sig = signature(types.none, *gufunc_param_types)
    if config.DEBUG_ARRAY_OPT:
        print("kernel_sig = ", kernel_sig)

    return kernel_func, parfor_args, kernel_sig
예제 #35
0
파일: parfor.py 프로젝트: cpcloud/numba
def call_parallel_gufunc(lowerer, cres, gu_signature, outer_sig, expr_args,
                         loop_ranges, redvars, reddict, init_block, index_var_typ):
    '''
    Adds the call to the gufunc function from the main function.
    '''
    context = lowerer.context
    builder = lowerer.builder
    library = lowerer.library

    from .parallel import (ParallelGUFuncBuilder, build_gufunc_wrapper,
                           get_thread_count, _launch_threads, _init)

    if config.DEBUG_ARRAY_OPT:
        print("make_parallel_loop")
        print("args = ", expr_args)
        print("outer_sig = ", outer_sig.args, outer_sig.return_type,
              outer_sig.recvr, outer_sig.pysig)
        print("loop_ranges = ", loop_ranges)

    # Build the wrapper for GUFunc
    args, return_type = sigutils.normalize_signature(outer_sig)
    llvm_func = cres.library.get_function(cres.fndesc.llvm_func_name)
    sin, sout = gu_signature

    # These are necessary for build_gufunc_wrapper to find external symbols
    _launch_threads()
    _init()

    wrapper_ptr, env, wrapper_name = build_gufunc_wrapper(llvm_func, cres, sin,
                                                          sout, {})
    cres.library._ensure_finalized()

    if config.DEBUG_ARRAY_OPT:
        print("parallel function = ", wrapper_name, cres)

    # loadvars for loop_ranges
    def load_range(v):
        if isinstance(v, ir.Var):
            return lowerer.loadvar(v.name)
        else:
            return context.get_constant(types.uintp, v)

    num_dim = len(loop_ranges)
    for i in range(num_dim):
        start, stop, step = loop_ranges[i]
        start = load_range(start)
        stop = load_range(stop)
        assert(step == 1)  # We do not support loop steps other than 1
        step = load_range(step)
        loop_ranges[i] = (start, stop, step)

        if config.DEBUG_ARRAY_OPT:
            print("call_parallel_gufunc loop_ranges[{}] = ".format(i), start,
                  stop, step)
            cgutils.printf(builder, "loop range[{}]: %d %d (%d)\n".format(i),
                           start, stop, step)

    # Commonly used LLVM types and constants
    byte_t = lc.Type.int(8)
    byte_ptr_t = lc.Type.pointer(byte_t)
    byte_ptr_ptr_t = lc.Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    uintp_t = context.get_value_type(types.uintp)
    intp_ptr_t = lc.Type.pointer(intp_t)
    uintp_ptr_t = lc.Type.pointer(uintp_t)
    zero = context.get_constant(types.uintp, 0)
    one = context.get_constant(types.uintp, 1)
    one_type = one.type
    sizeof_intp = context.get_abi_sizeof(intp_t)

    # Prepare sched, first pop it out of expr_args, outer_sig, and gu_signature
    sched_name = expr_args.pop(0)
    sched_typ = outer_sig.args[0]
    sched_sig = sin.pop(0)

    if config.DEBUG_ARRAY_OPT:
        print("Parfor has potentially negative start", index_var_typ.signed)

    if index_var_typ.signed:
        sched_type = intp_t
        sched_ptr_type = intp_ptr_t
    else:
        sched_type = uintp_t
        sched_ptr_type = uintp_ptr_t

    # Call do_scheduling with appropriate arguments
    dim_starts = cgutils.alloca_once(
        builder, sched_type, size=context.get_constant(
            types.uintp, num_dim), name="dims")
    dim_stops = cgutils.alloca_once(
        builder, sched_type, size=context.get_constant(
            types.uintp, num_dim), name="dims")
    for i in range(num_dim):
        start, stop, step = loop_ranges[i]
        if start.type != one_type:
            start = builder.sext(start, one_type)
        if stop.type != one_type:
            stop = builder.sext(stop, one_type)
        if step.type != one_type:
            step = builder.sext(step, one_type)
        # substract 1 because do-scheduling takes inclusive ranges
        stop = builder.sub(stop, one)
        builder.store(
            start, builder.gep(
                dim_starts, [
                    context.get_constant(
                        types.uintp, i)]))
        builder.store(stop, builder.gep(dim_stops,
                                        [context.get_constant(types.uintp, i)]))

    sched_size = get_thread_count() * num_dim * 2
    sched = cgutils.alloca_once(
        builder, sched_type, size=context.get_constant(
            types.uintp, sched_size), name="sched")
    debug_flag = 1 if config.DEBUG_ARRAY_OPT else 0
    scheduling_fnty = lc.Type.function(
        intp_ptr_t, [uintp_t, sched_ptr_type, sched_ptr_type, uintp_t, sched_ptr_type, intp_t])
    if index_var_typ.signed:
        do_scheduling = builder.module.get_or_insert_function(scheduling_fnty,
                                                          name="do_scheduling_signed")
    else:
        do_scheduling = builder.module.get_or_insert_function(scheduling_fnty,
                                                          name="do_scheduling_unsigned")

    builder.call(
        do_scheduling, [
            context.get_constant(
                types.uintp, num_dim), dim_starts, dim_stops, context.get_constant(
                types.uintp, get_thread_count()), sched, context.get_constant(
                    types.intp, debug_flag)])

    # init reduction array allocation here.
    nredvars = len(redvars)
    ninouts = len(expr_args) - nredvars
    redarrs = []
    for i in range(nredvars):
        redvar_typ = lowerer.fndesc.typemap[redvars[i]]
        # we need to use the default initial value instead of existing value in
        # redvar if available
        init_val = reddict[redvars[i]][0]
        if init_val != None:
            val = context.get_constant(redvar_typ, init_val)
        else:
            val = lowerer.loadvar(redvars[i])
        typ = context.get_value_type(redvar_typ)
        size = get_thread_count()
        arr = cgutils.alloca_once(builder, typ,
                                  size=context.get_constant(types.uintp, size))
        redarrs.append(arr)
        for j in range(size):
            dst = builder.gep(arr, [context.get_constant(types.uintp, j)])
            builder.store(val, dst)

    if config.DEBUG_ARRAY_OPT:
        for i in range(get_thread_count()):
            cgutils.printf(builder, "sched[" + str(i) + "] = ")
            for j in range(num_dim * 2):
                cgutils.printf(
                    builder, "%d ", builder.load(
                        builder.gep(
                            sched, [
                                context.get_constant(
                                    types.intp, i * num_dim * 2 + j)])))
            cgutils.printf(builder, "\n")

    # Prepare arguments: args, shapes, steps, data
    all_args = [lowerer.loadvar(x) for x in expr_args[:ninouts]] + redarrs
    num_args = len(all_args)
    num_inps = len(sin) + 1
    args = cgutils.alloca_once(
        builder,
        byte_ptr_t,
        size=context.get_constant(
            types.intp,
            1 + num_args),
        name="pargs")
    array_strides = []
    # sched goes first
    builder.store(builder.bitcast(sched, byte_ptr_t), args)
    array_strides.append(context.get_constant(types.intp, sizeof_intp))
    # followed by other arguments
    for i in range(num_args):
        arg = all_args[i]
        aty = outer_sig.args[i + 1]  # skip first argument sched
        dst = builder.gep(args, [context.get_constant(types.intp, i + 1)])
        if i >= ninouts:  # reduction variables
            builder.store(builder.bitcast(arg, byte_ptr_t), dst)
        elif isinstance(aty, types.ArrayCompatible):
            ary = context.make_array(aty)(context, builder, arg)
            strides = cgutils.unpack_tuple(builder, ary.strides, aty.ndim)
            for j in range(len(strides)):
                array_strides.append(strides[j])
            builder.store(builder.bitcast(ary.data, byte_ptr_t), dst)
        else:
            if i < num_inps:
                # Scalar input, need to store the value in an array of size 1
                typ = context.get_data_type(
                    aty) if aty != types.boolean else lc.Type.int(1)
                ptr = cgutils.alloca_once(builder, typ)
                builder.store(arg, ptr)
            else:
                # Scalar output, must allocate
                typ = context.get_data_type(
                    aty) if aty != types.boolean else lc.Type.int(1)
                ptr = cgutils.alloca_once(builder, typ)
            builder.store(builder.bitcast(ptr, byte_ptr_t), dst)

    # Next, we prepare the individual dimension info recorded in gu_signature
    sig_dim_dict = {}
    occurances = []
    occurances = [sched_sig[0]]
    sig_dim_dict[sched_sig[0]] = context.get_constant(types.intp, 2 * num_dim)
    for var, arg, aty, gu_sig in zip(expr_args[:ninouts], all_args[:ninouts],
                                     outer_sig.args[1:], sin + sout):
        if config.DEBUG_ARRAY_OPT:
            print("var = ", var, " gu_sig = ", gu_sig)
        i = 0
        for dim_sym in gu_sig:
            if config.DEBUG_ARRAY_OPT:
                print("var = ", var, " type = ", aty)
            ary = context.make_array(aty)(context, builder, arg)
            shapes = cgutils.unpack_tuple(builder, ary.shape, aty.ndim)
            sig_dim_dict[dim_sym] = shapes[i]
            if not (dim_sym in occurances):
                if config.DEBUG_ARRAY_OPT:
                    print("dim_sym = ", dim_sym, ", i = ", i)
                    cgutils.printf(builder, dim_sym + " = %d\n", shapes[i])
                occurances.append(dim_sym)
            i = i + 1

    # Prepare shapes, which is a single number (outer loop size), followed by
    # the size of individual shape variables.
    nshapes = len(sig_dim_dict) + 1
    shapes = cgutils.alloca_once(builder, intp_t, size=nshapes, name="pshape")
    # For now, outer loop size is the same as number of threads
    builder.store(context.get_constant(types.intp, get_thread_count()), shapes)
    # Individual shape variables go next
    i = 1
    for dim_sym in occurances:
        if config.DEBUG_ARRAY_OPT:
            cgutils.printf(builder, dim_sym + " = %d\n", sig_dim_dict[dim_sym])
        builder.store(
            sig_dim_dict[dim_sym], builder.gep(
                shapes, [
                    context.get_constant(
                        types.intp, i)]))
        i = i + 1

    # Prepare steps for each argument. Note that all steps are counted in
    # bytes.
    num_steps = num_args + 1 + len(array_strides)
    steps = cgutils.alloca_once(
        builder, intp_t, size=context.get_constant(
            types.intp, num_steps), name="psteps")
    # First goes the step size for sched, which is 2 * num_dim
    builder.store(context.get_constant(types.intp, 2 * num_dim * sizeof_intp),
                  steps)
    # The steps for all others are 0. (TODO: except reduction results)
    for i in range(num_args):
        if i >= ninouts:  # steps for reduction vars are abi_sizeof(typ)
            j = i - ninouts
            typ = context.get_value_type(lowerer.fndesc.typemap[redvars[j]])
            sizeof = context.get_abi_sizeof(typ)
            stepsize = context.get_constant(types.intp, sizeof)
        else:
            # steps are strides
            stepsize = zero
        dst = builder.gep(steps, [context.get_constant(types.intp, 1 + i)])
        builder.store(stepsize, dst)
    for j in range(len(array_strides)):
        dst = builder.gep(
            steps, [
                context.get_constant(
                    types.intp, 1 + num_args + j)])
        builder.store(array_strides[j], dst)

    # prepare data
    data = builder.inttoptr(zero, byte_ptr_t)

    fnty = lc.Type.function(lc.Type.void(), [byte_ptr_ptr_t, intp_ptr_t,
                                             intp_ptr_t, byte_ptr_t])
    fn = builder.module.get_or_insert_function(fnty, name=wrapper_name)
    if config.DEBUG_ARRAY_OPT:
        cgutils.printf(builder, "before calling kernel %p\n", fn)
    result = builder.call(fn, [args, shapes, steps, data])
    if config.DEBUG_ARRAY_OPT:
        cgutils.printf(builder, "after calling kernel %p\n", fn)

    scope = init_block.scope
    loc = init_block.loc
    calltypes = lowerer.fndesc.calltypes
    # Accumulate all reduction arrays back to a single value
    for i in range(get_thread_count()):
        for name, arr in zip(redvars, redarrs):
            tmpname = mk_unique_var(name)
            src = builder.gep(arr, [context.get_constant(types.intp, i)])
            val = builder.load(src)
            vty = lowerer.fndesc.typemap[name]
            lowerer.fndesc.typemap[tmpname] = vty
            lowerer.storevar(val, tmpname)
            tmpvar = ir.Var(scope, tmpname, loc)
            tmp_assign = ir.Assign(tmpvar, ir.Var(scope, name+"#init", loc), loc)
            if name+"#init" not in lowerer.fndesc.typemap:
                lowerer.fndesc.typemap[name+"#init"] = vty
            lowerer.lower_inst(tmp_assign)
            # generate code for combining reduction variable with thread output
            for inst in reddict[name][1]:
                lowerer.lower_inst(inst)

    # TODO: scalar output must be assigned back to corresponding output
    # variables
    return
예제 #36
0
파일: stencil.py 프로젝트: cpcloud/numba
    def add_indices_to_kernel(self, kernel, index_names, ndim,
                              neighborhood, standard_indexed):
        """
        Transforms the stencil kernel as specified by the user into one
        that includes each dimension's index variable as part of the getitem
        calls.  So, in effect array[-1] becomes array[index0-1].
        """
        const_dict = {}
        kernel_consts = []

        if config.DEBUG_ARRAY_OPT == 1:
            print("add_indices_to_kernel", ndim, neighborhood)
            ir_utils.dump_blocks(kernel.blocks)

        if neighborhood is None:
            need_to_calc_kernel = True
        else:
            need_to_calc_kernel = False
            if len(neighborhood) != ndim:
                raise ValueError("%d dimensional neighborhood specified for %d " \
                    "dimensional input array" % (len(neighborhood), ndim))

        tuple_table = ir_utils.get_tuple_table(kernel.blocks)

        relatively_indexed = set()

        for block in kernel.blocks.values():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if (isinstance(stmt, ir.Assign) and
                    isinstance(stmt.value, ir.Const)):
                    if config.DEBUG_ARRAY_OPT == 1:
                        print("remembering in const_dict", stmt.target.name,
                              stmt.value.value)
                    # Remember consts for use later.
                    const_dict[stmt.target.name] = stmt.value.value
                if ((isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['setitem', 'static_setitem']
                        and stmt.value.value.name in kernel.arg_names) or
                   (isinstance(stmt, ir.SetItem)
                        and stmt.target.name in kernel.arg_names)):
                    raise ValueError("Assignments to arrays passed to stencil " \
                        "kernels is not allowed.")
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['getitem', 'static_getitem']
                        and stmt.value.value.name in kernel.arg_names
                        and stmt.value.value.name not in standard_indexed):
                    # We found a getitem from the input array.
                    if stmt.value.op == 'getitem':
                        stmt_index_var = stmt.value.index
                    else:
                        stmt_index_var = stmt.value.index_var
                        # allow static_getitem since rewrite passes are applied
                        #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.")

                    relatively_indexed.add(stmt.value.value.name)

                    # Store the index used after looking up the variable in
                    # the const dictionary.
                    if need_to_calc_kernel:
                        assert hasattr(stmt_index_var, 'name')

                        if stmt_index_var.name in tuple_table:
                            kernel_consts += [tuple_table[stmt_index_var.name]]
                        elif stmt_index_var.name in const_dict:
                            kernel_consts += [const_dict[stmt_index_var.name]]
                        else:
                            raise ValueError("Non-constant specified for "
                                             "stencil kernel index.")

                    if ndim == 1:
                        # Single dimension always has index variable 'index0'.
                        # tmpvar will hold the real index and is computed by
                        # adding the relative offset in stmt.value.index to
                        # the current absolute location in index0.
                        index_var = ir.Var(scope, index_names[0], loc)
                        tmpname = ir_utils.mk_unique_var("stencil_index")
                        tmpvar  = ir.Var(scope, tmpname, loc)
                        acc_call = ir.Expr.binop('+', stmt_index_var,
                                                 index_var, loc)
                        new_body.append(ir.Assign(acc_call, tmpvar, loc))
                        new_body.append(ir.Assign(
                                       ir.Expr.getitem(stmt.value.value,tmpvar,loc),
                                       stmt.target,loc))
                    else:
                        index_vars = []
                        sum_results = []
                        s_index_name = ir_utils.mk_unique_var("stencil_index")
                        s_index_var  = ir.Var(scope, s_index_name, loc)
                        const_index_vars = []
                        ind_stencils = []

                        # Same idea as above but you have to extract
                        # individual elements out of the tuple indexing
                        # expression and add the corresponding index variable
                        # to them and then reconstitute as a tuple that can
                        # index the array.
                        for dim in range(ndim):
                            tmpname = ir_utils.mk_unique_var("const_index")
                            tmpvar  = ir.Var(scope, tmpname, loc)
                            new_body.append(ir.Assign(ir.Const(dim, loc),
                                                      tmpvar, loc))
                            const_index_vars += [tmpvar]
                            index_var = ir.Var(scope, index_names[dim], loc)
                            index_vars += [index_var]

                            tmpname = ir_utils.mk_unique_var("ind_stencil_index")
                            tmpvar  = ir.Var(scope, tmpname, loc)
                            ind_stencils += [tmpvar]
                            getitemname = ir_utils.mk_unique_var("getitem")
                            getitemvar  = ir.Var(scope, getitemname, loc)
                            getitemcall = ir.Expr.getitem(stmt_index_var,
                                                       const_index_vars[dim], loc)
                            new_body.append(ir.Assign(getitemcall, getitemvar, loc))
                            acc_call = ir.Expr.binop('+', getitemvar,
                                                     index_vars[dim], loc)
                            new_body.append(ir.Assign(acc_call, tmpvar, loc))

                        tuple_call = ir.Expr.build_tuple(ind_stencils, loc)
                        new_body.append(ir.Assign(tuple_call, s_index_var, loc))
                        new_body.append(ir.Assign(
                                  ir.Expr.getitem(stmt.value.value,s_index_var,loc),
                                  stmt.target,loc))
                else:
                    new_body.append(stmt)
            block.body = new_body

        if need_to_calc_kernel:
            # Find the size of the kernel by finding the maximum absolute value
            # index used in the kernel specification.
            neighborhood = [[0,0] for _ in range(ndim)]
            if len(kernel_consts) == 0:
                raise ValueError("Stencil kernel with no accesses to "
                                 "relatively indexed arrays.")

            for index in kernel_consts:
                if isinstance(index, tuple) or isinstance(index, list):
                    for i in range(len(index)):
                        te = index[i]
                        if isinstance(te, ir.Var) and te.name in const_dict:
                            te = const_dict[te.name]
                        if isinstance(te, int):
                            neighborhood[i][0] = min(neighborhood[i][0], te)
                            neighborhood[i][1] = max(neighborhood[i][1], te)
                        else:
                            raise ValueError(
                                "Non-constant used as stencil index.")
                    index_len = len(index)
                elif isinstance(index, int):
                    neighborhood[0][0] = min(neighborhood[0][0], index)
                    neighborhood[0][1] = max(neighborhood[0][1], index)
                    index_len = 1
                else:
                    raise ValueError(
                        "Non-tuple or non-integer used as stencil index.")
                if index_len != ndim:
                    raise ValueError(
                        "Stencil index does not match array dimensionality.")

        return (neighborhood, relatively_indexed)
예제 #37
0
파일: stencil.py 프로젝트: cpcloud/numba
    def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args):
        # Overall approach:
        # 1) Construct a string containing a function definition for the stencil function
        #    that will execute the stencil kernel.  This function definition includes a
        #    unique stencil function name, the parameters to the stencil kernel, loop
        #    nests across the dimenions of the input array.  Those loop nests use the
        #    computed stencil kernel size so as not to try to compute elements where
        #    elements outside the bounds of the input array would be needed.
        # 2) The but of the loop nest in this new function is a special sentinel
        #    assignment.
        # 3) Get the IR of this new function.
        # 4) Split the block containing the sentinel assignment and remove the sentinel
        #    assignment.  Insert the stencil kernel IR into the stencil function IR
        #    after label and variable renaming of the stencil kernel IR to prevent
        #    conflicts with the stencil function IR.
        # 5) Compile the combined stencil function IR + stencil kernel IR into existence.

        # Copy the kernel so that our changes for this callsite
        # won't effect other callsites.
        (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(
                                            self.kernel_ir, calltypes)
        # The stencil kernel body becomes the body of a loop, for which args aren't needed.
        ir_utils.remove_args(kernel_copy.blocks)
        first_arg = kernel_copy.arg_names[0]

        in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap)
        name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks)
        ir_utils.apply_copy_propagate(
            kernel_copy.blocks,
            in_cps,
            name_var_table,
            typemap,
            copy_calltypes)

        if "out" in name_var_table:
            raise ValueError("Cannot use the reserved word 'out' in stencil kernels.")

        sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table)
        if config.DEBUG_ARRAY_OPT == 1:
            print("name_var_table", name_var_table, sentinel_name)

        the_array = args[0]

        if config.DEBUG_ARRAY_OPT == 1:
            print("_stencil_wrapper", return_type, return_type.dtype,
                                      type(return_type.dtype), args)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # We generate a Numba function to execute this stencil and here
        # create the unique name of this function.
        stencil_func_name = "__numba_stencil_%s_%s" % (
                                        hex(id(the_array)).replace("-", "_"),
                                        self.id)

        # We will put a loop nest in the generated function for each
        # dimension in the input array.  Here we create the name for
        # the index variable for each dimension.  index0, index1, ...
        index_vars = []
        for i in range(the_array.ndim):
            index_var_name = ir_utils.get_unused_var_name("index" + str(i),
                                                          name_var_table)
            index_vars += [index_var_name]

        # Create extra signature for out and neighborhood.
        out_name = ir_utils.get_unused_var_name("out", name_var_table)
        neighborhood_name = ir_utils.get_unused_var_name("neighborhood",
                                                         name_var_table)
        sig_extra = ""
        if result is not None:
            sig_extra += ", {}=None".format(out_name)
        if "neighborhood" in dict(self.kws):
            sig_extra += ", {}=None".format(neighborhood_name)

        # Get a list of the standard indexed array names.
        standard_indexed = self.options.get("standard_indexing", [])

        if first_arg in standard_indexed:
            raise ValueError("The first argument to a stencil kernel must "
                             "use relative indexing, not standard indexing.")

        if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0:
            raise ValueError("Standard indexing requested for an array name "
                             "not present in the stencil kernel definition.")

        # Add index variables to getitems in the IR to transition the accesses
        # in the kernel from relative to regular Python indexing.  Returns the
        # computed size of the stencil kernel and a list of the relatively indexed
        # arrays.
        kernel_size, relatively_indexed = self.add_indices_to_kernel(
                kernel_copy, index_vars, the_array.ndim,
                self.neighborhood, standard_indexed)
        if self.neighborhood is None:
            self.neighborhood = kernel_size

        if config.DEBUG_ARRAY_OPT == 1:
            print("After add_indices_to_kernel")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # The return in the stencil kernel becomes a setitem for that
        # particular point in the iteration space.
        ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks,
                                                      index_vars, out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("After replace_return_with_setitem", ret_blocks)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Start to form the new function to execute the stencil kernel.
        func_text = "def {}({}{}):\n".format(stencil_func_name,
                        ",".join(kernel_copy.arg_names), sig_extra)

        # Get loop ranges for each dimension, which could be either int
        # or variable. In the latter case we'll use the extra neighborhood
        # argument to the function.
        ranges = []
        for i in range(the_array.ndim):
            if isinstance(kernel_size[i][0], int):
                lo = kernel_size[i][0]
                hi = kernel_size[i][1]
            else:
                lo = "{}[{}][0]".format(neighborhood_name, i)
                hi = "{}[{}][1]".format(neighborhood_name, i)
            ranges.append((lo, hi))

        # If there are more than one relatively indexed arrays, add a call to
        # a function that will raise an error if any of the relatively indexed
        # arrays are of different size than the first input array.
        if len(relatively_indexed) > 1:
            func_text += "    raise_if_incompatible_array_sizes(" + first_arg
            for other_array in relatively_indexed:
                if other_array != first_arg:
                    func_text += "," + other_array
            func_text += ")\n"

        # Get the shape of the first input array.
        shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table)
        func_text += "    {} = {}.shape\n".format(shape_name, first_arg)


        # If we have to allocate the output array (the out argument was not used)
        # then us numpy.full if the user specified a cval stencil decorator option
        # or np.zeros if they didn't to allocate the array.
        if result is None:
            if "cval" in self.options:
                cval = self.options["cval"]
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")

                out_init ="{} = np.full({}, {}, dtype=np.{})\n".format(
                            out_name, shape_name, cval, return_type.dtype)

            else:
                out_init ="{} = np.zeros({}, dtype=np.{})\n".format(
                            out_name, shape_name, return_type.dtype)
            func_text += "    " + out_init

        offset = 1
        # Add the loop nests to the new function.
        for i in range(the_array.ndim):
            for j in range(offset):
                func_text += "    "
            # ranges[i][0] is the minimum index used in the i'th dimension
            # but minimum's greater than 0 don't preclude any entry in the array.
            # So, take the minimum of 0 and the minimum index found in the kernel
            # and this will be a negative number (potentially -0).  Then, we do
            # unary - on that to get the positive offset in this dimension whose
            # use is precluded.
            # ranges[i][1] is the maximum of 0 and the observed maximum index
            # in this dimension because negative maximums would not cause us to
            # preclude any entry in the array from being used.
            func_text += ("for {} in range(-min(0,{}),"
                          "{}[{}]-max(0,{})):\n").format(
                            index_vars[i],
                            ranges[i][0],
                            shape_name,
                            i,
                            ranges[i][1])
            offset += 1

        for j in range(offset):
            func_text += "    "
        # Put a sentinel in the code so we can locate it in the IR.  We will
        # remove this sentinel assignment and replace it with the IR for the
        # stencil kernel body.
        func_text += "{} = 0\n".format(sentinel_name)
        func_text += "    return {}\n".format(out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new stencil func text")
            print(func_text)

        # Force the new stencil function into existence.
        exec_(func_text) in globals(), locals()
        stencil_func = eval(stencil_func_name)
        if sigret is not None:
            pysig = utils.pysignature(stencil_func)
            sigret.pysig = pysig
        # Get the IR for the newly created stencil function.
        stencil_ir = compiler.run_frontend(stencil_func)
        ir_utils.remove_dels(stencil_ir.blocks)

        # rename all variables in stencil_ir afresh
        var_table = ir_utils.get_name_var_table(stencil_ir.blocks)
        new_var_dict = {}
        reserved_names = ([sentinel_name, out_name, neighborhood_name,
                           shape_name] + kernel_copy.arg_names + index_vars)
        for name, var in var_table.items():
            if not name in reserved_names:
                new_var_dict[name] = ir_utils.mk_unique_var(name)
        ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict)

        stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1

        # Shift lables in the kernel copy so they are guaranteed unique
        # and don't conflict with any labels in the stencil_ir.
        kernel_copy.blocks = ir_utils.add_offset_to_labels(
                                kernel_copy.blocks, stencil_stub_last_label)
        new_label = max(kernel_copy.blocks.keys()) + 1
        # Adjust ret_blocks to account for addition of the offset.
        ret_blocks = [x + stencil_stub_last_label for x in ret_blocks]

        if config.DEBUG_ARRAY_OPT == 1:
            print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label)
            print("before replace sentinel stencil_ir")
            ir_utils.dump_blocks(stencil_ir.blocks)
            print("before replace sentinel kernel_copy")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Search all the block in the stencil outline for the sentinel.
        for label, block in stencil_ir.blocks.items():
            for i, inst in enumerate(block.body):
                if (isinstance( inst, ir.Assign) and
                    inst.target.name == sentinel_name):
                    # We found the sentinel assignment.
                    loc = inst.loc
                    scope = block.scope
                    # split block across __sentinel__
                    # A new block is allocated for the statements prior to the
                    # sentinel but the new block maintains the current block
                    # label.
                    prev_block = ir.Block(scope, loc)
                    prev_block.body = block.body[:i]
                    # The current block is used for statements after sentinel.
                    block.body = block.body[i + 1:]
                    # But the current block gets a new label.
                    body_first_label = min(kernel_copy.blocks.keys())

                    # The previous block jumps to the minimum labelled block of
                    # the parfor body.
                    prev_block.append(ir.Jump(body_first_label, loc))
                    # Add all the parfor loop body blocks to the gufunc
                    # function's IR.
                    for (l, b) in kernel_copy.blocks.items():
                        stencil_ir.blocks[l] = b

                    stencil_ir.blocks[new_label] = block
                    stencil_ir.blocks[label] = prev_block
                    # Add a jump from all the blocks that previously contained
                    # a return in the stencil kernel to the block
                    # containing statements after the sentinel.
                    for ret_block in ret_blocks:
                        stencil_ir.blocks[ret_block].append(
                            ir.Jump(new_label, loc))
                    break
            else:
                continue
            break

        stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks)
        ir_utils.remove_dels(stencil_ir.blocks)

        assert(isinstance(the_array, types.Type))
        array_types = args

        new_stencil_param_types = list(array_types)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new_stencil_param_types", new_stencil_param_types)
            ir_utils.dump_blocks(stencil_ir.blocks)

        # Compile the combined stencil function with the replaced loop
        # body in it.
        new_func = compiler.compile_ir(
            self._typingctx,
            self._targetctx,
            stencil_ir,
            new_stencil_param_types,
            None,
            compiler.DEFAULT_FLAGS,
            {})
        return new_func
예제 #38
0
    def inline_array(array_var, expr, stmts, list_vars, dels):
        """Check to see if the given "array_var" is created from a list
        of constants, and try to inline the list definition as array
        initialization.

        Extra statements produced with be appended to "stmts".
        """
        callname = guard(find_callname, func_ir, expr)
        require(callname and callname[1] == 'numpy' and callname[0] == 'array')
        require(expr.args[0].name in list_vars)
        ret_type = calltypes[expr].return_type
        require(isinstance(ret_type, types.ArrayCompatible) and
                           ret_type.ndim == 1)
        loc = expr.loc
        list_var = expr.args[0]
        # Get the type of the array to be created.
        array_typ = typemap[array_var.name]
        debug_print("inline array_var = ", array_var, " list_var = ", list_var)
        # Get the element type of the array to be created.
        dtype = array_typ.dtype
        # Get the sequence of operations to provide values to the new array.
        seq, _ = find_build_sequence(func_ir, list_var)
        size = len(seq)
        # Create a tuple to pass to empty below to specify the new array size.
        size_var = ir.Var(scope, mk_unique_var("size"), loc)
        size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
        size_typ = types.intp
        size_tuple_typ = types.UniTuple(size_typ, 1)
        typemap[size_var.name] = size_typ
        typemap[size_tuple_var.name] = size_tuple_typ
        stmts.append(_new_definition(func_ir, size_var,
                 ir.Const(size, loc=loc), loc))
        stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

        # The general approach is to create an empty array and then fill
        # the elements in one-by-one from their specificiation.

        # Get the numpy type to pass to empty.
        nptype = types.DType(dtype)

        # Create a variable to hold the numpy empty function.
        empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
        fnty = get_np_ufunc_typ(np.empty)
        sig = context.resolve_function_type(fnty, (size_typ,), {'dtype':nptype})

        typemap[empty_func.name] = fnty

        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))

        # We pass two arguments to empty, first the size tuple and second
        # the dtype of the new array.  Here, we created typ_var which is
        # the dtype argument of the new array.  typ_var in turn is created
        # by getattr of the dtype string on the numpy module.

        # Create var for numpy module.
        g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
        typemap[g_np_var.name] = types.misc.Module(np)
        g_np = ir.Global('np', np, loc)
        stmts.append(_new_definition(func_ir, g_np_var, g_np, loc))

        # Create var for result of numpy.<dtype>.
        typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc)
        typemap[typ_var.name] = nptype
        dtype_str = str(dtype)
        if dtype_str == 'bool':
            dtype_str = 'bool_'
        # Get dtype attribute of numpy module.
        np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc)
        stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc))

        # Create the call to numpy.empty passing the size tuple and dtype var.
        empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc)
        calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype)
        stmts.append(_new_definition(func_ir, array_var, empty_call, loc))

        # Fill in the new empty array one-by-one.
        for i in range(size):
            index_var = ir.Var(scope, mk_unique_var("index"), loc)
            index_typ = types.intp
            typemap[index_var.name] = index_typ
            stmts.append(_new_definition(func_ir, index_var,
                    ir.Const(i, loc), loc))
            setitem = ir.SetItem(array_var, index_var, seq[i], loc)
            calltypes[setitem] = typing.signature(types.none, array_typ,
                                                  index_typ, dtype)
            stmts.append(setitem)

        stmts.extend(dels)
        return True
예제 #39
0
def get_stencil_ir(sf, typingctx, args, scope, loc, input_dict, typemap,
                                                                    calltypes):
    """get typed IR from stencil bytecode
    """
    from numba.targets.cpu import CPUContext
    from numba.targets.registry import cpu_target
    from numba.annotations import type_annotations
    from numba.compiler import type_inference_stage

    # get untyped IR
    stencil_func_ir = sf.kernel_ir.copy()
    # copy the IR nodes to avoid changing IR in the StencilFunc object
    stencil_blocks = copy.deepcopy(stencil_func_ir.blocks)
    stencil_func_ir.blocks = stencil_blocks

    name_var_table = ir_utils.get_name_var_table(stencil_func_ir.blocks)
    if "out" in name_var_table:
        raise ValueError("Cannot use the reserved word 'out' in stencil kernels.")

    # get typed IR with a dummy pipeline (similar to test_parfors.py)
    targetctx = CPUContext(typingctx)
    with cpu_target.nested_context(typingctx, targetctx):
        tp = DummyPipeline(typingctx, targetctx, args, stencil_func_ir)

        numba.rewrites.rewrite_registry.apply(
            'before-inference', tp, tp.func_ir)

        tp.typemap, tp.return_type, tp.calltypes = type_inference_stage(
            tp.typingctx, tp.func_ir, tp.args, None)

        type_annotations.TypeAnnotation(
            func_ir=tp.func_ir,
            typemap=tp.typemap,
            calltypes=tp.calltypes,
            lifted=(),
            lifted_from=None,
            args=tp.args,
            return_type=tp.return_type,
            html_output=numba.config.HTML)

    # make block labels unique
    stencil_blocks = ir_utils.add_offset_to_labels(stencil_blocks,
                                                        ir_utils.next_label())
    min_label = min(stencil_blocks.keys())
    max_label = max(stencil_blocks.keys())
    ir_utils._max_label = max_label

    if config.DEBUG_ARRAY_OPT == 1:
        print("Initial stencil_blocks")
        ir_utils.dump_blocks(stencil_blocks)

    # rename variables,
    var_dict = {}
    for v, typ in tp.typemap.items():
        new_var = ir.Var(scope, mk_unique_var(v), loc)
        var_dict[v] = new_var
        typemap[new_var.name] = typ  # add new var type for overall function
    ir_utils.replace_vars(stencil_blocks, var_dict)

    if config.DEBUG_ARRAY_OPT == 1:
        print("After replace_vars")
        ir_utils.dump_blocks(stencil_blocks)

    # add call types to overall function
    for call, call_typ in tp.calltypes.items():
        calltypes[call] = call_typ

    arg_to_arr_dict = {}
    # replace arg with arr
    for block in stencil_blocks.values():
        for stmt in block.body:
            if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg):
                if config.DEBUG_ARRAY_OPT == 1:
                    print("input_dict", input_dict, stmt.value.index,
                               stmt.value.name, stmt.value.index in input_dict)
                arg_to_arr_dict[stmt.value.name] = input_dict[stmt.value.index].name
                stmt.value = input_dict[stmt.value.index]

    if config.DEBUG_ARRAY_OPT == 1:
        print("arg_to_arr_dict", arg_to_arr_dict)
        print("After replace arg with arr")
        ir_utils.dump_blocks(stencil_blocks)

    ir_utils.remove_dels(stencil_blocks)
    stencil_func_ir.blocks = stencil_blocks
    return stencil_func_ir, sf.get_return_type(args)[0], arg_to_arr_dict
예제 #40
0
    def _add_index_offsets(self, index_list, index_offsets, new_body,
                           scope, loc):
        """ Does the actual work of adding loop index variables to the
            relative index constants or variables.
        """
        assert len(index_list) == len(index_offsets)

        # shortcut if all values are integer
        if all([isinstance(v, int) for v in index_list+index_offsets]):
            # add offsets in all dimensions
            return list(map(add, index_list, index_offsets))

        out_nodes = []
        index_vars = []
        for i in range(len(index_list)):
            # new_index = old_index + offset
            old_index_var = index_list[i]
            if isinstance(old_index_var, int):
                old_index_var = ir.Var(scope,
                                mk_unique_var("old_index_var"), loc)
                self.typemap[old_index_var.name] = types.intp
                const_assign = ir.Assign(ir.Const(index_list[i], loc),
                                                    old_index_var, loc)
                out_nodes.append(const_assign)

            offset_var = index_offsets[i]
            if isinstance(offset_var, int):
                offset_var = ir.Var(scope,
                                mk_unique_var("offset_var"), loc)
                self.typemap[offset_var.name] = types.intp
                const_assign = ir.Assign(ir.Const(index_offsets[i], loc),
                                                offset_var, loc)
                out_nodes.append(const_assign)

            if (isinstance(old_index_var, slice)
                    or isinstance(self.typemap[old_index_var.name],
                                    types.misc.SliceType)):
                # only one arg can be slice
                assert self.typemap[offset_var.name] == types.intp
                index_var = self._add_offset_to_slice(old_index_var, offset_var,
                                                        out_nodes, scope, loc)
                index_vars.append(index_var)
                continue

            if (isinstance(offset_var, slice)
                    or isinstance(self.typemap[offset_var.name],
                                    types.misc.SliceType)):
                # only one arg can be slice
                assert self.typemap[old_index_var.name] == types.intp
                index_var = self._add_offset_to_slice(offset_var, old_index_var,
                                                        out_nodes, scope, loc)
                index_vars.append(index_var)
                continue

            index_var = ir.Var(scope,
                            mk_unique_var("offset_stencil_index"), loc)
            self.typemap[index_var.name] = types.intp
            index_call = ir.Expr.binop('+', old_index_var,
                                                offset_var, loc)
            self.calltypes[index_call] = ir_utils.find_op_typ('+',
                                        [types.intp, types.intp])
            index_assign = ir.Assign(index_call, index_var, loc)
            out_nodes.append(index_assign)
            index_vars.append(index_var)

        new_body.extend(out_nodes)
        return index_vars
예제 #41
0
    def _replace_stencil_accesses(self, stencil_blocks, parfor_vars, in_args,
                                  index_offsets, stencil_func, arg_to_arr_dict):
        """ Convert relative indexing in the stencil kernel to standard indexing
            by adding the loop index variables to the corresponding dimensions
            of the array index tuples.
        """
        in_arr = in_args[0]
        in_arg_names = [x.name for x in in_args]

        if "standard_indexing" in stencil_func.options:
            for x in stencil_func.options["standard_indexing"]:
                if x not in arg_to_arr_dict:
                    raise ValueError("Standard indexing requested for an array " \
                        "name not present in the stencil kernel definition.")
            standard_indexed = [arg_to_arr_dict[x] for x in
                                     stencil_func.options["standard_indexing"]]
        else:
            standard_indexed = []

        if in_arr.name in standard_indexed:
            raise ValueError("The first argument to a stencil kernel must use " \
                "relative indexing, not standard indexing.")

        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        # replace access indices, find access lengths in each dimension
        need_to_calc_kernel = stencil_func.neighborhood is None

        # If we need to infer the kernel size then initialize the minimum and
        # maximum seen indices for each dimension to 0.  If we already have
        # the neighborhood calculated then just convert from neighborhood format
        # to the separate start and end lengths format used here.
        if need_to_calc_kernel:
            start_lengths = ndims*[0]
            end_lengths = ndims*[0]
        else:
            start_lengths = [x[0] for x in stencil_func.neighborhood]
            end_lengths   = [x[1] for x in stencil_func.neighborhood]

        # Get all the tuples defined in the stencil blocks.
        tuple_table = ir_utils.get_tuple_table(stencil_blocks)

        found_relative_index = False

        # For all blocks in the stencil kernel...
        for label, block in stencil_blocks.items():
            new_body = []
            # For all statements in those blocks...
            for stmt in block.body:
                # Reject assignments to input arrays.
                if ((isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['setitem', 'static_setitem']
                        and stmt.value.value.name in in_arg_names) or
                   ((isinstance(stmt, ir.SetItem) or
                     isinstance(stmt, ir.StaticSetItem))
                        and stmt.target.name in in_arg_names)):
                    raise ValueError("Assignments to arrays passed to stencil kernels is not allowed.")
                # We found a getitem for some array.  If that array is an input
                # array and isn't in the list of standard indexed arrays then
                # update min and max seen indices if we are inferring the
                # kernel size and create a new tuple where the relative offsets
                # are added to loop index vars to get standard indexing.
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['static_getitem', 'getitem']
                        and stmt.value.value.name in in_arg_names
                        and stmt.value.value.name not in standard_indexed):
                    index_list = stmt.value.index
                    # handle 1D case
                    if ndims == 1:
                        index_list = [index_list]
                    else:
                        if hasattr(index_list, 'name') and index_list.name in tuple_table:
                            index_list = tuple_table[index_list.name]
                    if index_offsets:
                        index_list = self._add_index_offsets(index_list,
                                    list(index_offsets), new_body, scope, loc)

                    # update min and max indices
                    if need_to_calc_kernel:
                        # all indices should be integer to be able to calculate
                        # neighborhood automatically
                        if (isinstance(index_list, ir.Var) or
                            any([not isinstance(v, int) for v in index_list])):
                            raise ValueError("Variable stencil index only "
                                "possible with known neighborhood")
                        start_lengths = list(map(min, start_lengths,
                                                                    index_list))
                        end_lengths = list(map(max, end_lengths, index_list))
                        found_relative_index = True

                    # update access indices
                    index_vars = self._add_index_offsets(parfor_vars,
                                list(index_list), new_body, scope, loc)

                    # new access index tuple
                    if ndims == 1:
                        ind_var = index_vars[0]
                    else:
                        ind_var = ir.Var(scope, mk_unique_var(
                            "$parfor_index_ind_var"), loc)
                        self.typemap[ind_var.name] = types.containers.UniTuple(
                            types.intp, ndims)
                        tuple_call = ir.Expr.build_tuple(index_vars, loc)
                        tuple_assign = ir.Assign(tuple_call, ind_var, loc)
                        new_body.append(tuple_assign)

                    # getitem return type is scalar if all indices are integer
                    if all([self.typemap[v.name] == types.intp
                                                        for v in index_vars]):
                        getitem_return_typ = self.typemap[
                                                    stmt.value.value.name].dtype
                    else:
                        # getitem returns an array
                        getitem_return_typ = self.typemap[stmt.value.value.name]
                    # new getitem with the new index var
                    getitem_call = ir.Expr.getitem(stmt.value.value, ind_var,
                                                                            loc)
                    self.calltypes[getitem_call] = signature(
                        getitem_return_typ,
                        self.typemap[stmt.value.value.name],
                        self.typemap[ind_var.name])
                    stmt.value = getitem_call

                new_body.append(stmt)
            block.body = new_body
        if need_to_calc_kernel and not found_relative_index:
            raise ValueError("Stencil kernel with no accesses to " \
                "relatively indexed arrays.")

        return start_lengths, end_lengths
예제 #42
0
    def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir,
                           index_offsets, target, return_type, stencil_func,
                           arg_to_arr_dict):
        """ Converts a set of stencil kernel blocks to a parfor.
        """
        gen_nodes = []
        stencil_blocks = stencil_ir.blocks

        if config.DEBUG_ARRAY_OPT == 1:
            print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
                   return_type, stencil_func, stencil_blocks)
            ir_utils.dump_blocks(stencil_blocks)

        in_arr = in_args[0]
        # run copy propagate to replace in_args copies (e.g. a = A)
        in_arr_typ = self.typemap[in_arr.name]
        in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
        name_var_table = ir_utils.get_name_var_table(stencil_blocks)

        ir_utils.apply_copy_propagate(
            stencil_blocks,
            in_cps,
            name_var_table,
            self.typemap,
            self.calltypes)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after copy_propagate")
            ir_utils.dump_blocks(stencil_blocks)
        ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir,
                             self.typemap)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after removing dead code")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor vars
        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        parfor_vars = []
        for i in range(ndims):
            parfor_var = ir.Var(scope, mk_unique_var(
                "$parfor_index_var"), loc)
            self.typemap[parfor_var.name] = types.intp
            parfor_vars.append(parfor_var)

        start_lengths, end_lengths = self._replace_stencil_accesses(
             stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func,
             arg_to_arr_dict)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after replace stencil accesses")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor loop nests
        loopnests = []
        equiv_set = self.array_analysis.get_equiv_set(label)
        in_arr_dim_sizes = equiv_set.get_shape(in_arr)

        assert ndims == len(in_arr_dim_sizes)
        for i in range(ndims):
            last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
                                        end_lengths[i], gen_nodes, scope, loc)
            start_ind = self._get_stencil_start_ind(
                                        start_lengths[i], gen_nodes, scope, loc)
            # start from stencil size to avoid invalid array access
            loopnests.append(numba.parfor.LoopNest(parfor_vars[i],
                                start_ind, last_ind, 1))

        # We have to guarantee that the exit block has maximum label and that
        # there's only one exit block for the parfor body.
        # So, all return statements will change to jump to the parfor exit block.
        parfor_body_exit_label = max(stencil_blocks.keys()) + 1
        stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc)
        exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc)
        self.typemap[exit_value_var.name] = return_type.dtype

        # create parfor index var
        for_replacing_ret = []
        if ndims == 1:
            parfor_ind_var = parfor_vars[0]
        else:
            parfor_ind_var = ir.Var(scope, mk_unique_var(
                "$parfor_index_tuple_var"), loc)
            self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
                types.intp, ndims)
            tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
            tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
            for_replacing_ret.append(tuple_assign)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after creating parfor index var")
            ir_utils.dump_blocks(stencil_blocks)

        # empty init block
        init_block = ir.Block(scope, loc)
        if out_arr == None:
            in_arr_typ = self.typemap[in_arr.name]

            shape_name = ir_utils.mk_unique_var("in_arr_shape")
            shape_var = ir.Var(scope, shape_name, loc)
            shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
            self.typemap[shape_name] = types.containers.UniTuple(types.intp,
                                                               in_arr_typ.ndim)
            init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])

            zero_name = ir_utils.mk_unique_var("zero_val")
            zero_var = ir.Var(scope, zero_name, loc)
            if "cval" in stencil_func.options:
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError("cval type does not match stencil return type.")

                temp2 = return_type.dtype(cval)
            else:
                temp2 = return_type.dtype(0)
            full_const = ir.Const(temp2, loc)
            self.typemap[zero_name] = return_type.dtype
            init_block.body.extend([ir.Assign(full_const, zero_var, loc)])

            so_name = ir_utils.mk_unique_var("stencil_output")
            out_arr = ir.Var(scope, so_name, loc)
            self.typemap[out_arr.name] = numba.types.npytypes.Array(
                                                           return_type.dtype,
                                                           in_arr_typ.ndim,
                                                           in_arr_typ.layout)
            dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
            self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
            dtype_g_np = ir.Global('np', np, loc)
            dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
            init_block.body.append(dtype_g_np_assign)

            dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc)
            dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
            self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype)
            dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc)
            init_block.body.append(dtype_attr_assign)

            stmts = ir_utils.gen_np_call("full",
                                       np.full,
                                       out_arr,
                                       [shape_var, zero_var, dtype_attr_var],
                                       self.typingctx,
                                       self.typemap,
                                       self.calltypes)
            equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
            init_block.body.extend(stmts)

        self.replace_return_with_setitem(stencil_blocks, exit_value_var,
                                         parfor_body_exit_label)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after replacing return")
            ir_utils.dump_blocks(stencil_blocks)

        setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc)
        self.calltypes[setitem_call] = signature(
                                        types.none, self.typemap[out_arr.name],
                                        self.typemap[parfor_ind_var.name],
                                        self.typemap[out_arr.name].dtype
                                        )
        stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret)
        stencil_blocks[parfor_body_exit_label].body.append(setitem_call)

        # simplify CFG of parfor body (exit block could be simplified often)
        # add dummy return to enable CFG
        stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0,
                                            ir.Loc("stencilparfor_dummy", -1)))
        stencil_blocks = ir_utils.simplify_CFG(stencil_blocks)
        stencil_blocks[max(stencil_blocks.keys())].body.pop()

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after adding SetItem")
            ir_utils.dump_blocks(stencil_blocks)

        pattern = ('stencil', [start_lengths, end_lengths])
        parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks,
                                     loc, parfor_ind_var, equiv_set, pattern, self.flags)
        gen_nodes.append(parfor)
        gen_nodes.append(ir.Assign(out_arr, target, loc))
        return gen_nodes
예제 #43
0
def _inline_arraycall(func_ir, cfg, visited, loop, enable_prange=False):
    """Look for array(list) call in the exit block of a given loop, and turn list operations into
    array operations in the loop if the following conditions are met:
      1. The exit block contains an array call on the list;
      2. The list variable is no longer live after array call;
      3. The list is created in the loop entry block;
      4. The loop is created from an range iterator whose length is known prior to the loop;
      5. There is only one list_append operation on the list variable in the loop body;
      6. The block that contains list_append dominates the loop head, which ensures list
         length is the same as loop length;
    If any condition check fails, no modification will be made to the incoming IR.
    """
    debug_print = _make_debug_print("inline_arraycall")
    # There should only be one loop exit
    require(len(loop.exits) == 1)
    exit_block = next(iter(loop.exits))
    list_var, array_call_index, array_kws = _find_arraycall(func_ir, func_ir.blocks[exit_block])

    # check if dtype is present in array call
    dtype_def = None
    dtype_mod_def = None
    if 'dtype' in array_kws:
        require(isinstance(array_kws['dtype'], ir.Var))
        # We require that dtype argument to be a constant of getattr Expr, and we'll
        # remember its definition for later use.
        dtype_def = get_definition(func_ir, array_kws['dtype'])
        require(isinstance(dtype_def, ir.Expr) and dtype_def.op == 'getattr')
        dtype_mod_def = get_definition(func_ir, dtype_def.value)

    list_var_def = get_definition(func_ir, list_var)
    debug_print("list_var = ", list_var, " def = ", list_var_def)
    if isinstance(list_var_def, ir.Expr) and list_var_def.op == 'cast':
        list_var_def = get_definition(func_ir, list_var_def.value)
    # Check if the definition is a build_list
    require(isinstance(list_var_def, ir.Expr) and list_var_def.op ==  'build_list')

    # Look for list_append in "last" block in loop body, which should be a block that is
    # a post-dominator of the loop header.
    list_append_stmts = []
    for label in loop.body:
        # We have to consider blocks of this loop, but not sub-loops.
        # To achieve this, we require the set of "in_loops" of "label" to be visited loops.
        in_visited_loops = [l.header in visited for l in cfg.in_loops(label)]
        if not all(in_visited_loops):
            continue
        block = func_ir.blocks[label]
        debug_print("check loop body block ", label)
        for stmt in block.find_insts(ir.Assign):
            lhs = stmt.target
            expr = stmt.value
            if isinstance(expr, ir.Expr) and expr.op == 'call':
                func_def = get_definition(func_ir, expr.func)
                if isinstance(func_def, ir.Expr) and func_def.op == 'getattr' \
                  and func_def.attr == 'append':
                    list_def = get_definition(func_ir, func_def.value)
                    debug_print("list_def = ", list_def, list_def == list_var_def)
                    if list_def == list_var_def:
                        # found matching append call
                        list_append_stmts.append((label, block, stmt))

    # Require only one list_append, otherwise we won't know the indices
    require(len(list_append_stmts) == 1)
    append_block_label, append_block, append_stmt = list_append_stmts[0]

    # Check if append_block (besides loop entry) dominates loop header.
    # Since CFG doesn't give us this info without loop entry, we approximate
    # by checking if the predecessor set of the header block is the same
    # as loop_entries plus append_block, which is certainly more restrictive
    # than necessary, and can be relaxed if needed.
    preds = set(l for l, b in cfg.predecessors(loop.header))
    debug_print("preds = ", preds, (loop.entries | set([append_block_label])))
    require(preds == (loop.entries | set([append_block_label])))

    # Find iterator in loop header
    iter_vars = []
    iter_first_vars = []
    loop_header = func_ir.blocks[loop.header]
    for stmt in loop_header.find_insts(ir.Assign):
        expr = stmt.value
        if isinstance(expr, ir.Expr):
            if expr.op == 'iternext':
                iter_def = get_definition(func_ir, expr.value)
                debug_print("iter_def = ", iter_def)
                iter_vars.append(expr.value)
            elif expr.op == 'pair_first':
                iter_first_vars.append(stmt.target)

    # Require only one iterator in loop header
    require(len(iter_vars) == 1 and len(iter_first_vars) == 1)
    iter_var = iter_vars[0] # variable that holds the iterator object
    iter_first_var = iter_first_vars[0] # variable that holds the value out of iterator

    # Final requirement: only one loop entry, and we're going to modify it by:
    # 1. replacing the list definition with an array definition;
    # 2. adding a counter for the array iteration.
    require(len(loop.entries) == 1)
    loop_entry = func_ir.blocks[next(iter(loop.entries))]
    terminator = loop_entry.terminator
    scope = loop_entry.scope
    loc = loop_entry.loc
    stmts = []
    removed = []
    def is_removed(val, removed):
        if isinstance(val, ir.Var):
            for x in removed:
                if x.name == val.name:
                    return True
        return False
    # Skip list construction and skip terminator, add the rest to stmts
    for i in range(len(loop_entry.body) - 1):
        stmt = loop_entry.body[i]
        if isinstance(stmt, ir.Assign) and (stmt.value == list_def or is_removed(stmt.value, removed)):
            removed.append(stmt.target)
        else:
            stmts.append(stmt)
    debug_print("removed variables: ", removed)

    # Define an index_var to index the array.
    # If the range happens to be single step ranges like range(n), or range(m, n),
    # then the index_var correlates to iterator index; otherwise we'll have to
    # define a new counter.
    range_def = guard(_find_iter_range, func_ir, iter_var)
    index_var = ir.Var(scope, mk_unique_var("index"), loc)
    if range_def and range_def[0] == 0:
        # iterator starts with 0, index_var can just be iter_first_var
        index_var = iter_first_var
    else:
        # index_var = -1 # starting the index with -1 since it will incremented in loop header
        stmts.append(_new_definition(func_ir, index_var, ir.Const(value=-1, loc=loc), loc))

    # Insert statement to get the size of the loop iterator
    size_var = ir.Var(scope, mk_unique_var("size"), loc)
    if range_def:
        start, stop, range_func_def = range_def
        if start == 0:
            size_val = stop
        else:
            size_val = ir.Expr.binop(fn='-', lhs=stop, rhs=start, loc=loc)
        # we can parallelize this loop if enable_prange = True, by changing
        # range function from range, to prange.
        if enable_prange and isinstance(range_func_def, ir.Global):
            range_func_def.name = 'internal_prange'
            range_func_def.value = internal_prange

    else:
        len_func_var = ir.Var(scope, mk_unique_var("len_func"), loc)
        stmts.append(_new_definition(func_ir, len_func_var,
                     ir.Global('range_iter_len', range_iter_len, loc=loc), loc))
        size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc)

    stmts.append(_new_definition(func_ir, size_var, size_val, loc))

    size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
    stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

    # Insert array allocation
    array_var = ir.Var(scope, mk_unique_var("array"), loc)
    empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
    if dtype_def and dtype_mod_def:
        # when dtype is present, we'll call emtpy with dtype
        dtype_mod_var = ir.Var(scope, mk_unique_var("dtype_mod"), loc)
        dtype_var = ir.Var(scope, mk_unique_var("dtype"), loc)
        stmts.append(_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc))
        stmts.append(_new_definition(func_ir, dtype_var,
                         ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc), loc))
        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))
        array_kws = [('dtype', dtype_var)]
    else:
        # otherwise we'll call unsafe_empty_inferred
        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('unsafe_empty_inferred',
                             unsafe_empty_inferred, loc=loc), loc))
        array_kws = []
    # array_var = empty_func(size_tuple_var)
    stmts.append(_new_definition(func_ir, array_var,
                 ir.Expr.call(empty_func, (size_tuple_var,), list(array_kws), loc=loc), loc))

    # Add back removed just in case they are used by something else
    for var in removed:
        stmts.append(_new_definition(func_ir, var, array_var, loc))

    # Add back terminator
    stmts.append(terminator)
    # Modify loop_entry
    loop_entry.body = stmts

    if range_def:
        if range_def[0] != 0:
            # when range doesn't start from 0, index_var becomes loop index
            # (iter_first_var) minus an offset (range_def[0])
            terminator = loop_header.terminator
            assert(isinstance(terminator, ir.Branch))
            # find the block in the loop body that header jumps to
            block_id = terminator.truebr
            blk = func_ir.blocks[block_id]
            loc = blk.loc
            blk.body.insert(0, _new_definition(func_ir, index_var,
                ir.Expr.binop(fn='-', lhs=iter_first_var,
                                      rhs=range_def[0], loc=loc),
                loc))
    else:
        # Insert index_var increment to the end of loop header
        loc = loop_header.loc
        terminator = loop_header.terminator
        stmts = loop_header.body[0:-1]
        next_index_var = ir.Var(scope, mk_unique_var("next_index"), loc)
        one = ir.Var(scope, mk_unique_var("one"), loc)
        # one = 1
        stmts.append(_new_definition(func_ir, one,
                     ir.Const(value=1,loc=loc), loc))
        # next_index_var = index_var + 1
        stmts.append(_new_definition(func_ir, next_index_var,
                     ir.Expr.binop(fn='+', lhs=index_var, rhs=one, loc=loc), loc))
        # index_var = next_index_var
        stmts.append(_new_definition(func_ir, index_var, next_index_var, loc))
        stmts.append(terminator)
        loop_header.body = stmts

    # In append_block, change list_append into array assign
    for i in range(len(append_block.body)):
        if append_block.body[i] == append_stmt:
            debug_print("Replace append with SetItem")
            append_block.body[i] = ir.SetItem(target=array_var, index=index_var,
                                              value=append_stmt.value.args[0], loc=append_stmt.loc)

    # replace array call, by changing "a = array(b)" to "a = b"
    stmt = func_ir.blocks[exit_block].body[array_call_index]
    # stmt can be either array call or SetItem, we only replace array call
    if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr):
        stmt.value = array_var
        func_ir._definitions[stmt.target.name] = [stmt.value]

    return True