Example #1
0
def csv_distributed_run(csv_node, array_dists, typemap, calltypes, typingctx, targetctx, dist_pass):
    parallel = True

    if sdc.config.config_transport_mpi:
        for v in csv_node.out_vars:
            if (array_dists[v.name] != distributed.Distribution.OneD
                    and array_dists[v.name] != distributed.Distribution.OneD_Var):
                parallel = False
    else:
        parallel = False

    n_cols = len(csv_node.out_vars)
    # TODO: rebalance if output distributions are 1D instead of 1D_Var
    # get column variables
    arg_names = ", ".join("arr" + str(i) for i in range(n_cols))
    func_text = "def csv_impl(fname):\n"
    func_text += "    ({},) = _csv_reader_py(fname)\n".format(arg_names)
    # print(func_text)

    loc_vars = {}
    exec(func_text, {}, loc_vars)
    csv_impl = loc_vars['csv_impl']

    csv_reader_py = _gen_csv_reader_py(
        csv_node.df_colnames, csv_node.out_types, csv_node.usecols,
        csv_node.sep, typingctx, targetctx, parallel, csv_node.skiprows)

    f_block = compile_to_numba_ir(csv_impl,
                                  {'_csv_reader_py': csv_reader_py},
                                  typingctx, (string_type,),
                                  typemap, calltypes).blocks.popitem()[1]
    replace_arg_nodes(f_block, [csv_node.file_name])
    nodes = f_block.body[:-3]
    for i in range(len(csv_node.out_vars)):
        nodes[-len(csv_node.out_vars) + i].target = csv_node.out_vars[i]

    # get global array sizes by calling allreduce on chunk lens
    # TODO: get global size from C
    for arr in csv_node.out_vars:
        def f(A):
            return sdc.distributed_api.dist_reduce(len(A), np.int32(_op))
        f_block = compile_to_numba_ir(
            f, {'sdc': sdc, 'np': np,
                '_op': sdc.distributed_api.Reduce_Type.Sum.value},
            typingctx, (typemap[arr.name],), typemap, calltypes).blocks.popitem()[1]
        replace_arg_nodes(f_block, [arr])
        nodes += f_block.body[:-2]
        size_var = nodes[-1].target
        dist_pass._array_sizes[arr.name] = [size_var]
        out, start_var, end_var = dist_pass._gen_1D_div(
            size_var, arr.scope, csv_node.loc, "$alloc", "get_node_portion",
            sdc.distributed_api.get_node_portion)
        dist_pass._array_starts[arr.name] = [start_var]
        dist_pass._array_counts[arr.name] = [end_var]
        nodes += out

    return nodes
Example #2
0
    def _gen_rolling_init(self, win_size, func, center):
        nodes = []
        right_length = 0
        scope = win_size.scope
        loc = win_size.loc
        right_length = ir.Var(scope, mk_unique_var('zero_var'), scope)
        nodes.append(ir.Assign(ir.Const(0, loc), right_length, win_size.loc))

        def f(w):
            return -w + 1

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [win_size])
        nodes.extend(f_block.body[:-2])  # remove none return
        left_length = nodes[-1].target

        if center:

            def f(w):
                return -(w // 2)

            f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [win_size])
            nodes.extend(f_block.body[:-2])  # remove none return
            left_length = nodes[-1].target

            def f(w):
                return (w // 2)

            f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [win_size])
            nodes.extend(f_block.body[:-2])  # remove none return
            right_length = nodes[-1].target

        def f(a, b):
            return ((a, b), )

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [left_length, right_length])
        nodes.extend(f_block.body[:-2])  # remove none return
        win_tuple = nodes[-1].target

        index_offsets = [right_length]

        if func == 'apply':
            index_offsets = [left_length]

        def f(a):
            return (a, )

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, index_offsets)
        nodes.extend(f_block.body[:-2])  # remove none return
        index_offsets = nodes[-1].target

        return index_offsets, win_tuple, nodes
Example #3
0
    def _run_pd_DatetimeIndex(self, assign, lhs, rhs):
        """transform pd.DatetimeIndex() call with string array argument
        """
        kws = dict(rhs.kws)
        if 'data' in kws:
            data = kws['data']
            if len(rhs.args) != 0:  # pragma: no cover
                raise ValueError(
                    "only data argument suppoted in pd.DatetimeIndex()")
        else:
            if len(rhs.args) != 1:  # pragma: no cover
                raise ValueError(
                    "data argument in pd.DatetimeIndex() expected")
            data = rhs.args[0]

        def f(str_arr):
            numba.parfor.init_prange()
            n = len(str_arr)
            S = numba.unsafe.ndarray.empty_inferred((n, ))
            for i in numba.parfor.internal_prange(n):
                S[i] = hpat.pd_timestamp_ext.parse_datetime_str(str_arr[i])
            ret = S

        f_ir = compile_to_numba_ir(
            f, {
                'hpat': hpat,
                'numba': numba
            }, self.typingctx,
            (if_series_to_array_type(self.typemap[data.name]), ), self.typemap,
            self.calltypes)
        topo_order = find_topo_order(f_ir.blocks)
        f_ir.blocks[topo_order[-1]].body[-4].target = lhs
        replace_arg_nodes(f_ir.blocks[topo_order[0]], [data])
        return f_ir.blocks
Example #4
0
def _handle_np_fromfile(assign, lhs, rhs):
    """translate np.fromfile() to native
    """
    # TODO: dtype in kws
    if len(rhs.args) != 2:  # pragma: no cover
        raise ValueError("np.fromfile(): file name and dtype expected")

    # FIXME: import here since hio has hdf5 which might not be available
    from .. import hio
    import llvmlite.binding as ll
    ll.add_symbol('get_file_size', hio.get_file_size)
    ll.add_symbol('file_read', hio.file_read)
    ll.add_symbol('file_read_parallel', hio.file_read_parallel)
    _fname = rhs.args[0]
    _dtype = rhs.args[1]

    def fromfile_impl(fname, dtype):
        size = get_file_size(fname)
        dtype_size = get_dtype_size(dtype)
        A = np.empty(size // dtype_size, dtype=dtype)
        file_read(fname, A, size)
        read_arr = A

    f_block = compile_to_numba_ir(
        fromfile_impl, {
            'np': np,
            'get_file_size': get_file_size,
            'file_read': file_read,
            'get_dtype_size': get_dtype_size
        }).blocks.popitem()[1]
    replace_arg_nodes(f_block, [_fname, _dtype])
    nodes = f_block.body[:-3]  # remove none return
    nodes[-1].target = lhs
    return nodes
Example #5
0
def get_column_read_nodes(c_type, cvar, file_name, i):

    loc = cvar.loc

    func_text = ('def f(fname):\n  col_size = get_column_size_parquet(fname, {})\n'.
            format(i))
    # generate strings differently
    if c_type == string_array_type:
        # pass size for easier allocation and distributed analysis
        func_text += '  column = read_parquet_str(fname, {}, col_size)\n'.format(
                                                            i)
    else:
        el_type = get_element_type(c_type.dtype)
        func_text += '  column = np.empty(col_size, dtype=np.{})\n'.format(
                                                                        el_type)
        func_text += '  status = read_parquet(fname, {}, column, np.int32({}))\n'.format(
                                        i, _type_to_pq_dtype_number[el_type])
    loc_vars = {}
    exec(func_text, {}, loc_vars)
    size_func = loc_vars['f']
    _, f_block = compile_to_numba_ir(size_func,
                {'get_column_size_parquet': get_column_size_parquet,
                'read_parquet': read_parquet,
                'read_parquet_str': read_parquet_str, 'np': np,
                'StringArray': StringArray}).blocks.popitem()

    replace_arg_nodes(f_block, [file_name])
    out_nodes = f_block.body[:-3]
    for stmt in reversed(out_nodes):
        if stmt.target.name.startswith("column"):
            assign = ir.Assign(stmt.target, cvar, loc)
            break

    out_nodes.append(assign)
    return out_nodes
Example #6
0
    def _handle_empty_like(self, assign, lhs, rhs):
        # B = empty_like(A) -> B = empty(len(A), dtype)
        in_arr = rhs.args[0]

        if self.typemap[in_arr.name].ndim == 1:
            # generate simpler len() for 1D case
            def f(_in_arr):  # pragma: no cover
                _alloc_size = len(_in_arr)
                _out_arr = np.empty(_alloc_size, _in_arr.dtype)
        else:

            def f(_in_arr):  # pragma: no cover
                _alloc_size = _in_arr.shape
                _out_arr = np.empty(_alloc_size, _in_arr.dtype)

        f_block = compile_to_numba_ir(
            f, {
                'np': np
            }, self.typingctx,
            (if_series_to_array_type(self.typemap[in_arr.name]), ),
            self.typemap, self.calltypes).blocks.popitem()[1]
        replace_arg_nodes(f_block, [in_arr])
        nodes = f_block.body[:-3]  # remove none return
        nodes[-1].target = assign.target
        return nodes
Example #7
0
def gen_init_xenon(address, dset_name):
    # TODO: support non-constant address/dset_name
    func_text = ('def f():\n  connect_t = xe_connect(unicode_to_char_ptr("{}"))\n'.format(address))
    func_text += '  dset_t = xe_open(connect_t, unicode_to_char_ptr("{}"))\n'.format(dset_name)

    loc_vars = {}
    exec(func_text, {}, loc_vars)
    init_func = loc_vars['f']
    f_block = compile_to_numba_ir(init_func,
                                  {'xe_connect': xe_connect,
                                   'unicode_to_char_ptr': unicode_to_char_ptr,
                                   'xe_open': xe_open}).blocks.popitem()[1]

    connect_var = None
    dset_t_var = None

    out_nodes = f_block.body[:-3]
    for stmt in reversed(out_nodes):
        if stmt.target.name.startswith("connect_t"):
            connect_var = stmt.target
        if stmt.target.name.startswith("dset_t"):
            dset_t_var = stmt.target

    assert connect_var is not None and dset_t_var is not None
    return out_nodes, connect_var, dset_t_var
Example #8
0
    def _handle_str_contains(self, lhs, rhs, assign, call_table):
        fname = guard(find_callname, self.func_ir, rhs)
        if fname is None:
            return None

        if fname == ('str_contains_regex', 'hpat.hiframes_api'):
            comp_func = 'hpat.str_ext.contains_regex'
        elif fname == ('str_contains_noregex', 'hpat.hiframes_api'):
            comp_func = 'hpat.str_ext.contains_noregex'
        else:
            return None

        str_arr = rhs.args[0]
        pat = rhs.args[1]
        func_text = 'def f(str_arr, pat):\n'
        func_text += '  l = len(str_arr)\n'
        func_text += '  S = np.empty(l, dtype=np.bool_)\n'
        func_text += '  for i in numba.parfor.internal_prange(l):\n'
        func_text += '    S[i] = {}(str_arr[i], pat)\n'.format(comp_func)
        loc_vars = {}
        exec(func_text, {}, loc_vars)
        f = loc_vars['f']
        f_blocks = compile_to_numba_ir(
            f, {
                'numba': numba,
                'np': np,
                'hpat': hpat
            }, self.typingctx,
            (self.typemap[str_arr.name], self.typemap[pat.name]), self.typemap,
            self.calltypes).blocks
        replace_arg_nodes(f_blocks[min(f_blocks.keys())], [str_arr, pat])
        # replace call with result of parfor (S)
        # S is target of last statement in 1st block of f
        assign.value = f_blocks[min(f_blocks.keys())].body[-2].target
        return (f_blocks, [assign])
Example #9
0
    def _handle_fix_df_array(self, lhs, rhs, assign, call_table):
        # arr = fix_df_array(col) -> arr=col if col is array
        if (rhs.op == 'call' and rhs.func.name in call_table
                and call_table[rhs.func.name]
                == ['fix_df_array', 'hiframes_api', hpat]
                and isinstance(self.typemap[rhs.args[0].name],
                               (types.Array, StringArrayType))):
            assign.value = rhs.args[0]
            return [assign]
        # arr = fix_rolling_array(col) -> arr=col if col is float array
        if (rhs.op == 'call' and rhs.func.name in call_table
                and call_table[rhs.func.name]
                == ['fix_rolling_array', 'hiframes_api', hpat]):
            in_arr = rhs.args[0]
            if isinstance(self.typemap[in_arr.name].dtype, types.Float):
                assign.value = rhs.args[0]
                return [assign]
            else:

                def f(column):
                    a = column.astype(np.float64)

                f_block = compile_to_numba_ir(
                    f, {
                        'hpat': hpat,
                        'np': np
                    }, self.typingctx, (self.typemap[in_arr.name], ),
                    self.typemap, self.calltypes).blocks.popitem()[1]
                replace_arg_nodes(f_block, [in_arr])
                nodes = f_block.body[:-3]
                nodes[-1].target = assign.target
                return nodes
        return None
Example #10
0
def get_column_read_nodes(c_type, cvar, xe_connect_var, xe_dset_var, i, schema_arr_var):

    loc = cvar.loc

    func_text = ('def f(xe_connect_var, xe_dset_var, schema_arr):\n')
    func_text = ('    col_size = get_column_size_xenon(xe_connect_var, xe_dset_var, {})\n'. format(i))
    # func_text += '  print(col_size)\n'
    # generate strings differently since upfront allocation is not possible
    if c_type == string_array_type:
        # pass size for easier allocation and distributed analysis
        func_text += '  column = read_xenon_str(xe_connect_var, xe_dset_var, {}, col_size, schema_arr)\n'.format(i)
    else:
        el_type = get_element_type(c_type.dtype)
        func_text += '  column = np.empty(col_size, dtype=np.{})\n'.format(el_type)
        func_text += '  status = read_xenon_col(xe_connect_var, xe_dset_var, {}, column, schema_arr)\n'.format(i)
    loc_vars = {}
    exec(func_text, {}, loc_vars)
    size_func = loc_vars['f']
    _, f_block = compile_to_numba_ir(size_func,
                                     {'get_column_size_xenon': get_column_size_xenon,
                                      'read_xenon_col': read_xenon_col,
                                      'read_xenon_str': read_xenon_str,
                                      'np': np,
                                      }).blocks.popitem()

    replace_arg_nodes(f_block, [xe_connect_var, xe_dset_var, schema_arr_var])
    out_nodes = f_block.body[:-3]
    for stmt in reversed(out_nodes):
        if isinstance(stmt, ir.Assign) and stmt.target.name.startswith("column"):
            assign = ir.Assign(stmt.target, cvar, loc)
            break

    out_nodes.append(assign)
    return out_nodes
Example #11
0
    def _handle_empty_like(self, lhs, rhs, assign, call_table):
        # B = empty_like(A) -> B = empty(len(A), dtype)
        if (rhs.op == 'call' and rhs.func.name in call_table
                and call_table[rhs.func.name] == ['empty_like', np]):
            in_arr = rhs.args[0]

            if self.typemap[in_arr.name].ndim == 1:
                # generate simpler len() for 1D case
                def f(_in_arr):  # pragma: no cover
                    _alloc_size = len(_in_arr)
                    _out_arr = np.empty(_alloc_size, _in_arr.dtype)
            else:

                def f(_in_arr):  # pragma: no cover
                    _alloc_size = _in_arr.shape
                    _out_arr = np.empty(_alloc_size, _in_arr.dtype)

            f_block = compile_to_numba_ir(f, {
                'np': np
            }, self.typingctx, (self.typemap[in_arr.name], ), self.typemap,
                                          self.calltypes).blocks.popitem()[1]
            replace_arg_nodes(f_block, [in_arr])
            nodes = f_block.body[:-3]  # remove none return
            nodes[-1].target = assign.target
            return nodes
        return None
Example #12
0
    def _handle_str_contains(self, lhs, rhs):
        """
        Handle string contains like:
          B = df.column.str.contains('oo*', regex=True)
        """
        func_def = guard(get_definition, self.func_ir, rhs.func)
        assert func_def is not None
        # rare case where function variable is assigned to a new variable
        if isinstance(func_def, ir.Var):
            rhs.func = func_def
            return self._handle_str_contains(lhs, rhs)
        str_col = guard(self._get_str_contains_col, func_def)
        if str_col is None:
            return None
        kws = dict(rhs.kws)
        pat = rhs.args[0]
        regex = True  # default regex arg is True
        if 'regex' in kws:
            regex = get_constant(self.func_ir, kws['regex'], regex)
        if regex:

            def f(str_arr, pat):
                hpat.hiframes_api.str_contains_regex(str_arr, pat)
        else:

            def f(str_arr, pat):
                hpat.hiframes_api.str_contains_noregex(str_arr, pat)

        f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [str_col, pat])
        nodes = f_block.body[:-3]  # remove none return
        nodes[-1].target = lhs
        return nodes
Example #13
0
 def _handle_df_col_filter(self, lhs_name, rhs, assign):
     # find df['col2'] = df['col1'][arr]
     # since columns should have the same size, output is filled with NaNs
     # TODO: check for float, make sure col1 and col2 are in the same df
     if (rhs.op == 'getitem' and rhs.value.name in self.df_cols
             and lhs_name in self.df_cols
             and self.is_bool_arr(rhs.index.name)):
         lhs = assign.target
         in_arr = rhs.value
         index_var = rhs.index
         f_blocks = compile_to_numba_ir(
             _column_filter_impl_float, {
                 'numba': numba,
                 'np': np
             }, self.typingctx,
             (self.typemap[lhs.name], self.typemap[in_arr.name],
              self.typemap[index_var.name]), self.typemap,
             self.calltypes).blocks
         first_block = min(f_blocks.keys())
         replace_arg_nodes(f_blocks[first_block], [lhs, in_arr, index_var])
         alloc_nodes = gen_np_call('empty_like', np.empty_like, lhs,
                                   [in_arr], self.typingctx, self.typemap,
                                   self.calltypes)
         f_blocks[
             first_block].body = alloc_nodes + f_blocks[first_block].body
         return f_blocks
Example #14
0
def gen_stencil_call(in_arr, out_arr, kernel_func, index_offsets, fir_globals,
                                                other_args=None, options=None):
    if other_args is None:
        other_args = []
    if options is None:
        options = {}
    if index_offsets != [0]:
        options['index_offsets'] = index_offsets
    scope = in_arr.scope
    loc = in_arr.loc
    stencil_nodes = []
    stencil_nodes += gen_empty_like(in_arr, out_arr)

    kernel_var = ir.Var(scope, mk_unique_var("kernel_var"), scope)
    if not isinstance(kernel_func, ir.Expr):
        kernel_func = ir.Expr.make_function("kernel", kernel_func.__code__,
                    kernel_func.__closure__, kernel_func.__defaults__, loc)
    stencil_nodes.append(ir.Assign(kernel_func, kernel_var, loc))

    def f(A, B, f):
        numba.stencil(f)(A, out=B)
    f_block = compile_to_numba_ir(f, {'numba': numba}).blocks.popitem()[1]
    replace_arg_nodes(f_block, [in_arr, out_arr, kernel_var])
    stencil_nodes += f_block.body[:-3]  # remove none return
    setup_call = stencil_nodes[-2].value
    stencil_call = stencil_nodes[-1].value
    setup_call.kws = list(options.items())
    stencil_call.args += other_args

    return stencil_nodes
Example #15
0
    def _gen_column_shift_pct(self, out_var, args, col_var, func):
        loc = col_var.loc
        if func == 'pct_change':
            shift_const = 1
            if args:
                shift_const = get_constant(self.func_ir, args[0])
                assert shift_const is not NOT_CONSTANT
            func_text = 'def g(a):\n  return (a[0]-a[{}])/a[{}]\n'.format(
                -shift_const, -shift_const)
        else:
            assert func == 'shift'
            shift_const = get_constant(self.func_ir, args[0])
            assert shift_const is not NOT_CONSTANT
            func_text = 'def g(a):\n  return a[{}]\n'.format(-shift_const)

        loc_vars = {}
        exec(func_text, {}, loc_vars)
        kernel_func = loc_vars['g']

        index_offsets = [0]
        fir_globals = self.func_ir.func_id.func.__globals__
        stencil_nodes = gen_stencil_call(col_var, out_var, kernel_func,
                                         index_offsets, fir_globals)

        border_text = 'def f(A):\n  A[0:{}] = np.nan\n'.format(shift_const)
        loc_vars = {}
        exec(border_text, {}, loc_vars)
        border_func = loc_vars['f']

        f_blocks = compile_to_numba_ir(border_func, {'np': np}).blocks
        block = f_blocks[min(f_blocks.keys())]
        replace_arg_nodes(block, [out_var])
        setitem_nodes = block.body[:-3]  # remove none return

        return stencil_nodes + setitem_nodes
Example #16
0
    def _gen_col_describe(self, out_var, args, col_var):
        def f(A):
            a_count = hpat.hiframes_api.count(A)
            a_min = np.min(A)
            a_max = np.max(A)
            a_mean = hpat.hiframes_api.mean(A)
            a_std = hpat.hiframes_api.var(A)**0.5
            q25 = hpat.hiframes_api.quantile(A, .25)
            q50 = hpat.hiframes_api.quantile(A, .5)
            q75 = hpat.hiframes_api.quantile(A, .75)
            s = "count    "+str(a_count)+"\n"\
                "mean     "+str(a_mean)+"\n"\
                "std      "+str(a_std)+"\n"\
                "min      "+str(a_min)+"\n"\
                "25%      "+str(q25)+"\n"\
                "50%      "+str(q50)+"\n"\
                "75%      "+str(q75)+"\n"\
                "max      "+str(a_max)+"\n"

        f_block = compile_to_numba_ir(f, {
            'hpat': hpat,
            'np': np
        }).blocks.popitem()[1]
        replace_arg_nodes(f_block, [col_var])
        nodes = f_block.body[:-3]  # remove none return
        nodes[-1].target = out_var
        return nodes
Example #17
0
 def _add_offset_to_slice(self, slice_var, offset_var, out_nodes, scope,
                             loc):
     if isinstance(slice_var, slice):
         f_text = """def f(offset):
             return slice({} + offset, {} + offset)
         """.format(slice_var.start, slice_var.stop)
         loc = {}
         exec_(f_text, {}, loc)
         f = loc['f']
         args = [offset_var]
         arg_typs = (types.intp,)
     else:
         def f(old_slice, offset):
             return slice(old_slice.start + offset, old_slice.stop + offset)
         args = [slice_var, offset_var]
         slice_type = self.typemap[slice_var.name]
         arg_typs = (slice_type, types.intp,)
     _globals = self.func_ir.func_id.func.__globals__
     f_ir = compile_to_numba_ir(f, _globals, self.typingctx, arg_typs,
                                 self.typemap, self.calltypes)
     _, block = f_ir.blocks.popitem()
     replace_arg_nodes(block, args)
     new_index = block.body[-2].value.value
     out_nodes.extend(block.body[:-2])  # ignore return nodes
     return new_index
Example #18
0
def _copy_array_nodes(var, nodes, typingctx, typemap, calltypes):
    def _impl(arr):
        return arr.copy()

    f_block = compile_to_numba_ir(_impl, {}, typingctx, (typemap[var.name], ),
                                  typemap, calltypes).blocks.popitem()[1]
    replace_arg_nodes(f_block, [var])
    nodes += f_block.body[:-2]
    return nodes[-1].target
Example #19
0
    def _gen_col_var(self, out_var, args, col_var):
        def f(A):  # pragma: no cover
            s = hpat.hiframes_api.var(A)

        f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [col_var])
        nodes = f_block.body[:-3]  # remove none return
        nodes[-1].target = out_var
        return nodes
Example #20
0
    def _gen_col_quantile(self, out_var, args, col_var):
        def f(A, q):
            s = hpat.hiframes_api.quantile(A, q)

        f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [col_var, args[0]])
        nodes = f_block.body[:-3]  # remove none return
        nodes[-1].target = out_var
        return nodes
Example #21
0
def gen_close_xenon(connect_var, dset_t_var):
    #
    def close_func(connect_var, dset_t_var):
        s = xe_close(connect_var, dset_t_var)

    f_block = compile_to_numba_ir(close_func,
                                  {'xe_close': xe_close}).blocks.popitem()[1]

    replace_arg_nodes(f_block, [connect_var, dset_t_var])
    out_nodes = f_block.body[:-3]
    return out_nodes
Example #22
0
 def _get_stencil_start_ind(self, start_length, gen_nodes, scope, loc):
     if isinstance(start_length, int):
         return abs(min(start_length, 0))
     def get_start_ind(s_length):
         return abs(min(s_length, 0))
     f_ir = compile_to_numba_ir(get_start_ind, {}, self.typingctx,
                              (types.intp,), self.typemap, self.calltypes)
     assert len(f_ir.blocks) == 1
     block = f_ir.blocks.popitem()[1]
     replace_arg_nodes(block, [start_length])
     gen_nodes += block.body[:-2]
     ret_var = block.body[-2].value.value
     return ret_var
Example #23
0
    def _fix_rolling_array(self, col_var, func):
        """
        for integers and bools, the output should be converted to float64
        """

        # TODO: check all possible funcs
        def f(arr):
            df_arr = hpat.hiframes_api.fix_rolling_array(arr)

        f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [col_var])
        nodes = f_block.body[:-3]  # remove none return
        new_col_var = nodes[-1].target
        return new_col_var, nodes
Example #24
0
 def _fix_df_arrays(self, items_list):
     nodes = []
     new_list = []
     for item in items_list:
         col_varname = item[0]
         col_arr = item[1]
         def f(arr):
             df_arr = hpat.hiframes_api.fix_df_array(arr)
         f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
         replace_arg_nodes(f_block, [col_arr])
         nodes += f_block.body[:-3]  # remove none return
         new_col_arr = nodes[-1].target
         new_list.append((col_varname, new_col_arr))
     return nodes, new_list
Example #25
0
    def _gen_col_std(self, out_var, args, col_var):
        loc = out_var.loc
        scope = out_var.scope
        # calculate var() first
        var_var = ir.Var(scope, mk_unique_var("var_val"), loc)
        v_nodes = self._gen_col_var(var_var, args, col_var)

        def f(a):
            a**0.5

        s_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(s_block, [var_var])
        s_nodes = s_block.body[:-3]
        assert len(s_nodes) == 3
        s_nodes[-1].target = out_var
        return v_nodes + s_nodes
Example #26
0
    def gen_parquet_read(self, file_name):
        import pyarrow.parquet as pq
        fname_def = guard(get_definition, self.func_ir, file_name)
        if isinstance(fname_def, ir.Const):
            assert isinstance(fname_def.value, str)
            file_name_str = fname_def.value
            col_names, col_types = parquet_file_schema(file_name_str)
            scope = file_name.scope
            loc = file_name.loc
            out_nodes = []
            col_items = []
            for i, cname in enumerate(col_names):
                # get column type from schema
                c_type = col_types[i]
                # create a variable for column and assign type
                varname = mk_unique_var(cname)
                self.locals[varname] = c_type
                cvar = ir.Var(scope, varname, loc)
                col_items.append((cname, cvar))

                size_func_text = (
                    'def f():\n  col_size = get_column_size_parquet("{}", {})\n'
                    .format(file_name_str, i))
                size_func_text += '  column = np.empty(col_size, dtype=np.{})\n'.format(
                    c_type.dtype)
                size_func_text += '  status = read_parquet("{}", {}, column)\n'.format(
                    file_name_str, i)
                loc_vars = {}
                exec(size_func_text, {}, loc_vars)
                size_func = loc_vars['f']
                _, f_block = compile_to_numba_ir(
                    size_func, {
                        'get_column_size_parquet': get_column_size_parquet,
                        'read_parquet': read_parquet,
                        'np': np
                    }).blocks.popitem()

                out_nodes += f_block.body[:-3]
                for stmt in out_nodes:
                    if stmt.target.name.startswith("column"):
                        assign = ir.Assign(stmt.target, cvar, loc)
                        break

                out_nodes.append(assign)

            return col_items, out_nodes
        raise ValueError("Parquet schema not available")
Example #27
0
    def _handle_string_array_expr(self, lhs, rhs, assign):
        # convert str_arr==str into parfor
        if (rhs.op == 'binop' and rhs.fn in ['==', '!=', '>=', '>', '<=', '<']
                and (is_str_arr_typ(self.typemap[rhs.lhs.name])
                     or is_str_arr_typ(self.typemap[rhs.rhs.name]))):
            arg1 = rhs.lhs
            arg2 = rhs.rhs
            arg1_access = 'A'
            arg2_access = 'B'
            len_call = 'len(A)'
            if is_str_arr_typ(self.typemap[arg1.name]):
                arg1_access = 'A[i]'
                # replace type now for correct typing of len, etc.
                self.typemap.pop(arg1.name)
                self.typemap[arg1.name] = string_array_type

            if is_str_arr_typ(self.typemap[arg2.name]):
                arg1_access = 'B[i]'
                len_call = 'len(B)'
                self.typemap.pop(arg2.name)
                self.typemap[arg2.name] = string_array_type

            func_text = 'def f(A, B):\n'
            func_text += '  l = {}\n'.format(len_call)
            func_text += '  S = np.empty(l, dtype=np.bool_)\n'
            func_text += '  for i in numba.parfor.internal_prange(l):\n'
            func_text += '    S[i] = {} {} {}\n'.format(
                arg1_access, rhs.fn, arg2_access)

            loc_vars = {}
            exec(func_text, {}, loc_vars)
            f = loc_vars['f']
            f_blocks = compile_to_numba_ir(
                f, {
                    'numba': numba,
                    'np': np
                }, self.typingctx,
                (if_series_to_array_type(self.typemap[arg1.name]),
                 if_series_to_array_type(self.typemap[arg2.name])),
                self.typemap, self.calltypes).blocks
            replace_arg_nodes(f_blocks[min(f_blocks.keys())], [arg1, arg2])
            # replace == expression with result of parfor (S)
            # S is target of last statement in 1st block of f
            assign.value = f_blocks[min(f_blocks.keys())].body[-2].target
            return (f_blocks, [assign])

        return None
Example #28
0
 def handle_possible_h5_read(self, assign, lhs, rhs):
     tp = self._get_h5_type(lhs, rhs)
     if tp is not None:
         dtype_str = str(tp.dtype)
         func_text = "def _h5_read_impl(dset, index):\n"
         # TODO: index arg?
         func_text += "  arr = hpat.io.pio_api.h5_read_dummy(dset, {}, '{}', index)\n".format(tp.ndim, dtype_str)
         loc_vars = {}
         exec(func_text, {}, loc_vars)
         _h5_read_impl = loc_vars['_h5_read_impl']
         f_block = compile_to_numba_ir(_h5_read_impl, {'hpat': hpat}).blocks.popitem()[1]
         index_var = rhs.index if rhs.op == 'getitem' else rhs.index_var
         replace_arg_nodes(f_block, [rhs.value, index_var])
         nodes = f_block.body[:-3]  # remove none return
         nodes[-1].target = assign.target
         return nodes
     return None
Example #29
0
def get_column_read_nodes(c_type, cvar, arrow_readers_var, i):

    loc = cvar.loc

    func_text = 'def f(arrow_readers):\n'
    func_text += '  col_size = get_column_size_parquet(arrow_readers, {})\n'.format(
        i)
    # generate strings differently
    if c_type == string_type:
        # pass size for easier allocation and distributed analysis
        func_text += '  column = read_parquet_str(arrow_readers, {}, col_size)\n'.format(
            i)
    else:
        el_type = get_element_type(c_type)
        if el_type == repr(types.NPDatetime('ns')):
            func_text += '  column_tmp = np.empty(col_size, dtype=np.int64)\n'
            # TODO: fix alloc
            func_text += '  column = sdc.hiframes.api.ts_series_to_arr_typ(column_tmp)\n'
        else:
            func_text += '  column = np.empty(col_size, dtype=np.{})\n'.format(
                el_type)
        func_text += '  status = read_parquet(arrow_readers, {}, column, np.int32({}))\n'.format(
            i, _type_to_pq_dtype_number[el_type])

    loc_vars = {}
    exec(func_text, {'sdc': sdc, 'np': np}, loc_vars)
    size_func = loc_vars['f']
    _, f_block = compile_to_numba_ir(
        size_func, {
            'get_column_size_parquet': get_column_size_parquet,
            'read_parquet': read_parquet,
            'read_parquet_str': read_parquet_str,
            'np': np,
            'sdc': sdc,
            'StringArray': StringArray
        }).blocks.popitem()

    replace_arg_nodes(f_block, [arrow_readers_var])
    out_nodes = f_block.body[:-3]
    for stmt in reversed(out_nodes):
        if stmt.target.name.startswith("column"):
            assign = ir.Assign(stmt.target, cvar, loc)
            break

    out_nodes.append(assign)
    return out_nodes
Example #30
0
    def _gen_rebalances(self, rebalance_arrs, blocks):
        #
        for block in blocks.values():
            new_body = []
            for inst in block.body:
                # TODO: handle hiframes filter etc.
                if isinstance(inst, Parfor):
                    self._gen_rebalances(rebalance_arrs, {0: inst.init_block})
                    self._gen_rebalances(rebalance_arrs, inst.loop_body)
                if isinstance(
                        inst,
                        ir.Assign) and inst.target.name in rebalance_arrs:
                    out_arr = inst.target
                    self.func_ir._definitions[out_arr.name].remove(inst.value)
                    # hold inst results in tmp array
                    tmp_arr = ir.Var(out_arr.scope,
                                     mk_unique_var("rebalance_tmp"),
                                     out_arr.loc)
                    self.typemap[tmp_arr.name] = self.typemap[out_arr.name]
                    inst.target = tmp_arr
                    nodes = [inst]

                    def f(in_arr):  # pragma: no cover
                        out_a = hpat.distributed_api.rebalance_array(in_arr)

                    f_block = compile_to_numba_ir(
                        f, {
                            'hpat': hpat
                        }, self.typingctx, (self.typemap[tmp_arr.name], ),
                        self.typemap, self.calltypes).blocks.popitem()[1]
                    replace_arg_nodes(f_block, [tmp_arr])
                    nodes += f_block.body[:-3]  # remove none return
                    nodes[-1].target = out_arr
                    # update definitions
                    dumm_block = ir.Block(out_arr.scope, out_arr.loc)
                    dumm_block.body = nodes
                    build_definitions({0: dumm_block},
                                      self.func_ir._definitions)
                    new_body += nodes
                else:
                    new_body.append(inst)

            block.body = new_body