Esempio n. 1
0
    def _gen_column_shift_pct(self, out_var, args, col_var, func):
        loc = col_var.loc
        if func == 'pct_change':
            shift_const = 1
            if args:
                shift_const = get_constant(self.func_ir, args[0])
                assert shift_const is not NOT_CONSTANT
            func_text = 'def g(a):\n  return (a[0]-a[{}])/a[{}]\n'.format(
                -shift_const, -shift_const)
        else:
            assert func == 'shift'
            shift_const = get_constant(self.func_ir, args[0])
            assert shift_const is not NOT_CONSTANT
            func_text = 'def g(a):\n  return a[{}]\n'.format(-shift_const)

        loc_vars = {}
        exec(func_text, {}, loc_vars)
        kernel_func = loc_vars['g']

        index_offsets = [0]
        fir_globals = self.func_ir.func_id.func.__globals__
        stencil_nodes = gen_stencil_call(col_var, out_var, kernel_func,
                                         index_offsets, fir_globals)

        border_text = 'def f(A):\n  A[0:{}] = np.nan\n'.format(shift_const)
        loc_vars = {}
        exec(border_text, {}, loc_vars)
        border_func = loc_vars['f']

        f_blocks = compile_to_numba_ir(border_func, {'np': np}).blocks
        block = f_blocks[min(f_blocks.keys())]
        replace_arg_nodes(block, [out_var])
        setitem_nodes = block.body[:-3]  # remove none return

        return stencil_nodes + setitem_nodes
Esempio n. 2
0
 def _handle_rolling_setup(self, lhs, rhs):
     """
     Handle Series rolling calls like:
       r = df.column.rolling(3)
     """
     func_def = guard(get_definition, self.func_ir, rhs.func)
     assert func_def is not None
     # rare case where function variable is assigned to a new variable
     if isinstance(func_def, ir.Var):
         rhs.func = func_def
         return self._handle_rolling_setup(lhs, rhs)
     # df.column.rolling
     if (isinstance(func_def, ir.Expr) and func_def.op == 'getattr'
             and func_def.value.name in self.df_cols
             and func_def.attr == 'rolling'):
         center = False
         kws = dict(rhs.kws)
         if rhs.args:
             window = rhs.args[0]
         elif 'window' in kws:
             window = kws['window']
         else:
             raise ValueError("window argument to rolling() required")
         window = get_constant(self.func_ir, window, window)
         if 'center' in kws:
             center = get_constant(self.func_ir, kws['center'], center)
         self.rolling_calls[lhs.name] = [func_def.value, window, center]
         return []  # remove
     return None
Esempio n. 3
0
 def _handle_merge(self, lhs, rhs):
     if guard(find_callname, self.func_ir, rhs) == ('merge', 'pandas'):
         if len(rhs.args) < 2:
             raise ValueError("left and right arguments required for merge")
         left_df = rhs.args[0]
         right_df = rhs.args[1]
         kws = dict(rhs.kws)
         if 'on' in kws:
             left_on = get_constant(self.func_ir, kws['on'], None)
             right_on = left_on
         else:  # pragma: no cover
             if 'left_on' not in kws or 'right_on' not in kws:
                 raise ValueError("merge 'on' or 'left_on'/'right_on'"
                                  "arguments required")
             left_on = get_constant(self.func_ir, kws['left_on'], None)
             right_on = get_constant(self.func_ir, kws['right_on'], None)
         if left_on is None or right_on is None:
             raise ValueError("merge key values should be constant strings")
         scope = lhs.scope
         loc = lhs.loc
         self.df_vars[lhs.name] = {}
         # add columns from left to output
         for col, _ in self.df_vars[left_df.name].items():
             self.df_vars[lhs.name][col] = ir.Var(scope, mk_unique_var(col),
                                                  loc)
         # add columns from right to output
         for col, _ in self.df_vars[right_df.name].items():
             self.df_vars[lhs.name][col] = ir.Var(scope, mk_unique_var(col),
                                                  loc)
         self._update_df_cols()
         return [
             hiframes_join.Join(lhs.name, left_df.name, right_df.name,
                                left_on, right_on, self.df_vars, lhs.loc)
         ]
     return None
Esempio n. 4
0
    def _handle_str_contains(self, lhs, rhs):
        """
        Handle string contains like:
          B = df.column.str.contains('oo*', regex=True)
        """
        func_def = guard(get_definition, self.func_ir, rhs.func)
        assert func_def is not None
        # rare case where function variable is assigned to a new variable
        if isinstance(func_def, ir.Var):
            rhs.func = func_def
            return self._handle_str_contains(lhs, rhs)
        str_col = guard(self._get_str_contains_col, func_def)
        if str_col is None:
            return None
        kws = dict(rhs.kws)
        pat = rhs.args[0]
        regex = True  # default regex arg is True
        if 'regex' in kws:
            regex = get_constant(self.func_ir, kws['regex'], regex)
        if regex:

            def f(str_arr, pat):
                hpat.hiframes_api.str_contains_regex(str_arr, pat)
        else:

            def f(str_arr, pat):
                hpat.hiframes_api.str_contains_noregex(str_arr, pat)

        f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [str_col, pat])
        nodes = f_block.body[:-3]  # remove none return
        nodes[-1].target = lhs
        return nodes
Esempio n. 5
0
File: pio.py Progetto: zmyer/hpat
    def _get_dset_type(self, lhs, file_var, dset_var):
        """get data set type from user-specified locals types or actual file"""
        if lhs in self.local_vars:
            return self.local_vars[lhs]
        if self.reverse_copies[lhs] in self.local_vars:
            return self.local_vars[self.reverse_copies[lhs]]

        # read type from file if file name and dset name are constant values
        # TODO: check for file availability
        file_name = get_constant(self.func_ir, file_var)
        dset_str = get_constant(self.func_ir, dset_var)
        if file_name is not NOT_CONSTANT and dset_str is not NOT_CONSTANT:
            f = h5py.File(file_name, "r")
            ndims = len(f[dset_str].shape)
            numba_dtype = numpy_support.from_dtype(f[dset_str].dtype)
            return types.Array(numba_dtype, ndims, 'C')

        raise RuntimeError("data set type not found")
Esempio n. 6
0
 def _process_df_build_map(self, items_list):
     df_cols = {}
     for item in items_list:
         col_var = item[0]
         if isinstance(col_var, str):
             col_name = col_var
         else:
             col_name = get_constant(self.func_ir, col_var)
             if col_name is NOT_CONSTANT:
                 raise ValueError("data frame column names should be constant")
         df_cols[col_name] = item[1]
     return df_cols
Esempio n. 7
0
def gen_xe_init_from_uri(func_ir, dset_name_var):
    dset_name = get_constant(func_ir, dset_name_var)
    if dset_name is NOT_CONSTANT:
        raise ValueError("Xenon dataset should be a constant string")

    if dset_name.count("/") != 1:
        raise ValueError("invalid Xenon address {}".format(dset_name))
    address, dset_name = dset_name.split("/")
    from .. import hxe_ext
    schema = hxe_ext.get_schema(address, dset_name)
    col_names, col_types = parse_xe_schema(schema)

    out_nodes, xe_connect_var, xe_dset_var = gen_init_xenon(address, dset_name)
    return out_nodes, col_names, col_types, xe_connect_var, xe_dset_var
Esempio n. 8
0
    def _gen_fillna(self, out_var, args, col_var, kws):
        inplace = False
        if 'inplace' in kws:
            inplace = get_constant(self.func_ir, kws['inplace'])
            if inplace == NOT_CONSTANT:
                raise ValueError("inplace arg to fillna should be constant")

        if inplace:
            out_var = col_var  # output array is same as input array
            alloc_nodes = []
        else:
            alloc_nodes = gen_empty_like(col_var, out_var)

        val = args[0]

        def f(A, B, fill):
            hpat.hiframes_api.fillna(A, B, fill)

        f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [out_var, col_var, val])
        nodes = f_block.body[:-3]  # remove none return
        return alloc_nodes + nodes
Esempio n. 9
0
def _handle_read(assign, lhs, rhs, func_ir):
    if not hpat.config._has_xenon:
        raise ValueError("Xenon support not available")

    # TODO: init only once
    from .. import hxe_ext
    ll.add_symbol('get_column_size_xenon', hxe_ext.get_column_size_xenon)
    ll.add_symbol('c_read_xenon', hxe_ext.read_xenon_col)
    ll.add_symbol('c_read_xenon_parallel', hxe_ext.read_xenon_col_parallel)
    ll.add_symbol('c_read_xenon_col_str', hxe_ext.read_xenon_col_str)
    ll.add_symbol('c_read_xenon_col_str_parallel', hxe_ext.read_xenon_col_str_parallel)
    ll.add_symbol('c_xe_connect', hxe_ext.c_xe_connect)
    ll.add_symbol('c_xe_open', hxe_ext.c_xe_open)
    ll.add_symbol('c_xe_close', hxe_ext.c_xe_close)

    if len(rhs.args) not in [1, 3]:
        raise ValueError("read_xenon expects one or three argument but received {}".format(len(rhs.args)))

    if len(rhs.args) == 1:
        out_nodes, col_names, col_types, xe_connect_var, xe_dset_var = gen_xe_init_from_uri(func_ir, rhs.args[0])
    else:
        assert len(rhs.args) == 3
        xe_connect_var = rhs.args[0]
        xe_dset_var = rhs.args[1]
        schema = get_constant(func_ir, rhs.args[2])
        if schema is NOT_CONSTANT:
            raise ValueError("Xenon schema should be a constant string")
        col_names, col_types = parse_xe_schema(schema)
        out_nodes = []

    # generate array of schema types
    xe_typs = [str(get_xe_typ_enum(c_type)) for c_type in col_types]
    xe_typs_str = "np.array([" + ",".join(xe_typs) + "])"
    func_text = 'def f():\n  schema_arr = {}\n'.format(xe_typs_str)
    loc_vars = {}
    exec(func_text, {}, loc_vars)
    schm_func = loc_vars['f']
    f_block = compile_to_numba_ir(schm_func, {'np': np, }).blocks.popitem()[1]
    out_nodes += f_block.body[:-3]
    schema_arr_var = out_nodes[-1].target

    scope = rhs.args[0].scope
    loc = rhs.args[0].loc

    col_items = []
    for i, cname in enumerate(col_names):
        # get column type from schema
        c_type = col_types[i]

        # create a variable for column and assign type
        varname = mk_unique_var(cname)
        cvar = ir.Var(scope, varname, loc)
        col_items.append((cname, cvar))

        out_nodes += get_column_read_nodes(c_type, cvar, xe_connect_var, xe_dset_var, i, schema_arr_var)

    # we need to close in the URI case since we opened the connection/dataset
    if len(rhs.args) == 1:
        out_nodes += gen_close_xenon(xe_connect_var, xe_dset_var)

    return col_items, out_nodes