Exemple #1
0
 def _resolve_combine_func(self, ary, args, kws):
     dtype1 = ary.dtype
     # getitem returns Timestamp for dt_index and series(dt64)
     if dtype1 == types.NPDatetime('ns'):
         dtype1 = pandas_timestamp_type
     dtype2 = args[0].dtype
     if dtype2 == types.NPDatetime('ns'):
         dtype2 = pandas_timestamp_type
     t = args[1].get_call_type(self.context, (
         dtype1,
         dtype2,
     ), {})
     return signature(SeriesType(t.return_type), *args)
Exemple #2
0
 def _resolve_combine_func(self, ary, args, kws):
     dtype1 = ary.dtype
     # getitem returns Timestamp for dt_index and series(dt64)
     if dtype1 == types.NPDatetime('ns'):
         dtype1 = pandas_timestamp_type
     dtype2 = args[0].dtype
     if dtype2 == types.NPDatetime('ns'):
         dtype2 = pandas_timestamp_type
     code = args[1].literal_value.code
     f_ir = numba.ir_utils.get_ir_of_code({'np': np}, code)
     f_typemap, f_return_type, f_calltypes = numba.compiler.type_inference_stage(
             self.context, f_ir, (dtype1,dtype2,), None)
     return signature(SeriesType(f_return_type), *args)
Exemple #3
0
    def test_jit_explicit_signature(self):
        def _check_explicit_signature(sig):
            f = jit(sig, nopython=True)(add_usecase)
            # Just a sanity check
            args = DT(1, 'ms'), TD(2, 'us')
            expected = add_usecase(*args)
            self.assertPreciseEqual(f(*args), expected)

        # Test passing the signature in object form
        sig = types.NPDatetime('us')(types.NPDatetime('ms'), types.NPTimedelta('us'))
        _check_explicit_signature(sig)
        # Same with the signature in string form
        sig = "NPDatetime('us')(NPDatetime('ms'), NPTimedelta('us'))"
        _check_explicit_signature(sig)
Exemple #4
0
def _gen_csv_reader_py_pyarrow_func_text_core(col_names,
                                              col_typs,
                                              usecols,
                                              sep,
                                              skiprows,
                                              signature=None):
    # TODO: support non-numpy types like strings
    date_inds = ", ".join(
        str(i) for i, t in enumerate(col_typs)
        if t.dtype == types.NPDatetime('ns'))
    typ_strs = ", ".join([
        "{}='{}'".format(to_varname(cname), _get_dtype_str(t))
        for cname, t in zip(col_names, col_typs)
    ])
    pd_dtype_strs = ", ".join([
        "'{}':{}".format(cname, _get_pd_dtype_str(t))
        for cname, t in zip(col_names, col_typs)
    ])

    if signature is None:
        signature = "fname"
    func_text = "def csv_reader_py({}):\n".format(signature)
    func_text += "  with objmode({}):\n".format(typ_strs)
    func_text += "    df = pandas_read_csv(fname, names={},\n".format(
        col_names)
    func_text += "       parse_dates=[{}],\n".format(date_inds)
    func_text += "       dtype={{{}}},\n".format(pd_dtype_strs)
    func_text += "       skiprows={},\n".format(skiprows)
    func_text += "       usecols={}, sep='{}')\n".format(usecols, sep)
    for cname in col_names:
        func_text += "    {} = df['{}'].values\n".format(
            to_varname(cname), cname)
        # func_text += "    print({})\n".format(cname)
    return func_text, 'csv_reader_py'
Exemple #5
0
def lower_unbox_df_column(context, builder, sig, args):
    # FIXME: last arg should be types.DType?
    pyapi = context.get_python_api(builder)
    c = numba.pythonapi._UnboxContext(context, builder, pyapi)

    # TODO: refcounts?
    col_ind = sig.args[1].value
    col_name = sig.args[0].col_names[col_ind]
    series_obj = c.pyapi.object_getattr_string(args[0], col_name)
    arr_obj = c.pyapi.object_getattr_string(series_obj, "values")

    if isinstance(sig.args[2],
                  types.Const) and sig.args[2].value == 11:  # FIXME: str code
        native_val = unbox_str_series(string_array_type, arr_obj, c)
    else:
        if isinstance(
                sig.args[2],
                types.Const) and sig.args[2].value == 12:  # FIXME: dt64 code
            dtype = types.NPDatetime('ns')
        else:
            dtype = sig.args[2].dtype
        # TODO: error handling like Numba callwrappers.py
        native_val = unbox_array(types.Array(dtype=dtype, ndim=1, layout='C'),
                                 arr_obj, c)

    c.pyapi.decref(series_obj)
    c.pyapi.decref(arr_obj)
    return native_val.value
Exemple #6
0
def _gen_csv_reader_py_pandas(col_names, col_typs, usecols, sep, typingctx, targetctx, parallel, skiprows):
    # TODO: support non-numpy types like strings
    date_inds = ", ".join(str(i) for i, t in enumerate(col_typs) if t.dtype == types.NPDatetime('ns'))
    typ_strs = ", ".join(["{}='{}'".format(_sanitize_varname(cname), _get_dtype_str(t))
                          for cname, t in zip(col_names, col_typs)])
    pd_dtype_strs = ", ".join(["'{}':{}".format(cname, _get_pd_dtype_str(t)) for cname, t in zip(col_names, col_typs)])

    func_text = "def csv_reader_py(fname):\n"
    func_text += "  skiprows = {}\n".format(skiprows)
    func_text += "  f_reader = csv_file_chunk_reader(fname._data, {}, skiprows, -1)\n".format(parallel)
    func_text += "  with objmode({}):\n".format(typ_strs)
    func_text += "    df = pd.read_csv(f_reader, names={},\n".format(col_names)
    func_text += "       parse_dates=[{}],\n".format(date_inds)
    func_text += "       dtype={{{}}},\n".format(pd_dtype_strs)
    func_text += "       usecols={}, sep='{}')\n".format(usecols, sep)
    for cname in col_names:
        func_text += "    {} = df['{}'].values\n".format(_sanitize_varname(cname), cname)
        # func_text += "    print({})\n".format(cname)
    func_text += "  return ({},)\n".format(", ".join(_sanitize_varname(c) for c in col_names))

    # print(func_text)
    glbls = globals()  # TODO: fix globals after Numba's #3355 is resolved
    # {'objmode': objmode, 'csv_file_chunk_reader': csv_file_chunk_reader,
    # 'pd': pd, 'np': np}
    loc_vars = {}
    exec(func_text, glbls, loc_vars)
    csv_reader_py = loc_vars['csv_reader_py']

    # TODO: no_cpython_wrapper=True crashes for some reason
    jit_func = numba.njit(csv_reader_py)
    compiled_funcs.append(jit_func)
    return jit_func
Exemple #7
0
def _get_pd_dtype_str(t):
    dtype = t.dtype
    if isinstance(dtype, PDCategoricalDtype):
        return 'pd.api.types.CategoricalDtype({})'.format(dtype.categories)
    if dtype == types.NPDatetime('ns'):
        dtype = 'str'
    if t == string_array_type:
        return 'str'
    return 'np.{}'.format(dtype)
Exemple #8
0
 def generic(self, args, kws):
     if len(args) == 1:
         # Guard against unary -
         return
     dt, td = args
     if isinstance(dt, types.NPDatetime) and isinstance(td, types.NPTimedelta):
         unit = npdatetime.combine_datetime_timedelta_units(dt.unit, td.unit)
         if unit is not None:
             return signature(types.NPDatetime(unit), dt, td)
Exemple #9
0
 def generic(self, args, kws):
     assert not kws
     [ary, idx] = args
     if not isinstance(ary, SeriesType):
         return
     out = get_array_index_type(ary, idx)
     # check result to be dt64 since it might be sliced array
     # replace result with Timestamp
     if out is not None and out.result == types.NPDatetime('ns'):
         return signature(pandas_timestamp_type, ary, out.index)
Exemple #10
0
def _gen_csv_reader_py_pyarrow_func_text_core(col_names,
                                              col_typs,
                                              dtype_present,
                                              usecols,
                                              signature=None):
    # TODO: support non-numpy types like strings
    date_inds = ", ".join(
        str(i) for i, t in enumerate(col_typs)
        if t.dtype == types.NPDatetime('ns'))
    return_columns = usecols if usecols and isinstance(usecols[0],
                                                       str) else col_names
    nb_objmode_vars = ", ".join([
        "{}='{}'".format(to_varname(cname), _get_dtype_str(t))
        for cname, t in zip(return_columns, col_typs)
    ])
    pd_dtype_strs = ", ".join([
        "'{}':{}".format(cname, _get_pd_dtype_str(t))
        for cname, t in zip(return_columns, col_typs)
    ])

    if signature is None:
        signature = "filepath_or_buffer"
    func_text = "def csv_reader_py({}):\n".format(signature)
    func_text += "  with objmode({}):\n".format(nb_objmode_vars)
    func_text += "    df = pandas_read_csv(filepath_or_buffer,\n"

    # pyarrow reads unnamed header as " ", pandas reads it as "Unnamed: N"
    # during inference from file names should be raplaced with "Unnamed: N"
    # passing names to pyarrow means that one row is header and should be skipped
    if col_names and any(map(lambda x: x.startswith('Unnamed: '), col_names)):
        func_text += "        names={},\n".format(col_names)
        func_text += "        skiprows=(skiprows and skiprows + 1) or 1,\n"
    else:
        func_text += "        names=names,\n"
        func_text += "        skiprows=skiprows,\n"

    func_text += "        parse_dates=[{}],\n".format(date_inds)

    # Python objects (e.g. str, np.float) could not be jitted and passed to objmode
    # so they are hardcoded to function
    # func_text += "        dtype={{{}}},\n".format(pd_dtype_strs) if dtype_present else \
    #              "        dtype=dtype,\n"
    # dtype is hardcoded because datetime should be read as string
    func_text += "        dtype={{{}}},\n".format(pd_dtype_strs)

    func_text += "        usecols=usecols,\n"
    func_text += "        sep=sep,\n"
    func_text += "        delimiter=delimiter,\n"
    func_text += "    )\n"
    for cname in return_columns:
        func_text += "    {} = df['{}'].values\n".format(
            to_varname(cname), cname)
        # func_text += "    print({})\n".format(cname)
    return func_text, 'csv_reader_py'
Exemple #11
0
    def generic(self, args, kws):
        pysig = numba.utils.pysignature(pd_dt_index_stub)
        try:
            bound = pysig.bind(*args, **kws)
        except TypeError:  # pragma: no cover
            msg = "Unsupported arguments for pd.DatetimeIndex()"
            raise ValueError(msg)

        sig = signature(SeriesType(types.NPDatetime('ns'), 1, 'C'),
                        bound.args).replace(pysig=pysig)
        return sig
Exemple #12
0
    def generic(self, args, kws):
        assert not kws
        [in_arr, in_idx] = args
        is_arr_series = False
        is_idx_series = False
        is_arr_dt_index = False

        if not isinstance(in_arr, SeriesType) and not isinstance(
                in_idx, SeriesType):
            return None

        if isinstance(in_arr, SeriesType):
            in_arr = series_to_array_type(in_arr)
            is_arr_series = True
            if in_arr.dtype == types.NPDatetime('ns'):
                is_arr_dt_index = True

        if isinstance(in_idx, SeriesType):
            in_idx = series_to_array_type(in_idx)
            is_idx_series = True

        # TODO: dt_index
        if in_arr == string_array_type:
            sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws)
        else:
            out = get_array_index_type(in_arr, in_idx)
            sig = signature(out.result, in_arr, out.index)

        if sig is not None:
            arg1 = sig.args[0]
            arg2 = sig.args[1]
            if is_arr_series:
                sig.return_type = if_arr_to_series_type(sig.return_type)
                arg1 = if_arr_to_series_type(arg1)
            if is_idx_series:
                arg2 = if_arr_to_series_type(arg2)
            sig.args = (arg1, arg2)
            # dt_index and Series(dt64) should return Timestamp
            if is_arr_dt_index and sig.return_type == types.NPDatetime('ns'):
                sig.return_type = pandas_timestamp_type
        return sig
Exemple #13
0
def _get_numba_typ_from_pa_typ(pa_typ):
    import pyarrow as pa
    _typ_map = {
        # boolean
        pa.bool_(): types.bool_,
        # signed int types
        pa.int8(): types.int8,
        pa.int16(): types.int16,
        pa.int32(): types.int32,
        pa.int64(): types.int64,
        # unsigned int types
        pa.uint8(): types.uint8,
        pa.uint16(): types.uint16,
        pa.uint32(): types.uint32,
        pa.uint64(): types.uint64,
        # float types (TODO: float16?)
        pa.float32(): types.float32,
        pa.float64(): types.float64,
        # String
        pa.string(): string_type,
        # date
        pa.date32(): types.NPDatetime('ns'),
        pa.date64(): types.NPDatetime('ns'),
        # time (TODO: time32, time64, ...)
        pa.timestamp('ns'): types.NPDatetime('ns'),
        pa.timestamp('us'): types.NPDatetime('ns'),
        pa.timestamp('ms'): types.NPDatetime('ns'),
        pa.timestamp('s'): types.NPDatetime('ns'),
    }
    if pa_typ not in _typ_map:
        raise ValueError("Arrow data type {} not supported yet".format(pa_typ))
    return _typ_map[pa_typ]
Exemple #14
0
class MaskedConstructor(ConcreteTemplate):
    key = api.Masked
    units = ["ns", "ms", "us", "s"]
    datetime_cases = {types.NPDatetime(u) for u in units}
    timedelta_cases = {types.NPTimedelta(u) for u in units}
    cases = [
        nb_signature(MaskedType(t), t, types.boolean)
        for t in (
            types.integer_domain
            | types.real_domain
            | datetime_cases
            | timedelta_cases
            | {types.boolean}
        )
    ]
Exemple #15
0
 def generic(self, args, kws):
     assert not kws
     assert len(args) == 3
     df_typ, col_ind_const, dtype_typ = args[0], args[1], args[2]
     if isinstance(dtype_typ, types.Literal):
         if dtype_typ.literal_value == 12:  # FIXME dtype for dt64
             out_typ = types.Array(types.NPDatetime('ns'), 1, 'C')
         elif dtype_typ.literal_value == 11:  # FIXME dtype for str
             out_typ = string_array_type
         else:
             raise ValueError("invalid input dataframe dtype {}".format(dtype_typ.literal_value))
     else:
         out_typ = types.Array(dtype_typ.dtype, 1, 'C')
     # FIXME: last arg should be types.DType?
     return signature(out_typ, *args)
Exemple #16
0
 def generic(self, args, kws):
     if len(args) == 1:
         # Guard against unary +
         return
     left, right = args
     if isinstance(right, types.NPTimedelta):
         dt = left
         td = right
     elif isinstance(left, types.NPTimedelta):
         dt = right
         td = left
     else:
         return
     if isinstance(dt, types.NPDatetime):
         unit = npdatetime.combine_datetime_timedelta_units(dt.unit, td.unit)
         if unit is not None:
             return signature(types.NPDatetime(unit), left, right)
Exemple #17
0
def get_column_read_nodes(c_type, cvar, arrow_readers_var, i):

    loc = cvar.loc

    func_text = 'def f(arrow_readers):\n'
    func_text += '  col_size = get_column_size_parquet(arrow_readers, {})\n'.format(
        i)
    # generate strings differently
    if c_type == string_type:
        # pass size for easier allocation and distributed analysis
        func_text += '  column = read_parquet_str(arrow_readers, {}, col_size)\n'.format(
            i)
    else:
        el_type = get_element_type(c_type)
        if el_type == repr(types.NPDatetime('ns')):
            func_text += '  column_tmp = np.empty(col_size, dtype=np.int64)\n'
            # TODO: fix alloc
            func_text += '  column = sdc.hiframes.api.ts_series_to_arr_typ(column_tmp)\n'
        else:
            func_text += '  column = np.empty(col_size, dtype=np.{})\n'.format(
                el_type)
        func_text += '  status = read_parquet(arrow_readers, {}, column, np.int32({}))\n'.format(
            i, _type_to_pq_dtype_number[el_type])

    loc_vars = {}
    exec(func_text, {'sdc': sdc, 'np': np}, loc_vars)
    size_func = loc_vars['f']
    _, f_block = compile_to_numba_ir(
        size_func, {
            'get_column_size_parquet': get_column_size_parquet,
            'read_parquet': read_parquet,
            'read_parquet_str': read_parquet_str,
            'np': np,
            'sdc': sdc,
            'StringArray': StringArray
        }).blocks.popitem()

    replace_arg_nodes(f_block, [arrow_readers_var])
    out_nodes = f_block.body[:-3]
    for stmt in reversed(out_nodes):
        if stmt.target.name.startswith("column"):
            assign = ir.Assign(stmt.target, cvar, loc)
            break

    out_nodes.append(assign)
    return out_nodes
Exemple #18
0
    def _resolve_map_func(self, ary, args, kws):
        dtype = ary.dtype
        # getitem returns Timestamp for dt_index and series(dt64)
        if dtype == types.NPDatetime('ns'):
            dtype = pandas_timestamp_type
        code = args[0].literal_value.code
        _globals = {'np': np}
        # XXX hack in hiframes_typed to make globals available
        if hasattr(args[0].literal_value, 'globals'):
            # TODO: use code.co_names to find globals actually used?
            _globals = args[0].literal_value.globals

        f_ir = numba.ir_utils.get_ir_of_code(_globals, code)
        f_typemap, f_return_type, f_calltypes = numba.typed_passes.type_inference_stage(
            self.context, f_ir, (dtype, ), None)

        return signature(SeriesType(f_return_type), *args)
Exemple #19
0
def _get_dtype_str(t):
    dtype = t.dtype
    if isinstance(dtype, PDCategoricalDtype):
        cat_arr = CategoricalArray(dtype)
        # HACK: add cat type to numba.types
        # FIXME: fix after Numba #3372 is resolved
        cat_arr_name = 'CategoricalArray' + str(ir_utils.next_label())
        setattr(types, cat_arr_name, cat_arr)
        return cat_arr_name

    if dtype == types.NPDatetime('ns'):
        dtype = 'NPDatetime("ns")'
    if t == string_array_type:
        # HACK: add string_array_type to numba.types
        # FIXME: fix after Numba #3372 is resolved
        types.string_array_type = string_array_type
        return 'string_array_type'
    return '{}[::1]'.format(dtype)
Exemple #20
0
def iternext_itertuples(context, builder, sig, args, result):
    iterty, = sig.args
    it, = args

    # TODO: support string arrays
    iterobj = context.make_helper(builder, iterty, value=it)
    # first array type is implicit int index
    # use len() to support string arrays
    len_sig = signature(types.intp, iterty.array_types[1])
    nitems = context.compile_internal(builder, lambda a: len(a), len_sig,
                                      [iterobj.array0])
    # ary = make_array(iterty.array_types[1])(context, builder, value=iterobj.array0)
    # nitems, = cgutils.unpack_tuple(builder, ary.shape, count=1)

    index = builder.load(iterobj.index)
    is_valid = builder.icmp(lc.ICMP_SLT, index, nitems)
    result.set_valid(is_valid)

    with builder.if_then(is_valid):
        values = [index]  # XXX implicit int index
        for i, arr_typ in enumerate(iterty.array_types[1:]):
            arr_ptr = getattr(iterobj, "array{}".format(i))

            if arr_typ == types.Array(types.NPDatetime('ns'), 1, 'C'):
                getitem_sig = signature(pandas_timestamp_type, arr_typ,
                                        types.intp)
                val = context.compile_internal(
                    builder, lambda a, i: hpat.pd_timestamp_ext.
                    convert_datetime64_to_timestamp(np.int64(a[i])),
                    getitem_sig, [arr_ptr, index])
            else:
                getitem_sig = signature(arr_typ.dtype, arr_typ, types.intp)
                val = context.compile_internal(builder, lambda a, i: a[i],
                                               getitem_sig, [arr_ptr, index])
            # arr = make_array(arr_typ)(context, builder, value=arr_ptr)
            # val = _getitem_array1d(context, builder, arr_typ, arr, index,
            #                      wraparound=False)
            values.append(val)

        value = context.make_tuple(builder, iterty.yield_type, values)
        result.yield_(value)
        nindex = cgutils.increment_index(builder, index)
        builder.store(nindex, iterobj.index)
Exemple #21
0
 def test_call_notation(self):
     # Function call signature
     i = types.int32
     d = types.double
     self.assertEqual(i(), typing.signature(i))
     self.assertEqual(i(d), typing.signature(i, d))
     self.assertEqual(i(d, d), typing.signature(i, d, d))
     # Value cast
     self.assertPreciseEqual(i(42.5), 42)
     self.assertPreciseEqual(d(-5), -5.0)
     ty = types.NPDatetime('Y')
     self.assertPreciseEqual(ty('1900'), np.datetime64('1900', 'Y'))
     self.assertPreciseEqual(ty('NaT'), np.datetime64('NaT', 'Y'))
     ty = types.NPTimedelta('s')
     self.assertPreciseEqual(ty(5), np.timedelta64(5, 's'))
     self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT', 's'))
     ty = types.NPTimedelta('')
     self.assertPreciseEqual(ty(5), np.timedelta64(5))
     self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT'))
Exemple #22
0
 def test_call_notation(self):
     # Function call signature
     i = types.int32
     d = types.double
     self.assertEqual(i(), typing.signature(i))
     self.assertEqual(i(d), typing.signature(i, d))
     self.assertEqual(i(d, d), typing.signature(i, d, d))
     # Value cast
     self.assertPreciseEqual(i(42.5), 42)
     self.assertPreciseEqual(d(-5), -5.0)
     ty = types.NPDatetime('Y')
     self.assertPreciseEqual(ty('1900'), np.datetime64('1900', 'Y'))
     if numpy_version < (1,16):  # FIXME: workaround for known NumPy 1.16 issue
         self.assertPreciseEqual(ty('NaT'), np.datetime64('NaT', 'Y'))
     ty = types.NPTimedelta('s')
     self.assertPreciseEqual(ty(5), np.timedelta64(5, 's'))
     if numpy_version < (1,16):  # FIXME: workaround for known NumPy 1.16 issue
         self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT', 's'))
     ty = types.NPTimedelta('')
     self.assertPreciseEqual(ty(5), np.timedelta64(5))
     if numpy_version < (1,16):  # FIXME: workaround for known NumPy 1.16 issue
         self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT'))
Exemple #23
0
def is_dt64_series_typ(t):
    return isinstance(t, SeriesType) and t.dtype == types.NPDatetime('ns')
Exemple #24
0
 def test_atomic_types(self):
     for unit in ('M', 'ms'):
         ty = types.NPDatetime(unit)
         self.check_pickling(ty)
         ty = types.NPTimedelta(unit)
         self.check_pickling(ty)
Exemple #25
0
from numba.typing import signature
from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed
import numpy as np
import hpat
from hpat.str_ext import string_type, unicode_to_char_ptr
from hpat.str_arr_ext import StringArray, StringArrayPayloadType, construct_string_array
from hpat.str_arr_ext import string_array_type
from hpat.utils import unliteral_all


# from parquet/types.h
# boolean, int32, int64, int96, float, double, byte
# XXX arrow converts int96 timestamp to int64
_type_to_pq_dtype_number = {'bool_': 0, 'int32': 1, 'int64': 2,
                            'int96': 3, 'float32': 4, 'float64': 5,
                            repr(types.NPDatetime('ns')): 3, 'int8': 6}


def read_parquet():
    return 0


def read_parquet_str():
    return 0


def read_parquet_str_parallel():
    return 0


def read_parquet_parallel():
@numba.njit
def lt_f(a, b):
    return a < b


@numba.njit
def gt_f(a, b):
    return a > b


series_replace_funcs = {
    'sum': _column_sum_impl_basic,
    'prod': _column_prod_impl_basic,
    'count': _column_count_impl,
    'mean': _column_mean_impl,
    'max': defaultdict(lambda: _column_max_impl, [(types.NPDatetime('ns'), _column_max_impl_no_isnan)]),
    'min': defaultdict(lambda: _column_min_impl, [(types.NPDatetime('ns'), _column_min_impl_no_isnan)]),
    'var': _column_var_impl,
    'std': _column_std_impl,
    'nunique': lambda A: hpat.hiframes.api.nunique(A),
    'unique': lambda A: hpat.hiframes.api.unique(A),
    'describe': _column_describe_impl,
    'fillna_alloc': _column_fillna_alloc_impl,
    'fillna_str_alloc': _series_fillna_str_alloc_impl,
    'dropna_float': _series_dropna_float_impl,
    'dropna_str_alloc': _series_dropna_str_alloc_impl,
    'shift': lambda A, shift: hpat.hiframes.api.init_series(hpat.hiframes.rolling.shift(A, shift, False)),
    'shift_default': lambda A: hpat.hiframes.api.init_series(hpat.hiframes.rolling.shift(A, 1, False)),
    'pct_change': lambda A, shift: hpat.hiframes.api.init_series(hpat.hiframes.rolling.pct_change(A, shift, False)),
    'pct_change_default': lambda A: hpat.hiframes.api.init_series(hpat.hiframes.rolling.pct_change(A, 1, False)),
    'str_contains_regex': _str_contains_regex_impl,
Exemple #27
0
    def test_ufunc_find_matching_loop(self):
        f = numpy_support.ufunc_find_matching_loop
        np_add = FakeUFunc(_add_types)
        np_mul = FakeUFunc(_mul_types)

        def check(ufunc, input_types, sigs, output_types=()):
            """
            Check that ufunc_find_matching_loop() finds one of the given
            *sigs* for *ufunc*, *input_types* and optional *output_types*.
            """
            loop = f(ufunc, input_types + output_types)
            self.assertTrue(loop)
            if isinstance(sigs, str):
                sigs = (sigs, )
            self.assertIn(loop.ufunc_sig, sigs)
            self.assertEqual(len(loop.numpy_inputs), len(loop.inputs))
            self.assertEqual(len(loop.numpy_outputs), len(loop.outputs))
            if not output_types:
                # Add explicit outputs and check the result is the same
                loop_explicit = f(ufunc, list(input_types) + loop.outputs)
                self.assertEqual(loop_explicit, loop)
            else:
                self.assertEqual(loop.outputs, list(output_types))
            # Round-tripping inputs and outputs
            loop_rt = f(ufunc, loop.inputs + loop.outputs)
            self.assertEqual(loop_rt, loop)
            return loop

        def check_exact(ufunc, input_types, sigs, output_types=()):
            loop = check(ufunc, input_types, sigs, output_types)
            self.assertEqual(loop.inputs, list(input_types))

        def check_no_match(ufunc, input_types):
            loop = f(ufunc, input_types)
            self.assertIs(loop, None)

        # Exact matching for number types
        check_exact(np_add, (types.bool_, types.bool_), '??->?')
        check_exact(np_add, (types.int8, types.int8), 'bb->b')
        check_exact(np_add, (types.uint8, types.uint8), 'BB->B')
        check_exact(np_add, (types.int64, types.int64), ('ll->l', 'qq->q'))
        check_exact(np_add, (types.uint64, types.uint64), ('LL->L', 'QQ->Q'))
        check_exact(np_add, (types.float32, types.float32), 'ff->f')
        check_exact(np_add, (types.float64, types.float64), 'dd->d')
        check_exact(np_add, (types.complex64, types.complex64), 'FF->F')
        check_exact(np_add, (types.complex128, types.complex128), 'DD->D')

        # Exact matching for datetime64 and timedelta64 types
        check_exact(np_add, (types.NPTimedelta('s'), types.NPTimedelta('s')),
                    'mm->m',
                    output_types=(types.NPTimedelta('s'), ))
        check_exact(np_add, (types.NPTimedelta('ms'), types.NPDatetime('s')),
                    'mM->M',
                    output_types=(types.NPDatetime('ms'), ))
        check_exact(np_add, (types.NPDatetime('s'), types.NPTimedelta('s')),
                    'Mm->M',
                    output_types=(types.NPDatetime('s'), ))

        check_exact(np_mul, (types.NPTimedelta('s'), types.int64),
                    'mq->m',
                    output_types=(types.NPTimedelta('s'), ))
        check_exact(np_mul, (types.float64, types.NPTimedelta('s')),
                    'dm->m',
                    output_types=(types.NPTimedelta('s'), ))

        # Mix and match number types, with casting
        check(np_add, (types.bool_, types.int8), 'bb->b')
        check(np_add, (types.uint8, types.bool_), 'BB->B')
        check(np_add, (types.int16, types.uint16), 'ii->i')
        check(np_add, (types.complex64, types.float64), 'DD->D')
        check(np_add, (types.float64, types.complex64), 'DD->D')
        # With some timedelta64 arguments as well
        check(np_mul, (types.NPTimedelta('s'), types.int32),
              'mq->m',
              output_types=(types.NPTimedelta('s'), ))
        check(np_mul, (types.NPTimedelta('s'), types.uint32),
              'mq->m',
              output_types=(types.NPTimedelta('s'), ))
        check(np_mul, (types.NPTimedelta('s'), types.float32),
              'md->m',
              output_types=(types.NPTimedelta('s'), ))
        check(np_mul, (types.float32, types.NPTimedelta('s')),
              'dm->m',
              output_types=(types.NPTimedelta('s'), ))

        # No match
        check_no_match(np_add, (types.NPDatetime('s'), types.NPDatetime('s')))
        # No implicit casting from int64 to timedelta64 (Numpy would allow
        # this).
        check_no_match(np_add, (types.NPTimedelta('s'), types.int64))
Exemple #28
0
    def generic(self, args, kws):
        assert not kws
        [in_arr, in_idx] = args
        is_arr_series = False
        is_idx_series = False
        is_arr_dt_index = False

        if not isinstance(in_arr, SeriesType) and not isinstance(
                in_idx, SeriesType):
            return None

        if isinstance(in_arr, SeriesType):
            in_arr = series_to_array_type(in_arr)
            is_arr_series = True
            if in_arr.dtype == types.NPDatetime('ns'):
                is_arr_dt_index = True

        if isinstance(in_idx, SeriesType):
            in_idx = series_to_array_type(in_idx)
            is_idx_series = True

        # TODO: dt_index
        if in_arr == string_array_type:
            # XXX fails due in overload
            # compile_internal version results in symbol not found!
            # sig = self.context.resolve_function_type(
            #     operator.getitem, (in_arr, in_idx), kws)
            # HACK to get avoid issues for now
            if isinstance(in_idx, (types.Integer, types.IntegerLiteral)):
                sig = string_type(in_arr, in_idx)
            else:
                sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws)
        elif in_arr == list_string_array_type:
            # TODO: split view
            # mimic array indexing for list
            if (isinstance(in_idx, types.Array) and in_idx.ndim == 1
                    and isinstance(in_idx.dtype,
                                   (types.Integer, types.Boolean))):
                sig = signature(in_arr, in_arr, in_idx)
            else:
                sig = numba.typing.collections.GetItemSequence.generic(
                    self, (in_arr, in_idx), kws)
        elif in_arr == string_array_split_view_type:
            sig = GetItemStringArraySplitView.generic(self, (in_arr, in_idx),
                                                      kws)
        else:
            out = get_array_index_type(in_arr, in_idx)
            sig = signature(out.result, in_arr, out.index)

        if sig is not None:
            arg1 = sig.args[0]
            arg2 = sig.args[1]
            if is_arr_series:
                sig.return_type = if_arr_to_series_type(sig.return_type)
                arg1 = if_arr_to_series_type(arg1)
            if is_idx_series:
                arg2 = if_arr_to_series_type(arg2)
            sig.args = (arg1, arg2)
            # dt_index and Series(dt64) should return Timestamp
            if is_arr_dt_index and sig.return_type == types.NPDatetime('ns'):
                sig.return_type = pandas_timestamp_type
        return sig
Exemple #29
0
    def test_ufunc_find_matching_loop(self):
        f = numpy_support.ufunc_find_matching_loop
        np_add = FakeUFunc(_add_types)
        np_mul = FakeUFunc(_mul_types)
        np_isnan = FakeUFunc(_isnan_types)
        np_sqrt = FakeUFunc(_sqrt_types)

        def check(ufunc, input_types, sigs, output_types=()):
            """
            Check that ufunc_find_matching_loop() finds one of the given
            *sigs* for *ufunc*, *input_types* and optional *output_types*.
            """
            loop = f(ufunc, input_types + output_types)
            self.assertTrue(loop)
            if isinstance(sigs, str):
                sigs = (sigs, )
            self.assertIn(
                loop.ufunc_sig, sigs,
                "inputs=%s and outputs=%s should have selected "
                "one of %s, got %s" %
                (input_types, output_types, sigs, loop.ufunc_sig))
            self.assertEqual(len(loop.numpy_inputs), len(loop.inputs))
            self.assertEqual(len(loop.numpy_outputs), len(loop.outputs))
            if not output_types:
                # Add explicit outputs and check the result is the same
                loop_explicit = f(ufunc, list(input_types) + loop.outputs)
                self.assertEqual(loop_explicit, loop)
            else:
                self.assertEqual(loop.outputs, list(output_types))
            # Round-tripping inputs and outputs
            loop_rt = f(ufunc, loop.inputs + loop.outputs)
            self.assertEqual(loop_rt, loop)
            return loop

        def check_exact(ufunc, input_types, sigs, output_types=()):
            """
            Like check(), but also ensure no casting of inputs occurred.
            """
            loop = check(ufunc, input_types, sigs, output_types)
            self.assertEqual(loop.inputs, list(input_types))

        def check_no_match(ufunc, input_types):
            loop = f(ufunc, input_types)
            self.assertIs(loop, None)

        # Exact matching for number types
        check_exact(np_add, (types.bool_, types.bool_), '??->?')
        check_exact(np_add, (types.int8, types.int8), 'bb->b')
        check_exact(np_add, (types.uint8, types.uint8), 'BB->B')
        check_exact(np_add, (types.int64, types.int64), ('ll->l', 'qq->q'))
        check_exact(np_add, (types.uint64, types.uint64), ('LL->L', 'QQ->Q'))
        check_exact(np_add, (types.float32, types.float32), 'ff->f')
        check_exact(np_add, (types.float64, types.float64), 'dd->d')
        check_exact(np_add, (types.complex64, types.complex64), 'FF->F')
        check_exact(np_add, (types.complex128, types.complex128), 'DD->D')

        # Exact matching for datetime64 and timedelta64 types
        check_exact(np_add, (types.NPTimedelta('s'), types.NPTimedelta('s')),
                    'mm->m',
                    output_types=(types.NPTimedelta('s'), ))
        check_exact(np_add, (types.NPTimedelta('ms'), types.NPDatetime('s')),
                    'mM->M',
                    output_types=(types.NPDatetime('ms'), ))
        check_exact(np_add, (types.NPDatetime('s'), types.NPTimedelta('s')),
                    'Mm->M',
                    output_types=(types.NPDatetime('s'), ))

        check_exact(np_mul, (types.NPTimedelta('s'), types.int64),
                    'mq->m',
                    output_types=(types.NPTimedelta('s'), ))
        check_exact(np_mul, (types.float64, types.NPTimedelta('s')),
                    'dm->m',
                    output_types=(types.NPTimedelta('s'), ))

        # Mix and match number types, with casting
        check(np_add, (types.bool_, types.int8), 'bb->b')
        check(np_add, (types.uint8, types.bool_), 'BB->B')
        check(np_add, (types.int16, types.uint16), 'ii->i')
        check(np_add, (types.complex64, types.float64), 'DD->D')
        check(np_add, (types.float64, types.complex64), 'DD->D')
        # Integers, when used together with floating-point numbers,
        # should cast to any real or complex (see #2006)
        int_types = [types.int32, types.uint32, types.int64, types.uint64]
        for intty in int_types:
            check(np_add, (types.float32, intty), 'ff->f')
            check(np_add, (types.float64, intty), 'dd->d')
            check(np_add, (types.complex64, intty), 'FF->F')
            check(np_add, (types.complex128, intty), 'DD->D')
        # However, when used alone, they should cast only to
        # floating-point types of sufficient precision
        # (typical use case: np.sqrt(2) should give an accurate enough value)
        for intty in int_types:
            check(np_sqrt, (intty, ), 'd->d')
            check(np_isnan, (intty, ), 'd->?')

        # With some timedelta64 arguments as well
        check(np_mul, (types.NPTimedelta('s'), types.int32),
              'mq->m',
              output_types=(types.NPTimedelta('s'), ))
        check(np_mul, (types.NPTimedelta('s'), types.uint32),
              'mq->m',
              output_types=(types.NPTimedelta('s'), ))
        check(np_mul, (types.NPTimedelta('s'), types.float32),
              'md->m',
              output_types=(types.NPTimedelta('s'), ))
        check(np_mul, (types.float32, types.NPTimedelta('s')),
              'dm->m',
              output_types=(types.NPTimedelta('s'), ))

        # No match
        check_no_match(np_add, (types.NPDatetime('s'), types.NPDatetime('s')))
        # No implicit casting from int64 to timedelta64 (Numpy would allow
        # this).
        check_no_match(np_add, (types.NPTimedelta('s'), types.int64))
Exemple #30
0
 def resolve_dt(self, ary):
     assert ary.dtype == types.NPDatetime('ns')
     return series_dt_methods_type