Ejemplo n.º 1
0
    def test_jit_explicit_signature(self):
        def _check_explicit_signature(sig):
            f = jit(sig, nopython=True)(add_usecase)
            # Just a sanity check
            args = DT(1, 'ms'), TD(2, 'us')
            expected = add_usecase(*args)
            self.assertPreciseEqual(f(*args), expected)

        # Test passing the signature in object form
        sig = types.NPDatetime('us')(types.NPDatetime('ms'), types.NPTimedelta('us'))
        _check_explicit_signature(sig)
        # Same with the signature in string form
        sig = "NPDatetime('us')(NPDatetime('ms'), NPTimedelta('us'))"
        _check_explicit_signature(sig)
Ejemplo n.º 2
0
def _get_pd_dtype_str(t):
    dtype = t.dtype
    if isinstance(t, Categorical):
        return 'pd.{}'.format(t.pd_dtype)
    if dtype == types.NPDatetime('ns'):
        dtype = 'str'
    if t == string_array_type:
        return 'str'
    return 'np.{}'.format(dtype)
Ejemplo n.º 3
0
 def generic(self, args, kws):
     if len(args) == 1:
         # Guard against unary -
         return
     dt, td = args
     if isinstance(dt, types.NPDatetime) and isinstance(
             td, types.NPTimedelta):
         unit = npdatetime_helpers.combine_datetime_timedelta_units(
             dt.unit, td.unit)
         if unit is not None:
             return signature(types.NPDatetime(unit), dt, td)
Ejemplo n.º 4
0
def _from_datetime_dtype(dtype):
    m = re_datetimestr.match(dtype.str)
    if not m:
        raise NotImplementedError(dtype)
    groups = m.groups()
    typecode = groups[0]
    unit = groups[2] or ''
    if typecode == 'm':
        return types.NPTimedelta(unit)
    elif typecode == 'M':
        return types.NPDatetime(unit)
    else:
        raise NotImplementedError(dtype)
Ejemplo n.º 5
0
 def make_datetime_specific(outputs, dt_unit, td_unit):
     new_outputs = []
     for out in outputs:
         if isinstance(out, types.NPDatetime) and out.unit == "":
             unit = npdatetime_helpers.combine_datetime_timedelta_units(
                 dt_unit, td_unit)
             if unit is None:
                 raise TypeError(f"ufunc '{ufunc_name}' is not " +
                                 "supported between " +
                                 f"datetime64[{dt_unit}] " +
                                 f"and timedelta64[{td_unit}]")
             new_outputs.append(types.NPDatetime(unit))
         else:
             new_outputs.append(out)
     return new_outputs
Ejemplo n.º 6
0
def _gen_csv_reader_py_pyarrow_func_text_core(col_names, col_typs, dtype_present, usecols, signature=None):
    # TODO: support non-numpy types like strings
    date_inds = ", ".join(str(i) for i, t in enumerate(col_typs) if t.dtype == types.NPDatetime('ns'))
    return_columns = usecols if usecols and isinstance(usecols[0], str) else col_names
    nb_objmode_vars = ", ".join([
        '{}="{}"'.format(to_varname(cname), _get_dtype_str(t))
        for cname, t in zip(return_columns, col_typs)
    ])
    pd_dtype_strs = ", ".join([
        "'{}': {}".format(cname, _get_pd_dtype_str(t))
        for cname, t in zip(return_columns, col_typs)
    ])

    if signature is None:
        signature = "filepath_or_buffer"
    func_text = "def csv_reader_py({}):\n".format(signature)
    func_text += "  with objmode({}):\n".format(nb_objmode_vars)
    func_text += "    df = pandas_read_csv(filepath_or_buffer,\n"

    # pyarrow reads unnamed header as " ", pandas reads it as "Unnamed: N"
    # during inference from file names should be raplaced with "Unnamed: N"
    # passing names to pyarrow means that one row is header and should be skipped
    if col_names and any(map(lambda x: x.startswith('Unnamed: '), col_names)):
        func_text += "        names={},\n".format(col_names)
        func_text += "        skiprows=(skiprows and skiprows + 1) or 1,\n"
    else:
        func_text += "        names=names,\n"
        func_text += "        skiprows=skiprows,\n"

    func_text += "        parse_dates=[{}],\n".format(date_inds)

    # Python objects (e.g. str, np.float) could not be jitted and passed to objmode
    # so they are hardcoded to function
    # func_text += "        dtype={{{}}},\n".format(pd_dtype_strs) if dtype_present else \
    #              "        dtype=dtype,\n"
    # dtype is hardcoded because datetime should be read as string
    func_text += "        dtype={{{}}},\n".format(pd_dtype_strs)

    func_text += "        usecols=usecols,\n"
    func_text += "        sep=sep,\n"
    func_text += "        delimiter=delimiter,\n"
    func_text += "    )\n"
    for cname in return_columns:
        func_text += "    {} = df['{}'].values\n".format(to_varname(cname), cname)
        # func_text += "    print({})\n".format(cname)
    return func_text, 'csv_reader_py'
Ejemplo n.º 7
0
def _get_dtype_str(t):
    dtype = t.dtype

    if isinstance(t, Categorical):
        # return categorical representation
        # for some reason pandas and pyarrow read_csv() return CategoricalDtype with
        # ordered=False in case when dtype is with ordered=None
        return str(t).replace('ordered=None', 'ordered=False')

    if dtype == types.NPDatetime('ns'):
        dtype = 'NPDatetime("ns")'
    if t == string_array_type:
        # HACK: add string_array_type to numba.types
        # FIXME: fix after Numba #3372 is resolved
        types.string_array_type = string_array_type
        return 'string_array_type'
    return '{}[::1]'.format(dtype)
Ejemplo n.º 8
0
 def generic(self, args, kws):
     if len(args) == 1:
         # Guard against unary +
         return
     left, right = args
     if isinstance(right, types.NPTimedelta):
         dt = left
         td = right
     elif isinstance(left, types.NPTimedelta):
         dt = right
         td = left
     else:
         return
     if isinstance(dt, types.NPDatetime):
         unit = npdatetime_helpers.combine_datetime_timedelta_units(
             dt.unit, td.unit)
         if unit is not None:
             return signature(types.NPDatetime(unit), left, right)
Ejemplo n.º 9
0
 def test_call_notation(self):
     # Function call signature
     i = types.int32
     d = types.double
     self.assertEqual(i(), typing.signature(i))
     self.assertEqual(i(d), typing.signature(i, d))
     self.assertEqual(i(d, d), typing.signature(i, d, d))
     # Value cast
     self.assertPreciseEqual(i(42.5), 42)
     self.assertPreciseEqual(d(-5), -5.0)
     ty = types.NPDatetime('Y')
     self.assertPreciseEqual(ty('1900'), np.datetime64('1900', 'Y'))
     self.assertPreciseEqual(ty('NaT'), np.datetime64('NaT', 'Y'))
     ty = types.NPTimedelta('s')
     self.assertPreciseEqual(ty(5), np.timedelta64(5, 's'))
     self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT', 's'))
     ty = types.NPTimedelta('')
     self.assertPreciseEqual(ty(5), np.timedelta64(5))
     self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT'))
Ejemplo n.º 10
0
 def test_call_notation(self):
     # Function call signature
     i = types.int32
     d = types.double
     self.assertEqual(i(), typing.signature(i))
     self.assertEqual(i(d), typing.signature(i, d))
     self.assertEqual(i(d, d), typing.signature(i, d, d))
     # Value cast
     self.assertPreciseEqual(i(42.5), 42)
     self.assertPreciseEqual(d(-5), -5.0)
     ty = types.NPDatetime("Y")
     self.assertPreciseEqual(ty("1900"), np.datetime64("1900", "Y"))
     self.assertPreciseEqual(ty("NaT"), np.datetime64("NaT", "Y"))
     ty = types.NPTimedelta("s")
     self.assertPreciseEqual(ty(5), np.timedelta64(5, "s"))
     self.assertPreciseEqual(ty("NaT"), np.timedelta64("NaT", "s"))
     ty = types.NPTimedelta("")
     self.assertPreciseEqual(ty(5), np.timedelta64(5))
     self.assertPreciseEqual(ty("NaT"), np.timedelta64("NaT"))
Ejemplo n.º 11
0
 def test_atomic_types(self):
     for unit in ('M', 'ms'):
         ty = types.NPDatetime(unit)
         self.check_pickling(ty)
         ty = types.NPTimedelta(unit)
         self.check_pickling(ty)
    def test_ufunc_find_matching_loop(self):
        f = numpy_support.ufunc_find_matching_loop
        np_add = FakeUFunc(_add_types)
        np_mul = FakeUFunc(_mul_types)
        np_isnan = FakeUFunc(_isnan_types)
        np_sqrt = FakeUFunc(_sqrt_types)

        def check(ufunc, input_types, sigs, output_types=()):
            """
            Check that ufunc_find_matching_loop() finds one of the given
            *sigs* for *ufunc*, *input_types* and optional *output_types*.
            """
            loop = f(ufunc, input_types + output_types)
            self.assertTrue(loop)
            if isinstance(sigs, str):
                sigs = (sigs,)
            self.assertIn(
                loop.ufunc_sig,
                sigs,
                "inputs=%s and outputs=%s should have selected one of %s, got %s"
                % (input_types, output_types, sigs, loop.ufunc_sig),
            )
            self.assertEqual(len(loop.numpy_inputs), len(loop.inputs))
            self.assertEqual(len(loop.numpy_outputs), len(loop.outputs))
            if not output_types:
                # Add explicit outputs and check the result is the same
                loop_explicit = f(ufunc, list(input_types) + loop.outputs)
                self.assertEqual(loop_explicit, loop)
            else:
                self.assertEqual(loop.outputs, list(output_types))
            # Round-tripping inputs and outputs
            loop_rt = f(ufunc, loop.inputs + loop.outputs)
            self.assertEqual(loop_rt, loop)
            return loop

        def check_exact(ufunc, input_types, sigs, output_types=()):
            """
            Like check(), but also ensure no casting of inputs occurred.
            """
            loop = check(ufunc, input_types, sigs, output_types)
            self.assertEqual(loop.inputs, list(input_types))

        def check_no_match(ufunc, input_types):
            loop = f(ufunc, input_types)
            self.assertIs(loop, None)

        # Exact matching for number types
        check_exact(np_add, (types.bool_, types.bool_), "??->?")
        check_exact(np_add, (types.int8, types.int8), "bb->b")
        check_exact(np_add, (types.uint8, types.uint8), "BB->B")
        check_exact(np_add, (types.int64, types.int64), ("ll->l", "qq->q"))
        check_exact(np_add, (types.uint64, types.uint64), ("LL->L", "QQ->Q"))
        check_exact(np_add, (types.float32, types.float32), "ff->f")
        check_exact(np_add, (types.float64, types.float64), "dd->d")
        check_exact(np_add, (types.complex64, types.complex64), "FF->F")
        check_exact(np_add, (types.complex128, types.complex128), "DD->D")

        # Exact matching for datetime64 and timedelta64 types
        check_exact(
            np_add,
            (types.NPTimedelta("s"), types.NPTimedelta("s")),
            "mm->m",
            output_types=(types.NPTimedelta("s"),),
        )
        check_exact(
            np_add,
            (types.NPTimedelta("ms"), types.NPDatetime("s")),
            "mM->M",
            output_types=(types.NPDatetime("ms"),),
        )
        check_exact(
            np_add,
            (types.NPDatetime("s"), types.NPTimedelta("s")),
            "Mm->M",
            output_types=(types.NPDatetime("s"),),
        )

        check_exact(
            np_mul,
            (types.NPTimedelta("s"), types.int64),
            "mq->m",
            output_types=(types.NPTimedelta("s"),),
        )
        check_exact(
            np_mul,
            (types.float64, types.NPTimedelta("s")),
            "dm->m",
            output_types=(types.NPTimedelta("s"),),
        )

        # Mix and match number types, with casting
        check(np_add, (types.bool_, types.int8), "bb->b")
        check(np_add, (types.uint8, types.bool_), "BB->B")
        check(np_add, (types.int16, types.uint16), "ii->i")
        check(np_add, (types.complex64, types.float64), "DD->D")
        check(np_add, (types.float64, types.complex64), "DD->D")
        # Integers, when used together with floating-point numbers,
        # should cast to any real or complex (see #2006)
        int_types = [types.int32, types.uint32, types.int64, types.uint64]
        for intty in int_types:
            check(np_add, (types.float32, intty), "ff->f")
            check(np_add, (types.float64, intty), "dd->d")
            check(np_add, (types.complex64, intty), "FF->F")
            check(np_add, (types.complex128, intty), "DD->D")
        # However, when used alone, they should cast only to
        # floating-point types of sufficient precision
        # (typical use case: np.sqrt(2) should give an accurate enough value)
        for intty in int_types:
            check(np_sqrt, (intty,), "d->d")
            check(np_isnan, (intty,), "d->?")

        # With some timedelta64 arguments as well
        check(
            np_mul,
            (types.NPTimedelta("s"), types.int32),
            "mq->m",
            output_types=(types.NPTimedelta("s"),),
        )
        check(
            np_mul,
            (types.NPTimedelta("s"), types.uint32),
            "mq->m",
            output_types=(types.NPTimedelta("s"),),
        )
        check(
            np_mul,
            (types.NPTimedelta("s"), types.float32),
            "md->m",
            output_types=(types.NPTimedelta("s"),),
        )
        check(
            np_mul,
            (types.float32, types.NPTimedelta("s")),
            "dm->m",
            output_types=(types.NPTimedelta("s"),),
        )

        # No match
        check_no_match(np_add, (types.NPDatetime("s"), types.NPDatetime("s")))
        # No implicit casting from int64 to timedelta64 (Numpy would allow
        # this).
        check_no_match(np_add, (types.NPTimedelta("s"), types.int64))
Ejemplo n.º 13
0
def _gen_pandas_read_csv_func_text(col_names,
                                   col_typs,
                                   py_col_dtypes,
                                   usecols,
                                   signature=None):

    func_name = 'csv_reader_py'
    return_columns = usecols if usecols and isinstance(usecols[0],
                                                       str) else col_names

    column_loc, _, _ = get_structure_maps(col_typs, return_columns)
    df_type = DataFrameType(tuple(col_typs),
                            types.none,
                            tuple(col_names),
                            column_loc=column_loc)

    df_type_repr = repr(df_type)
    # for some reason pandas and pyarrow read_csv() return CategoricalDtype with
    # ordered=False in case when dtype is with ordered=None
    df_type_repr = df_type_repr.replace('ordered=None', 'ordered=False')

    # TODO: support non-numpy types like strings
    date_inds = ", ".join(
        str(i) for i, t in enumerate(col_typs)
        if t.dtype == types.NPDatetime('ns'))
    return_columns = usecols if usecols and isinstance(usecols[0],
                                                       str) else col_names

    if signature is None:
        signature = "filepath_or_buffer"

    # map generated func params into values used in inner call of pandas_read_csv
    # if no transformation is needed just use outer param name (since APIs match)
    # otherwise use value in the dictionary
    inner_call_params = {'parse_dates': f"[{date_inds}]"}
    used_read_csv_params = ('filepath_or_buffer', 'names', 'skiprows',
                            'parse_dates', 'dtype', 'usecols', 'sep',
                            'delimiter')

    # pyarrow reads unnamed header as " ", pandas reads it as "Unnamed: N"
    # during inference from file names should be raplaced with "Unnamed: N"
    # passing names to pyarrow means that one row is header and should be skipped
    if col_names and any(map(lambda x: x.startswith('Unnamed: '), col_names)):
        inner_call_params['names'] = str(col_names)
        inner_call_params['skiprows'] = "(skiprows and skiprows + 1) or 1"

    # dtype parameter of compiled function is not used at all, instead a python dict
    # of columns dtypes is captured at compile time, because some dtypes (like datetime)
    # are converted and also to avoid penalty of creating dict in objmode
    inner_call_params['dtype'] = 'read_as_dtypes'

    params_str = '\n'.join([
        f"      {param}={inner_call_params.get(param, param)},"
        for param in used_read_csv_params
    ])
    func_text = '\n'.join([
        f"def {func_name}({signature}):",
        f"  with objmode(df=\"{df_type_repr}\"):",
        f"    df = pandas_read_csv(\n{params_str}", f"    )", f"  return df"
    ])

    global_vars = {
        'read_as_dtypes': py_col_dtypes,
        'objmode': objmode,
        'pandas_read_csv': pandas_read_csv,
    }

    return func_text, func_name, global_vars