Beispiel #1
0
 def raise_exc(self, data, expected_types, name=''):
     """
     Raise exception with unified message
     Parameters
     ----------
     data: :obj:`any`
         real type of the data
     expected_types: :obj:`str`
         expected types inserting directly to the exception
     name: :obj:`str`
         name of the parameter
     """
     msg = self.msg_template.format(self.func_name, name, data, expected_types)
     raise TypingError(msg)
Beispiel #2
0
def unicode_rjust(string, width, fillchar=' '):
    if not isinstance(width, types.Integer):
        raise TypingError('The width must be an Integer')
    if not (fillchar == ' ' or isinstance(fillchar,
                                          (types.Omitted, types.UnicodeType))):
        raise TypingError('The fillchar must be a UnicodeType')

    def rjust_impl(string, width, fillchar=' '):
        str_len = len(string)
        fillchar_len = len(fillchar)

        if fillchar_len != 1:
            raise ValueError(
                'The fill character must be exactly one character long')

        if width <= str_len:
            return string

        newstr = (fillchar * (width - str_len)) + string

        return newstr

    return rjust_impl
Beispiel #3
0
def unicode_center(string, width, fillchar=' '):
    if not isinstance(width, types.Integer):
        raise TypingError('The width must be an Integer')

    if isinstance(fillchar, types.UnicodeCharSeq):

        def center_impl(string, width, fillchar):
            return string.center(width, str(fillchar))

        return center_impl

    if not (fillchar == ' ' or isinstance(fillchar,
                                          (types.Omitted, types.UnicodeType))):
        raise TypingError('The fillchar must be a UnicodeType')

    def center_impl(string, width, fillchar=' '):
        str_len = len(string)
        fillchar_len = len(fillchar)

        if fillchar_len != 1:
            raise ValueError(
                'The fill character must be exactly one character long')

        if width <= str_len:
            return string

        allmargin = width - str_len
        lmargin = (allmargin // 2) + (allmargin & width & 1)
        rmargin = allmargin - lmargin

        l_string = fillchar * lmargin
        if lmargin == rmargin:
            return l_string + string + l_string
        else:
            return l_string + string + (fillchar * rmargin)

    return center_impl
Beispiel #4
0
    def resolve_argsort(self, ary, args, kws):
        assert not args
        kwargs = dict(kws)
        kind = kwargs.pop('kind', types.StringLiteral('quicksort'))
        if kwargs:
            msg = "Unsupported keywords: {!r}"
            raise TypingError(msg.format([k for k in kwargs.keys()]))
        if ary.ndim == 1:

            def argsort_stub(kind='quicksort'):
                pass

            pysig = utils.pysignature(argsort_stub)
            sig = signature(types.Array(types.intp, 1, 'C'),
                            kind).replace(pysig=pysig)
            return sig
Beispiel #5
0
def _sentry_safe_cast(fromty, toty):
    """Check and raise TypingError if *fromty* cannot be safely cast to *toty*
    """
    tyctxt = cpu_target.typing_context
    by = tyctxt.can_convert(fromty, toty)
    if by is None or by > Conversion.safe:
        if isinstance(fromty, types.Integer) and isinstance(toty, types.Integer):
            # Accept if both types are ints
            return
        if isinstance(fromty, types.Integer) and isinstance(toty, types.Float):
            # Accept if ints to floats
            return
        if isinstance(fromty, types.Float) and isinstance(toty, types.Float):
            # Accept if floats to floats
            return
        raise TypingError('cannot safely cast {} to {}'.format(fromty, toty))
Beispiel #6
0
def _as_meminfo(typingctx, dctobj):
    """Returns the MemInfoPointer of a dictionary.
    """
    if not isinstance(dctobj, types.DictType):
        raise TypingError('expected *dctobj* to be a DictType')

    def codegen(context, builder, sig, args):
        [td] = sig.args
        [d] = args
        # Incref
        context.nrt.incref(builder, td, d)
        ctor = cgutils.create_struct_proxy(td)
        dstruct = ctor(context, builder, value=d)
        # Returns the plain MemInfo
        return dstruct.meminfo

    sig = _meminfo_dictptr(dctobj)
    return sig, codegen
Beispiel #7
0
def _as_meminfo(typingctx, lstobj):
    """Returns the MemInfoPointer of a list.
    """
    if not isinstance(lstobj, types.ListType):
        raise TypingError('expected *lstobj* to be a ListType')

    def codegen(context, builder, sig, args):
        [tl] = sig.args
        [l] = args
        # Incref
        context.nrt.incref(builder, tl, l)
        ctor = cgutils.create_struct_proxy(tl)
        lstruct = ctor(context, builder, value=l)
        # Returns the plain MemInfo
        return lstruct.meminfo

    sig = _meminfo_listptr(lstobj)
    return sig, codegen
Beispiel #8
0
def impl_getitem(l, index):
    if not isinstance(l, types.ListType):
        return

    indexty = INDEXTY
    itemty = l.item_type
    IS_NOT_NONE = not isinstance(l.item_type, types.NoneType)

    if index in index_types:
        if IS_NOT_NONE:

            def integer_non_none_impl(l, index):
                index = handle_index(l, index)
                castedindex = _cast(index, indexty)
                status, item = _list_getitem(l, castedindex)
                if status == ListStatus.LIST_OK:
                    return _nonoptional(item)
                else:
                    raise AssertionError("internal list error during getitem")

            return integer_non_none_impl
        else:

            def integer_none_impl(l, index):
                index = handle_index(l, index)
                return None

            return integer_none_impl

    elif isinstance(index, types.SliceType):

        def slice_impl(l, index):
            newl = new_list(itemty)
            for i in handle_slice(l, index):
                newl.append(l[i])
            return newl

        return slice_impl

    else:
        raise TypingError("list indices must be integers or slices")
Beispiel #9
0
def impl_delitem(l, index):
    if not isinstance(l, types.ListType):
        return

    if index in types.signed_domain:
        def integer_impl(l, index):
            l.pop(index)

        return integer_impl

    elif isinstance(index, types.SliceType):
        def slice_impl(l, index):
            slice_range = handle_slice(l, index)
            _list_delete_slice(l,
                               slice_range.start,
                               slice_range.stop,
                               slice_range.step)
        return slice_impl

    else:
        raise TypingError("list indices must be signed integers or slices")
Beispiel #10
0
def impl_insert(l, index, item):
    if not isinstance(l, types.ListType):
        return

    if index in index_types:

        def impl(l, index, item):
            # If the index is larger than the size of the list or if the list is
            # empty, just append.
            if index >= len(l) or len(l) == 0:
                l.append(item)
            # Else, do the insert dance
            else:
                # convert negative indices
                if index < 0:
                    # if the index is still negative after conversion, use 0
                    index = max(len(l) + index, 0)
                # grow the list by one, make room for item to insert
                l.append(l[0])
                # reverse iterate over the list and shift all elements
                i = len(l) - 1
                while (i > index):
                    l[i] = l[i - 1]
                    i -= 1
                # finally, insert the item
                l[index] = item

        if l.is_precise():
            # Handle the precise case.
            return impl
        else:
            # Handle the imprecise case
            l = l.refine(item)
            # Re-bind the item type to match the arguments.
            itemty = l.item_type
            # Create the signature that we wanted this impl to have.
            sig = typing.signature(types.void, l, INDEXTY, itemty)
            return sig, impl
    else:
        raise TypingError("list insert indices must be integers")
def hpat_pandas_series_iloc(self):
    """
    Pandas Series attribute 'values' implementation.
        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.values.html#pandas.Series.values

    Algorithm: result = series.values
    Where:
        series: pandas.series
        result: pandas.series as ndarray

    Test:  python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_values
    """

    _func_name = 'Attribute values.'

    if not isinstance(self, SeriesType):
        raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

    def hpat_pandas_series_values_impl(self):
        return self._data

    return hpat_pandas_series_values_impl
Beispiel #12
0
def unicode_zfill(string, width):
    if not isinstance(width, types.Integer):
        raise TypingError("<width> must be an Integer")

    def zfill_impl(string, width):

        str_len = len(string)

        if width <= str_len:
            return string

        first_char = string[0] if str_len else ''
        padding = '0' * (width - str_len)

        if first_char in ['+', '-']:
            newstr = first_char + padding + string[1:]
        else:
            newstr = padding + string

        return newstr

    return zfill_impl
Beispiel #13
0
def to_fixed_tuple(typingctx, array, length):
    """Convert *array* into a tuple of *length*

    Returns ``UniTuple(array.dtype, length)``

    ** Warning **
    - No boundchecking.
      If *length* is longer than *array.size*, the behavior is undefined.
    """
    if not isinstance(length, types.Const):
        raise RequireConstValue('*length* argument must be a constant')

    if array.ndim != 1:
        raise TypingError("Not supported on array.ndim={}".format(array.ndim))

    # Determine types
    tuple_size = int(length.value)
    tuple_type = types.UniTuple(dtype=array.dtype, count=tuple_size)
    sig = tuple_type(array, length)

    def codegen(context, builder, signature, args):
        def impl(array, length, empty_tuple):
            out = empty_tuple
            for i in range(length):
                out = tuple_setitem(out, i, array[i])
            return out

        inner_argtypes = [signature.args[0], types.intp, tuple_type]
        inner_sig = typing.signature(tuple_type, *inner_argtypes)
        ll_idx_type = context.get_value_type(types.intp)
        # Allocate an empty tuple
        empty_tuple = context.get_constant_undef(tuple_type)
        inner_args = [args[0], ll_idx_type(tuple_size), empty_tuple]

        res = context.compile_internal(builder, impl, inner_sig, inner_args)
        return res

    return sig, codegen
Beispiel #14
0
def impl_pop(l, index=-1):
    if not isinstance(l, types.ListType):
        return

    indexty = INDEXTY

    # FIXME: this type check works, but it isn't clear why and if it optimal
    if (isinstance(index, int)
            or index in types.signed_domain
            or isinstance(index, types.Omitted)):
        def impl(l, index=-1):
            if len(l) == 0:
                raise IndexError("pop from empty list")
            index = handle_index(l, index)
            castedindex = _cast(index, indexty)
            status, item = _list_pop(l, castedindex)
            if status == ListStatus.LIST_OK:
                return _nonoptional(item)
            else:
                raise AssertionError("internal list error during pop")
        return impl

    else:
        raise TypingError("argument for pop must be a signed integer")
Beispiel #15
0
 def check_arg(arg, name):
     if not (arg is None or arg in index_types
             or isinstance(arg, (types.Omitted, types.NoneType))):
         raise TypingError(
             "{} argument for index must be an integer".format(name))
Beispiel #16
0
def sdc_pandas_series_comp_binop(self, other, level=None, fill_value=None, axis=0):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.comp_binop

    Limitations
    -----------
    Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_comp_binop.py
       :language: python
       :lines: 27-
       :caption:
       :name: ex_series_comp_binop

    .. command-output:: python ./series/series_comp_binop.py
       :cwd: ../../../examples

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas Series method :meth:`pandas.Series.comp_binop` implementation.

    .. only:: developer
        Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op8
    """

    _func_name = 'Method comp_binop().'

    ty_checker = TypeChecker(_func_name)
    ty_checker.check(self, SeriesType)

    if not (isinstance(level, types.Omitted) or level is None):
        ty_checker.raise_exc(level, 'None', 'level')

    if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None:
        ty_checker.raise_exc(fill_value, 'number', 'fill_value')

    if not (isinstance(axis, types.Omitted) or axis == 0):
        ty_checker.raise_exc(axis, 'int', 'axis')

    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError('{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    fill_value_is_none = isinstance(fill_value, (types.NoneType, types.Omitted)) or fill_value is None
    if not operands_are_series:
        def _series_comp_binop_scalar_impl(self, other, level=None, fill_value=None, axis=0):
            if self_is_series == True:  # noqa
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(self._data, inplace=True, value=fill_value)
                return pandas.Series(self._data < other, index=self._index, name=self._name)
            else:
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(other._data, inplace=True, value=fill_value)
                return pandas.Series(self < other._data, index=other._index, name=other._name)

        return _series_comp_binop_scalar_impl

    else:

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)):
            def _series_comp_binop_none_indexes_impl(self, other, level=None, fill_value=None, axis=0):
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(self._data, inplace=True, value=fill_value)
                    numpy_like.fillna(other._data, inplace=True, value=fill_value)
                left_size, right_size = len(self._data), len(other._data)
                if (left_size == right_size):
                    return pandas.Series(self._data < other._data)
                else:
                    raise ValueError("Can only compare identically-labeled Series objects")

            return _series_comp_binop_none_indexes_impl
        else:

            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_comp_binop_common_impl(self, other, level=None, fill_value=None, axis=0):
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(self._data, inplace=True, value=fill_value)
                    numpy_like.fillna(other._data, inplace=True, value=fill_value)
                left_index, right_index = self.index, other.index

                if sdc_check_indexes_equal(left_index, right_index):
                    if none_or_numeric_indexes == True:  # noqa
                        new_index = numpy_like.astype(left_index, numba_index_common_dtype)
                    else:
                        new_index = self._index
                    return pandas.Series(self._data < other._data,
                                         new_index)
                else:
                    raise ValueError("Can only compare identically-labeled Series objects")

            return _series_comp_binop_common_impl

    return None
Beispiel #17
0
def impl_setitem(l, index, item):
    if not isinstance(l, types.ListType):
        return

    indexty = INDEXTY
    itemty = l.item_type

    if index in index_types:

        def impl_integer(l, index, item):
            index = handle_index(l, index)
            castedindex = _cast(index, indexty)
            casteditem = _cast(item, itemty)
            status = _list_setitem(l, castedindex, casteditem)
            if status == ListStatus.LIST_OK:
                return
            else:
                raise AssertionError("internal list error during settitem")

        return impl_integer

    elif isinstance(index, types.SliceType):
        if not isinstance(item, types.IterableType):
            raise TypingError("can only assign an iterable when using a slice "
                              "with assignment/setitem")

        def impl_slice(l, index, item):
            # special case "a[i:j] = a", need to copy first
            if l == item:
                item = item.copy()
            slice_range = handle_slice(l, index)
            # non-extended (simple) slices
            if slice_range.step == 1:
                # replace
                if len(item) == len(slice_range):
                    for i, j in zip(slice_range, item):
                        l[i] = j
                # replace and insert
                if len(item) > len(slice_range):
                    # do the replaces we can
                    for i, j in zip(slice_range, item[:len(slice_range)]):
                        l[i] = j
                    # insert the remaining ones
                    insert_range = range(
                        slice_range.stop,
                        slice_range.stop + len(item) - len(slice_range))
                    for i, k in zip(insert_range, item[len(slice_range):]):
                        # FIXME: This may be slow.  Each insert can incur a
                        # memory copy of one or more items.
                        l.insert(i, k)
                # replace and delete
                if len(item) < len(slice_range):
                    # do the replaces we can
                    replace_range = range(slice_range.start,
                                          slice_range.start + len(item))
                    for i, j in zip(replace_range, item):
                        l[i] = j
                    # delete remaining ones
                    del l[slice_range.start + len(item):slice_range.stop]
            # Extended slices
            else:
                if len(slice_range) != len(item):
                    raise ValueError(
                        "length mismatch for extended slice and sequence")
                # extended slice can only replace
                for i, j in zip(slice_range, item):
                    l[i] = j

        return impl_slice

    else:
        raise TypingError("list indices must be integers or slices")
Beispiel #18
0
def sdc_pandas_series_operator_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.binop` implementation

    Note: Currently implemented for numeric Series only.
        Differs from Pandas in returning Series with fixed dtype :obj:`float64`

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    ty_checker = TypeChecker('Operator binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError('{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    def sdc_pandas_series_operator_binop_impl(self, other):
        return self.binop(other)

    return sdc_pandas_series_operator_binop_impl
Beispiel #19
0
def sdc_pandas_series_operator_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.binop` implementation

    Note: Currently implemented for numeric Series only.
        Differs from Pandas in returning Series with fixed dtype :obj:`float64`

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator binop().'

    ty_checker = TypeChecker('Operator binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(
        self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(
        other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    # specializations for numeric series only
    if not operands_are_series:

        def _series_operator_binop_scalar_impl(self, other):
            if self_is_series == True:  # noqa
                result_data = numpy.empty(len(self._data), dtype=numpy.float64)
                result_data[:] = self._data + numpy.float64(other)
                return pandas.Series(result_data,
                                     index=self._index,
                                     name=self._name)
            else:
                result_data = numpy.empty(len(other._data),
                                          dtype=numpy.float64)
                result_data[:] = numpy.float64(self) + other._data
                return pandas.Series(result_data,
                                     index=other._index,
                                     name=other._name)

        return _series_operator_binop_scalar_impl

    else:  # both operands are numeric series

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_binop_none_indexes_impl(self, other):

                if (len(self._data) == len(other._data)):
                    result_data = astype(self._data, numpy.float64)
                    result_data = result_data + other._data
                    return pandas.Series(result_data)
                else:
                    left_size, right_size = len(self._data), len(other._data)
                    min_data_size = min(left_size, right_size)
                    max_data_size = max(left_size, right_size)
                    result_data = numpy.empty(max_data_size,
                                              dtype=numpy.float64)
                    if (left_size == min_data_size):
                        result_data[:min_data_size] = self._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = result_data + other._data
                    else:
                        result_data[:min_data_size] = other._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = self._data + result_data

                    return pandas.Series(result_data)

            return _series_operator_binop_none_indexes_impl
        else:
            # for numeric indexes find common dtype to be used when creating joined index
            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                # check if indexes are equal and series don't have to be aligned
                if sdc_check_indexes_equal(left_index, right_index):
                    result_data = numpy.empty(len(self._data),
                                              dtype=numpy.float64)
                    result_data[:] = self._data + other._data

                    if none_or_numeric_indexes == True:  # noqa
                        result_index = astype(left_index,
                                              numba_index_common_dtype)
                    else:
                        result_index = self._index

                    return pandas.Series(result_data, index=result_index)

                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                joined_index, left_indexer, right_indexer = sdc_join_series_indexes(
                    left_index, right_index)

                result_size = len(joined_index)
                left_values = numpy.empty(result_size, dtype=numpy.float64)
                right_values = numpy.empty(result_size, dtype=numpy.float64)
                for i in numba.prange(result_size):
                    left_pos, right_pos = left_indexer[i], right_indexer[i]
                    left_values[i] = self._data[
                        left_pos] if left_pos != -1 else numpy.nan
                    right_values[i] = other._data[
                        right_pos] if right_pos != -1 else numpy.nan

                result_data = left_values + right_values
                return pandas.Series(result_data, joined_index)

            return _series_operator_binop_common_impl

    return None
Beispiel #20
0
def sdc_pandas_series_operator_comp_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.comp_binop` implementation

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator comp_binop().'

    ty_checker = TypeChecker('Operator comp_binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    if not operands_are_series:

        def _series_operator_comp_binop_scalar_impl(self, other):
            if self_is_series == True:  # noqa
                return pandas.Series(self._data < other,
                                     index=self._index,
                                     name=self._name)
            else:
                return pandas.Series(self < other._data,
                                     index=other._index,
                                     name=other._name)

        return _series_operator_comp_binop_scalar_impl

    else:

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_comp_binop_none_indexes_impl(self, other):
                left_size, right_size = len(self._data), len(other._data)
                if (left_size == right_size):
                    return pandas.Series(self._data < other._data)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_none_indexes_impl
        else:

            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_comp_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                if sdc_check_indexes_equal(left_index, right_index):
                    if none_or_numeric_indexes == True:  # noqa
                        new_index = astype(left_index,
                                           numba_index_common_dtype)
                    else:
                        new_index = self._index
                    return pandas.Series(self._data < other._data, new_index)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_common_impl

    return None
Beispiel #21
0
def sdc_pandas_read_csv(
    filepath_or_buffer,
    sep=',',
    delimiter=None,
    # Column and Index Locations and Names
    header="infer",
    names=None,
    index_col=None,
    usecols=None,
    squeeze=False,
    prefix=None,
    mangle_dupe_cols=True,
    # General Parsing Configuration
    dtype=None,
    engine=None,
    converters=None,
    true_values=None,
    false_values=None,
    skipinitialspace=False,
    skiprows=None,
    skipfooter=0,
    nrows=None,
    # NA and Missing Data Handling
    na_values=None,
    keep_default_na=True,
    na_filter=True,
    verbose=False,
    skip_blank_lines=True,
    # Datetime Handling
    parse_dates=False,
    infer_datetime_format=False,
    keep_date_col=False,
    date_parser=None,
    dayfirst=False,
    cache_dates=True,
    # Iteration
    iterator=False,
    chunksize=None,
    # Quoting, Compression, and File Format
    compression="infer",
    thousands=None,
    decimal=b".",
    lineterminator=None,
    quotechar='"',
    # quoting=csv.QUOTE_MINIMAL,  # not supported
    doublequote=True,
    escapechar=None,
    comment=None,
    encoding=None,
    dialect=None,
    # Error Handling
    error_bad_lines=True,
    warn_bad_lines=True,
    # Internal
    delim_whitespace=False,
    # low_memory=_c_parser_defaults["low_memory"],  # not supported
    memory_map=False,
    float_precision=None,
):
    signature = """
        filepath_or_buffer,
        sep=',',
        delimiter=None,
        # Column and Index Locations and Names
        header="infer",
        names=None,
        index_col=None,
        usecols=None,
        squeeze=False,
        prefix=None,
        mangle_dupe_cols=True,
        # General Parsing Configuration
        dtype=None,
        engine=None,
        converters=None,
        true_values=None,
        false_values=None,
        skipinitialspace=False,
        skiprows=None,
        skipfooter=0,
        nrows=None,
        # NA and Missing Data Handling
        na_values=None,
        keep_default_na=True,
        na_filter=True,
        verbose=False,
        skip_blank_lines=True,
        # Datetime Handling
        parse_dates=False,
        infer_datetime_format=False,
        keep_date_col=False,
        date_parser=None,
        dayfirst=False,
        cache_dates=True,
        # Iteration
        iterator=False,
        chunksize=None,
        # Quoting, Compression, and File Format
        compression="infer",
        thousands=None,
        decimal=b".",
        lineterminator=None,
        quotechar='"',
        # quoting=csv.QUOTE_MINIMAL,  # not supported
        doublequote=True,
        escapechar=None,
        comment=None,
        encoding=None,
        dialect=None,
        # Error Handling
        error_bad_lines=True,
        warn_bad_lines=True,
        # Internal
        delim_whitespace=False,
        # low_memory=_c_parser_defaults["low_memory"],  # not supported
        memory_map=False,
        float_precision=None,
    """

    # read_csv can infer result DataFrame type from file or from params

    # for inferring from file this parameters should be literal or omitted
    infer_from_file = all([
        isinstance(filepath_or_buffer, types.Literal),
        isinstance(sep, (types.Literal, types.Omitted)) or sep == ',',
        isinstance(delimiter,
                   (types.Literal, types.Omitted)) or delimiter is None,
        isinstance(names, (types.Tuple, types.Omitted, type(None))),
        isinstance(usecols, (types.Tuple, types.Omitted, type(None))),
        isinstance(skiprows, (types.Literal, types.Omitted))
        or skiprows is None,
    ])

    # for inference from params dtype and (names or usecols) shoud present
    # names, dtype and usecols should be literal tuples after rewrite pass (see. RewriteReadCsv)
    # header not supported
    infer_from_params = all([
        isinstance(dtype, types.Tuple),
        any([
            isinstance(names, types.Tuple)
            and isinstance(usecols, types.Tuple),
            isinstance(names, types.Tuple)
            and isinstance(usecols, (types.Omitted, type(None))),
            isinstance(names, (types.Omitted, type(None)))
            and isinstance(usecols, types.Tuple),
        ]),
        isinstance(header, types.Omitted) or header == 'infer',
    ])

    # cannot create function if parameters provide not enough info
    if not any([infer_from_file, infer_from_params]):
        msg = "Cannot infer resulting DataFrame from constant file or parameters."
        raise TypingError(msg)

    if infer_from_file:
        # parameters should be constants and are important only for inference from file

        if isinstance(filepath_or_buffer, types.Literal):
            filepath_or_buffer = filepath_or_buffer.literal_value

        if isinstance(sep, types.Literal):
            sep = sep.literal_value

        if isinstance(delimiter, types.Literal):
            delimiter = delimiter.literal_value

        # Alias sep -> delimiter.
        if delimiter is None:
            delimiter = sep

        if isinstance(skiprows, types.Literal):
            skiprows = skiprows.literal_value

    # names and usecols influence on both inferencing from file and from params
    if isinstance(names, types.Tuple):
        assert all(isinstance(name, types.Literal) for name in names)
        names = [name.literal_value for name in names]

    if isinstance(usecols, types.Tuple):
        assert all(isinstance(col, types.Literal) for col in usecols)
        usecols = [col.literal_value for col in usecols]

    if infer_from_params:
        # dtype should be constants and is important only for inference from params
        if isinstance(dtype, types.Tuple):
            assert all(isinstance(key, types.Literal) for key in dtype[::2])
            keys = (k.literal_value for k in dtype[::2])

            values = dtype[1::2]
            values = [
                v.typing_key if isinstance(v, types.Function) else v
                for v in values
            ]
            values = [
                types.Array(numba.from_dtype(np.dtype(v.literal_value)), 1,
                            'C') if isinstance(v, types.Literal) else v
                for v in values
            ]
            values = [
                types.Array(types.int_, 1, 'C') if v == int else v
                for v in values
            ]
            values = [
                types.Array(types.float64, 1, 'C') if v == float else v
                for v in values
            ]
            values = [string_array_type if v == str else v for v in values]

            dtype = dict(zip(keys, values))

    # in case of both are available
    # inferencing from params has priority over inferencing from file
    if infer_from_params:
        col_names = names
        # all names should be in dtype
        return_columns = usecols if usecols else names
        col_typs = [dtype[n] for n in return_columns]

    elif infer_from_file:
        col_names, col_typs = infer_column_names_and_types_from_constant_filename(
            filepath_or_buffer, delimiter, names, usecols, skiprows)

    else:
        return None

    dtype_present = not isinstance(dtype, (types.Omitted, type(None)))

    # generate function text with signature and returning DataFrame
    func_text, func_name = _gen_csv_reader_py_pyarrow_func_text_dataframe(
        col_names, col_typs, dtype_present, usecols, signature)

    # compile with Python
    csv_reader_py = _gen_csv_reader_py_pyarrow_py_func(func_text, func_name)

    return csv_reader_py
Beispiel #22
0
def sdc_pandas_series_binop(self, other, level=None, fill_value=None, axis=0):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.binop

    Limitations
    -----------
    Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_binop.py
       :language: python
       :lines: 27-
       :caption:
       :name: ex_series_binop

    .. command-output:: python ./series/series_binop.py
       :cwd: ../../../examples

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas Series method :meth:`pandas.Series.binop` implementation.

    .. only:: developer
        Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5
    """

    ty_checker = TypeChecker('Method binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError('{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    if not isinstance(level, types.Omitted) and level is not None:
        ty_checker.raise_exc(level, 'None', 'level')

    if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None:
        ty_checker.raise_exc(fill_value, 'number', 'fill_value')

    if not isinstance(axis, types.Omitted) and axis != 0:
        ty_checker.raise_exc(axis, 'int', 'axis')
    fill_value_is_none = isinstance(fill_value, (types.NoneType, types.Omitted)) or fill_value is None
    # specializations for numeric series only
    if not operands_are_series:
        def _series_binop_scalar_impl(self, other, level=None, fill_value=None, axis=0):
            if self_is_series == True:  # noqa
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(self._data, inplace=True, value=fill_value)
                result_data = numpy.empty(len(self._data), dtype=numpy.float64)
                result_data[:] = self._data + numpy.float64(other)
                return pandas.Series(result_data, index=self._index, name=self._name)
            else:
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(other._data, inplace=True, value=fill_value)
                result_data = numpy.empty(len(other._data), dtype=numpy.float64)
                result_data[:] = numpy.float64(self) + other._data
                return pandas.Series(result_data, index=other._index, name=other._name)

        return _series_binop_scalar_impl

    else:   # both operands are numeric series
        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)):
            def _series_binop_none_indexes_impl(self, other, level=None, fill_value=None, axis=0):
                _fill_value = numpy.nan if fill_value_is_none == True else fill_value  # noqa
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(self._data, inplace=True, value=fill_value)
                    numpy_like.fillna(other._data, inplace=True, value=fill_value)

                if (len(self._data) == len(other._data)):
                    result_data = numpy_like.astype(self._data, numpy.float64)
                    result_data = result_data + other._data
                    return pandas.Series(result_data)
                else:
                    left_size, right_size = len(self._data), len(other._data)
                    min_data_size = min(left_size, right_size)
                    max_data_size = max(left_size, right_size)
                    result_data = numpy.empty(max_data_size, dtype=numpy.float64)
                    if (left_size == min_data_size):
                        result_data[:min_data_size] = self._data
                        for i in range(min_data_size, len(result_data)):
                            result_data[i] = _fill_value
                        result_data = result_data + other._data
                    else:
                        result_data[:min_data_size] = other._data
                        for i in range(min_data_size, len(result_data)):
                            result_data[i] = _fill_value
                        result_data = self._data + result_data

                    return pandas.Series(result_data)

            return _series_binop_none_indexes_impl
        else:
            # for numeric indexes find common dtype to be used when creating joined index
            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_binop_common_impl(self, other, level=None, fill_value=None, axis=0):
                left_index, right_index = self.index, other.index
                _fill_value = numpy.nan if fill_value_is_none == True else fill_value  # noqa
                if not (fill_value is None or numpy.isnan(fill_value)):
                    numpy_like.fillna(self._data, inplace=True, value=fill_value)
                    numpy_like.fillna(other._data, inplace=True, value=fill_value)
                # check if indexes are equal and series don't have to be aligned
                if sdc_check_indexes_equal(left_index, right_index):
                    result_data = numpy.empty(len(self._data), dtype=numpy.float64)
                    result_data[:] = self._data + other._data

                    if none_or_numeric_indexes == True:  # noqa
                        result_index = numpy_like.astype(left_index, numba_index_common_dtype)
                    else:
                        result_index = self._index

                    return pandas.Series(result_data, index=result_index)

                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                joined_index, left_indexer, right_indexer = sdc_join_series_indexes(left_index, right_index)
                result_size = len(joined_index)
                left_values = numpy.empty(result_size, dtype=numpy.float64)
                right_values = numpy.empty(result_size, dtype=numpy.float64)
                for i in range(result_size):
                    left_pos, right_pos = left_indexer[i], right_indexer[i]
                    left_values[i] = self._data[left_pos] if left_pos != -1 else _fill_value
                    right_values[i] = other._data[right_pos] if right_pos != -1 else _fill_value
                result_data = left_values + right_values
                return pandas.Series(result_data, joined_index)

            return _series_binop_common_impl

    return None
Beispiel #23
0
def _check_for_none_typed(lst, method):
    if isinstance(lst.dtype, NoneType):
        raise TypingError("method support for List[None] is limited, "
                          "not supported: '{}'.".format(method))
Beispiel #24
0
def assert_item_type_consistent_with_heap_type(heap, item):
    if not heap.dtype == item:
        raise TypingError('heap type must be the same as item type')