Ejemplo n.º 1
0
def pd_multi_index_from_tuples_overload(cls, iterables):
    if cls.instance_type is not MultiIndexType:
        return

    _func_name = f'Method from_tuples()'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(iterables, (types.List, types.ListType))
            and isinstance(iterables.dtype, (types.Tuple, types.UniTuple))):
        ty_checker.raise_exc(iterables, f'list of tuples', 'iterables')

    def pd_multi_index_type_from_tuples_impl(cls, iterables):

        index_size = len(iterables)
        if not index_size:
            raise TypeError("Cannot infer number of levels from empty list")

        # use first value to infer types and allocate dicts for result multi index levels
        example_value = iterables[0]
        levels_dicts = sdc_tuple_map(_multi_index_alloc_level_dict,
                                     example_value)
        index_codes = sdc_tuple_map(
            lambda _, size: np.empty(size, dtype=types.int64), example_value,
            index_size)

        for i, val in enumerate(iterables):
            _multi_index_from_tuples_helper(val, levels_dicts, index_codes, i)

        index_levels = sdc_tuple_map(lambda x: list(x.keys()), levels_dicts)

        res = pd.MultiIndex(
            levels=index_levels,
            codes=index_codes,
        )
        return res

    return pd_multi_index_type_from_tuples_impl
def sdc_pandas_dataframe_rolling_var(self, ddof=1):

    ty_checker = TypeChecker('Method rolling.var().')
    ty_checker.check(self, DataFrameRollingType)

    if not isinstance(ddof, (int, Integer, Omitted)):
        ty_checker.raise_exc(ddof, 'int', 'ddof')

    return gen_df_rolling_method_impl('var', self, kws={'ddof': '1'})
Ejemplo n.º 3
0
def hpat_pandas_series_rolling_var(self, ddof=1):

    ty_checker = TypeChecker('Method rolling.var().')
    ty_checker.check(self, SeriesRollingType)

    if not isinstance(ddof, (int, Integer, Omitted)):
        ty_checker.raise_exc(ddof, 'int', 'ddof')

    return sdc_pandas_series_rolling_var_impl
Ejemplo n.º 4
0
def sdc_isnan_overload(self):
    """
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Parallel replacement of numpy.isnan.
    .. only:: developer
       Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan
    """

    if not isinstance(self, types.Array):
        return None

    ty_checker = TypeChecker("numpy-like 'isnan'")
    dtype = self.dtype
    isnan = get_isnan(dtype)
    if isinstance(dtype, (types.Integer, types.Boolean, bool)):
        def sdc_isnan_int_impl(self):
            length = len(self)
            res = numpy.zeros(shape=length, dtype=numpy.bool_)

            return res

        return sdc_isnan_int_impl

    if isinstance(dtype, types.Float):
        def sdc_isnan_float_impl(self):
            length = len(self)
            res = numpy.empty(shape=length, dtype=numpy.bool_)
            for i in prange(length):
                res[i] = isnan(self[i])

            return res

        return sdc_isnan_float_impl

    ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')
Ejemplo n.º 5
0
def pd_multi_index_append_overload(self, other):
    if not isinstance(self, MultiIndexType):
        return None

    _func_name = 'Method append().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(other, MultiIndexType)):
        ty_checker.raise_exc(other, 'pandas MultiIndex', 'other')

    if not check_types_comparable(self, other):
        raise TypingError('{} Not allowed for non comparable indexes. \
        Given: self={}, other={}'.format(_func_name, self, other))

    def pd_multi_index_append_impl(self, other):

        self_and_other_data = _multi_index_binop_helper(self, other)
        tup_append_level_res = sdc_tuple_map(
            lambda x: _multi_index_append_level(*x), self_and_other_data)

        new_levels, new_codes = sdc_tuple_unzip(tup_append_level_res)
        return pd.MultiIndex(levels=new_levels, codes=new_codes)

    return pd_multi_index_append_impl
def hpat_pandas_series_rolling_quantile(self, quantile, interpolation='linear'):

    ty_checker = TypeChecker('Method rolling.quantile().')
    ty_checker.check(self, SeriesRollingType)

    if not isinstance(quantile, Number):
        ty_checker.raise_exc(quantile, 'float', 'quantile')

    str_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(interpolation, str_types) and interpolation != 'linear':
        ty_checker.raise_exc(interpolation, 'str', 'interpolation')

    def hpat_pandas_rolling_series_quantile_impl(self, quantile, interpolation='linear'):
        if quantile < 0 or quantile > 1:
            raise ValueError('quantile value not in [0, 1]')
        if interpolation != 'linear':
            raise ValueError('interpolation value not "linear"')

        win = self._window
        minp = self._min_periods

        input_series = self._data
        input_arr = input_series._data
        length = len(input_arr)
        output_arr = numpy.empty(length, dtype=float64)

        def calc_quantile(arr, quantile, minp):
            finite_arr = arr[numpy.isfinite(arr)]
            if len(finite_arr) < minp:
                return numpy.nan
            else:
                return arr_quantile(finite_arr, quantile)

        boundary = min(win, length)
        for i in prange(boundary):
            arr_range = input_arr[:i + 1]
            output_arr[i] = calc_quantile(arr_range, quantile, minp)

        for i in prange(boundary, length):
            arr_range = input_arr[i + 1 - win:i + 1]
            output_arr[i] = calc_quantile(arr_range, quantile, minp)

        return pandas.Series(output_arr, input_series._index, name=input_series._name)

    return hpat_pandas_rolling_series_quantile_impl
def sdc_pandas_dataframe_rolling_quantile(self,
                                          quantile,
                                          interpolation='linear'):

    ty_checker = TypeChecker('Method rolling.quantile().')
    ty_checker.check(self, DataFrameRollingType)

    if not isinstance(quantile, Number):
        ty_checker.raise_exc(quantile, 'float', 'quantile')

    str_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(interpolation, str_types) and interpolation != 'linear':
        ty_checker.raise_exc(interpolation, 'str', 'interpolation')

    return gen_df_rolling_method_impl('quantile',
                                      self,
                                      args=['quantile'],
                                      kws={'interpolation': '"linear"'})
Ejemplo n.º 8
0
def sdc_pandas_dataframe_rolling_apply(self, func, raw=None):

    ty_checker = TypeChecker('Method rolling.apply().')
    ty_checker.check(self, DataFrameRollingType)

    raw_accepted = (Omitted, NoneType, Boolean)
    if not isinstance(raw, raw_accepted) and raw is not None:
        ty_checker.raise_exc(raw, 'bool', 'raw')

    return gen_df_rolling_method_impl('apply', self, args=['func'],
                                      kws={'raw': 'None'})
Ejemplo n.º 9
0
def sdc_pandas_series_operator_comp_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.comp_binop` implementation

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    ty_checker = TypeChecker('Operator comp_binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError('{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    def sdc_pandas_series_operator_comp_binop_impl(self, other):
        return self.comp_binop(other)

    return sdc_pandas_series_operator_comp_binop_impl
Ejemplo n.º 10
0
def sdc_pandas_dataframe_rolling_corr(self, other=None, pairwise=None):

    ty_checker = TypeChecker('Method rolling.corr().')
    ty_checker.check(self, DataFrameRollingType)

    accepted_other = (Omitted, NoneType, DataFrameType, SeriesType)
    if not isinstance(other, accepted_other) and other is not None:
        ty_checker.raise_exc(other, 'DataFrame, Series', 'other')

    accepted_pairwise = (bool, Boolean, Omitted, NoneType)
    if not isinstance(pairwise, accepted_pairwise) and pairwise is not None:
        ty_checker.raise_exc(pairwise, 'bool', 'pairwise')

    none_other = isinstance(other, (Omitted, NoneType)) or other is None
    kws = {'other': 'None', 'pairwise': 'None'}

    if none_other:
        return gen_df_rolling_method_other_none_impl('corr', self, kws=kws)

    if isinstance(other, DataFrameType):
        return gen_df_rolling_method_other_df_impl('corr', self, other, kws=kws)

    return gen_df_rolling_method_impl('corr', self, kws=kws)
Ejemplo n.º 11
0
def sdc_pandas_series_groupby_var(self, ddof=1, *args):

    method_name = 'GroupBy.var().'
    ty_checker = TypeChecker(method_name)
    ty_checker.check(self, SeriesGroupByType)

    if not isinstance(ddof, (types.Omitted, int, types.Integer)):
        ty_checker.raise_exc(ddof, 'int', 'ddof')

    method_args = ['self', 'ddof', '*args']
    default_values = {'ddof': 1}
    impl_used_params = {'ddof': 'ddof'}

    applied_func_name = 'var'
    return sdc_pandas_series_groupby_apply_func(self, applied_func_name,
                                                method_args, default_values,
                                                impl_used_params)
Ejemplo n.º 12
0
def pd_range_index_copy_overload(self, name=None, deep=False, dtype=None):
    if not isinstance(self, RangeIndexType):
        return None

    _func_name = 'Method copy().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(name, (types.NoneType, types.Omitted, types.UnicodeType)) or name is None):
        ty_checker.raise_exc(name, 'string or none', 'name')

    if not (isinstance(deep, (types.Omitted, types.Boolean)) or deep is False):
        ty_checker.raise_exc(deep, 'boolean', 'deep')

    if not _check_dtype_param_type(dtype):
        ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype')

    name_is_none = isinstance(name, (types.NoneType, types.Omitted)) or name is None
    keep_name = name_is_none and self.is_named
    def pd_range_index_copy_impl(self, name=None, deep=False, dtype=None):

        _name = self._name if keep_name == True else name  # noqa
        return init_range_index(self._data, _name)

    return pd_range_index_copy_impl
Ejemplo n.º 13
0
def hpat_pandas_series_rolling_apply(self, func, raw=None):

    ty_checker = TypeChecker('Method rolling.apply().')
    ty_checker.check(self, SeriesRollingType)

    raw_accepted = (Omitted, NoneType, Boolean)
    if not isinstance(raw, raw_accepted) and raw is not None:
        ty_checker.raise_exc(raw, 'bool', 'raw')

    def hpat_pandas_rolling_series_apply_impl(self, func, raw=None):
        win = self._window
        minp = self._min_periods

        input_series = self._data
        input_arr = input_series._data
        length = len(input_arr)
        output_arr = numpy.empty(length, dtype=float64)

        def culc_apply(arr, func, minp):
            finite_arr = arr.copy()
            finite_arr[numpy.isinf(arr)] = numpy.nan
            if len(finite_arr) < minp:
                return numpy.nan
            else:
                return arr_apply(finite_arr, func)

        boundary = min(win, length)
        for i in prange(boundary):
            arr_range = input_arr[:i + 1]
            output_arr[i] = culc_apply(arr_range, func, minp)

        for i in prange(boundary, length):
            arr_range = input_arr[i + 1 - win:i + 1]
            output_arr[i] = culc_apply(arr_range, func, minp)

        return pandas.Series(output_arr,
                             input_series._index,
                             name=input_series._name)

    return hpat_pandas_rolling_series_apply_impl
def hpat_pandas_series_rolling_median(self):

    ty_checker = TypeChecker('Method rolling.median().')
    ty_checker.check(self, SeriesRollingType)

    return hpat_pandas_rolling_series_median_impl
Ejemplo n.º 15
0
def pd_range_index_overload(start=None,
                            stop=None,
                            step=None,
                            dtype=None,
                            copy=False,
                            name=None,
                            fastpath=None):

    _func_name = 'pd.RangeIndex().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(copy, types.Omitted) or copy is False):
        raise SDCLimitation(
            f"{_func_name} Unsupported parameter. Given 'copy': {copy}")

    if not (isinstance(copy, types.Omitted) or fastpath is None):
        raise SDCLimitation(
            f"{_func_name} Unsupported parameter. Given 'fastpath': {fastpath}"
        )

    dtype_is_np_int64 = dtype is types.NumberClass(types.int64)
    dtype_is_unicode_str = isinstance(dtype,
                                      (types.UnicodeType, types.StringLiteral))
    if not _check_dtype_param_type(dtype):
        ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype')

    # TODO: support ensure_python_int from pandas.core.dtype.common to handle integers as float params
    if not (isinstance(start, (types.NoneType, types.Omitted, types.Integer))
            or start is None):
        ty_checker.raise_exc(start, 'number or none', 'start')
    if not (isinstance(stop, (types.NoneType, types.Omitted, types.Integer))
            or stop is None):
        ty_checker.raise_exc(stop, 'number or none', 'stop')
    if not (isinstance(step, (types.NoneType, types.Omitted, types.Integer))
            or step is None):
        ty_checker.raise_exc(step, 'number or none', 'step')

    if not (isinstance(name,
                       (types.NoneType, types.Omitted, types.StringLiteral,
                        types.UnicodeType)) or name is None):
        ty_checker.raise_exc(name, 'string or none', 'name')

    if ((isinstance(start, (types.NoneType, types.Omitted)) or start is None)
            and (isinstance(stop,
                            (types.NoneType, types.Omitted)) or stop is None)
            and (isinstance(step,
                            (types.NoneType, types.Omitted)) or step is None)):

        def pd_range_index_ctor_dummy_impl(start=None,
                                           stop=None,
                                           step=None,
                                           dtype=None,
                                           copy=False,
                                           name=None,
                                           fastpath=None):
            raise TypeError("RangeIndex(...) must be called with integers")

        return pd_range_index_ctor_dummy_impl

    def pd_range_index_ctor_impl(start=None,
                                 stop=None,
                                 step=None,
                                 dtype=None,
                                 copy=False,
                                 name=None,
                                 fastpath=None):

        if not (dtype is None or dtype_is_unicode_str and dtype == 'int64'
                or dtype_is_np_int64):
            raise TypeError("Invalid to pass a non-int64 dtype to RangeIndex")

        _start = types.int64(start) if start is not None else types.int64(0)

        if stop is None:
            _start, _stop = types.int64(0), types.int64(start)
        else:
            _stop = types.int64(stop)

        _step = types.int64(step) if step is not None else types.int64(1)
        if _step == 0:
            raise ValueError("Step must not be zero")

        return init_range_index(range(_start, _stop, _step), name)

    return pd_range_index_ctor_impl
Ejemplo n.º 16
0
def sdc_pandas_series_operator_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.binop` implementation

    Note: Currently implemented for numeric Series only.
        Differs from Pandas in returning Series with fixed dtype :obj:`float64`

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator binop().'

    ty_checker = TypeChecker('Operator binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(
        self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(
        other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    # specializations for numeric series only
    if not operands_are_series:

        def _series_operator_binop_scalar_impl(self, other):
            if self_is_series == True:  # noqa
                result_data = numpy.empty(len(self._data), dtype=numpy.float64)
                result_data[:] = self._data + numpy.float64(other)
                return pandas.Series(result_data,
                                     index=self._index,
                                     name=self._name)
            else:
                result_data = numpy.empty(len(other._data),
                                          dtype=numpy.float64)
                result_data[:] = numpy.float64(self) + other._data
                return pandas.Series(result_data,
                                     index=other._index,
                                     name=other._name)

        return _series_operator_binop_scalar_impl

    else:  # both operands are numeric series

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_binop_none_indexes_impl(self, other):

                if (len(self._data) == len(other._data)):
                    result_data = astype(self._data, numpy.float64)
                    result_data = result_data + other._data
                    return pandas.Series(result_data)
                else:
                    left_size, right_size = len(self._data), len(other._data)
                    min_data_size = min(left_size, right_size)
                    max_data_size = max(left_size, right_size)
                    result_data = numpy.empty(max_data_size,
                                              dtype=numpy.float64)
                    if (left_size == min_data_size):
                        result_data[:min_data_size] = self._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = result_data + other._data
                    else:
                        result_data[:min_data_size] = other._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = self._data + result_data

                    return pandas.Series(result_data)

            return _series_operator_binop_none_indexes_impl
        else:
            # for numeric indexes find common dtype to be used when creating joined index
            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                # check if indexes are equal and series don't have to be aligned
                if sdc_check_indexes_equal(left_index, right_index):
                    result_data = numpy.empty(len(self._data),
                                              dtype=numpy.float64)
                    result_data[:] = self._data + other._data

                    if none_or_numeric_indexes == True:  # noqa
                        result_index = astype(left_index,
                                              numba_index_common_dtype)
                    else:
                        result_index = self._index

                    return pandas.Series(result_data, index=result_index)

                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                joined_index, left_indexer, right_indexer = sdc_join_series_indexes(
                    left_index, right_index)

                result_size = len(joined_index)
                left_values = numpy.empty(result_size, dtype=numpy.float64)
                right_values = numpy.empty(result_size, dtype=numpy.float64)
                for i in numba.prange(result_size):
                    left_pos, right_pos = left_indexer[i], right_indexer[i]
                    left_values[i] = self._data[
                        left_pos] if left_pos != -1 else numpy.nan
                    right_values[i] = other._data[
                        right_pos] if right_pos != -1 else numpy.nan

                result_data = left_values + right_values
                return pandas.Series(result_data, joined_index)

            return _series_operator_binop_common_impl

    return None
Ejemplo n.º 17
0
def sdc_pandas_series_operator_comp_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.comp_binop` implementation

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator comp_binop().'

    ty_checker = TypeChecker('Operator comp_binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    if not operands_are_series:

        def _series_operator_comp_binop_scalar_impl(self, other):
            if self_is_series == True:  # noqa
                return pandas.Series(self._data < other,
                                     index=self._index,
                                     name=self._name)
            else:
                return pandas.Series(self < other._data,
                                     index=other._index,
                                     name=other._name)

        return _series_operator_comp_binop_scalar_impl

    else:

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_comp_binop_none_indexes_impl(self, other):
                left_size, right_size = len(self._data), len(other._data)
                if (left_size == right_size):
                    return pandas.Series(self._data < other._data)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_none_indexes_impl
        else:

            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_comp_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                if sdc_check_indexes_equal(left_index, right_index):
                    if none_or_numeric_indexes == True:  # noqa
                        new_index = astype(left_index,
                                           numba_index_common_dtype)
                    else:
                        new_index = self._index
                    return pandas.Series(self._data < other._data, new_index)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_common_impl

    return None
Ejemplo n.º 18
0
def sdc_pandas_series_comp_binop(self, other, level=None, fill_value=None, axis=0):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.comp_binop

    Limitations
    -----------
    Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_comp_binop.py
       :language: python
       :lines: 27-
       :caption:
       :name: ex_series_comp_binop

    .. command-output:: python ./series/series_comp_binop.py
       :cwd: ../../../examples

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas Series method :meth:`pandas.Series.comp_binop` implementation.

    .. only:: developer
        Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op8
    """

    _func_name = 'Method comp_binop().'

    ty_checker = TypeChecker(_func_name)
    ty_checker.check(self, SeriesType)

    if not (isinstance(level, types.Omitted) or level is None):
        ty_checker.raise_exc(level, 'None', 'level')

    if not (isinstance(fill_value, (types.Omitted, types.Number, types.UnicodeType, types.NoneType))
            or fill_value is None):
        ty_checker.raise_exc(fill_value, 'scalar', 'fill_value')

    if not (isinstance(axis, types.Omitted) or axis == 0):
        ty_checker.raise_exc(axis, 'int', 'axis')

    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, SeriesType):
        ty_checker.raise_exc(self, 'pandas.series', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        series_indexes_comparable = check_types_comparable(self.index, other.index)
        if not series_indexes_comparable:
            raise TypingError('{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    # specializations for both numeric and string series
    def series_comp_binop_wrapper(self, other, level=None, fill_value=None, axis=0):
        return sdc_comp_binop(self, other, fill_value)

    return series_comp_binop_wrapper
Ejemplo n.º 19
0
def hpat_pandas_series_map(self, arg, na_action=None):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.map

    Limitations
    -----------
    - Series data types String is currently unsupported by Intel Scalable Dataframe Compiler.
    - ``arg`` as Series is currently unsupported by Intel Scalable Dataframe Compiler.
    - ``arg`` as function should return scalar. Other types \
        are currently unsupported by Intel Scalable Dataframe Compiler.
    - ``na_action`` is currently unsupported by Intel Scalable Dataframe Compiler.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_map.py
       :language: python
       :lines: 36-
       :caption: `map()` accepts a function.
       :name: ex_series_map

    .. command-output:: python ./series/series_map.py
       :cwd: ../../../examples

    .. seealso::

        :ref:`Series.map <pandas.Series.apply>`
            For applying more complex functions on a Series.
        :ref:`DataFrame.apply <pandas.DataFrame.apply>`
            Apply a function row-/column-wise.
        :ref:`DataFrame.applymap <pandas.DataFrame.applymap>`
            Apply a function elementwise on a whole DataFrame.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    .. only:: developer
        Test: python -m sdc.runtests sdc.tests.test_series -k map
    """

    ty_checker = TypeChecker("Method map().")
    ty_checker.check(self, SeriesType)

    if isinstance(arg, types.Callable):
        sig = arg.get_call_type(cpu_target.typing_context, [self.dtype], {})
        output_type = sig.return_type

        def impl(self, arg, na_action=None):
            input_arr = self._data
            length = len(input_arr)

            output_arr = numpy.empty(length, dtype=output_type)

            for i in prange(length):
                output_arr[i] = arg(input_arr[i])

            return pandas.Series(output_arr,
                                 index=self._index,
                                 name=self._name)

        return impl

    if isinstance(arg, types.DictType):
        output_type = self.dtype

        def impl(self, arg, na_action=None):
            input_arr = self._data
            length = len(input_arr)

            output_arr = numpy.empty(length, dtype=output_type)

            for i in prange(length):
                output_arr[i] = arg.get(input_arr[i], numpy.nan)

            return pandas.Series(output_arr,
                                 index=self._index,
                                 name=self._name)

        return impl
Ejemplo n.º 20
0
def hpat_pandas_stringmethods_startswith(self, pat, na=None):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.startswith

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_startswith.py
       :language: python
       :lines: 27-
       :caption: Test if the start of each string element matches a string
       :name: ex_series_str_startswith

    .. command-output:: python ./series/str/series_str_startswith.py
       :cwd: ../../../examples

    .. todo::
        - Add support of matching the start of each string by a pattern
        - Add support of parameter ``na``

    .. seealso::
        `str.startswith <https://docs.python.org/3/library/stdtypes.html#str.startswith>`_
            Python standard library string method.
        :ref:`Series.str.endswith <pandas.Series.str.endswith>`
            Same as startswith, but tests the end of string.
        :ref:`Series.str.contains <pandas.Series.str.contains>`
            Tests if string element contains a pattern.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg
    pat: :obj:`str`
        Character sequence
    na: :obj:`bool`
        Object shown if element tested is not a string
        *unsupported*

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method startswith().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(pat, (StringLiteral, UnicodeType)):
        ty_checker.raise_exc(pat, 'str', 'pat')

    if not isinstance(na, (Boolean, NoneType, Omitted)) and na is not None:
        ty_checker.raise_exc(na, 'bool', 'na')

    def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None):
        if na is not None:
            msg = 'Method startswith(). The object na\n expected: None'
            raise ValueError(msg)

        item_startswith = len(self._data)
        result = numpy.empty(item_startswith, numba.types.boolean)
        for idx, item in enumerate(self._data._data):
            result[idx] = item.startswith(pat)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_startswith_impl
def hpat_pandas_series_rolling_count(self):

    ty_checker = TypeChecker('Method rolling.count().')
    ty_checker.check(self, SeriesRollingType)

    return sdc_pandas_series_rolling_count_impl
def sdc_pandas_dataframe_rolling_sum(self):

    ty_checker = TypeChecker('Method rolling.sum().')
    ty_checker.check(self, DataFrameRollingType)

    return gen_df_rolling_method_impl('sum', self)
Ejemplo n.º 23
0
def hpat_pandas_stringmethods_zfill(self, width):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.zfill

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_zfill.py
       :language: python
       :lines: 27-
       :caption: Pad strings in the Series by prepending '0' characters
       :name: ex_series_str_zfill

    .. command-output:: python ./series/str/series_str_zfill.py
       :cwd: ../../../examples

    .. todo:: Add support of 32-bit Unicode for `str.zfill()`

    .. seealso::
        :ref:`Series.str.rjust <pandas.Series.str.rjust>`
            Fills the left side of strings with an arbitrary character.
        :ref:`Series.str.ljust <pandas.Series.str.ljust>`
            Fills the right side of strings with an arbitrary character.
        :ref:`Series.str.pad <pandas.Series.str.pad>`
            Fills the specified sides of strings with an arbitrary character.
        :ref:`Series.str.center <pandas.Series.str.center>`
            Fills boths sides of strings with an arbitrary character.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.zfill()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_zfill

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg
    width: :obj:`int`
        Minimum width of resulting string

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method zfill().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(width, Integer):
        ty_checker.raise_exc(width, 'int', 'width')

    def hpat_pandas_stringmethods_zfill_impl(self, width):
        item_count = len(self._data)
        result = [''] * item_count
        for idx, item in enumerate(self._data._data):
            result[idx] = item.zfill(width)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_zfill_impl
Ejemplo n.º 24
0
def sdc_pandas_series_binop(self, other, level=None, fill_value=None, axis=0):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.binop

    Limitations
    -----------
    Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_binop.py
       :language: python
       :lines: 27-
       :caption:
       :name: ex_series_binop

    .. command-output:: python ./series/series_binop.py
       :cwd: ../../../examples

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas Series method :meth:`pandas.Series.binop` implementation.

    .. only:: developer
        Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5
    """

    _func_name = 'Method binop().'
    ty_checker = TypeChecker(_func_name)
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(
        self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(
        other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    if not isinstance(level, types.Omitted) and level is not None:
        ty_checker.raise_exc(level, 'None', 'level')

    if not isinstance(fill_value, (types.Omitted, types.Number,
                                   types.NoneType)) and fill_value is not None:
        ty_checker.raise_exc(fill_value, 'number', 'fill_value')
    fill_value_is_none = isinstance(
        fill_value, (types.NoneType, types.Omitted)) or fill_value is None

    if not isinstance(axis, types.Omitted) and axis != 0:
        ty_checker.raise_exc(axis, 'int', 'axis')
    # specializations for numeric series only
    if not operands_are_series:

        def _series_binop_scalar_impl(self,
                                      other,
                                      level=None,
                                      fill_value=None,
                                      axis=0):
            if self_is_series == True:  # noqa
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                result_data = numpy.empty(len(self._data), dtype=numpy.float64)
                result_data[:] = self._data + numpy.float64(other)
                return pandas.Series(result_data,
                                     index=self._index,
                                     name=self._name)
            else:
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                result_data = numpy.empty(len(other._data),
                                          dtype=numpy.float64)
                result_data[:] = numpy.float64(self) + other._data
                return pandas.Series(result_data,
                                     index=other._index,
                                     name=other._name)

        return _series_binop_scalar_impl

    else:  # both operands are numeric series
        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_binop_none_indexes_impl(self,
                                                other,
                                                level=None,
                                                fill_value=None,
                                                axis=0):
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)

                if (len(self._data) == len(other._data)):
                    result_data = numpy_like.astype(self._data, numpy.float64)
                    result_data = result_data + other._data
                    return pandas.Series(result_data)
                else:
                    left_size, right_size = len(self._data), len(other._data)
                    min_data_size = min(left_size, right_size)
                    max_data_size = max(left_size, right_size)
                    result_data = numpy.empty(max_data_size,
                                              dtype=numpy.float64)
                    _fill_value = numpy.nan if fill_value_is_none == True else fill_value  # noqa
                    if (left_size == min_data_size):
                        result_data[:min_data_size] = self._data
                        for i in range(min_data_size, len(result_data)):
                            result_data[i] = _fill_value
                        result_data = result_data + other._data
                    else:
                        result_data[:min_data_size] = other._data
                        for i in range(min_data_size, len(result_data)):
                            result_data[i] = _fill_value
                        result_data = self._data + result_data

                    return pandas.Series(result_data)

            return _series_binop_none_indexes_impl
        else:
            left_index_is_range = isinstance(self.index,
                                             (RangeIndexType, types.NoneType))
            right_index_is_range = isinstance(other.index,
                                              (RangeIndexType, types.NoneType))
            check_index_equal = left_index_is_range and right_index_is_range
            self_index_dtype = RangeIndexType.dtype if isinstance(
                self.index, types.NoneType) else self.index.dtype
            other_index_dtype = RangeIndexType.dtype if isinstance(
                other.index, types.NoneType) else other.index.dtype
            index_dtypes_match = self_index_dtype == other_index_dtype
            if not index_dtypes_match:
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [self_index_dtype, other_index_dtype], [])
            else:
                numba_index_common_dtype = self_index_dtype

            def _series_binop_common_impl(self,
                                          other,
                                          level=None,
                                          fill_value=None,
                                          axis=0):
                left_index, right_index = self.index, other.index
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                if check_index_equal == True:  # noqa
                    equal_indexes = numpy_like.array_equal(
                        left_index, right_index)
                else:
                    equal_indexes = False

                if (left_index is right_index or equal_indexes):
                    result_data = numpy.empty(len(self._data),
                                              dtype=numpy.float64)
                    result_data[:] = self._data + other._data
                    if index_dtypes_match == False:  # noqa
                        result_index = numpy_like.astype(
                            left_index, numba_index_common_dtype)
                    else:
                        result_index = left_index.values if left_index_is_range == True else left_index  # noqa

                    return pandas.Series(result_data, index=result_index)

                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                joined_index, left_indexer, right_indexer = sdc_join_series_indexes(
                    left_index, right_index)
                result_size = len(joined_index)
                left_values = numpy.empty(result_size, dtype=numpy.float64)
                right_values = numpy.empty(result_size, dtype=numpy.float64)
                _fill_value = numpy.nan if fill_value_is_none == True else fill_value  # noqa
                for i in range(result_size):
                    left_pos, right_pos = left_indexer[i], right_indexer[i]
                    left_values[i] = self._data[
                        left_pos] if left_pos != -1 else _fill_value
                    right_values[i] = other._data[
                        right_pos] if right_pos != -1 else _fill_value
                result_data = left_values + right_values
                return pandas.Series(result_data, joined_index)

            return _series_binop_common_impl

    return None
Ejemplo n.º 25
0
def sdc_pandas_series_comp_binop(self,
                                 other,
                                 level=None,
                                 fill_value=None,
                                 axis=0):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.comp_binop

    Limitations
    -----------
    Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_comp_binop.py
       :language: python
       :lines: 27-
       :caption:
       :name: ex_series_comp_binop

    .. command-output:: python ./series/series_comp_binop.py
       :cwd: ../../../examples

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas Series method :meth:`pandas.Series.comp_binop` implementation.

    .. only:: developer
        Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op8
    """

    _func_name = 'Method comp_binop().'

    ty_checker = TypeChecker(_func_name)
    ty_checker.check(self, SeriesType)

    if not (isinstance(level, types.Omitted) or level is None):
        ty_checker.raise_exc(level, 'None', 'level')

    if not isinstance(fill_value, (types.Omitted, types.Number,
                                   types.NoneType)) and fill_value is not None:
        ty_checker.raise_exc(fill_value, 'number', 'fill_value')

    if not (isinstance(axis, types.Omitted) or axis == 0):
        ty_checker.raise_exc(axis, 'int', 'axis')

    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    fill_value_is_none = isinstance(
        fill_value, (types.NoneType, types.Omitted)) or fill_value is None
    if not operands_are_series:

        def _series_comp_binop_scalar_impl(self,
                                           other,
                                           level=None,
                                           fill_value=None,
                                           axis=0):
            if self_is_series == True:  # noqa
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                return pandas.Series(self._data < other,
                                     index=self._index,
                                     name=self._name)
            else:
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                return pandas.Series(self < other._data,
                                     index=other._index,
                                     name=other._name)

        return _series_comp_binop_scalar_impl

    else:

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_comp_binop_none_indexes_impl(self,
                                                     other,
                                                     level=None,
                                                     fill_value=None,
                                                     axis=0):
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                left_size, right_size = len(self._data), len(other._data)
                if (left_size == right_size):
                    return pandas.Series(self._data < other._data)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_comp_binop_none_indexes_impl
        else:
            left_index_is_range = isinstance(self.index,
                                             (RangeIndexType, types.NoneType))
            index_dtypes_match = self.index.dtype == other.index.dtype
            if not index_dtypes_match:
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [self.index.dtype, other.index.dtype], [])
            else:
                numba_index_common_dtype = self.index.dtype

            def _series_comp_binop_common_impl(self,
                                               other,
                                               level=None,
                                               fill_value=None,
                                               axis=0):
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                left_index, right_index = self.index, other.index

                if (left_index is right_index
                        or numpy_like.array_equal(left_index, right_index)):
                    if index_dtypes_match == False:  # noqa
                        new_index = numpy_like.astype(
                            left_index, numba_index_common_dtype)
                    else:
                        new_index = left_index.values if left_index_is_range == True else left_index  # noqa
                    return pandas.Series(self._data < other._data, new_index)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_comp_binop_common_impl

    return None
Ejemplo n.º 26
0
    def arg_impl(self):
        """
        Intel Scalable Dataframe Compiler Developer Guide
        *************************************************
        Parallel replacement of numpy.argmin/numpy.argmax.

        .. only:: developer
        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmin
        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmax

        """

        ty_checker = TypeChecker("numpy-like 'argmin'/'argmax'")
        dtype = self.dtype
        isnan = get_isnan(dtype)
        max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
        if isinstance(dtype, types.Integer):
            initial_result = {
                min: max_dtype_int_val(dtype),
                max: min_dtype_int_val(dtype),
            }[reduce_op]

        if isinstance(dtype, types.Float):
            initial_result = {
                min: max_dtype_float_val(dtype),
                max: min_dtype_float_val(dtype),
            }[reduce_op]

        if not isinstance(self, types.Array):
            return None

        if isinstance(dtype, types.Number):

            def sdc_argmin_impl(self):
                chunks = parallel_chunks(len(self))
                arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
                arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
                for i in prange(len(chunks)):
                    chunk = chunks[i]
                    res = initial_result
                    pos = max_int64
                    for j in range(chunk.start, chunk.stop):
                        if not isnan(self[j]):
                            if reduce_op(res, self[j]) != self[j]:
                                continue
                            if res == self[j]:
                                pos = min(pos, j)
                            else:
                                pos = j
                                res = self[j]
                        else:
                            if numpy.isnan(res):
                                pos = min(pos, j)
                            else:
                                pos = j
                            res = self[j]

                    arr_res[i] = res
                    arr_pos[i] = pos
                general_res = initial_result
                general_pos = max_int64
                for i in range(len(chunks)):
                    if not isnan(arr_res[i]):
                        if reduce_op(general_res, arr_res[i]) != arr_res[i]:
                            continue
                        if general_res == arr_res[i]:
                            general_pos = min(general_pos, arr_pos[i])
                        else:
                            general_pos = arr_pos[i]
                            general_res = arr_res[i]
                    else:
                        if numpy.isnan(general_res):
                            general_pos = min(general_pos, arr_pos[i])
                        else:
                            general_pos = arr_pos[i]
                        general_res = arr_res[i]
                return general_pos

            return sdc_argmin_impl

        ty_checker.raise_exc(dtype, 'number', 'self.dtype')
def hpat_pandas_series_rolling_corr(self, other=None, pairwise=None):

    ty_checker = TypeChecker('Method rolling.corr().')
    ty_checker.check(self, SeriesRollingType)

    accepted_other = (bool, Omitted, NoneType, SeriesType)
    if not isinstance(other, accepted_other) and other is not None:
        ty_checker.raise_exc(other, 'Series', 'other')

    accepted_pairwise = (bool, Boolean, Omitted, NoneType)
    if not isinstance(pairwise, accepted_pairwise) and pairwise is not None:
        ty_checker.raise_exc(pairwise, 'bool', 'pairwise')

    nan_other = isinstance(other, (Omitted, NoneType)) or other is None

    def hpat_pandas_rolling_series_corr_impl(self, other=None, pairwise=None):
        win = self._window
        minp = self._min_periods

        main_series = self._data
        main_arr = main_series._data

        if nan_other == True:  # noqa
            other_arr = main_arr
        else:
            other_arr = other._data

        main_arr_length = len(main_arr)
        other_arr_length = len(other_arr)
        min_length = min(main_arr_length, other_arr_length)
        length = max(main_arr_length, other_arr_length)
        output_arr = numpy.empty(length, dtype=float64)

        chunks = parallel_chunks(length)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            nfinite = 0
            result = (0., 0., 0., 0., 0.)

            if win == 0:
                for idx in range(chunk.start, chunk.stop):
                    output_arr[idx] = corr_result_or_nan(nfinite, minp, result)
                continue

            prelude_start = max(0, chunk.start - win + 1)
            prelude_stop = min(chunk.start, min_length)

            interlude_start = chunk.start
            interlude_stop = min(prelude_start + win, chunk.stop, min_length)

            postlude_start = min(prelude_start + win, chunk.stop)
            postlude_stop = min(chunk.stop, min_length)

            for idx in range(prelude_start, prelude_stop):
                x, y = main_arr[idx], other_arr[idx]
                nfinite, result = put_corr(x, y, nfinite, result)

            for idx in range(interlude_start, interlude_stop):
                x, y = main_arr[idx], other_arr[idx]
                nfinite, result = put_corr(x, y, nfinite, result)
                output_arr[idx] = corr_result_or_nan(nfinite, minp, result)

            for idx in range(postlude_start, postlude_stop):
                put_x, put_y = main_arr[idx], other_arr[idx]
                pop_x, pop_y = main_arr[idx - win], other_arr[idx - win]
                nfinite, result = put_corr(put_x, put_y, nfinite, result)
                nfinite, result = pop_corr(pop_x, pop_y, nfinite, result)
                output_arr[idx] = corr_result_or_nan(nfinite, minp, result)

            last_start = max(min_length, interlude_start)
            for idx in range(last_start, postlude_start):
                output_arr[idx] = corr_result_or_nan(nfinite, minp, result)

            last_start = max(min_length, postlude_start)
            last_stop = min(min_length + win, chunk.stop)
            for idx in range(last_start, last_stop):
                x, y = main_arr[idx - win], other_arr[idx - win]
                nfinite, result = pop_corr(x, y, nfinite, result)
                output_arr[idx] = corr_result_or_nan(nfinite, minp, result)

            for idx in range(last_stop, chunk.stop):
                output_arr[idx] = numpy.nan

        return pandas.Series(output_arr)

    return hpat_pandas_rolling_series_corr_impl
Ejemplo n.º 28
0
def sdc_pandas_series_operator_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.binop` implementation

    Note: Currently implemented for numeric Series only.
        Differs from Pandas in returning Series with fixed dtype :obj:`float64`

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Method comp_binop().'
    ty_checker = TypeChecker(_func_name)
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(
        self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(
        other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    def series_operator_binop_wrapper(self, other):
        return sdc_binop(self, other)

    return series_operator_binop_wrapper
Ejemplo n.º 29
0
def sdc_astype_overload(self, dtype):
    """
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Parallel replacement of numpy.astype.

    .. only:: developer
       Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k astype

    """

    ty_checker = TypeChecker("numpy-like 'astype'")
    if not isinstance(self, (types.Array, StringArrayType, RangeIndexType)):
        return None

    accepted_dtype_types = (types.functions.NumberClass, types.Function,
                            types.StringLiteral)
    if not isinstance(dtype, accepted_dtype_types):

        def impl(self, dtype):
            return literally(dtype)

        return impl

    if not isinstance(dtype, accepted_dtype_types):
        ty_checker.raise_exc(dtype, 'string or type', 'dtype')

    if ((isinstance(dtype, types.Function) and dtype.typing_key == str)
            or (isinstance(dtype, types.StringLiteral)
                and dtype.literal_value == 'str')):

        def sdc_astype_number_to_string_impl(self, dtype):
            num_bytes = 0
            arr_len = len(self)

            # Get total bytes for new array
            for i in prange(arr_len):
                item = self[i]
                num_bytes += get_utf8_size(str(item))

            data = pre_alloc_string_array(arr_len, num_bytes)

            for i in range(arr_len):
                item = self[i]
                data[i] = str(item)  # TODO: check NA

            return data

        return sdc_astype_number_to_string_impl

    if (isinstance(self, (types.Array, RangeIndexType)) and isinstance(
            dtype, (types.StringLiteral, types.functions.NumberClass))):

        def sdc_astype_number_impl(self, dtype):
            arr = numpy.empty(len(self), dtype=numpy.dtype(dtype))
            for i in numba.prange(len(self)):
                arr[i] = self[i]

            return arr

        return sdc_astype_number_impl
Ejemplo n.º 30
0
def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.rjust

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_rjust.py
       :language: python
       :lines: 27-
       :caption: Filling left side of strings in the Series with an additional character
       :name: ex_series_str_rjust

    .. command-output:: python ./series/str/series_str_rjust.py
       :cwd: ../../../examples

    .. todo:: Add support of 32-bit Unicode for `str.rjust()`

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg
    width: :obj:`int`
        Minimum width of resulting string
    fillchar: :obj:`str`
        Additional character for filling, default is whitespace

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method rjust().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(width, Integer):
        ty_checker.raise_exc(width, 'int', 'width')

    accepted_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(fillchar, accepted_types) and fillchar != ' ':
        ty_checker.raise_exc(fillchar, 'str', 'fillchar')

    def hpat_pandas_stringmethods_rjust_impl(self, width, fillchar=' '):
        item_count = len(self._data)
        result = [''] * item_count
        for idx, item in enumerate(self._data._data):
            result[idx] = item.rjust(width, fillchar)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_rjust_impl