def hpat_pandas_series_rolling_count(self):

    ty_checker = TypeChecker('Method rolling.count().')
    ty_checker.check(self, SeriesRollingType)

    def hpat_pandas_rolling_series_count_impl(self):
        win = self._window

        input_series = self._data
        input_arr = input_series._data
        length = len(input_arr)
        output_arr = numpy.empty(length, dtype=float64)

        boundary = min(win, length)
        for i in prange(boundary):
            arr_range = input_arr[:i + 1]
            output_arr[i] = arr_nonnan_count(arr_range)

        for i in prange(boundary, length):
            arr_range = input_arr[i + 1 - win:i + 1]
            output_arr[i] = arr_nonnan_count(arr_range)

        return pandas.Series(output_arr,
                             input_series._index,
                             name=input_series._name)

    return hpat_pandas_rolling_series_count_impl
예제 #2
0
def hpat_pandas_stringmethods_isspace(self):

    ty_checker = TypeChecker('Method isspace().')
    ty_checker.check(self, StringMethodsType)

    def hpat_pandas_stringmethods_isspace_impl(self):
        item_count = len(self._data)
        result = numpy.empty(item_count, numba.types.boolean)
        for idx, item in enumerate(self._data._data):
            result[idx] = item.isspace()

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_isspace_impl
예제 #3
0
def count_overload(df, axis=0, level=None, numeric_only=False):
    """
    Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation.
    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_count*

    Parameters
    -----------
    df: :class:`pandas.DataFrame`
      input arg
    axis:
      *unsupported*
    level:
      *unsupported*
    numeric_only:
      *unsupported*

    Returns
    -------
    :obj:`pandas.Series` or `pandas.DataFrame`
      for each column/row the number of non-NA/null entries. If level is specified returns a DataFrame.
    """

    name = 'count'

    ty_checker = TypeChecker('Method {}().'.format(name))
    ty_checker.check(df, DataFrameType)

    if not (isinstance(axis, types.Omitted) or axis == 0):
        ty_checker.raise_exc(axis, 'unsupported', 'axis')

    if not (isinstance(level, types.Omitted) or level is None):
        ty_checker.raise_exc(level, 'unsupported', 'level')

    if not (isinstance(numeric_only, types.Omitted) or numeric_only is False):
        ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only')

    params = {'axis': 0, 'level': None, 'numeric_only': False}
    ser_par = {'level': 'level'}

    return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
def hpat_pandas_series_rolling_quantile(self,
                                        quantile,
                                        interpolation='linear'):

    ty_checker = TypeChecker('Method rolling.quantile().')
    ty_checker.check(self, SeriesRollingType)

    if not isinstance(quantile, Number):
        ty_checker.raise_exc(quantile, 'float', 'quantile')

    str_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(interpolation, str_types) and interpolation != 'linear':
        ty_checker.raise_exc(interpolation, 'str', 'interpolation')

    def hpat_pandas_rolling_series_quantile_impl(self,
                                                 quantile,
                                                 interpolation='linear'):
        if quantile < 0 or quantile > 1:
            raise ValueError('quantile value not in [0, 1]')
        if interpolation != 'linear':
            raise ValueError('interpolation value not "linear"')

        win = self._window
        minp = self._min_periods

        input_series = self._data
        input_arr = input_series._data
        length = len(input_arr)
        output_arr = numpy.empty(length, dtype=float64)

        def calc_quantile(arr, quantile, minp):
            finite_arr = arr[numpy.isfinite(arr)]
            if len(finite_arr) < minp:
                return numpy.nan
            else:
                return arr_quantile(finite_arr, quantile)

        boundary = min(win, length)
        for i in prange(boundary):
            arr_range = input_arr[:i + 1]
            output_arr[i] = calc_quantile(arr_range, quantile, minp)

        for i in prange(boundary, length):
            arr_range = input_arr[i + 1 - win:i + 1]
            output_arr[i] = calc_quantile(arr_range, quantile, minp)

        return pandas.Series(output_arr,
                             input_series._index,
                             name=input_series._name)

    return hpat_pandas_rolling_series_quantile_impl
def hpat_pandas_series_rolling_corr(self, other=None, pairwise=None):

    ty_checker = TypeChecker('Method rolling.corr().')
    ty_checker.check(self, SeriesRollingType)

    # TODO: check `other` is Series after a circular import of SeriesType fixed
    # accepted_other = (bool, Omitted, NoneType, SeriesType)
    # if not isinstance(other, accepted_other) and other is not None:
    #     ty_checker.raise_exc(other, 'Series', 'other')

    accepted_pairwise = (bool, Boolean, Omitted, NoneType)
    if not isinstance(pairwise, accepted_pairwise) and pairwise is not None:
        ty_checker.raise_exc(pairwise, 'bool', 'pairwise')

    nan_other = isinstance(other, (Omitted, NoneType)) or other is None

    def hpat_pandas_rolling_series_corr_impl(self, other=None, pairwise=None):
        win = self._window
        minp = self._min_periods

        main_series = self._data
        main_arr = main_series._data
        main_arr_length = len(main_arr)

        if nan_other == True:  # noqa
            other_arr = main_arr
        else:
            other_arr = other._data

        other_arr_length = len(other_arr)
        length = max(main_arr_length, other_arr_length)
        output_arr = numpy.empty(length, dtype=float64)

        def calc_corr(main, other, minp):
            # align arrays `main` and `other` by size and finiteness
            min_length = min(len(main), len(other))
            main_valid_indices = numpy.isfinite(main[:min_length])
            other_valid_indices = numpy.isfinite(other[:min_length])
            valid = main_valid_indices & other_valid_indices

            if len(main[valid]) < minp:
                return numpy.nan
            else:
                return arr_corr(main[valid], other[valid])

        for i in prange(min(win, length)):
            main_arr_range = main_arr[:i + 1]
            other_arr_range = other_arr[:i + 1]
            output_arr[i] = calc_corr(main_arr_range, other_arr_range, minp)

        for i in prange(win, length):
            main_arr_range = main_arr[i + 1 - win:i + 1]
            other_arr_range = other_arr[i + 1 - win:i + 1]
            output_arr[i] = calc_corr(main_arr_range, other_arr_range, minp)

        return pandas.Series(output_arr)

    return hpat_pandas_rolling_series_corr_impl
def hpat_pandas_series_rolling_var(self, ddof=1):

    ty_checker = TypeChecker('Method rolling.var().')
    ty_checker.check(self, SeriesRollingType)

    if not isinstance(ddof, (int, Integer, Omitted)):
        ty_checker.raise_exc(ddof, 'int', 'ddof')

    return hpat_pandas_rolling_series_var_impl
def hpat_pandas_series_rolling_apply(self, func, raw=None):

    ty_checker = TypeChecker('Method rolling.apply().')
    ty_checker.check(self, SeriesRollingType)

    raw_accepted = (Omitted, NoneType, Boolean)
    if not isinstance(raw, raw_accepted) and raw is not None:
        ty_checker.raise_exc(raw, 'bool', 'raw')

    def hpat_pandas_rolling_series_apply_impl(self, func, raw=None):
        win = self._window
        minp = self._min_periods

        input_series = self._data
        input_arr = input_series._data
        length = len(input_arr)
        output_arr = numpy.empty(length, dtype=float64)

        def culc_apply(arr, func, minp):
            finite_arr = arr.copy()
            finite_arr[numpy.isinf(arr)] = numpy.nan
            if len(finite_arr) < minp:
                return numpy.nan
            else:
                return arr_apply(finite_arr, func)

        boundary = min(win, length)
        for i in prange(boundary):
            arr_range = input_arr[:i + 1]
            output_arr[i] = culc_apply(arr_range, func, minp)

        for i in prange(boundary, length):
            arr_range = input_arr[i + 1 - win:i + 1]
            output_arr[i] = culc_apply(arr_range, func, minp)

        return pandas.Series(output_arr,
                             input_series._index,
                             name=input_series._name)

    return hpat_pandas_rolling_series_apply_impl
예제 #8
0
def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.rjust

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_rjust.py
       :language: python
       :lines: 27-
       :caption: Filling left side of strings in the Series with an additional character
       :name: ex_series_str_rjust

    .. command-output:: python ./series/str/series_str_rjust.py
       :cwd: ../../../examples

    .. todo:: Add support of 32-bit Unicode for `str.rjust()`

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg
    width: :obj:`int`
        Minimum width of resulting string
    fillchar: :obj:`str`
        Additional character for filling, default is whitespace

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method rjust().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(width, Integer):
        ty_checker.raise_exc(width, 'int', 'width')

    accepted_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(fillchar, accepted_types) and fillchar != ' ':
        ty_checker.raise_exc(fillchar, 'str', 'fillchar')

    def hpat_pandas_stringmethods_rjust_impl(self, width, fillchar=' '):
        item_count = len(self._data)
        result = [''] * item_count
        for idx, item in enumerate(self._data._data):
            result[idx] = item.rjust(width, fillchar)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_rjust_impl
예제 #9
0
def hpat_pandas_stringmethods_len(self):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.len

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_len.py
       :language: python
       :lines: 27-
       :caption: Compute the length of each element in the Series
       :name: ex_series_str_len

    .. command-output:: python ./series/str/series_str_len.py
       :cwd: ../../../examples

    .. seealso::
        `str.len`
            Python built-in function returning the length of an object.
        :ref:`Series.size <pandas.Series.size>`
            Returns the length of the Series.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.len()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str_len1

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method len().')
    ty_checker.check(self, StringMethodsType)

    def hpat_pandas_stringmethods_len_impl(self):
        item_count = len(self._data)
        result = numpy.empty(item_count, numba.types.int64)
        for idx, item in enumerate(self._data._data):
            result[idx] = len(item)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_len_impl
예제 #10
0
def hpat_pandas_stringmethods_isupper(self):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.isupper

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_isupper.py
       :language: python
       :lines: 27-
       :caption: Check whether all characters in each string are uppercase
       :name: ex_series_str_isupper

    .. command-output:: python ./series/str/series_str_isupper.py
       :cwd: ../../../examples

    .. seealso::
        :ref:`Series.str.isalpha <pandas.Series.str.isalpha>`
            Check whether all characters are alphabetic.
        :ref:`Series.str.isnumeric <pandas.Series.str.isnumeric>`
            Check whether all characters are numeric.
        :ref:`Series.str.isalnum <pandas.Series.str.isalnum>`
            Check whether all characters are alphanumeric.
        :ref:`Series.str.isdigit <pandas.Series.str.isdigit>`
            Check whether all characters are digits.
        :ref:`Series.str.isdecimal <pandas.Series.str.isdecimal>`
            Check whether all characters are decimal.
        :ref:`Series.str.isspace <pandas.Series.str.isspace>`
            Check whether all characters are whitespace.
        :ref:`Series.str.islower <pandas.Series.str.islower>`
            Check whether all characters are lowercase.
        :ref:`Series.str.istitle <pandas.Series.str.istitle>`
            Check whether all characters are titlecase.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.isupper()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str2str

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method isupper().')
    ty_checker.check(self, StringMethodsType)

    def hpat_pandas_stringmethods_isupper_impl(self):
        item_count = len(self._data)
        result = numpy.empty(item_count, numba.types.boolean)
        for idx, item in enumerate(self._data._data):
            result[idx] = item.isupper()

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_isupper_impl
예제 #11
0
def hpat_pandas_stringmethods_find(self, sub, start=0, end=None):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.find

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_find.py
       :language: python
       :lines: 27-
       :caption: Return lowest indexes in each strings in the Series
       :name: ex_series_str_find

    .. command-output:: python ./series/str/series_str_find.py
       :cwd: ../../../examples

    .. todo:: Add support of parameters ``start`` and ``end``

    .. seealso::
        :ref:`Series.str.rfind <pandas.Series.str.rfind>`
            Return highest indexes in each strings.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.find()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_find

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg
    sub: :obj:`str`
        Substring being searched
    start: :obj:`int`
        Left edge index
        *unsupported*
    end: :obj:`int`
        Right edge index
        *unsupported*

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method find().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(sub, (StringLiteral, UnicodeType)):
        ty_checker.raise_exc(sub, 'str', 'sub')

    accepted_types = (Integer, NoneType, Omitted)
    if not isinstance(start, accepted_types) and start != 0:
        ty_checker.raise_exc(start, 'None, int', 'start')

    if not isinstance(end, accepted_types) and end is not None:
        ty_checker.raise_exc(end, 'None, int', 'end')

    def hpat_pandas_stringmethods_find_impl(self, sub, start=0, end=None):
        if start != 0:
            raise ValueError('Method find(). The object start\n expected: 0')
        if end is not None:
            raise ValueError('Method find(). The object end\n expected: None')

        item_count = len(self._data)
        result = numpy.empty(item_count, numba.types.int64)
        for idx, item in enumerate(self._data._data):
            result[idx] = item.find(sub)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_find_impl
예제 #12
0
def sdc_pandas_series_operator_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.binop` implementation

    Note: Currently implemented for numeric Series only.
        Differs from Pandas in returning Series with fixed dtype :obj:`float64`

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator binop().'

    ty_checker = TypeChecker('Operator binop().')
    if not isinstance(self, SeriesType):
        return None

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    if isinstance(other, SeriesType):
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_data_comparable = check_types_comparable(self.data, other.data)
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes

    if isinstance(other, SeriesType) and not series_data_comparable:
        raise TypingError(
            '{} Not supported for series with not-comparable data. \
        Given: self.data={}, other.data={}'.format(_func_name, self.data,
                                                   other.data))

    if isinstance(other, SeriesType) and not series_indexes_comparable:
        raise TypingError(
            '{} Not implemented for series with not-comparable indexes. \
        Given: self.index={}, other.index={}'.format(_func_name, self.index,
                                                     other.index))

    # specializations for numeric series - TODO: support arithmetic operation on StringArrays
    if (isinstance(other, types.Number)):

        def _series_operator_binop_scalar_impl(self, other):
            result_data = self._data.astype(
                numpy.float64) + numpy.float64(other)
            return pandas.Series(result_data,
                                 index=self._index,
                                 name=self._name)

        return _series_operator_binop_scalar_impl

    elif (isinstance(other, SeriesType)):

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_binop_none_indexes_impl(self, other):

                if (len(self._data) == len(other._data)):
                    result_data = self._data.astype(numpy.float64)
                    result_data = result_data + other._data.astype(
                        numpy.float64)
                    return pandas.Series(result_data)
                else:
                    left_size, right_size = len(self._data), len(other._data)
                    min_data_size = min(left_size, right_size)
                    max_data_size = max(left_size, right_size)
                    result_data = numpy.empty(max_data_size,
                                              dtype=numpy.float64)
                    if (left_size == min_data_size):
                        result_data[:min_data_size] = self._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = result_data + other._data.astype(
                            numpy.float64)
                    else:
                        result_data[:min_data_size] = other._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = self._data.astype(
                            numpy.float64) + result_data

                    return pandas.Series(result_data, self._index)

            return _series_operator_binop_none_indexes_impl
        else:
            # for numeric indexes find common dtype to be used when creating joined index
            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                # check if indexes are equal and series don't have to be aligned
                if sdc_check_indexes_equal(left_index, right_index):
                    result_data = self._data.astype(numpy.float64)
                    result_data = result_data + other._data.astype(
                        numpy.float64)

                    if none_or_numeric_indexes == True:  # noqa
                        result_index = left_index.astype(
                            numba_index_common_dtype)
                    else:
                        result_index = self._index

                    return pandas.Series(result_data, index=result_index)

                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                joined_index, left_indexer, right_indexer = sdc_join_series_indexes(
                    left_index, right_index)

                joined_index_range = numpy.arange(len(joined_index))
                left_values = numpy.asarray(
                    [self._data[left_indexer[i]] for i in joined_index_range],
                    numpy.float64)
                left_values[left_indexer == -1] = numpy.nan

                right_values = numpy.asarray([
                    other._data[right_indexer[i]] for i in joined_index_range
                ], numpy.float64)
                right_values[right_indexer == -1] = numpy.nan

                result_data = left_values + right_values
                return pandas.Series(result_data, joined_index)

            return _series_operator_binop_common_impl

    return None
예제 #13
0
def check_type(name, df, axis=None, skipna=None, level=None, numeric_only=None, ddof=1, min_count=0):
    ty_checker = TypeChecker('Method {}().'.format(name))
    ty_checker.check(df, DataFrameType)

    if not (isinstance(axis, types.Omitted) or axis is None):
        ty_checker.raise_exc(axis, 'unsupported', 'axis')

    if not (isinstance(skipna, (types.Omitted, types.NoneType, types.Boolean)) or skipna is None):
        ty_checker.raise_exc(skipna, 'bool', 'skipna')

    if not (isinstance(level, types.Omitted) or level is None):
        ty_checker.raise_exc(level, 'unsupported', 'level')

    if not (isinstance(numeric_only, types.Omitted) or numeric_only is None):
        ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only')

    if not (isinstance(ddof, types.Omitted) or ddof == 1):
        ty_checker.raise_exc(ddof, 'unsupported', 'ddof')

    if not (isinstance(min_count, types.Omitted) or min_count == 0):
        ty_checker.raise_exc(min_count, 'unsupported', 'min_count')
예제 #14
0
def sdc_pandas_dataframe_append(df, other, ignore_index=True, verify_integrity=False, sort=None):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.DataFrame.append
    Examples
    --------
    .. literalinclude:: ../../../examples/dataframe_append.py
       :language: python
       :lines: 27-
       :caption: Appending rows of other to the end of caller, returning a new object.
       Columns in other that are not in the caller are added as new columns.
       :name: ex_dataframe_append

    .. command-output:: python ./dataframe_append.py
        :cwd: ../../../examples

    .. note::
        Parameter ignore_index, verify_integrity, sort are currently unsupported
        by Intel Scalable Dataframe Compiler
        Currently only pandas.DataFrame is supported as "other" parameter

    .. seealso::
        `pandas.concat <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.concat.html>`_
            General function to concatenate DataFrame or Series objects.
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas DataFrame method :meth:`pandas.DataFrame.append` implementation.
    .. only:: developer
    Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_append*
    Parameters
    -----------
    df: :obj:`pandas.DataFrame`
        input arg
    other: :obj:`pandas.DataFrame` object or :obj:`pandas.Series` or :obj:`dict`
        The data to append
    ignore_index: :obj:`bool`
        *unsupported*
    verify_integrity: :obj:`bool`
        *unsupported*
    sort: :obj:`bool`
        *unsupported*
    Returns
    -------
    :obj: `pandas.DataFrame`
        return DataFrame with appended rows to the end
    """

    _func_name = 'append'

    ty_checker = TypeChecker(f'Method {_func_name}().')
    ty_checker.check(df, DataFrameType)
    # TODO: support other array-like types
    ty_checker.check(other, DataFrameType)
    # TODO: support index in series from df-columns
    if not isinstance(ignore_index, (bool, types.Boolean, types.Omitted)) and not ignore_index:
        ty_checker.raise_exc(ignore_index, 'boolean', 'ignore_index')

    if not isinstance(verify_integrity, (bool, types.Boolean, types.Omitted)) and verify_integrity:
        ty_checker.raise_exc(verify_integrity, 'boolean', 'verify_integrity')

    if not isinstance(sort, (bool, types.Boolean, types.Omitted)) and sort is not None:
        ty_checker.raise_exc(sort, 'boolean, None', 'sort')

    args = (('ignore_index', True), ('verify_integrity', False), ('sort', None))

    def sdc_pandas_dataframe_append_impl(df, other, _func_name, args):
        loc_vars = {}
        func_def, global_vars = sdc_pandas_dataframe_append_codegen(df, other, _func_name, args)

        exec(func_def, global_vars, loc_vars)
        _append_impl = loc_vars['sdc_pandas_dataframe_append_impl']
        return _append_impl

    return sdc_pandas_dataframe_append_impl(df, other, _func_name, args)
def hpat_pandas_series_rolling_sum(self):

    ty_checker = TypeChecker('Method rolling.sum().')
    ty_checker.check(self, SeriesRollingType)

    return hpat_pandas_rolling_series_sum_impl
예제 #16
0
def hpat_pandas_stringmethods_startswith(self, pat, na=None):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.startswith

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_startswith.py
       :language: python
       :lines: 27-
       :caption: Test if the start of each string element matches a string
       :name: ex_series_str_startswith

    .. command-output:: python ./series/str/series_str_startswith.py
       :cwd: ../../../examples

    .. todo::
        - Add support of matching the start of each string by a pattern
        - Add support of parameter ``na``

    .. seealso::
        `str.startswith <https://docs.python.org/3/library/stdtypes.html#str.startswith>`_
            Python standard library string method.
        :ref:`Series.str.endswith <pandas.Series.str.endswith>`
            Same as startswith, but tests the end of string.
        :ref:`Series.str.contains <pandas.Series.str.contains>`
            Tests if string element contains a pattern.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg
    pat: :obj:`str`
        Character sequence
    na: :obj:`bool`
        Object shown if element tested is not a string
        *unsupported*

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method startswith().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(pat, (StringLiteral, UnicodeType)):
        ty_checker.raise_exc(pat, 'str', 'pat')

    if not isinstance(na, (Boolean, NoneType, Omitted)) and na is not None:
        ty_checker.raise_exc(na, 'bool', 'na')

    def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None):
        if na is not None:
            msg = 'Method startswith(). The object na\n expected: None'
            raise ValueError(msg)

        item_startswith = len(self._data)
        result = numpy.empty(item_startswith, numba.types.boolean)
        for idx, item in enumerate(self._data._data):
            result[idx] = item.startswith(pat)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_startswith_impl
예제 #17
0
def hpat_pandas_stringmethods_zfill(self, width):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.zfill

    Limitations
    -----------
    Series elements are expected to be Unicode strings. Elements cannot be NaN.

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_zfill.py
       :language: python
       :lines: 27-
       :caption: Pad strings in the Series by prepending '0' characters
       :name: ex_series_str_zfill

    .. command-output:: python ./series/str/series_str_zfill.py
       :cwd: ../../../examples

    .. todo:: Add support of 32-bit Unicode for `str.zfill()`

    .. seealso::
        :ref:`Series.str.rjust <pandas.Series.str.rjust>`
            Fills the left side of strings with an arbitrary character.
        :ref:`Series.str.ljust <pandas.Series.str.ljust>`
            Fills the right side of strings with an arbitrary character.
        :ref:`Series.str.pad <pandas.Series.str.pad>`
            Fills the specified sides of strings with an arbitrary character.
        :ref:`Series.str.center <pandas.Series.str.center>`
            Fills boths sides of strings with an arbitrary character.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.zfill()` implementation.

    Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_zfill

    Parameters
    ----------
    self: :class:`pandas.core.strings.StringMethods`
        input arg
    width: :obj:`int`
        Minimum width of resulting string

    Returns
    -------
    :obj:`pandas.Series`
         returns :obj:`pandas.Series` object
    """

    ty_checker = TypeChecker('Method zfill().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(width, Integer):
        ty_checker.raise_exc(width, 'int', 'width')

    def hpat_pandas_stringmethods_zfill_impl(self, width):
        item_count = len(self._data)
        result = [''] * item_count
        for idx, item in enumerate(self._data._data):
            result[idx] = item.zfill(width)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_zfill_impl
예제 #18
0
def sdc_pandas_dataframe_drop(df, labels=None, axis=0, index=None, columns=None, level=None, inplace=False,
                              errors='raise'):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.DataFrame.drop

    Limitations
    -----------
    Parameter columns is expected to be a Literal value with one column name or Tuple with columns names.

    Examples
    --------
    .. literalinclude:: ../../../examples/dataframe/dataframe_drop.py
        :language: python
        :lines: 37-
        :caption: Drop specified columns from DataFrame
        Remove columns by specifying directly index or column names.
        :name: ex_dataframe_drop

    .. command-output:: python ./dataframe_drop.py
        :cwd: ../../../examples

     .. note::
        Parameters axis, index, level, inplace, errors are currently unsupported
        by Intel Scalable Dataframe Compiler
        Currently multi-indexing is not supported.

    .. seealso::
        :ref:`DataFrame.loc <pandas.DataFrame.loc>`
            Label-location based indexer for selection by label.
        :ref:`DataFrame.dropna <pandas.DataFrame.dropna>`
            Return DataFrame with labels on given axis omitted where (all or any) data are missing.
        :ref:`DataFrame.drop_duplicates <pandas.DataFrame.drop_duplicates>`
            Return DataFrame with duplicate rows removed, optionally only considering certain columns.
        :ref:`Series.drop <pandas.Series.drop>`
            Return Series with specified index labels removed.

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas DataFrame method :meth:`pandas.DataFrame.drop` implementation.
    .. only:: developer
    Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_drop*
    Parameters
    -----------
    df: :obj:`pandas.DataFrame`
        input arg
    labels: single label or list-like
        Column labels to drop
        *unsupported*
    axis: :obj:`int` default 0
        *unsupported*
    index: single label or list-like
        *unsupported*
    columns: single label or list-like
    level: :obj:`int` or :obj:`str`
        For MultiIndex, level from which the labels will be removed.
        *unsupported*
    inplace: :obj:`bool` default False
        *unsupported*
    errors: :obj:`str` default 'raise'
        If 'ignore', suppress error and only existing labels are dropped.
        *unsupported*

    Returns
    -------
    :obj: `pandas.DataFrame`
        DataFrame without the removed index or column labels.

    Raises
    -------
    KeyError
        If any of the labels is not found in the selected axis.
    """

    _func_name = 'drop'

    ty_checker = TypeChecker(f'Method {_func_name}().')
    ty_checker.check(df, DataFrameType)

    if not isinstance(labels, types.Omitted) and labels is not None:
        ty_checker.raise_exc(labels, 'None', 'labels')

    if not isinstance(axis, (int, types.Omitted)):
        ty_checker.raise_exc(axis, 'int', 'axis')

    if not isinstance(index, types.Omitted) and index is not None:
        ty_checker.raise_exc(index, 'None', 'index')

    if not isinstance(columns, (types.Omitted, types.Tuple, types.Literal)):
        ty_checker.raise_exc(columns, 'str, tuple of str', 'columns')

    if not isinstance(level, (types.Omitted, types.Literal)) and level is not None:
        ty_checker.raise_exc(level, 'None', 'level')

    if not isinstance(inplace, (bool, types.Omitted)) and inplace:
        ty_checker.raise_exc(inplace, 'bool', 'inplace')

    if not isinstance(errors, (str, types.Omitted, types.Literal)):
        ty_checker.raise_exc(errors, 'str', 'errors')

    args = {'labels': None, 'axis': 0, 'index': None, 'columns': None, 'level': None, 'inplace': False,
            'errors': f'"raise"'}

    def sdc_pandas_dataframe_drop_impl(df, _func_name, args, columns):
        func_args = ['df']
        for key, value in args.items():
            if key not in func_args:
                if isinstance(value, types.Literal):
                    value = value.literal_value
                func_args.append(f'{key}={value}')

        if isinstance(columns, types.StringLiteral):
            drop_cols = (columns.literal_value,)
        elif isinstance(columns, types.Tuple):
            drop_cols = tuple(column.literal_value for column in columns)
        else:
            raise ValueError('Only drop by one column or tuple of columns is currently supported in df.drop()')

        func_def, global_vars = sdc_pandas_dataframe_drop_codegen(_func_name, func_args, df, drop_cols)
        loc_vars = {}
        exec(func_def, global_vars, loc_vars)
        _drop_impl = loc_vars['sdc_pandas_dataframe_drop_impl']
        return _drop_impl

    return sdc_pandas_dataframe_drop_impl(df, _func_name, args, columns)
예제 #19
0
def sdc_pandas_series_operator_comp_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.comp_binop` implementation

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator comp_binop().'

    ty_checker = TypeChecker('Operator comp_binop().')
    if not isinstance(self, SeriesType):
        return None

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    if isinstance(other, SeriesType):
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_data_comparable = check_types_comparable(self.data, other.data)
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes

    if isinstance(other, SeriesType) and not series_data_comparable:
        raise TypingError(
            '{} Not supported for series with not-comparable data. \
        Given: self.data={}, other.data={}'.format(_func_name, self.data,
                                                   other.data))

    if isinstance(other, SeriesType) and not series_indexes_comparable:
        raise TypingError(
            '{} Not implemented for series with not-comparable indexes. \
        Given: self.index={}, other.index={}'.format(_func_name, self.index,
                                                     other.index))

    # specializations for numeric series
    if (isinstance(other, types.Number)):

        def _series_operator_comp_binop_scalar_impl(self, other):
            return pandas.Series(self._data < other,
                                 index=self._index,
                                 name=self._name)

        return _series_operator_comp_binop_scalar_impl

    elif (isinstance(other, SeriesType)):

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_comp_binop_none_indexes_impl(self, other):
                left_size, right_size = len(self._data), len(other._data)
                if (left_size == right_size):
                    return pandas.Series(self._data < other._data)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_none_indexes_impl
        else:

            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_comp_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                if sdc_check_indexes_equal(left_index, right_index):
                    if none_or_numeric_indexes == True:  # noqa
                        new_index = left_index.astype(numba_index_common_dtype)
                    else:
                        new_index = self._index
                    return pandas.Series(self._data < other._data, new_index)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_common_impl

    return None
예제 #20
0
def pct_change_overload(df, periods=1, fill_method='pad', limit=None, freq=None):
    """
    Pandas DataFrame method :meth:`pandas.DataFrame.pct_change` implementation.

    .. only:: developer

      Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_pct_change*

    Parameters
    -----------
    df: :class:`pandas.DataFrame`
      input arg
    periods: :obj:`int`, default 1
        Periods to shift for forming percent change.
    fill_method: :obj:`str`, default 'pad'
        How to handle NAs before computing percent changes.
    limit:
      *unsupported*
    freq:
      *unsupported*

    Returns
    -------
    :obj:`pandas.Series` or `pandas.DataFrame`
      Percentage change between the current and a prior element.
    """

    name = 'pct_change'

    ty_checker = TypeChecker('Method {}().'.format(name))
    ty_checker.check(df, DataFrameType)

    if not isinstance(periods, (types.Integer, types.Omitted)):
        ty_checker.raise_exc(periods, 'int64', 'periods')

    if not isinstance(fill_method, (str, types.UnicodeType, types.StringLiteral, types.NoneType, types.Omitted)):
        ty_checker.raise_exc(fill_method, 'string', 'fill_method')

    if not isinstance(limit, (types.Omitted, types.NoneType)):
        ty_checker.raise_exc(limit, 'None', 'limit')

    if not isinstance(freq, (types.Omitted, types.NoneType)):
        ty_checker.raise_exc(freq, 'None', 'freq')

    params = {'periods': 1, 'fill_method': '"pad"', 'limit': None, 'freq': None}
    ser_par = {'periods': 'periods', 'fill_method': 'fill_method', 'limit': 'limit', 'freq': 'freq'}

    return sdc_pandas_dataframe_apply_columns(df, name, params, ser_par)
예제 #21
0
    def sdc_pandas_rolling(self,
                           window,
                           min_periods=None,
                           center=False,
                           win_type=None,
                           on=None,
                           axis=0,
                           closed=None):
        ty_checker = TypeChecker('Method rolling().')
        ty_checker.check(self, ty)

        if not isinstance(window, types.Integer):
            ty_checker.raise_exc(window, 'int', 'window')

        minp_accepted = (types.Omitted, types.NoneType, types.Integer)
        if not isinstance(min_periods,
                          minp_accepted) and min_periods is not None:
            ty_checker.raise_exc(min_periods, 'None, int', 'min_periods')

        center_accepted = (types.Omitted, types.Boolean)
        if not isinstance(center, center_accepted) and center is not False:
            ty_checker.raise_exc(center, 'bool', 'center')

        str_types = (types.Omitted, types.NoneType, types.StringLiteral,
                     types.UnicodeType)
        if not isinstance(win_type, str_types) and win_type is not None:
            ty_checker.raise_exc(win_type, 'str', 'win_type')

        if not isinstance(on, str_types) and on is not None:
            ty_checker.raise_exc(on, 'str', 'on')

        axis_accepted = (types.Omitted, types.Integer, types.StringLiteral,
                         types.UnicodeType)
        if not isinstance(axis, axis_accepted) and axis != 0:
            ty_checker.raise_exc(axis, 'int, str', 'axis')

        if not isinstance(closed, str_types) and closed is not None:
            ty_checker.raise_exc(closed, 'str', 'closed')

        nan_minp = isinstance(
            min_periods,
            (types.Omitted, types.NoneType)) or min_periods is None

        def sdc_pandas_rolling_impl(self,
                                    window,
                                    min_periods=None,
                                    center=False,
                                    win_type=None,
                                    on=None,
                                    axis=0,
                                    closed=None):
            if window < 0:
                raise ValueError('window must be non-negative')

            if nan_minp == True:  # noqa
                minp = window
            else:
                minp = min_periods

            if minp < 0:
                raise ValueError('min_periods must be >= 0')
            if minp > window:
                raise ValueError('min_periods must be <= window')

            if center != False:  # noqa
                raise ValueError(
                    'Method rolling(). The object center\n expected: False')

            if win_type is not None:
                raise ValueError(
                    'Method rolling(). The object win_type\n expected: None')

            if on is not None:
                raise ValueError(
                    'Method rolling(). The object on\n expected: None')

            if axis != 0:
                raise ValueError(
                    'Method rolling(). The object axis\n expected: 0')

            if closed is not None:
                raise ValueError(
                    'Method rolling(). The object closed\n expected: None')

            return initializer(self, window, minp, center, win_type, on, axis,
                               closed)

        return sdc_pandas_rolling_impl
def sdc_pandas_dataframe_rolling_min(self):

    ty_checker = TypeChecker('Method rolling.min().')
    ty_checker.check(self, DataFrameRollingType)

    return gen_df_rolling_method_impl('min', self)