Python find_common_dtype_from_numpy_dtypes Exemples, sdc.utilities.sdc_typing_utils.find_common_dtype_from_numpy_dtypes Python Exemples

Exemple #1

0

Afficher le fichier

def sdc_pandas_series_operator_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.binop` implementation

    Note: Currently implemented for numeric Series only.
        Differs from Pandas in returning Series with fixed dtype :obj:`float64`

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator binop().'

    ty_checker = TypeChecker('Operator binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(
        self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(
        other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    # specializations for numeric series only
    if not operands_are_series:

        def _series_operator_binop_scalar_impl(self, other):
            if self_is_series == True:  # noqa
                result_data = numpy.empty(len(self._data), dtype=numpy.float64)
                result_data[:] = self._data + numpy.float64(other)
                return pandas.Series(result_data,
                                     index=self._index,
                                     name=self._name)
            else:
                result_data = numpy.empty(len(other._data),
                                          dtype=numpy.float64)
                result_data[:] = numpy.float64(self) + other._data
                return pandas.Series(result_data,
                                     index=other._index,
                                     name=other._name)

        return _series_operator_binop_scalar_impl

    else:  # both operands are numeric series

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_binop_none_indexes_impl(self, other):

                if (len(self._data) == len(other._data)):
                    result_data = astype(self._data, numpy.float64)
                    result_data = result_data + other._data
                    return pandas.Series(result_data)
                else:
                    left_size, right_size = len(self._data), len(other._data)
                    min_data_size = min(left_size, right_size)
                    max_data_size = max(left_size, right_size)
                    result_data = numpy.empty(max_data_size,
                                              dtype=numpy.float64)
                    if (left_size == min_data_size):
                        result_data[:min_data_size] = self._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = result_data + other._data
                    else:
                        result_data[:min_data_size] = other._data
                        result_data[min_data_size:] = numpy.nan
                        result_data = self._data + result_data

                    return pandas.Series(result_data)

            return _series_operator_binop_none_indexes_impl
        else:
            # for numeric indexes find common dtype to be used when creating joined index
            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                # check if indexes are equal and series don't have to be aligned
                if sdc_check_indexes_equal(left_index, right_index):
                    result_data = numpy.empty(len(self._data),
                                              dtype=numpy.float64)
                    result_data[:] = self._data + other._data

                    if none_or_numeric_indexes == True:  # noqa
                        result_index = astype(left_index,
                                              numba_index_common_dtype)
                    else:
                        result_index = self._index

                    return pandas.Series(result_data, index=result_index)

                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                joined_index, left_indexer, right_indexer = sdc_join_series_indexes(
                    left_index, right_index)

                result_size = len(joined_index)
                left_values = numpy.empty(result_size, dtype=numpy.float64)
                right_values = numpy.empty(result_size, dtype=numpy.float64)
                for i in numba.prange(result_size):
                    left_pos, right_pos = left_indexer[i], right_indexer[i]
                    left_values[i] = self._data[
                        left_pos] if left_pos != -1 else numpy.nan
                    right_values[i] = other._data[
                        right_pos] if right_pos != -1 else numpy.nan

                result_data = left_values + right_values
                return pandas.Series(result_data, joined_index)

            return _series_operator_binop_common_impl

    return None

Exemple #2

0

Afficher le fichier

Fichier : sdc_function_templates.py Projet : ls-pepper/sdc

def sdc_pandas_series_binop(self, other, level=None, fill_value=None, axis=0):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.binop

    Limitations
    -----------
    Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_binop.py
       :language: python
       :lines: 27-
       :caption:
       :name: ex_series_binop

    .. command-output:: python ./series/series_binop.py
       :cwd: ../../../examples

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas Series method :meth:`pandas.Series.binop` implementation.

    .. only:: developer
        Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5
    """

    _func_name = 'Method binop().'
    ty_checker = TypeChecker(_func_name)
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(
        self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(
        other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    if not isinstance(level, types.Omitted) and level is not None:
        ty_checker.raise_exc(level, 'None', 'level')

    if not isinstance(fill_value, (types.Omitted, types.Number,
                                   types.NoneType)) and fill_value is not None:
        ty_checker.raise_exc(fill_value, 'number', 'fill_value')
    fill_value_is_none = isinstance(
        fill_value, (types.NoneType, types.Omitted)) or fill_value is None

    if not isinstance(axis, types.Omitted) and axis != 0:
        ty_checker.raise_exc(axis, 'int', 'axis')
    # specializations for numeric series only
    if not operands_are_series:

        def _series_binop_scalar_impl(self,
                                      other,
                                      level=None,
                                      fill_value=None,
                                      axis=0):
            if self_is_series == True:  # noqa
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                result_data = numpy.empty(len(self._data), dtype=numpy.float64)
                result_data[:] = self._data + numpy.float64(other)
                return pandas.Series(result_data,
                                     index=self._index,
                                     name=self._name)
            else:
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                result_data = numpy.empty(len(other._data),
                                          dtype=numpy.float64)
                result_data[:] = numpy.float64(self) + other._data
                return pandas.Series(result_data,
                                     index=other._index,
                                     name=other._name)

        return _series_binop_scalar_impl

    else:  # both operands are numeric series
        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_binop_none_indexes_impl(self,
                                                other,
                                                level=None,
                                                fill_value=None,
                                                axis=0):
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)

                if (len(self._data) == len(other._data)):
                    result_data = numpy_like.astype(self._data, numpy.float64)
                    result_data = result_data + other._data
                    return pandas.Series(result_data)
                else:
                    left_size, right_size = len(self._data), len(other._data)
                    min_data_size = min(left_size, right_size)
                    max_data_size = max(left_size, right_size)
                    result_data = numpy.empty(max_data_size,
                                              dtype=numpy.float64)
                    _fill_value = numpy.nan if fill_value_is_none == True else fill_value  # noqa
                    if (left_size == min_data_size):
                        result_data[:min_data_size] = self._data
                        for i in range(min_data_size, len(result_data)):
                            result_data[i] = _fill_value
                        result_data = result_data + other._data
                    else:
                        result_data[:min_data_size] = other._data
                        for i in range(min_data_size, len(result_data)):
                            result_data[i] = _fill_value
                        result_data = self._data + result_data

                    return pandas.Series(result_data)

            return _series_binop_none_indexes_impl
        else:
            left_index_is_range = isinstance(self.index,
                                             (RangeIndexType, types.NoneType))
            right_index_is_range = isinstance(other.index,
                                              (RangeIndexType, types.NoneType))
            check_index_equal = left_index_is_range and right_index_is_range
            self_index_dtype = RangeIndexType.dtype if isinstance(
                self.index, types.NoneType) else self.index.dtype
            other_index_dtype = RangeIndexType.dtype if isinstance(
                other.index, types.NoneType) else other.index.dtype
            index_dtypes_match = self_index_dtype == other_index_dtype
            if not index_dtypes_match:
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [self_index_dtype, other_index_dtype], [])
            else:
                numba_index_common_dtype = self_index_dtype

            def _series_binop_common_impl(self,
                                          other,
                                          level=None,
                                          fill_value=None,
                                          axis=0):
                left_index, right_index = self.index, other.index
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                if check_index_equal == True:  # noqa
                    equal_indexes = numpy_like.array_equal(
                        left_index, right_index)
                else:
                    equal_indexes = False

                if (left_index is right_index or equal_indexes):
                    result_data = numpy.empty(len(self._data),
                                              dtype=numpy.float64)
                    result_data[:] = self._data + other._data
                    if index_dtypes_match == False:  # noqa
                        result_index = numpy_like.astype(
                            left_index, numba_index_common_dtype)
                    else:
                        result_index = left_index.values if left_index_is_range == True else left_index  # noqa

                    return pandas.Series(result_data, index=result_index)

                # TODO: replace below with core join(how='outer', return_indexers=True) when implemented
                joined_index, left_indexer, right_indexer = sdc_join_series_indexes(
                    left_index, right_index)
                result_size = len(joined_index)
                left_values = numpy.empty(result_size, dtype=numpy.float64)
                right_values = numpy.empty(result_size, dtype=numpy.float64)
                _fill_value = numpy.nan if fill_value_is_none == True else fill_value  # noqa
                for i in range(result_size):
                    left_pos, right_pos = left_indexer[i], right_indexer[i]
                    left_values[i] = self._data[
                        left_pos] if left_pos != -1 else _fill_value
                    right_values[i] = other._data[
                        right_pos] if right_pos != -1 else _fill_value
                result_data = left_values + right_values
                return pandas.Series(result_data, joined_index)

            return _series_binop_common_impl

    return None

Exemple #3

0

Afficher le fichier

def sdc_pandas_series_operator_comp_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.comp_binop` implementation

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator comp_binop().'

    ty_checker = TypeChecker('Operator comp_binop().')
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    if not operands_are_series:

        def _series_operator_comp_binop_scalar_impl(self, other):
            if self_is_series == True:  # noqa
                return pandas.Series(self._data < other,
                                     index=self._index,
                                     name=self._name)
            else:
                return pandas.Series(self < other._data,
                                     index=other._index,
                                     name=other._name)

        return _series_operator_comp_binop_scalar_impl

    else:

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_operator_comp_binop_none_indexes_impl(self, other):
                left_size, right_size = len(self._data), len(other._data)
                if (left_size == right_size):
                    return pandas.Series(self._data < other._data)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_none_indexes_impl
        else:

            if none_or_numeric_indexes:
                ty_left_index_dtype = types.int64 if isinstance(
                    self.index, types.NoneType) else self.index.dtype
                ty_right_index_dtype = types.int64 if isinstance(
                    other.index, types.NoneType) else other.index.dtype
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [ty_left_index_dtype, ty_right_index_dtype], [])

            def _series_operator_comp_binop_common_impl(self, other):
                left_index, right_index = self.index, other.index

                if sdc_check_indexes_equal(left_index, right_index):
                    if none_or_numeric_indexes == True:  # noqa
                        new_index = astype(left_index,
                                           numba_index_common_dtype)
                    else:
                        new_index = self._index
                    return pandas.Series(self._data < other._data, new_index)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_operator_comp_binop_common_impl

    return None

Exemple #4

0

Afficher le fichier

def hpat_arrays_append_overload(A, B):
    """Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A"""

    use_A_array = isinstance(A, (RangeIndexType, Int64IndexType))
    use_B_array = isinstance(B, (RangeIndexType, Int64IndexType))
    if isinstance(A, (types.Array, RangeIndexType, Int64IndexType)):
        if isinstance(B, (types.Array, RangeIndexType, Int64IndexType)):

            def _append_single_numeric_impl(A, B):
                _A = A.values if use_A_array == True else A  # noqa
                _B = B.values if use_B_array == True else B  # noqa
                return numpy.concatenate((
                    _A,
                    _B,
                ))

            return _append_single_numeric_impl

        elif (isinstance(B, (types.UniTuple, types.List))
              and isinstance(B.dtype,
                             (types.Array, RangeIndexType, Int64IndexType))):
            B_dtype_is_index = isinstance(B.dtype,
                                          (RangeIndexType, Int64IndexType))
            numba_common_dtype = find_common_dtype_from_numpy_dtypes(
                [A.dtype, B.dtype.dtype], [])

            # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
            def _append_list_numeric_impl(A, B):

                total_length = len(A) + numpy.array([len(arr)
                                                     for arr in B]).sum()
                new_data = numpy.empty(total_length, numba_common_dtype)

                stop = len(A)
                _A = numpy.array(A) if use_A_array == True else A  # noqa
                new_data[:stop] = _A
                for arr in B:
                    _arr = arr.values if B_dtype_is_index == True else arr  # noqa
                    start = stop
                    stop = start + len(_arr)
                    new_data[start:stop] = _arr
                return new_data

            return _append_list_numeric_impl

    elif A == string_array_type:
        if B == string_array_type:

            def _append_single_string_array_impl(A, B):
                total_size = len(A) + len(B)
                total_chars = num_total_chars(A) + num_total_chars(B)
                new_data = sdc.str_arr_ext.pre_alloc_string_array(
                    total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                pos += append_string_array_to(new_data, pos, B)

                return new_data

            return _append_single_string_array_impl
        elif (isinstance(B, (types.UniTuple, types.List))
              and B.dtype == string_array_type):

            def _append_list_string_array_impl(A, B):
                array_list = [A] + list(B)
                total_size = numpy.array([len(arr)
                                          for arr in array_list]).sum()
                total_chars = numpy.array(
                    [num_total_chars(arr) for arr in array_list]).sum()

                new_data = sdc.str_arr_ext.pre_alloc_string_array(
                    total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                for arr in B:
                    pos += append_string_array_to(new_data, pos, arr)

                return new_data

            return _append_list_string_array_impl

Exemple #5

0

Afficher le fichier

Fichier : sdc_function_templates.py Projet : ls-pepper/sdc

def sdc_pandas_series_comp_binop(self,
                                 other,
                                 level=None,
                                 fill_value=None,
                                 axis=0):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************

    Pandas API: pandas.Series.comp_binop

    Limitations
    -----------
    Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler

    Examples
    --------
    .. literalinclude:: ../../../examples/series/series_comp_binop.py
       :language: python
       :lines: 27-
       :caption:
       :name: ex_series_comp_binop

    .. command-output:: python ./series/series_comp_binop.py
       :cwd: ../../../examples

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Pandas Series method :meth:`pandas.Series.comp_binop` implementation.

    .. only:: developer
        Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op8
    """

    _func_name = 'Method comp_binop().'

    ty_checker = TypeChecker(_func_name)
    ty_checker.check(self, SeriesType)

    if not (isinstance(level, types.Omitted) or level is None):
        ty_checker.raise_exc(level, 'None', 'level')

    if not isinstance(fill_value, (types.Omitted, types.Number,
                                   types.NoneType)) and fill_value is not None:
        ty_checker.raise_exc(fill_value, 'number', 'fill_value')

    if not (isinstance(axis, types.Omitted) or axis == 0):
        ty_checker.raise_exc(axis, 'int', 'axis')

    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    fill_value_is_none = isinstance(
        fill_value, (types.NoneType, types.Omitted)) or fill_value is None
    if not operands_are_series:

        def _series_comp_binop_scalar_impl(self,
                                           other,
                                           level=None,
                                           fill_value=None,
                                           axis=0):
            if self_is_series == True:  # noqa
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                return pandas.Series(self._data < other,
                                     index=self._index,
                                     name=self._name)
            else:
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                return pandas.Series(self < other._data,
                                     index=other._index,
                                     name=other._name)

        return _series_comp_binop_scalar_impl

    else:

        # optimization for series with default indexes, that can be aligned differently
        if (isinstance(self.index, types.NoneType)
                and isinstance(other.index, types.NoneType)):

            def _series_comp_binop_none_indexes_impl(self,
                                                     other,
                                                     level=None,
                                                     fill_value=None,
                                                     axis=0):
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                left_size, right_size = len(self._data), len(other._data)
                if (left_size == right_size):
                    return pandas.Series(self._data < other._data)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_comp_binop_none_indexes_impl
        else:
            left_index_is_range = isinstance(self.index,
                                             (RangeIndexType, types.NoneType))
            index_dtypes_match = self.index.dtype == other.index.dtype
            if not index_dtypes_match:
                numba_index_common_dtype = find_common_dtype_from_numpy_dtypes(
                    [self.index.dtype, other.index.dtype], [])
            else:
                numba_index_common_dtype = self.index.dtype

            def _series_comp_binop_common_impl(self,
                                               other,
                                               level=None,
                                               fill_value=None,
                                               axis=0):
                numpy_like.fillna(self._data, inplace=True, value=fill_value)
                numpy_like.fillna(other._data, inplace=True, value=fill_value)
                left_index, right_index = self.index, other.index

                if (left_index is right_index
                        or numpy_like.array_equal(left_index, right_index)):
                    if index_dtypes_match == False:  # noqa
                        new_index = numpy_like.astype(
                            left_index, numba_index_common_dtype)
                    else:
                        new_index = left_index.values if left_index_is_range == True else left_index  # noqa
                    return pandas.Series(self._data < other._data, new_index)
                else:
                    raise ValueError(
                        "Can only compare identically-labeled Series objects")

            return _series_comp_binop_common_impl

    return None

Exemple #6

0

Afficher le fichier

def sdc_join_series_indexes_overload(left, right):
    """Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm"""

    # check that both operands are of types used for representing Pandas indexes
    if not (isinstance(left, sdc_pandas_index_types)
            and isinstance(right, sdc_pandas_index_types)
            and not isinstance(left, types.NoneType)
            and not isinstance(right, types.NoneType)):
        return None

    convert_left = isinstance(left, (RangeIndexType, Int64IndexType))
    convert_right = isinstance(right, (RangeIndexType, Int64IndexType))

    def _convert_to_arrays_impl(left, right):
        _left = left.values if convert_left == True else left  # noqa
        _right = right.values if convert_right == True else right  # noqa
        return sdc_join_series_indexes(_left, _right)

    if isinstance(left, RangeIndexType) and isinstance(right, RangeIndexType):

        def sdc_join_range_indexes_impl(left, right):
            if (left is right or numpy_like.array_equal(left, right)):
                joined = left.values
                lidx = numpy.arange(len(joined))
                ridx = lidx
                return joined, lidx, ridx
            else:
                return sdc_join_series_indexes(left.values, right.values)

        return sdc_join_range_indexes_impl

    elif (isinstance(left, (RangeIndexType, Int64IndexType, types.Array))
          and isinstance(right, (RangeIndexType, Int64IndexType, types.Array))
          and not (isinstance(left, types.Array)
                   and isinstance(right, types.Array))):
        return _convert_to_arrays_impl

    # TODO: remove code duplication below and merge numeric and StringArray impls into one
    # needs equivalents of numpy.arsort and _hpat_ensure_array_capacity for StringArrays
    elif isinstance(left, types.Array) and isinstance(right, types.Array):

        numba_common_dtype = find_common_dtype_from_numpy_dtypes(
            [left.dtype, right.dtype], [])
        if isinstance(numba_common_dtype, types.Number):

            def sdc_join_series_indexes_impl(left, right):

                # allocate result arrays
                lsize = len(left)
                rsize = len(right)
                est_total_size = int(1.1 * (lsize + rsize))

                lidx = numpy.empty(est_total_size, numpy.int64)
                ridx = numpy.empty(est_total_size, numpy.int64)
                joined = numpy.empty(est_total_size, numba_common_dtype)

                left_nan = []
                right_nan = []
                for i in range(lsize):
                    if numpy.isnan(left[i]):
                        left_nan.append(i)
                for i in range(rsize):
                    if numpy.isnan(right[i]):
                        right_nan.append(i)

                # sort arrays saving the old positions
                sorted_left = numpy_like.argsort(left, kind='mergesort')
                sorted_right = numpy_like.argsort(right, kind='mergesort')
                # put the position of the nans in an increasing sequence
                sorted_left[lsize - len(left_nan):] = left_nan
                sorted_right[rsize - len(right_nan):] = right_nan

                i, j, k = 0, 0, 0
                while (i < lsize and j < rsize):
                    joined = _hpat_ensure_array_capacity(k + 1, joined)
                    lidx = _hpat_ensure_array_capacity(k + 1, lidx)
                    ridx = _hpat_ensure_array_capacity(k + 1, ridx)

                    left_index = left[sorted_left[i]]
                    right_index = right[sorted_right[j]]

                    if (left_index < right_index) or numpy.isnan(right_index):
                        joined[k] = left_index
                        lidx[k] = sorted_left[i]
                        ridx[k] = -1
                        i += 1
                        k += 1
                    elif (left_index > right_index) or numpy.isnan(left_index):
                        joined[k] = right_index
                        lidx[k] = -1
                        ridx[k] = sorted_right[j]
                        j += 1
                        k += 1
                    else:
                        # find ends of sequences of equal index values in left and right
                        ni, nj = i, j
                        while (ni < lsize
                               and left[sorted_left[ni]] == left_index):
                            ni += 1
                        while (nj < rsize
                               and right[sorted_right[nj]] == right_index):
                            nj += 1

                        # join the blocks found into results
                        for s in numpy.arange(i, ni, 1):
                            block_size = nj - j
                            to_joined = numpy.repeat(left_index, block_size)
                            to_lidx = numpy.repeat(sorted_left[s], block_size)
                            to_ridx = numpy.array([
                                sorted_right[k]
                                for k in numpy.arange(j, nj, 1)
                            ], numpy.int64)

                            joined = _hpat_ensure_array_capacity(
                                k + block_size, joined)
                            lidx = _hpat_ensure_array_capacity(
                                k + block_size, lidx)
                            ridx = _hpat_ensure_array_capacity(
                                k + block_size, ridx)

                            joined[k:k + block_size] = to_joined
                            lidx[k:k + block_size] = to_lidx
                            ridx[k:k + block_size] = to_ridx
                            k += block_size
                        i = ni
                        j = nj

                # fill the end of joined with remaining part of left or right
                if i < lsize:
                    block_size = lsize - i
                    joined = _hpat_ensure_array_capacity(
                        k + block_size, joined)
                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                    ridx[k:k + block_size] = numpy.repeat(-1, block_size)
                    while i < lsize:
                        joined[k] = left[sorted_left[i]]
                        lidx[k] = sorted_left[i]
                        i += 1
                        k += 1

                elif j < rsize:
                    block_size = rsize - j
                    joined = _hpat_ensure_array_capacity(
                        k + block_size, joined)
                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                    lidx[k:k + block_size] = numpy.repeat(-1, block_size)
                    while j < rsize:
                        joined[k] = right[sorted_right[j]]
                        ridx[k] = sorted_right[j]
                        j += 1
                        k += 1

                return joined[:k], lidx[:k], ridx[:k]

            return sdc_join_series_indexes_impl

        else:
            return None

    elif (left == string_array_type and right == string_array_type):

        def sdc_join_series_indexes_impl(left, right):

            # allocate result arrays
            lsize = len(left)
            rsize = len(right)
            est_total_size = int(1.1 * (lsize + rsize))

            lidx = numpy.empty(est_total_size, numpy.int64)
            ridx = numpy.empty(est_total_size, numpy.int64)

            # use Series.sort_values since argsort for StringArrays not implemented
            original_left_series = pandas.Series(left)
            original_right_series = pandas.Series(right)

            # sort arrays saving the old positions
            left_series = original_left_series.sort_values(kind='mergesort')
            right_series = original_right_series.sort_values(kind='mergesort')
            sorted_left = left_series._index
            sorted_right = right_series._index

            i, j, k = 0, 0, 0
            while (i < lsize and j < rsize):
                lidx = _hpat_ensure_array_capacity(k + 1, lidx)
                ridx = _hpat_ensure_array_capacity(k + 1, ridx)

                left_index = left[sorted_left[i]]
                right_index = right[sorted_right[j]]

                if (left_index < right_index):
                    lidx[k] = sorted_left[i]
                    ridx[k] = -1
                    i += 1
                    k += 1
                elif (left_index > right_index):
                    lidx[k] = -1
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1
                else:
                    # find ends of sequences of equal index values in left and right
                    ni, nj = i, j
                    while (ni < lsize and left[sorted_left[ni]] == left_index):
                        ni += 1
                    while (nj < rsize
                           and right[sorted_right[nj]] == right_index):
                        nj += 1

                    # join the blocks found into results
                    for s in numpy.arange(i, ni, 1):
                        block_size = nj - j
                        to_lidx = numpy.repeat(sorted_left[s], block_size)
                        to_ridx = numpy.array(
                            [sorted_right[k] for k in numpy.arange(j, nj, 1)],
                            numpy.int64)

                        lidx = _hpat_ensure_array_capacity(
                            k + block_size, lidx)
                        ridx = _hpat_ensure_array_capacity(
                            k + block_size, ridx)

                        lidx[k:k + block_size] = to_lidx
                        ridx[k:k + block_size] = to_ridx
                        k += block_size
                    i = ni
                    j = nj

            # fill the end of joined with remaining part of left or right
            if i < lsize:
                block_size = lsize - i
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                ridx[k:k + block_size] = numpy.repeat(-1, block_size)
                while i < lsize:
                    lidx[k] = sorted_left[i]
                    i += 1
                    k += 1

            elif j < rsize:
                block_size = rsize - j
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                lidx[k:k + block_size] = numpy.repeat(-1, block_size)
                while j < rsize:
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1

            # count total number of characters and allocate joined array
            total_joined_size = k
            num_chars_in_joined = 0
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    num_chars_in_joined += len(left[lidx[i]])
                elif ridx[i] != -1:
                    num_chars_in_joined += len(right[ridx[i]])

            joined = pre_alloc_string_array(total_joined_size,
                                            num_chars_in_joined)

            # iterate over joined and fill it with indexes using lidx and ridx indexers
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    joined[i] = left[lidx[i]]
                    if (str_arr_is_na(left, lidx[i])):
                        str_arr_set_na(joined, i)
                elif ridx[i] != -1:
                    joined[i] = right[ridx[i]]
                    if (str_arr_is_na(right, ridx[i])):
                        str_arr_set_na(joined, i)
                else:
                    str_arr_set_na(joined, i)

            return joined, lidx, ridx

        return sdc_join_series_indexes_impl

    return None

Exemple #7

0

Afficher le fichier

def hpat_arrays_append_overload(A, B):
    """Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A"""

    if isinstance(A, types.Array):
        if isinstance(B, types.Array):

            def _append_single_numeric_impl(A, B):
                return numpy.concatenate((
                    A,
                    B,
                ))

            return _append_single_numeric_impl
        elif isinstance(B, (types.UniTuple, types.List)):
            # TODO: this heavily relies on B being a homogeneous tuple/list - find a better way
            # to resolve common dtype of heterogeneous sequence of arrays
            numba_common_dtype = find_common_dtype_from_numpy_dtypes(
                [A.dtype, B.dtype.dtype], [])

            # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
            def _append_list_numeric_impl(A, B):

                total_length = len(A) + numpy.array([len(arr)
                                                     for arr in B]).sum()
                new_data = numpy.empty(total_length, numba_common_dtype)

                stop = len(A)
                new_data[:stop] = A
                for arr in B:
                    start = stop
                    stop = start + len(arr)
                    new_data[start:stop] = arr
                return new_data

            return _append_list_numeric_impl

    elif A == string_array_type:
        if B == string_array_type:

            def _append_single_string_array_impl(A, B):
                total_size = len(A) + len(B)
                total_chars = num_total_chars(A) + num_total_chars(B)
                new_data = sdc.str_arr_ext.pre_alloc_string_array(
                    total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                pos += append_string_array_to(new_data, pos, B)

                return new_data

            return _append_single_string_array_impl
        elif (isinstance(B, (types.UniTuple, types.List))
              and B.dtype == string_array_type):

            def _append_list_string_array_impl(A, B):
                array_list = [A] + list(B)
                total_size = numpy.array([len(arr)
                                          for arr in array_list]).sum()
                total_chars = numpy.array(
                    [num_total_chars(arr) for arr in array_list]).sum()

                new_data = sdc.str_arr_ext.pre_alloc_string_array(
                    total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                for arr in B:
                    pos += append_string_array_to(new_data, pos, arr)

                return new_data

            return _append_list_string_array_impl

Exemple #8

0

Afficher le fichier

def sdc_join_series_indexes_overload(left, right):
    """Function for joining arrays left and right in a way similar to pandas.join 'outer' algorithm"""

    # TODO: eliminate code duplication by merging implementations for numeric and StringArray
    # requires equivalents of numpy.arsort and _hpat_ensure_array_capacity for StringArrays
    if (isinstance(left, types.Array) and isinstance(right, types.Array)):

        numba_common_dtype = find_common_dtype_from_numpy_dtypes(
            [left.dtype, right.dtype], [])
        if isinstance(numba_common_dtype, types.Number):

            def sdc_join_series_indexes_impl(left, right):

                # allocate result arrays
                lsize = len(left)
                rsize = len(right)
                est_total_size = int(1.1 * (lsize + rsize))

                lidx = numpy.empty(est_total_size, numpy.int64)
                ridx = numpy.empty(est_total_size, numpy.int64)
                joined = numpy.empty(est_total_size, numba_common_dtype)

                # sort arrays saving the old positions
                sorted_left = numpy.argsort(left, kind='mergesort')
                sorted_right = numpy.argsort(right, kind='mergesort')

                i, j, k = 0, 0, 0
                while (i < lsize and j < rsize):
                    joined = _hpat_ensure_array_capacity(k + 1, joined)
                    lidx = _hpat_ensure_array_capacity(k + 1, lidx)
                    ridx = _hpat_ensure_array_capacity(k + 1, ridx)

                    left_index = left[sorted_left[i]]
                    right_index = right[sorted_right[j]]

                    if (left_index < right_index):
                        joined[k] = left_index
                        lidx[k] = sorted_left[i]
                        ridx[k] = -1
                        i += 1
                        k += 1
                    elif (left_index > right_index):
                        joined[k] = right_index
                        lidx[k] = -1
                        ridx[k] = sorted_right[j]
                        j += 1
                        k += 1
                    else:
                        # find ends of sequences of equal index values in left and right
                        ni, nj = i, j
                        while (ni < lsize
                               and left[sorted_left[ni]] == left_index):
                            ni += 1
                        while (nj < rsize
                               and right[sorted_right[nj]] == right_index):
                            nj += 1

                        # join the blocks found into results
                        for s in numpy.arange(i, ni, 1):
                            block_size = nj - j
                            to_joined = numpy.repeat(left_index, block_size)
                            to_lidx = numpy.repeat(sorted_left[s], block_size)
                            to_ridx = numpy.array([
                                sorted_right[k]
                                for k in numpy.arange(j, nj, 1)
                            ], numpy.int64)

                            joined = _hpat_ensure_array_capacity(
                                k + block_size, joined)
                            lidx = _hpat_ensure_array_capacity(
                                k + block_size, lidx)
                            ridx = _hpat_ensure_array_capacity(
                                k + block_size, ridx)

                            joined[k:k + block_size] = to_joined
                            lidx[k:k + block_size] = to_lidx
                            ridx[k:k + block_size] = to_ridx
                            k += block_size
                        i = ni
                        j = nj

                # fill the end of joined with remaining part of left or right
                if i < lsize:
                    block_size = lsize - i
                    joined = _hpat_ensure_array_capacity(
                        k + block_size, joined)
                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                    ridx[k:k + block_size] = numpy.repeat(-1, block_size)
                    while i < lsize:
                        joined[k] = left[sorted_left[i]]
                        lidx[k] = sorted_left[i]
                        i += 1
                        k += 1

                elif j < rsize:
                    block_size = rsize - j
                    joined = _hpat_ensure_array_capacity(
                        k + block_size, joined)
                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                    lidx[k:k + block_size] = numpy.repeat(-1, block_size)
                    while j < rsize:
                        joined[k] = right[sorted_right[j]]
                        ridx[k] = sorted_right[j]
                        j += 1
                        k += 1

                return joined[:k], lidx[:k], ridx[:k]

            return sdc_join_series_indexes_impl

        else:
            # TODO: support joining indexes with common dtype=object - requires Numba
            # support of such numpy arrays in nopython mode, for now just return None
            return None

    elif (left == string_array_type and right == string_array_type):

        def sdc_join_series_indexes_impl(left, right):

            # allocate result arrays
            lsize = len(left)
            rsize = len(right)
            est_total_size = int(1.1 * (lsize + rsize))

            lidx = numpy.empty(est_total_size, numpy.int64)
            ridx = numpy.empty(est_total_size, numpy.int64)

            # use Series.sort_values since argsort for StringArrays not implemented
            original_left_series = pandas.Series(left)
            original_right_series = pandas.Series(right)

            # sort arrays saving the old positions
            left_series = original_left_series.sort_values(kind='mergesort')
            right_series = original_right_series.sort_values(kind='mergesort')
            sorted_left = left_series._index
            sorted_right = right_series._index

            i, j, k = 0, 0, 0
            while (i < lsize and j < rsize):
                lidx = _hpat_ensure_array_capacity(k + 1, lidx)
                ridx = _hpat_ensure_array_capacity(k + 1, ridx)

                left_index = left[sorted_left[i]]
                right_index = right[sorted_right[j]]

                if (left_index < right_index):
                    lidx[k] = sorted_left[i]
                    ridx[k] = -1
                    i += 1
                    k += 1
                elif (left_index > right_index):
                    lidx[k] = -1
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1
                else:
                    # find ends of sequences of equal index values in left and right
                    ni, nj = i, j
                    while (ni < lsize and left[sorted_left[ni]] == left_index):
                        ni += 1
                    while (nj < rsize
                           and right[sorted_right[nj]] == right_index):
                        nj += 1

                    # join the blocks found into results
                    for s in numpy.arange(i, ni, 1):
                        block_size = nj - j
                        to_lidx = numpy.repeat(sorted_left[s], block_size)
                        to_ridx = numpy.array(
                            [sorted_right[k] for k in numpy.arange(j, nj, 1)],
                            numpy.int64)

                        lidx = _hpat_ensure_array_capacity(
                            k + block_size, lidx)
                        ridx = _hpat_ensure_array_capacity(
                            k + block_size, ridx)

                        lidx[k:k + block_size] = to_lidx
                        ridx[k:k + block_size] = to_ridx
                        k += block_size
                    i = ni
                    j = nj

            # fill the end of joined with remaining part of left or right
            if i < lsize:
                block_size = lsize - i
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                ridx[k:k + block_size] = numpy.repeat(-1, block_size)
                while i < lsize:
                    lidx[k] = sorted_left[i]
                    i += 1
                    k += 1

            elif j < rsize:
                block_size = rsize - j
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                lidx[k:k + block_size] = numpy.repeat(-1, block_size)
                while j < rsize:
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1

            # count total number of characters and allocate joined array
            total_joined_size = k
            num_chars_in_joined = 0
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    num_chars_in_joined += len(left[lidx[i]])
                elif ridx[i] != -1:
                    num_chars_in_joined += len(right[ridx[i]])

            joined = pre_alloc_string_array(total_joined_size,
                                            num_chars_in_joined)

            # iterate over joined and fill it with indexes using lidx and ridx indexers
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    joined[i] = left[lidx[i]]
                    if (str_arr_is_na(left, lidx[i])):
                        str_arr_set_na(joined, i)
                elif ridx[i] != -1:
                    joined[i] = right[ridx[i]]
                    if (str_arr_is_na(right, ridx[i])):
                        str_arr_set_na(joined, i)
                else:
                    str_arr_set_na(joined, i)

            return joined, lidx, ridx

        return sdc_join_series_indexes_impl

    return None

Exemple #9

0

Afficher le fichier

def hpat_arrays_append_overload(A, B):
    """Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A"""

    if not isinstance(A, sdc_pandas_df_column_types):
        return None

    # this function should work with arrays, not indexes, but until all indexes support
    # common API (e.g. append is not supported for types.Array indexes) it is simplier to support
    # indexes here rather than branch depending on index types on call site
    # TO-DO: clean-up when Float64Index and StringArrayIndex are supported
    # if not (isinstance(B, sdc_pandas_df_column_types) or isinstance(B.dtype, sdc_pandas_df_column_types)):
    #     return None
    valid_num_single_B_dtype = (types.Array, ) + sdc_pandas_index_types
    valid_num_seq_B_dtypes = (types.Array, ) + sdc_pandas_index_types

    if isinstance(A, types.Array):
        if isinstance(B, valid_num_single_B_dtype):
            convert_B = not isinstance(B, types.Array)
            def _append_single_numeric_impl(A, B):
                _B = B if convert_B == False else B.values  # noqa
                return numpy.concatenate((A, _B,))

            return _append_single_numeric_impl

        elif (isinstance(B, (types.UniTuple, types.List)) and isinstance(B.dtype, valid_num_seq_B_dtypes)):
            numba_common_dtype = find_common_dtype_from_numpy_dtypes([A.dtype, B.dtype.dtype], [])

            convert_B = not isinstance(B.dtype, types.Array)
            # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
            def _append_list_numeric_impl(A, B):

                total_length = len(A) + numpy.array([len(arr) for arr in B]).sum()
                new_data = numpy.empty(total_length, numba_common_dtype)

                stop = len(A)
                new_data[:stop] = A
                for arr in B:
                    start = stop
                    stop = start + len(arr)
                    if convert_B == False:   # noqa
                        new_data[start:stop] = arr
                    else:
                        new_data[start:stop] = arr.values
                return new_data

            return _append_list_numeric_impl

    elif A == string_array_type:
        if B == string_array_type:
            def _append_single_string_array_impl(A, B):
                total_size = len(A) + len(B)
                total_chars = num_total_chars(A) + num_total_chars(B)
                new_data = sdc.str_arr_ext.pre_alloc_string_array(total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                pos += append_string_array_to(new_data, pos, B)

                return new_data

            return _append_single_string_array_impl
        elif (isinstance(B, (types.UniTuple, types.List)) and B.dtype == string_array_type):
            def _append_list_string_array_impl(A, B):
                array_list = [A] + list(B)
                total_size = numpy.array([len(arr) for arr in array_list]).sum()
                total_chars = numpy.array([num_total_chars(arr) for arr in array_list]).sum()

                new_data = sdc.str_arr_ext.pre_alloc_string_array(total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                for arr in B:
                    pos += append_string_array_to(new_data, pos, arr)

                return new_data

            return _append_list_string_array_impl

Exemple #10

0

Afficher le fichier

def _sdc_internal_join_ovld(left, right):

    if isinstance(left, types.Array) and isinstance(right, types.Array):

        numba_common_dtype = find_common_dtype_from_numpy_dtypes([left.dtype, right.dtype], [])
        if isinstance(numba_common_dtype, types.Number):

            def sdc_join_series_indexes_impl(left, right):

                # allocate result arrays
                lsize = len(left)
                rsize = len(right)
                est_total_size = int(1.1 * (lsize + rsize))

                lidx = numpy.empty(est_total_size, numpy.int64)
                ridx = numpy.empty(est_total_size, numpy.int64)
                joined = numpy.empty(est_total_size, numba_common_dtype)

                left_nan = []
                right_nan = []
                for i in range(lsize):
                    if numpy.isnan(left[i]):
                        left_nan.append(i)
                for i in range(rsize):
                    if numpy.isnan(right[i]):
                        right_nan.append(i)

                # sort arrays saving the old positions
                sorted_left = numpy_like.argsort(left, kind='mergesort')
                sorted_right = numpy_like.argsort(right, kind='mergesort')
                # put the position of the nans in an increasing sequence
                sorted_left[lsize-len(left_nan):] = left_nan
                sorted_right[rsize-len(right_nan):] = right_nan

                i, j, k = 0, 0, 0
                while (i < lsize and j < rsize):
                    joined = _hpat_ensure_array_capacity(k + 1, joined)
                    lidx = _hpat_ensure_array_capacity(k + 1, lidx)
                    ridx = _hpat_ensure_array_capacity(k + 1, ridx)

                    left_index = left[sorted_left[i]]
                    right_index = right[sorted_right[j]]

                    if (left_index < right_index) or numpy.isnan(right_index):
                        joined[k] = left_index
                        lidx[k] = sorted_left[i]
                        ridx[k] = -1
                        i += 1
                        k += 1
                    elif (left_index > right_index) or numpy.isnan(left_index):
                        joined[k] = right_index
                        lidx[k] = -1
                        ridx[k] = sorted_right[j]
                        j += 1
                        k += 1
                    else:
                        # find ends of sequences of equal index values in left and right
                        ni, nj = i, j
                        while (ni < lsize and left[sorted_left[ni]] == left_index):
                            ni += 1
                        while (nj < rsize and right[sorted_right[nj]] == right_index):
                            nj += 1

                        # join the blocks found into results
                        for s in numpy.arange(i, ni, 1):
                            block_size = nj - j
                            to_joined = numpy.repeat(left_index, block_size)
                            to_lidx = numpy.repeat(sorted_left[s], block_size)
                            to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)

                            joined = _hpat_ensure_array_capacity(k + block_size, joined)
                            lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                            ridx = _hpat_ensure_array_capacity(k + block_size, ridx)

                            joined[k:k + block_size] = to_joined
                            lidx[k:k + block_size] = to_lidx
                            ridx[k:k + block_size] = to_ridx
                            k += block_size
                        i = ni
                        j = nj

                # fill the end of joined with remaining part of left or right
                if i < lsize:
                    block_size = lsize - i
                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                    ridx[k: k + block_size] = numpy.repeat(-1, block_size)
                    while i < lsize:
                        joined[k] = left[sorted_left[i]]
                        lidx[k] = sorted_left[i]
                        i += 1
                        k += 1

                elif j < rsize:
                    block_size = rsize - j
                    joined = _hpat_ensure_array_capacity(k + block_size, joined)
                    lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                    ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                    lidx[k: k + block_size] = numpy.repeat(-1, block_size)
                    while j < rsize:
                        joined[k] = right[sorted_right[j]]
                        ridx[k] = sorted_right[j]
                        j += 1
                        k += 1

                return joined[:k], lidx[:k], ridx[:k]

            return sdc_join_series_indexes_impl

        else:
            return None

    elif (left == string_array_type and right == string_array_type):

        def sdc_join_series_indexes_impl(left, right):

            # allocate result arrays
            lsize = len(left)
            rsize = len(right)
            est_total_size = int(1.1 * (lsize + rsize))

            lidx = numpy.empty(est_total_size, numpy.int64)
            ridx = numpy.empty(est_total_size, numpy.int64)

            # use Series.sort_values since argsort for StringArrays not implemented
            original_left_series = pandas.Series(left)
            original_right_series = pandas.Series(right)

            # sort arrays saving the old positions
            left_series = original_left_series.sort_values(kind='mergesort')
            right_series = original_right_series.sort_values(kind='mergesort')
            sorted_left = left_series._index
            sorted_right = right_series._index

            i, j, k = 0, 0, 0
            while (i < lsize and j < rsize):
                lidx = _hpat_ensure_array_capacity(k + 1, lidx)
                ridx = _hpat_ensure_array_capacity(k + 1, ridx)

                left_index = left[sorted_left[i]]
                right_index = right[sorted_right[j]]

                if (left_index < right_index):
                    lidx[k] = sorted_left[i]
                    ridx[k] = -1
                    i += 1
                    k += 1
                elif (left_index > right_index):
                    lidx[k] = -1
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1
                else:
                    # find ends of sequences of equal index values in left and right
                    ni, nj = i, j
                    while (ni < lsize and left[sorted_left[ni]] == left_index):
                        ni += 1
                    while (nj < rsize and right[sorted_right[nj]] == right_index):
                        nj += 1

                    # join the blocks found into results
                    for s in numpy.arange(i, ni, 1):
                        block_size = nj - j
                        to_lidx = numpy.repeat(sorted_left[s], block_size)
                        to_ridx = numpy.array([sorted_right[k] for k in numpy.arange(j, nj, 1)], numpy.int64)

                        lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                        ridx = _hpat_ensure_array_capacity(k + block_size, ridx)

                        lidx[k:k + block_size] = to_lidx
                        ridx[k:k + block_size] = to_ridx
                        k += block_size
                    i = ni
                    j = nj

            # fill the end of joined with remaining part of left or right
            if i < lsize:
                block_size = lsize - i
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                ridx[k: k + block_size] = numpy.repeat(-1, block_size)
                while i < lsize:
                    lidx[k] = sorted_left[i]
                    i += 1
                    k += 1

            elif j < rsize:
                block_size = rsize - j
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                lidx[k: k + block_size] = numpy.repeat(-1, block_size)
                while j < rsize:
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1

            # count total number of characters and allocate joined array
            total_joined_size = k
            num_chars_in_joined = 0
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    num_chars_in_joined += len(left[lidx[i]])
                elif ridx[i] != -1:
                    num_chars_in_joined += len(right[ridx[i]])

            joined = pre_alloc_string_array(total_joined_size, num_chars_in_joined)

            # iterate over joined and fill it with indexes using lidx and ridx indexers
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    joined[i] = left[lidx[i]]
                    if (str_arr_is_na(left, lidx[i])):
                        str_arr_set_na(joined, i)
                elif ridx[i] != -1:
                    joined[i] = right[ridx[i]]
                    if (str_arr_is_na(right, ridx[i])):
                        str_arr_set_na(joined, i)
                else:
                    str_arr_set_na(joined, i)

            return joined, lidx, ridx

        return sdc_join_series_indexes_impl

    return None