def pd_multi_index_from_tuples_overload(cls, iterables): if cls.instance_type is not MultiIndexType: return _func_name = f'Method from_tuples()' ty_checker = TypeChecker(_func_name) if not (isinstance(iterables, (types.List, types.ListType)) and isinstance(iterables.dtype, (types.Tuple, types.UniTuple))): ty_checker.raise_exc(iterables, f'list of tuples', 'iterables') def pd_multi_index_type_from_tuples_impl(cls, iterables): index_size = len(iterables) if not index_size: raise TypeError("Cannot infer number of levels from empty list") # use first value to infer types and allocate dicts for result multi index levels example_value = iterables[0] levels_dicts = sdc_tuple_map(_multi_index_alloc_level_dict, example_value) index_codes = sdc_tuple_map( lambda _, size: np.empty(size, dtype=types.int64), example_value, index_size) for i, val in enumerate(iterables): _multi_index_from_tuples_helper(val, levels_dicts, index_codes, i) index_levels = sdc_tuple_map(lambda x: list(x.keys()), levels_dicts) res = pd.MultiIndex( levels=index_levels, codes=index_codes, ) return res return pd_multi_index_type_from_tuples_impl
def sdc_pandas_dataframe_rolling_var(self, ddof=1): ty_checker = TypeChecker('Method rolling.var().') ty_checker.check(self, DataFrameRollingType) if not isinstance(ddof, (int, Integer, Omitted)): ty_checker.raise_exc(ddof, 'int', 'ddof') return gen_df_rolling_method_impl('var', self, kws={'ddof': '1'})
def hpat_pandas_series_rolling_var(self, ddof=1): ty_checker = TypeChecker('Method rolling.var().') ty_checker.check(self, SeriesRollingType) if not isinstance(ddof, (int, Integer, Omitted)): ty_checker.raise_exc(ddof, 'int', 'ddof') return sdc_pandas_series_rolling_var_impl
def sdc_isnan_overload(self): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of numpy.isnan. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan """ if not isinstance(self, types.Array): return None ty_checker = TypeChecker("numpy-like 'isnan'") dtype = self.dtype isnan = get_isnan(dtype) if isinstance(dtype, (types.Integer, types.Boolean, bool)): def sdc_isnan_int_impl(self): length = len(self) res = numpy.zeros(shape=length, dtype=numpy.bool_) return res return sdc_isnan_int_impl if isinstance(dtype, types.Float): def sdc_isnan_float_impl(self): length = len(self) res = numpy.empty(shape=length, dtype=numpy.bool_) for i in prange(length): res[i] = isnan(self[i]) return res return sdc_isnan_float_impl ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')
def pd_multi_index_append_overload(self, other): if not isinstance(self, MultiIndexType): return None _func_name = 'Method append().' ty_checker = TypeChecker(_func_name) if not (isinstance(other, MultiIndexType)): ty_checker.raise_exc(other, 'pandas MultiIndex', 'other') if not check_types_comparable(self, other): raise TypingError('{} Not allowed for non comparable indexes. \ Given: self={}, other={}'.format(_func_name, self, other)) def pd_multi_index_append_impl(self, other): self_and_other_data = _multi_index_binop_helper(self, other) tup_append_level_res = sdc_tuple_map( lambda x: _multi_index_append_level(*x), self_and_other_data) new_levels, new_codes = sdc_tuple_unzip(tup_append_level_res) return pd.MultiIndex(levels=new_levels, codes=new_codes) return pd_multi_index_append_impl
def hpat_pandas_series_rolling_quantile(self, quantile, interpolation='linear'): ty_checker = TypeChecker('Method rolling.quantile().') ty_checker.check(self, SeriesRollingType) if not isinstance(quantile, Number): ty_checker.raise_exc(quantile, 'float', 'quantile') str_types = (Omitted, StringLiteral, UnicodeType) if not isinstance(interpolation, str_types) and interpolation != 'linear': ty_checker.raise_exc(interpolation, 'str', 'interpolation') def hpat_pandas_rolling_series_quantile_impl(self, quantile, interpolation='linear'): if quantile < 0 or quantile > 1: raise ValueError('quantile value not in [0, 1]') if interpolation != 'linear': raise ValueError('interpolation value not "linear"') win = self._window minp = self._min_periods input_series = self._data input_arr = input_series._data length = len(input_arr) output_arr = numpy.empty(length, dtype=float64) def calc_quantile(arr, quantile, minp): finite_arr = arr[numpy.isfinite(arr)] if len(finite_arr) < minp: return numpy.nan else: return arr_quantile(finite_arr, quantile) boundary = min(win, length) for i in prange(boundary): arr_range = input_arr[:i + 1] output_arr[i] = calc_quantile(arr_range, quantile, minp) for i in prange(boundary, length): arr_range = input_arr[i + 1 - win:i + 1] output_arr[i] = calc_quantile(arr_range, quantile, minp) return pandas.Series(output_arr, input_series._index, name=input_series._name) return hpat_pandas_rolling_series_quantile_impl
def sdc_pandas_dataframe_rolling_quantile(self, quantile, interpolation='linear'): ty_checker = TypeChecker('Method rolling.quantile().') ty_checker.check(self, DataFrameRollingType) if not isinstance(quantile, Number): ty_checker.raise_exc(quantile, 'float', 'quantile') str_types = (Omitted, StringLiteral, UnicodeType) if not isinstance(interpolation, str_types) and interpolation != 'linear': ty_checker.raise_exc(interpolation, 'str', 'interpolation') return gen_df_rolling_method_impl('quantile', self, args=['quantile'], kws={'interpolation': '"linear"'})
def sdc_pandas_dataframe_rolling_apply(self, func, raw=None): ty_checker = TypeChecker('Method rolling.apply().') ty_checker.check(self, DataFrameRollingType) raw_accepted = (Omitted, NoneType, Boolean) if not isinstance(raw, raw_accepted) and raw is not None: ty_checker.raise_exc(raw, 'bool', 'raw') return gen_df_rolling_method_impl('apply', self, args=['func'], kws={'raw': 'None'})
def sdc_pandas_series_operator_comp_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.comp_binop` implementation .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ ty_checker = TypeChecker('Operator comp_binop().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) def sdc_pandas_series_operator_comp_binop_impl(self, other): return self.comp_binop(other) return sdc_pandas_series_operator_comp_binop_impl
def sdc_pandas_dataframe_rolling_corr(self, other=None, pairwise=None): ty_checker = TypeChecker('Method rolling.corr().') ty_checker.check(self, DataFrameRollingType) accepted_other = (Omitted, NoneType, DataFrameType, SeriesType) if not isinstance(other, accepted_other) and other is not None: ty_checker.raise_exc(other, 'DataFrame, Series', 'other') accepted_pairwise = (bool, Boolean, Omitted, NoneType) if not isinstance(pairwise, accepted_pairwise) and pairwise is not None: ty_checker.raise_exc(pairwise, 'bool', 'pairwise') none_other = isinstance(other, (Omitted, NoneType)) or other is None kws = {'other': 'None', 'pairwise': 'None'} if none_other: return gen_df_rolling_method_other_none_impl('corr', self, kws=kws) if isinstance(other, DataFrameType): return gen_df_rolling_method_other_df_impl('corr', self, other, kws=kws) return gen_df_rolling_method_impl('corr', self, kws=kws)
def sdc_pandas_series_groupby_var(self, ddof=1, *args): method_name = 'GroupBy.var().' ty_checker = TypeChecker(method_name) ty_checker.check(self, SeriesGroupByType) if not isinstance(ddof, (types.Omitted, int, types.Integer)): ty_checker.raise_exc(ddof, 'int', 'ddof') method_args = ['self', 'ddof', '*args'] default_values = {'ddof': 1} impl_used_params = {'ddof': 'ddof'} applied_func_name = 'var' return sdc_pandas_series_groupby_apply_func(self, applied_func_name, method_args, default_values, impl_used_params)
def pd_range_index_copy_overload(self, name=None, deep=False, dtype=None): if not isinstance(self, RangeIndexType): return None _func_name = 'Method copy().' ty_checker = TypeChecker(_func_name) if not (isinstance(name, (types.NoneType, types.Omitted, types.UnicodeType)) or name is None): ty_checker.raise_exc(name, 'string or none', 'name') if not (isinstance(deep, (types.Omitted, types.Boolean)) or deep is False): ty_checker.raise_exc(deep, 'boolean', 'deep') if not _check_dtype_param_type(dtype): ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype') name_is_none = isinstance(name, (types.NoneType, types.Omitted)) or name is None keep_name = name_is_none and self.is_named def pd_range_index_copy_impl(self, name=None, deep=False, dtype=None): _name = self._name if keep_name == True else name # noqa return init_range_index(self._data, _name) return pd_range_index_copy_impl
def hpat_pandas_series_rolling_apply(self, func, raw=None): ty_checker = TypeChecker('Method rolling.apply().') ty_checker.check(self, SeriesRollingType) raw_accepted = (Omitted, NoneType, Boolean) if not isinstance(raw, raw_accepted) and raw is not None: ty_checker.raise_exc(raw, 'bool', 'raw') def hpat_pandas_rolling_series_apply_impl(self, func, raw=None): win = self._window minp = self._min_periods input_series = self._data input_arr = input_series._data length = len(input_arr) output_arr = numpy.empty(length, dtype=float64) def culc_apply(arr, func, minp): finite_arr = arr.copy() finite_arr[numpy.isinf(arr)] = numpy.nan if len(finite_arr) < minp: return numpy.nan else: return arr_apply(finite_arr, func) boundary = min(win, length) for i in prange(boundary): arr_range = input_arr[:i + 1] output_arr[i] = culc_apply(arr_range, func, minp) for i in prange(boundary, length): arr_range = input_arr[i + 1 - win:i + 1] output_arr[i] = culc_apply(arr_range, func, minp) return pandas.Series(output_arr, input_series._index, name=input_series._name) return hpat_pandas_rolling_series_apply_impl
def hpat_pandas_series_rolling_median(self): ty_checker = TypeChecker('Method rolling.median().') ty_checker.check(self, SeriesRollingType) return hpat_pandas_rolling_series_median_impl
def pd_range_index_overload(start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None): _func_name = 'pd.RangeIndex().' ty_checker = TypeChecker(_func_name) if not (isinstance(copy, types.Omitted) or copy is False): raise SDCLimitation( f"{_func_name} Unsupported parameter. Given 'copy': {copy}") if not (isinstance(copy, types.Omitted) or fastpath is None): raise SDCLimitation( f"{_func_name} Unsupported parameter. Given 'fastpath': {fastpath}" ) dtype_is_np_int64 = dtype is types.NumberClass(types.int64) dtype_is_unicode_str = isinstance(dtype, (types.UnicodeType, types.StringLiteral)) if not _check_dtype_param_type(dtype): ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype') # TODO: support ensure_python_int from pandas.core.dtype.common to handle integers as float params if not (isinstance(start, (types.NoneType, types.Omitted, types.Integer)) or start is None): ty_checker.raise_exc(start, 'number or none', 'start') if not (isinstance(stop, (types.NoneType, types.Omitted, types.Integer)) or stop is None): ty_checker.raise_exc(stop, 'number or none', 'stop') if not (isinstance(step, (types.NoneType, types.Omitted, types.Integer)) or step is None): ty_checker.raise_exc(step, 'number or none', 'step') if not (isinstance(name, (types.NoneType, types.Omitted, types.StringLiteral, types.UnicodeType)) or name is None): ty_checker.raise_exc(name, 'string or none', 'name') if ((isinstance(start, (types.NoneType, types.Omitted)) or start is None) and (isinstance(stop, (types.NoneType, types.Omitted)) or stop is None) and (isinstance(step, (types.NoneType, types.Omitted)) or step is None)): def pd_range_index_ctor_dummy_impl(start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None): raise TypeError("RangeIndex(...) must be called with integers") return pd_range_index_ctor_dummy_impl def pd_range_index_ctor_impl(start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None): if not (dtype is None or dtype_is_unicode_str and dtype == 'int64' or dtype_is_np_int64): raise TypeError("Invalid to pass a non-int64 dtype to RangeIndex") _start = types.int64(start) if start is not None else types.int64(0) if stop is None: _start, _stop = types.int64(0), types.int64(start) else: _stop = types.int64(stop) _step = types.int64(step) if step is not None else types.int64(1) if _step == 0: raise ValueError("Step must not be zero") return init_range_index(range(_start, _stop, _step), name) return pd_range_index_ctor_impl
def sdc_pandas_series_operator_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.binop` implementation Note: Currently implemented for numeric Series only. Differs from Pandas in returning Series with fixed dtype :obj:`float64` .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ _func_name = 'Operator binop().' ty_checker = TypeChecker('Operator binop().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is not for string series self_is_string_series = self_is_series and isinstance( self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance( other.dtype, types.UnicodeType) if self_is_string_series or other_is_string_series: return None if not isinstance(self, (SeriesType, types.Number)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) # specializations for numeric series only if not operands_are_series: def _series_operator_binop_scalar_impl(self, other): if self_is_series == True: # noqa result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + numpy.float64(other) return pandas.Series(result_data, index=self._index, name=self._name) else: result_data = numpy.empty(len(other._data), dtype=numpy.float64) result_data[:] = numpy.float64(self) + other._data return pandas.Series(result_data, index=other._index, name=other._name) return _series_operator_binop_scalar_impl else: # both operands are numeric series # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_operator_binop_none_indexes_impl(self, other): if (len(self._data) == len(other._data)): result_data = astype(self._data, numpy.float64) result_data = result_data + other._data return pandas.Series(result_data) else: left_size, right_size = len(self._data), len(other._data) min_data_size = min(left_size, right_size) max_data_size = max(left_size, right_size) result_data = numpy.empty(max_data_size, dtype=numpy.float64) if (left_size == min_data_size): result_data[:min_data_size] = self._data result_data[min_data_size:] = numpy.nan result_data = result_data + other._data else: result_data[:min_data_size] = other._data result_data[min_data_size:] = numpy.nan result_data = self._data + result_data return pandas.Series(result_data) return _series_operator_binop_none_indexes_impl else: # for numeric indexes find common dtype to be used when creating joined index if none_or_numeric_indexes: ty_left_index_dtype = types.int64 if isinstance( self.index, types.NoneType) else self.index.dtype ty_right_index_dtype = types.int64 if isinstance( other.index, types.NoneType) else other.index.dtype numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [ty_left_index_dtype, ty_right_index_dtype], []) def _series_operator_binop_common_impl(self, other): left_index, right_index = self.index, other.index # check if indexes are equal and series don't have to be aligned if sdc_check_indexes_equal(left_index, right_index): result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + other._data if none_or_numeric_indexes == True: # noqa result_index = astype(left_index, numba_index_common_dtype) else: result_index = self._index return pandas.Series(result_data, index=result_index) # TODO: replace below with core join(how='outer', return_indexers=True) when implemented joined_index, left_indexer, right_indexer = sdc_join_series_indexes( left_index, right_index) result_size = len(joined_index) left_values = numpy.empty(result_size, dtype=numpy.float64) right_values = numpy.empty(result_size, dtype=numpy.float64) for i in numba.prange(result_size): left_pos, right_pos = left_indexer[i], right_indexer[i] left_values[i] = self._data[ left_pos] if left_pos != -1 else numpy.nan right_values[i] = other._data[ right_pos] if right_pos != -1 else numpy.nan result_data = left_values + right_values return pandas.Series(result_data, joined_index) return _series_operator_binop_common_impl return None
def sdc_pandas_series_operator_comp_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.comp_binop` implementation .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ _func_name = 'Operator comp_binop().' ty_checker = TypeChecker('Operator comp_binop().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) if not operands_are_series: def _series_operator_comp_binop_scalar_impl(self, other): if self_is_series == True: # noqa return pandas.Series(self._data < other, index=self._index, name=self._name) else: return pandas.Series(self < other._data, index=other._index, name=other._name) return _series_operator_comp_binop_scalar_impl else: # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_operator_comp_binop_none_indexes_impl(self, other): left_size, right_size = len(self._data), len(other._data) if (left_size == right_size): return pandas.Series(self._data < other._data) else: raise ValueError( "Can only compare identically-labeled Series objects") return _series_operator_comp_binop_none_indexes_impl else: if none_or_numeric_indexes: ty_left_index_dtype = types.int64 if isinstance( self.index, types.NoneType) else self.index.dtype ty_right_index_dtype = types.int64 if isinstance( other.index, types.NoneType) else other.index.dtype numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [ty_left_index_dtype, ty_right_index_dtype], []) def _series_operator_comp_binop_common_impl(self, other): left_index, right_index = self.index, other.index if sdc_check_indexes_equal(left_index, right_index): if none_or_numeric_indexes == True: # noqa new_index = astype(left_index, numba_index_common_dtype) else: new_index = self._index return pandas.Series(self._data < other._data, new_index) else: raise ValueError( "Can only compare identically-labeled Series objects") return _series_operator_comp_binop_common_impl return None
def sdc_pandas_series_comp_binop(self, other, level=None, fill_value=None, axis=0): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.comp_binop Limitations ----------- Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler Examples -------- .. literalinclude:: ../../../examples/series/series_comp_binop.py :language: python :lines: 27- :caption: :name: ex_series_comp_binop .. command-output:: python ./series/series_comp_binop.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.comp_binop` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op8 """ _func_name = 'Method comp_binop().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(level, types.Omitted) or level is None): ty_checker.raise_exc(level, 'None', 'level') if not (isinstance(fill_value, (types.Omitted, types.Number, types.UnicodeType, types.NoneType)) or fill_value is None): ty_checker.raise_exc(fill_value, 'scalar', 'fill_value') if not (isinstance(axis, types.Omitted) or axis == 0): ty_checker.raise_exc(axis, 'int', 'axis') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None if not isinstance(self, SeriesType): ty_checker.raise_exc(self, 'pandas.series', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: series_indexes_comparable = check_types_comparable(self.index, other.index) if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) # specializations for both numeric and string series def series_comp_binop_wrapper(self, other, level=None, fill_value=None, axis=0): return sdc_comp_binop(self, other, fill_value) return series_comp_binop_wrapper
def hpat_pandas_series_map(self, arg, na_action=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.map Limitations ----------- - Series data types String is currently unsupported by Intel Scalable Dataframe Compiler. - ``arg`` as Series is currently unsupported by Intel Scalable Dataframe Compiler. - ``arg`` as function should return scalar. Other types \ are currently unsupported by Intel Scalable Dataframe Compiler. - ``na_action`` is currently unsupported by Intel Scalable Dataframe Compiler. Examples -------- .. literalinclude:: ../../../examples/series/series_map.py :language: python :lines: 36- :caption: `map()` accepts a function. :name: ex_series_map .. command-output:: python ./series/series_map.py :cwd: ../../../examples .. seealso:: :ref:`Series.map <pandas.Series.apply>` For applying more complex functions on a Series. :ref:`DataFrame.apply <pandas.DataFrame.apply>` Apply a function row-/column-wise. :ref:`DataFrame.applymap <pandas.DataFrame.applymap>` Apply a function elementwise on a whole DataFrame. Intel Scalable Dataframe Compiler Developer Guide ************************************************* .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series -k map """ ty_checker = TypeChecker("Method map().") ty_checker.check(self, SeriesType) if isinstance(arg, types.Callable): sig = arg.get_call_type(cpu_target.typing_context, [self.dtype], {}) output_type = sig.return_type def impl(self, arg, na_action=None): input_arr = self._data length = len(input_arr) output_arr = numpy.empty(length, dtype=output_type) for i in prange(length): output_arr[i] = arg(input_arr[i]) return pandas.Series(output_arr, index=self._index, name=self._name) return impl if isinstance(arg, types.DictType): output_type = self.dtype def impl(self, arg, na_action=None): input_arr = self._data length = len(input_arr) output_arr = numpy.empty(length, dtype=output_type) for i in prange(length): output_arr[i] = arg.get(input_arr[i], numpy.nan) return pandas.Series(output_arr, index=self._index, name=self._name) return impl
def hpat_pandas_stringmethods_startswith(self, pat, na=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.startswith Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_startswith.py :language: python :lines: 27- :caption: Test if the start of each string element matches a string :name: ex_series_str_startswith .. command-output:: python ./series/str/series_str_startswith.py :cwd: ../../../examples .. todo:: - Add support of matching the start of each string by a pattern - Add support of parameter ``na`` .. seealso:: `str.startswith <https://docs.python.org/3/library/stdtypes.html#str.startswith>`_ Python standard library string method. :ref:`Series.str.endswith <pandas.Series.str.endswith>` Same as startswith, but tests the end of string. :ref:`Series.str.contains <pandas.Series.str.contains>` Tests if string element contains a pattern. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.core.strings.StringMethods.startswith()` implementation. Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_startswith Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg pat: :obj:`str` Character sequence na: :obj:`bool` Object shown if element tested is not a string *unsupported* Returns ------- :obj:`pandas.Series` returns :obj:`pandas.Series` object """ ty_checker = TypeChecker('Method startswith().') ty_checker.check(self, StringMethodsType) if not isinstance(pat, (StringLiteral, UnicodeType)): ty_checker.raise_exc(pat, 'str', 'pat') if not isinstance(na, (Boolean, NoneType, Omitted)) and na is not None: ty_checker.raise_exc(na, 'bool', 'na') def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None): if na is not None: msg = 'Method startswith(). The object na\n expected: None' raise ValueError(msg) item_startswith = len(self._data) result = numpy.empty(item_startswith, numba.types.boolean) for idx, item in enumerate(self._data._data): result[idx] = item.startswith(pat) return pandas.Series(result, self._data._index, name=self._data._name) return hpat_pandas_stringmethods_startswith_impl
def hpat_pandas_series_rolling_count(self): ty_checker = TypeChecker('Method rolling.count().') ty_checker.check(self, SeriesRollingType) return sdc_pandas_series_rolling_count_impl
def sdc_pandas_dataframe_rolling_sum(self): ty_checker = TypeChecker('Method rolling.sum().') ty_checker.check(self, DataFrameRollingType) return gen_df_rolling_method_impl('sum', self)
def hpat_pandas_stringmethods_zfill(self, width): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.zfill Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_zfill.py :language: python :lines: 27- :caption: Pad strings in the Series by prepending '0' characters :name: ex_series_str_zfill .. command-output:: python ./series/str/series_str_zfill.py :cwd: ../../../examples .. todo:: Add support of 32-bit Unicode for `str.zfill()` .. seealso:: :ref:`Series.str.rjust <pandas.Series.str.rjust>` Fills the left side of strings with an arbitrary character. :ref:`Series.str.ljust <pandas.Series.str.ljust>` Fills the right side of strings with an arbitrary character. :ref:`Series.str.pad <pandas.Series.str.pad>` Fills the specified sides of strings with an arbitrary character. :ref:`Series.str.center <pandas.Series.str.center>` Fills boths sides of strings with an arbitrary character. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.core.strings.StringMethods.zfill()` implementation. Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_zfill Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg width: :obj:`int` Minimum width of resulting string Returns ------- :obj:`pandas.Series` returns :obj:`pandas.Series` object """ ty_checker = TypeChecker('Method zfill().') ty_checker.check(self, StringMethodsType) if not isinstance(width, Integer): ty_checker.raise_exc(width, 'int', 'width') def hpat_pandas_stringmethods_zfill_impl(self, width): item_count = len(self._data) result = [''] * item_count for idx, item in enumerate(self._data._data): result[idx] = item.zfill(width) return pandas.Series(result, self._data._index, name=self._data._name) return hpat_pandas_stringmethods_zfill_impl
def sdc_pandas_series_binop(self, other, level=None, fill_value=None, axis=0): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.binop Limitations ----------- Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler Examples -------- .. literalinclude:: ../../../examples/series/series_binop.py :language: python :lines: 27- :caption: :name: ex_series_binop .. command-output:: python ./series/series_binop.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.binop` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ _func_name = 'Method binop().' ty_checker = TypeChecker(_func_name) self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is not for string series self_is_string_series = self_is_series and isinstance( self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance( other.dtype, types.UnicodeType) if self_is_string_series or other_is_string_series: return None if not isinstance(self, (SeriesType, types.Number)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) if not isinstance(level, types.Omitted) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None: ty_checker.raise_exc(fill_value, 'number', 'fill_value') fill_value_is_none = isinstance( fill_value, (types.NoneType, types.Omitted)) or fill_value is None if not isinstance(axis, types.Omitted) and axis != 0: ty_checker.raise_exc(axis, 'int', 'axis') # specializations for numeric series only if not operands_are_series: def _series_binop_scalar_impl(self, other, level=None, fill_value=None, axis=0): if self_is_series == True: # noqa numpy_like.fillna(self._data, inplace=True, value=fill_value) result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + numpy.float64(other) return pandas.Series(result_data, index=self._index, name=self._name) else: numpy_like.fillna(other._data, inplace=True, value=fill_value) result_data = numpy.empty(len(other._data), dtype=numpy.float64) result_data[:] = numpy.float64(self) + other._data return pandas.Series(result_data, index=other._index, name=other._name) return _series_binop_scalar_impl else: # both operands are numeric series # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_binop_none_indexes_impl(self, other, level=None, fill_value=None, axis=0): numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) if (len(self._data) == len(other._data)): result_data = numpy_like.astype(self._data, numpy.float64) result_data = result_data + other._data return pandas.Series(result_data) else: left_size, right_size = len(self._data), len(other._data) min_data_size = min(left_size, right_size) max_data_size = max(left_size, right_size) result_data = numpy.empty(max_data_size, dtype=numpy.float64) _fill_value = numpy.nan if fill_value_is_none == True else fill_value # noqa if (left_size == min_data_size): result_data[:min_data_size] = self._data for i in range(min_data_size, len(result_data)): result_data[i] = _fill_value result_data = result_data + other._data else: result_data[:min_data_size] = other._data for i in range(min_data_size, len(result_data)): result_data[i] = _fill_value result_data = self._data + result_data return pandas.Series(result_data) return _series_binop_none_indexes_impl else: left_index_is_range = isinstance(self.index, (RangeIndexType, types.NoneType)) right_index_is_range = isinstance(other.index, (RangeIndexType, types.NoneType)) check_index_equal = left_index_is_range and right_index_is_range self_index_dtype = RangeIndexType.dtype if isinstance( self.index, types.NoneType) else self.index.dtype other_index_dtype = RangeIndexType.dtype if isinstance( other.index, types.NoneType) else other.index.dtype index_dtypes_match = self_index_dtype == other_index_dtype if not index_dtypes_match: numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [self_index_dtype, other_index_dtype], []) else: numba_index_common_dtype = self_index_dtype def _series_binop_common_impl(self, other, level=None, fill_value=None, axis=0): left_index, right_index = self.index, other.index numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) if check_index_equal == True: # noqa equal_indexes = numpy_like.array_equal( left_index, right_index) else: equal_indexes = False if (left_index is right_index or equal_indexes): result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + other._data if index_dtypes_match == False: # noqa result_index = numpy_like.astype( left_index, numba_index_common_dtype) else: result_index = left_index.values if left_index_is_range == True else left_index # noqa return pandas.Series(result_data, index=result_index) # TODO: replace below with core join(how='outer', return_indexers=True) when implemented joined_index, left_indexer, right_indexer = sdc_join_series_indexes( left_index, right_index) result_size = len(joined_index) left_values = numpy.empty(result_size, dtype=numpy.float64) right_values = numpy.empty(result_size, dtype=numpy.float64) _fill_value = numpy.nan if fill_value_is_none == True else fill_value # noqa for i in range(result_size): left_pos, right_pos = left_indexer[i], right_indexer[i] left_values[i] = self._data[ left_pos] if left_pos != -1 else _fill_value right_values[i] = other._data[ right_pos] if right_pos != -1 else _fill_value result_data = left_values + right_values return pandas.Series(result_data, joined_index) return _series_binop_common_impl return None
def sdc_pandas_series_comp_binop(self, other, level=None, fill_value=None, axis=0): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.comp_binop Limitations ----------- Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler Examples -------- .. literalinclude:: ../../../examples/series/series_comp_binop.py :language: python :lines: 27- :caption: :name: ex_series_comp_binop .. command-output:: python ./series/series_comp_binop.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.comp_binop` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op8 """ _func_name = 'Method comp_binop().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(level, types.Omitted) or level is None): ty_checker.raise_exc(level, 'None', 'level') if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None: ty_checker.raise_exc(fill_value, 'number', 'fill_value') if not (isinstance(axis, types.Omitted) or axis == 0): ty_checker.raise_exc(axis, 'int', 'axis') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) fill_value_is_none = isinstance( fill_value, (types.NoneType, types.Omitted)) or fill_value is None if not operands_are_series: def _series_comp_binop_scalar_impl(self, other, level=None, fill_value=None, axis=0): if self_is_series == True: # noqa numpy_like.fillna(self._data, inplace=True, value=fill_value) return pandas.Series(self._data < other, index=self._index, name=self._name) else: numpy_like.fillna(other._data, inplace=True, value=fill_value) return pandas.Series(self < other._data, index=other._index, name=other._name) return _series_comp_binop_scalar_impl else: # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_comp_binop_none_indexes_impl(self, other, level=None, fill_value=None, axis=0): numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) left_size, right_size = len(self._data), len(other._data) if (left_size == right_size): return pandas.Series(self._data < other._data) else: raise ValueError( "Can only compare identically-labeled Series objects") return _series_comp_binop_none_indexes_impl else: left_index_is_range = isinstance(self.index, (RangeIndexType, types.NoneType)) index_dtypes_match = self.index.dtype == other.index.dtype if not index_dtypes_match: numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [self.index.dtype, other.index.dtype], []) else: numba_index_common_dtype = self.index.dtype def _series_comp_binop_common_impl(self, other, level=None, fill_value=None, axis=0): numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) left_index, right_index = self.index, other.index if (left_index is right_index or numpy_like.array_equal(left_index, right_index)): if index_dtypes_match == False: # noqa new_index = numpy_like.astype( left_index, numba_index_common_dtype) else: new_index = left_index.values if left_index_is_range == True else left_index # noqa return pandas.Series(self._data < other._data, new_index) else: raise ValueError( "Can only compare identically-labeled Series objects") return _series_comp_binop_common_impl return None
def arg_impl(self): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of numpy.argmin/numpy.argmax. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmin Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmax """ ty_checker = TypeChecker("numpy-like 'argmin'/'argmax'") dtype = self.dtype isnan = get_isnan(dtype) max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64)) if isinstance(dtype, types.Integer): initial_result = { min: max_dtype_int_val(dtype), max: min_dtype_int_val(dtype), }[reduce_op] if isinstance(dtype, types.Float): initial_result = { min: max_dtype_float_val(dtype), max: min_dtype_float_val(dtype), }[reduce_op] if not isinstance(self, types.Array): return None if isinstance(dtype, types.Number): def sdc_argmin_impl(self): chunks = parallel_chunks(len(self)) arr_res = numpy.empty(shape=len(chunks), dtype=dtype) arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64) for i in prange(len(chunks)): chunk = chunks[i] res = initial_result pos = max_int64 for j in range(chunk.start, chunk.stop): if not isnan(self[j]): if reduce_op(res, self[j]) != self[j]: continue if res == self[j]: pos = min(pos, j) else: pos = j res = self[j] else: if numpy.isnan(res): pos = min(pos, j) else: pos = j res = self[j] arr_res[i] = res arr_pos[i] = pos general_res = initial_result general_pos = max_int64 for i in range(len(chunks)): if not isnan(arr_res[i]): if reduce_op(general_res, arr_res[i]) != arr_res[i]: continue if general_res == arr_res[i]: general_pos = min(general_pos, arr_pos[i]) else: general_pos = arr_pos[i] general_res = arr_res[i] else: if numpy.isnan(general_res): general_pos = min(general_pos, arr_pos[i]) else: general_pos = arr_pos[i] general_res = arr_res[i] return general_pos return sdc_argmin_impl ty_checker.raise_exc(dtype, 'number', 'self.dtype')
def hpat_pandas_series_rolling_corr(self, other=None, pairwise=None): ty_checker = TypeChecker('Method rolling.corr().') ty_checker.check(self, SeriesRollingType) accepted_other = (bool, Omitted, NoneType, SeriesType) if not isinstance(other, accepted_other) and other is not None: ty_checker.raise_exc(other, 'Series', 'other') accepted_pairwise = (bool, Boolean, Omitted, NoneType) if not isinstance(pairwise, accepted_pairwise) and pairwise is not None: ty_checker.raise_exc(pairwise, 'bool', 'pairwise') nan_other = isinstance(other, (Omitted, NoneType)) or other is None def hpat_pandas_rolling_series_corr_impl(self, other=None, pairwise=None): win = self._window minp = self._min_periods main_series = self._data main_arr = main_series._data if nan_other == True: # noqa other_arr = main_arr else: other_arr = other._data main_arr_length = len(main_arr) other_arr_length = len(other_arr) min_length = min(main_arr_length, other_arr_length) length = max(main_arr_length, other_arr_length) output_arr = numpy.empty(length, dtype=float64) chunks = parallel_chunks(length) for i in prange(len(chunks)): chunk = chunks[i] nfinite = 0 result = (0., 0., 0., 0., 0.) if win == 0: for idx in range(chunk.start, chunk.stop): output_arr[idx] = corr_result_or_nan(nfinite, minp, result) continue prelude_start = max(0, chunk.start - win + 1) prelude_stop = min(chunk.start, min_length) interlude_start = chunk.start interlude_stop = min(prelude_start + win, chunk.stop, min_length) postlude_start = min(prelude_start + win, chunk.stop) postlude_stop = min(chunk.stop, min_length) for idx in range(prelude_start, prelude_stop): x, y = main_arr[idx], other_arr[idx] nfinite, result = put_corr(x, y, nfinite, result) for idx in range(interlude_start, interlude_stop): x, y = main_arr[idx], other_arr[idx] nfinite, result = put_corr(x, y, nfinite, result) output_arr[idx] = corr_result_or_nan(nfinite, minp, result) for idx in range(postlude_start, postlude_stop): put_x, put_y = main_arr[idx], other_arr[idx] pop_x, pop_y = main_arr[idx - win], other_arr[idx - win] nfinite, result = put_corr(put_x, put_y, nfinite, result) nfinite, result = pop_corr(pop_x, pop_y, nfinite, result) output_arr[idx] = corr_result_or_nan(nfinite, minp, result) last_start = max(min_length, interlude_start) for idx in range(last_start, postlude_start): output_arr[idx] = corr_result_or_nan(nfinite, minp, result) last_start = max(min_length, postlude_start) last_stop = min(min_length + win, chunk.stop) for idx in range(last_start, last_stop): x, y = main_arr[idx - win], other_arr[idx - win] nfinite, result = pop_corr(x, y, nfinite, result) output_arr[idx] = corr_result_or_nan(nfinite, minp, result) for idx in range(last_stop, chunk.stop): output_arr[idx] = numpy.nan return pandas.Series(output_arr) return hpat_pandas_rolling_series_corr_impl
def sdc_pandas_series_operator_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.binop` implementation Note: Currently implemented for numeric Series only. Differs from Pandas in returning Series with fixed dtype :obj:`float64` .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ _func_name = 'Method comp_binop().' ty_checker = TypeChecker(_func_name) self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is not for string series self_is_string_series = self_is_series and isinstance( self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance( other.dtype, types.UnicodeType) if self_is_string_series or other_is_string_series: return None if not isinstance(self, (SeriesType, types.Number)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) def series_operator_binop_wrapper(self, other): return sdc_binop(self, other) return series_operator_binop_wrapper
def sdc_astype_overload(self, dtype): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of numpy.astype. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k astype """ ty_checker = TypeChecker("numpy-like 'astype'") if not isinstance(self, (types.Array, StringArrayType, RangeIndexType)): return None accepted_dtype_types = (types.functions.NumberClass, types.Function, types.StringLiteral) if not isinstance(dtype, accepted_dtype_types): def impl(self, dtype): return literally(dtype) return impl if not isinstance(dtype, accepted_dtype_types): ty_checker.raise_exc(dtype, 'string or type', 'dtype') if ((isinstance(dtype, types.Function) and dtype.typing_key == str) or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')): def sdc_astype_number_to_string_impl(self, dtype): num_bytes = 0 arr_len = len(self) # Get total bytes for new array for i in prange(arr_len): item = self[i] num_bytes += get_utf8_size(str(item)) data = pre_alloc_string_array(arr_len, num_bytes) for i in range(arr_len): item = self[i] data[i] = str(item) # TODO: check NA return data return sdc_astype_number_to_string_impl if (isinstance(self, (types.Array, RangeIndexType)) and isinstance( dtype, (types.StringLiteral, types.functions.NumberClass))): def sdc_astype_number_impl(self, dtype): arr = numpy.empty(len(self), dtype=numpy.dtype(dtype)) for i in numba.prange(len(self)): arr[i] = self[i] return arr return sdc_astype_number_impl
def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.rjust Limitations ----------- Series elements are expected to be Unicode strings. Elements cannot be NaN. Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_rjust.py :language: python :lines: 27- :caption: Filling left side of strings in the Series with an additional character :name: ex_series_str_rjust .. command-output:: python ./series/str/series_str_rjust.py :cwd: ../../../examples .. todo:: Add support of 32-bit Unicode for `str.rjust()` Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation. Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust Parameters ---------- self: :class:`pandas.core.strings.StringMethods` input arg width: :obj:`int` Minimum width of resulting string fillchar: :obj:`str` Additional character for filling, default is whitespace Returns ------- :obj:`pandas.Series` returns :obj:`pandas.Series` object """ ty_checker = TypeChecker('Method rjust().') ty_checker.check(self, StringMethodsType) if not isinstance(width, Integer): ty_checker.raise_exc(width, 'int', 'width') accepted_types = (Omitted, StringLiteral, UnicodeType) if not isinstance(fillchar, accepted_types) and fillchar != ' ': ty_checker.raise_exc(fillchar, 'str', 'fillchar') def hpat_pandas_stringmethods_rjust_impl(self, width, fillchar=' '): item_count = len(self._data) result = [''] * item_count for idx, item in enumerate(self._data._data): result[idx] = item.rjust(width, fillchar) return pandas.Series(result, self._data._index, name=self._data._name) return hpat_pandas_stringmethods_rjust_impl