def sdc_pandas_dataframe_rolling_corr(self, other=None, pairwise=None): ty_checker = TypeChecker('Method rolling.corr().') ty_checker.check(self, DataFrameRollingType) accepted_other = (Omitted, NoneType, DataFrameType, SeriesType) if not isinstance(other, accepted_other) and other is not None: ty_checker.raise_exc(other, 'DataFrame, Series', 'other') accepted_pairwise = (bool, Boolean, Omitted, NoneType) if not isinstance(pairwise, accepted_pairwise) and pairwise is not None: ty_checker.raise_exc(pairwise, 'bool', 'pairwise') none_other = isinstance(other, (Omitted, NoneType)) or other is None kws = {'other': 'None', 'pairwise': 'None'} if none_other: return gen_df_rolling_method_other_none_impl('corr', self, kws=kws) if isinstance(other, DataFrameType): return gen_df_rolling_method_other_df_impl('corr', self, other, kws=kws) return gen_df_rolling_method_impl('corr', self, kws=kws)
def pd_range_index_getitem_overload(self, idx): if not isinstance(self, RangeIndexType): return None _func_name = 'Operator getitem().' ty_checker = TypeChecker(_func_name) # TO-DO: extend getitem to support other indexers (Arrays, Lists, etc) # for Arrays and Lists it requires Int64Index class as return value if not isinstance(idx, (types.Integer, types.SliceType)): ty_checker.raise_exc(idx, 'integer', 'idx') if isinstance(idx, types.Integer): def pd_range_index_getitem_impl(self, idx): range_len = len(self._data) idx = (range_len + idx) if idx < 0 else idx if (idx < 0 or idx >= range_len): raise IndexError("RangeIndex.getitem: index is out of bounds") return self.start + self.step * idx return pd_range_index_getitem_impl if isinstance(idx, types.SliceType): def pd_range_index_getitem_impl(self, idx): fix_start, fix_stop, fix_step = idx.indices(len(self._data)) return pd.RangeIndex( self.start + self.step * fix_start, self.start + self.step * fix_stop, self.step * fix_step, name=self._name ) return pd_range_index_getitem_impl
def pd_int64_index_getitem_overload(self, idx): if not isinstance(self, Int64IndexType): return None _func_name = 'Operator getitem().' ty_checker = TypeChecker(_func_name) if not (isinstance(idx, (types.Integer, types.SliceType)) or isinstance(idx, (types.Array, types.List)) and isinstance(idx.dtype, (types.Integer, types.Boolean))): ty_checker.raise_exc(idx, 'integer, slice, integer array or list', 'idx') if isinstance(idx, types.Integer): def pd_int64_index_getitem_impl(self, idx): index_len = len(self._data) # FIXME_Numba#5801: Numba type unification rules make this float idx = types.int64((index_len + idx) if idx < 0 else idx) if (idx < 0 or idx >= index_len): raise IndexError("Int64Index.getitem: index is out of bounds") return self._data[idx] return pd_int64_index_getitem_impl else: def pd_int64_index_getitem_impl(self, idx): index_data = self._data[idx] return pd.Int64Index(index_data, name=self._name) return pd_int64_index_getitem_impl
def pd_int64_index_take_overload(self, indexes): if not isinstance(self, Int64IndexType): return None _func_name = 'Method take().' ty_checker = TypeChecker(_func_name) valid_indexes_types = (types.Array, types.List, types.ListType) + sdc_pandas_index_types if not (isinstance(indexes, valid_indexes_types) and isinstance(indexes.dtype, (types.Integer, types.ListType))): ty_checker.raise_exc(indexes, 'array/list of integers or integer index', 'indexes') # separate handling when indexes is nested lists produces with parallel impls if isinstance(indexes.dtype, types.ListType): def pd_int64_index_take_chunked_impl(self, indexes): new_index_data = numpy_like.take(self.values, indexes) return pd.Int64Index(new_index_data, name=self._name) return pd_int64_index_take_chunked_impl convert_target = isinstance( indexes, sdc_pandas_index_types) and not isinstance(indexes, types.Array) def pd_int64_index_take_impl(self, indexes): _indexes = indexes.values if convert_target == True else indexes # noqa new_index_data = numpy_like.take(self._data, _indexes) return pd.Int64Index(new_index_data, name=self._name) return pd_int64_index_take_impl
def concurrent_dict_type_fromkeys_ovld(cls, keys, value): if cls.instance_type is not ConcurrentDictType: return _func_name = f'Method ConcurrentDict::fromkeys()' ty_checker = TypeChecker(_func_name) valid_keys_types = (types.Sequence, types.Array, StringArrayType) if not isinstance(keys, valid_keys_types): ty_checker.raise_exc(keys, f'array or sequence', 'keys') dict_key_type, dict_value_type = keys.dtype, value if isinstance(keys, (types.Array, StringArrayType)): def concurrent_dict_fromkeys_impl(cls, keys, value): res = ConcurrentDict.empty(dict_key_type, dict_value_type) for i in numba.prange(len(keys)): res[keys[i]] = value return res else: # generic for all other iterables def concurrent_dict_fromkeys_impl(cls, keys, value): res = ConcurrentDict.empty(dict_key_type, dict_value_type) for k in keys: res[k] = value return res return concurrent_dict_fromkeys_impl
def concurrent_dict_empty(cls, key_type, value_type): if cls.instance_type is not ConcurrentDictType: return _func_name = 'Method ConcurrentDictTypeRef::empty().' ty_checker = TypeChecker(_func_name) supported_key_types = (types.NumberClass, types.TypeRef) supported_value_types = (types.NumberClass, types.TypeRef) if not isinstance(key_type, supported_key_types): ty_checker.raise_exc(key_type, f'Numba type of dict keys (e.g. types.int32)', 'key_type') if not isinstance(value_type, supported_value_types): ty_checker.raise_exc(value_type, f'Numba type of dict values (e.g. types.int32)', 'value_type') if (isinstance(key_type, types.NumberClass) and key_type.dtype not in supported_numeric_key_types or isinstance(key_type, types.TypeRef) and not isinstance( key_type.instance_type, (types.UnicodeType, types.Hashable) or isinstance(value_type, types.NumberClass) and value_type.dtype not in supported_numeric_value_types)): error_msg = '{} SDC ConcurrentDict({}, {}) is not supported. ' raise TypingError(error_msg.format(_func_name, key_type, value_type)) def concurrent_dict_empty_impl(cls, key_type, value_type): return hashmap_create(key_type, value_type) return concurrent_dict_empty_impl
def concurrent_dict_pop_ovld(self, key, default=None): if not isinstance(self, ConcurrentDictType): return None _func_name = f'Method {self}::pop()' ty_checker = TypeChecker(_func_name) # default value is expected to be of the same (or safely casted) type as dict's value_type no_default = isinstance(default, (types.NoneType, types.Omitted)) or default is None default_is_optional = isinstance(default, types.Optional) if not (no_default or check_types_comparable(default, self.value_type) or default_is_optional and check_types_comparable(default.type, self.value_type)): ty_checker.raise_exc(default, f'{self.value_type} or convertible or None', 'default') dict_key_type, dict_value_type = self.key_type, self.value_type cast_key = key is not dict_key_type def concurrent_dict_pop_impl(self, key, default=None): _key = key if cast_key == False else _cast(key, dict_key_type) # noqa found, res = hashmap_pop(self, _key) if not found: if no_default == False: # noqa return _cast(default, dict_value_type) else: return None return res return concurrent_dict_pop_impl
def pd_multi_index_from_product_overload(cls, iterables, sortorder=None, names=None): if cls.instance_type is not MultiIndexType: return _func_name = f'Method from_product()' valid_levels_data_types = sdc_pandas_index_types + sdc_pandas_df_column_types + ( types.List, types.ListType) ty_checker = TypeChecker(_func_name) if not (isinstance(iterables, (types.List, types.ListType, types.UniTuple)) and isinstance(iterables.dtype, valid_levels_data_types) or isinstance(iterables, types.Tuple) and all( map(lambda x: isinstance(x, valid_levels_data_types), iterables))): ty_checker.raise_exc(iterables, 'list or tuple of tuples ', 'iterables') if not (isinstance(sortorder, (types.Omitted, types.NoneType)) or sortorder is None): raise TypingError( '{} Unsupported parameters. Given sortorder: {}'.format( _func_name, sortorder)) if not (isinstance(names, (types.Omitted, types.NoneType)) or names is None): raise TypingError('{} Unsupported parameters. Given names: {}'.format( _func_name, names)) def pd_multi_index_from_product_impl(cls, iterables, sortorder=None, names=None): # TO-DO: support indexes.unique() method and use it here levels_factorized = sdc_tuple_map(factorize_level, iterables) levels_names = sdc_tuple_map(sdc_indexes_get_name, iterables) index_levels = sdc_tuple_map(lambda x: fix_df_index(list(x[0])), levels_factorized) temp_cumprod_sizes = [ 1, ] codes_info = sdc_tuple_map(next_codes_info, levels_factorized, temp_cumprod_sizes) res_index_size = temp_cumprod_sizes[-1] index_codes = sdc_tuple_map(next_codes_array, codes_info, res_index_size) res = sdc_pandas_multi_index_ctor(index_levels, index_codes, name=levels_names) return res return pd_multi_index_from_product_impl
def pd_multi_index_equals_overload(self, other): if not isinstance(self, MultiIndexType): return None _func_name = 'Method equals().' ty_checker = TypeChecker(_func_name) if not (isinstance(other, MultiIndexType) and self.dtype is other.dtype): ty_checker.raise_exc(other, 'pandas MultiIndex', 'other') def pd_multi_index_equals_impl(self, other): if self.nlevels != other.nlevels: return False self_and_other_data = _multi_index_binop_helper(self, other) tup_levels_cmp_res = sdc_tuple_map( lambda x: cat_array_equal(*x), self_and_other_data, ) # np.all is not supported for Tuples and below compiles a bit faster # than 'np.all(np.array(list(tup_levels_cmp_res)))' for cmp_res in tup_levels_cmp_res: if not cmp_res: return False return True return pd_multi_index_equals_impl
def pd_int64_index_append_overload(self, other): if not isinstance(self, Int64IndexType): return None _func_name = 'Method append().' ty_checker = TypeChecker(_func_name) if not isinstance(other, sdc_pandas_index_types): ty_checker.raise_exc(other, 'pandas index', 'other') if not check_types_comparable(self, other): raise TypingError('{} Not allowed for non comparable indexes. \ Given: self={}, other={}'.format(_func_name, self, other)) convert_other = not isinstance(other, types.Array) _, res_index_dtype = find_index_common_dtype(self, other) return_as_array_index = res_index_dtype is not types.int64 def pd_int64_index_append_impl(self, other): _other = other.values if convert_other == True else other # noqa new_index_data = hpat_arrays_append(self._data, _other) # this is only needed while some indexes are represented with arrays # TO-DO: support pd.Index() overload with dtype arg to create indexes if return_as_array_index == False: # noqa return pd.Int64Index(new_index_data) else: return new_index_data return pd_int64_index_append_impl
def sdc_pandas_series_operator_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.binop` implementation Note: Currently implemented for numeric Series only. Differs from Pandas in returning Series with fixed dtype :obj:`float64` .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ _func_name = 'Method comp_binop().' ty_checker = TypeChecker(_func_name) self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is not for string series self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType) if self_is_string_series or other_is_string_series: return None if not isinstance(self, (SeriesType, types.Number)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: series_indexes_comparable = check_types_comparable(self.index, other.index) if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) def series_operator_binop_wrapper(self, other): return sdc_binop(self, other) return series_operator_binop_wrapper
def sdc_astype_overload(self, dtype): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of numpy.astype. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k astype """ ty_checker = TypeChecker("numpy-like 'astype'") valid_self_types = (types.Array,) + sdc_pandas_index_types if not (isinstance(self, valid_self_types) and not isinstance(self, types.NoneType)): return None accepted_dtype_types = (types.functions.NumberClass, types.Function, types.StringLiteral) if not isinstance(dtype, accepted_dtype_types): def impl(self, dtype): return literally(dtype) return impl if not isinstance(dtype, accepted_dtype_types): ty_checker.raise_exc(dtype, 'string or type', 'dtype') if ( (isinstance(dtype, types.Function) and dtype.typing_key == str) or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str') ): def sdc_astype_number_to_string_impl(self, dtype): num_bytes = 0 arr_len = len(self) # Get total bytes for new array for i in prange(arr_len): item = self[i] num_bytes += get_utf8_size(str(item)) data = pre_alloc_string_array(arr_len, num_bytes) for i in range(arr_len): item = self[i] data[i] = str(item) # TODO: check NA return data return sdc_astype_number_to_string_impl if (isinstance(self, (types.Array, RangeIndexType, Int64IndexType)) and isinstance(dtype, (types.StringLiteral, types.functions.NumberClass))): def sdc_astype_number_impl(self, dtype): arr = numpy.empty(len(self), dtype=numpy.dtype(dtype)) for i in numba.prange(len(self)): arr[i] = self[i] return arr return sdc_astype_number_impl
def hpat_pandas_stringmethods_strip(self, to_strip=None): ty_checker = TypeChecker('Method strip().') ty_checker.check(self, StringMethodsType) if not isinstance(to_strip, (NoneType, StringLiteral, UnicodeType, Omitted)) and to_strip is not None: ty_checker.raise_exc(to_strip, 'str', 'to_strip') return sdc_pandas_series_str_strip_impl
def sdc_pandas_series_operator_comp_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.comp_binop` implementation .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ _func_name = 'Operator comp_binop().' ty_checker = TypeChecker(_func_name) self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) def sdc_pandas_series_operator_comp_binop_impl(self, other): return self.comp_binop(other) return sdc_pandas_series_operator_comp_binop_impl
def hpat_pandas_series_rolling_corr(self, other=None, pairwise=None): ty_checker = TypeChecker('Method rolling.corr().') ty_checker.check(self, SeriesRollingType) # TODO: check `other` is Series after a circular import of SeriesType fixed # accepted_other = (bool, Omitted, NoneType, SeriesType) # if not isinstance(other, accepted_other) and other is not None: # ty_checker.raise_exc(other, 'Series', 'other') accepted_pairwise = (bool, Boolean, Omitted, NoneType) if not isinstance(pairwise, accepted_pairwise) and pairwise is not None: ty_checker.raise_exc(pairwise, 'bool', 'pairwise') nan_other = isinstance(other, (Omitted, NoneType)) or other is None def hpat_pandas_rolling_series_corr_impl(self, other=None, pairwise=None): win = self._window minp = self._min_periods main_series = self._data main_arr = main_series._data main_arr_length = len(main_arr) if nan_other == True: # noqa other_arr = main_arr else: other_arr = other._data other_arr_length = len(other_arr) length = max(main_arr_length, other_arr_length) output_arr = numpy.empty(length, dtype=float64) def calc_corr(main, other, minp): # align arrays `main` and `other` by size and finiteness min_length = min(len(main), len(other)) main_valid_indices = numpy.isfinite(main[:min_length]) other_valid_indices = numpy.isfinite(other[:min_length]) valid = main_valid_indices & other_valid_indices if len(main[valid]) < minp: return numpy.nan else: return arr_corr(main[valid], other[valid]) for i in prange(min(win, length)): main_arr_range = main_arr[:i + 1] other_arr_range = other_arr[:i + 1] output_arr[i] = calc_corr(main_arr_range, other_arr_range, minp) for i in prange(win, length): main_arr_range = main_arr[i + 1 - win:i + 1] other_arr_range = other_arr[i + 1 - win:i + 1] output_arr[i] = calc_corr(main_arr_range, other_arr_range, minp) return pandas.Series(output_arr) return hpat_pandas_rolling_series_corr_impl
def sdc_pandas_dataframe_rolling_var(self, ddof=1): ty_checker = TypeChecker('Method rolling.var().') ty_checker.check(self, DataFrameRollingType) if not isinstance(ddof, (int, Integer, Omitted)): ty_checker.raise_exc(ddof, 'int', 'ddof') return gen_df_rolling_method_impl('var', self, kws={'ddof': '1'})
def hpat_pandas_series_rolling_var(self, ddof=1): ty_checker = TypeChecker('Method rolling.var().') ty_checker.check(self, SeriesRollingType) if not isinstance(ddof, (int, Integer, Omitted)): ty_checker.raise_exc(ddof, 'int', 'ddof') return sdc_pandas_series_rolling_var_impl
def hpat_pandas_stringmethods_center(self, width, fillchar=' '): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.str.center Examples -------- .. literalinclude:: ../../../examples/series/str/series_str_center.py :language: python :lines: 27- :caption: Filling left and right side of strings in the Series with an additional character :name: ex_series_str_center .. command-output:: python ./series/str/series_str_center.py :cwd: ../../../examples .. seealso:: :ref:`Series.str.rjust <pandas.Series.str.rjust>` Fills the left side of strings with an arbitrary character. :ref:`Series.str.ljust <pandas.Series.str.ljust>` Fills the right side of strings with an arbitrary character. .. todo:: Add support of 32-bit Unicode for `str.center()` Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.core.strings.StringMethods.center()` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_center """ ty_checker = TypeChecker('Method center().') ty_checker.check(self, StringMethodsType) if not isinstance(width, Integer): ty_checker.raise_exc(width, 'int', 'width') accepted_types = (Omitted, StringLiteral, UnicodeType) if not isinstance(fillchar, accepted_types) and fillchar != ' ': ty_checker.raise_exc(fillchar, 'str', 'fillchar') def hpat_pandas_stringmethods_center_impl(self, width, fillchar=' '): mask = get_nan_mask(self._data._data) item_count = len(self._data) res_list = [''] * item_count for idx in numba.prange(item_count): res_list[idx] = self._data._data[idx].center(width, fillchar) str_arr = create_str_arr_from_list(res_list) result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name) return hpat_pandas_stringmethods_center_impl
def pd_multi_index_getitem_overload(self, idx): if not isinstance(self, MultiIndexType): return None _func_name = 'Operator getitem().' ty_checker = TypeChecker(_func_name) if not (isinstance(idx, (types.Integer, types.SliceType)) or isinstance(idx, (types.Array, types.List)) and isinstance(idx.dtype, (types.Integer, types.Boolean))): ty_checker.raise_exc(idx, 'integer, slice, integer array or list', 'idx') if isinstance(idx, types.Integer): def pd_multi_index_getitem_idx_scalar_impl(self, idx): index_len = len(self) # FIXME_Numba#5801: Numba type unification rules make this float idx = types.int64((index_len + idx) if idx < 0 else idx) if (idx < 0 or idx >= index_len): raise IndexError("MultiIndex.getitem: index is out of bounds") return _multi_index_getitem_impl(self, idx) return pd_multi_index_getitem_idx_scalar_impl elif isinstance(idx, types.SliceType): def pd_multi_index_getitem_idx_slice_impl(self, idx): new_levels = self._levels new_codes = sdc_tuple_map( lambda arr_codes, taken_idxs: arr_codes[taken_idxs], self._codes, idx) return pd.MultiIndex(new_levels, new_codes) return pd_multi_index_getitem_idx_slice_impl elif isinstance(idx, types.Array) and isinstance(idx.dtype, types.Boolean): def pd_multi_index_getitem_idx_bool_array_impl(self, idx): new_levels = self._levels new_codes = sdc_tuple_map( lambda arr_codes, taken_idxs: numpy_like.getitem_by_mask( arr_codes, taken_idxs), self._codes, idx) return pd.MultiIndex(new_levels, new_codes) return pd_multi_index_getitem_idx_bool_array_impl elif isinstance(idx, types.Array) and isinstance(idx.dtype, types.Integer): def pd_multi_index_getitem_as_take_impl(self, idx): return self.take(idx) return pd_multi_index_getitem_as_take_impl
def pd_range_index_overload(start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None): _func_name = 'pd.RangeIndex().' ty_checker = TypeChecker(_func_name) if not (isinstance(copy, types.Omitted) or copy is False): raise SDCLimitation(f"{_func_name} Unsupported parameter. Given 'copy': {copy}") if not (isinstance(copy, types.Omitted) or fastpath is None): raise SDCLimitation(f"{_func_name} Unsupported parameter. Given 'fastpath': {fastpath}") dtype_is_np_int64 = dtype is types.NumberClass(types.int64) if not _check_dtype_param_type(dtype): ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype') # TODO: support ensure_python_int from pandas.core.dtype.common to handle integers as float params if not (isinstance(start, (types.NoneType, types.Omitted, types.Integer)) or start is None): ty_checker.raise_exc(start, 'number or none', 'start') if not (isinstance(stop, (types.NoneType, types.Omitted, types.Integer)) or stop is None): ty_checker.raise_exc(stop, 'number or none', 'stop') if not (isinstance(step, (types.NoneType, types.Omitted, types.Integer)) or step is None): ty_checker.raise_exc(step, 'number or none', 'step') if not (isinstance(name, (types.NoneType, types.Omitted, types.StringLiteral, types.UnicodeType)) or name is None): ty_checker.raise_exc(name, 'string or none', 'name') if ((isinstance(start, (types.NoneType, types.Omitted)) or start is None) and (isinstance(stop, (types.NoneType, types.Omitted)) or stop is None) and (isinstance(step, (types.NoneType, types.Omitted)) or step is None)): def pd_range_index_ctor_dummy_impl( start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None): raise TypeError("RangeIndex(...) must be called with integers") return pd_range_index_ctor_dummy_impl def pd_range_index_ctor_impl(start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None): if not (dtype is None or dtype == 'int64' or dtype_is_np_int64): raise TypeError("Invalid to pass a non-int64 dtype to RangeIndex") _start = types.int64(start) if start is not None else types.int64(0) if stop is None: _start, _stop = types.int64(0), types.int64(start) else: _stop = types.int64(stop) _step = types.int64(step) if step is not None else types.int64(1) if _step == 0: raise ValueError("Step must not be zero") return init_range_index(range(_start, _stop, _step), name) return pd_range_index_ctor_impl
def sdc_pandas_dataframe_rolling_apply(self, func, raw=None): ty_checker = TypeChecker('Method rolling.apply().') ty_checker.check(self, DataFrameRollingType) raw_accepted = (Omitted, NoneType, Boolean) if not isinstance(raw, raw_accepted) and raw is not None: ty_checker.raise_exc(raw, 'bool', 'raw') return gen_df_rolling_method_impl('apply', self, args=['func'], kws={'raw': 'None'})
def hpat_pandas_series_rolling_quantile(self, quantile, interpolation='linear'): ty_checker = TypeChecker('Method rolling.quantile().') ty_checker.check(self, SeriesRollingType) if not isinstance(quantile, Number): ty_checker.raise_exc(quantile, 'float', 'quantile') str_types = (Omitted, StringLiteral, UnicodeType) if not isinstance(interpolation, str_types) and interpolation != 'linear': ty_checker.raise_exc(interpolation, 'str', 'interpolation') def hpat_pandas_rolling_series_quantile_impl(self, quantile, interpolation='linear'): if quantile < 0 or quantile > 1: raise ValueError('quantile value not in [0, 1]') if interpolation != 'linear': raise ValueError('interpolation value not "linear"') win = self._window minp = self._min_periods input_series = self._data input_arr = input_series._data length = len(input_arr) output_arr = numpy.empty(length, dtype=float64) def calc_quantile(arr, quantile, minp): finite_arr = arr[numpy.isfinite(arr)] if len(finite_arr) < minp: return numpy.nan else: return arr_quantile(finite_arr, quantile) boundary = min(win, length) for i in prange(boundary): arr_range = input_arr[:i + 1] output_arr[i] = calc_quantile(arr_range, quantile, minp) for i in prange(boundary, length): arr_range = input_arr[i + 1 - win:i + 1] output_arr[i] = calc_quantile(arr_range, quantile, minp) return pandas.Series(output_arr, input_series._index, name=input_series._name) return hpat_pandas_rolling_series_quantile_impl
def pd_range_index_getitem_overload(self, idx): if not isinstance(self, RangeIndexType): return None _func_name = 'Operator getitem().' ty_checker = TypeChecker(_func_name) if not (isinstance(idx, (types.Integer, types.SliceType)) or isinstance(idx, (types.Array, types.List)) and isinstance(idx.dtype, (types.Integer, types.Boolean))): ty_checker.raise_exc(idx, 'integer, slice, integer array or list', 'idx') if isinstance(idx, types.Integer): def pd_range_index_getitem_impl(self, idx): range_len = len(self._data) # FIXME_Numba#5801: Numba type unification rules make this float idx = types.int64((range_len + idx) if idx < 0 else idx) if (idx < 0 or idx >= range_len): raise IndexError("RangeIndex.getitem: index is out of bounds") return self.start + self.step * idx return pd_range_index_getitem_impl if isinstance(idx, types.SliceType): def pd_range_index_getitem_impl(self, idx): fix_start, fix_stop, fix_step = idx.indices(len(self._data)) return pd.RangeIndex(self.start + self.step * fix_start, self.start + self.step * fix_stop, self.step * fix_step, name=self._name) return pd_range_index_getitem_impl if isinstance(idx, (types.Array, types.List)): if isinstance(idx.dtype, types.Integer): def pd_range_index_getitem_impl(self, idx): res_as_arr = _sdc_take(self, idx) return pd.Int64Index(res_as_arr, name=self._name) return pd_range_index_getitem_impl elif isinstance(idx.dtype, types.Boolean): def pd_range_index_getitem_impl(self, idx): return getitem_by_mask(self, idx) return pd_range_index_getitem_impl
def sdc_pandas_dataframe_rolling_quantile(self, quantile, interpolation='linear'): ty_checker = TypeChecker('Method rolling.quantile().') ty_checker.check(self, DataFrameRollingType) if not isinstance(quantile, Number): ty_checker.raise_exc(quantile, 'float', 'quantile') str_types = (Omitted, StringLiteral, UnicodeType) if not isinstance(interpolation, str_types) and interpolation != 'linear': ty_checker.raise_exc(interpolation, 'str', 'interpolation') return gen_df_rolling_method_impl('quantile', self, args=['quantile'], kws={'interpolation': '"linear"'})
def pd_range_index_getitem_overload(self, idx): if not isinstance(self, RangeIndexType): return None _func_name = 'Operator getitem().' ty_checker = TypeChecker(_func_name) if not (isinstance(idx, (types.Integer, types.SliceType)) or isinstance(idx, (types.Array, types.List)) and isinstance(idx.dtype, (types.Integer, types.Boolean))): ty_checker.raise_exc(idx, 'integer, slice, integer array or list', 'idx') if isinstance(idx, types.Integer): def pd_range_index_getitem_impl(self, idx): range_len = len(self._data) idx = (range_len + idx) if idx < 0 else idx if (idx < 0 or idx >= range_len): raise IndexError("RangeIndex.getitem: index is out of bounds") return self.start + self.step * idx return pd_range_index_getitem_impl if isinstance(idx, types.SliceType): def pd_range_index_getitem_impl(self, idx): fix_start, fix_stop, fix_step = idx.indices(len(self._data)) return pd.RangeIndex(self.start + self.step * fix_start, self.start + self.step * fix_stop, self.step * fix_step, name=self._name) return pd_range_index_getitem_impl # returns np.array which is used to represent pandas Int64Index now if isinstance(idx, (types.Array, types.List)): if isinstance(idx.dtype, types.Integer): def pd_range_index_getitem_impl(self, idx): return _sdc_take(self, idx) return pd_range_index_getitem_impl elif isinstance(idx.dtype, types.Boolean): def pd_range_index_getitem_impl(self, idx): return getitem_by_mask(self, idx) return pd_range_index_getitem_impl
def pd_int64_index_join_overload(self, other, how, level=None, return_indexers=False, sort=False): if not isinstance(self, Int64IndexType): return None _func_name = 'Method join().' ty_checker = TypeChecker(_func_name) if not isinstance(other, sdc_pandas_index_types): ty_checker.raise_exc(other, 'pandas index', 'other') if not isinstance(how, types.StringLiteral): ty_checker.raise_exc(how, 'string', 'how') if not how.literal_value == 'outer': raise SDCLimitation( f"{_func_name} Only supporting 'outer' now. Given 'how': {how.literal_value}" ) if not (isinstance(level, (types.Omitted, types.NoneType)) or level is None): ty_checker.raise_exc(level, 'None', 'level') if not (isinstance(return_indexers, (types.Omitted, types.BooleanLiteral)) or return_indexers is False): ty_checker.raise_exc(return_indexers, 'boolean', 'return_indexers') if not (isinstance(sort, (types.Omitted, types.Boolean)) or sort is False): ty_checker.raise_exc(sort, 'boolean', 'sort') _return_indexers = return_indexers.literal_value def pd_int64_index_join_impl(self, other, how, level=None, return_indexers=False, sort=False): if _return_indexers == True: # noqa return sdc_indexes_join_outer(self, other) else: joined_index, = sdc_indexes_join_outer(self, other) return joined_index return pd_int64_index_join_impl
def concurrent_dict_update_ovld(self, other): if not ((self, ConcurrentDictType) and isinstance(other, ConcurrentDictType)): return None _func_name = f'Method {self}::update()' ty_checker = TypeChecker(_func_name) if self is not other: ty_checker.raise_exc(other, f'{self}', 'other') def concurrent_dict_update_impl(self, other): return hashmap_update(self, other) return concurrent_dict_update_impl
def pd_int64_index_overload(data, dtype=None, copy=False, name=None): _func_name = 'pd.Int64Index().' ty_checker = TypeChecker(_func_name) convertible_indexes = (PositionalIndexType, RangeIndexType, Int64IndexType) if not (isinstance(data, (types.Array, types.List)) and isinstance(data.dtype, types.Integer) or isinstance(data, convertible_indexes)): ty_checker.raise_exc(data, 'array/list of integers or integer index', 'data') dtype_is_number_class = isinstance(dtype, types.NumberClass) dtype_is_numpy_signed_int = (check_signed_integer(dtype) or dtype_is_number_class and check_signed_integer(dtype.dtype)) dtype_is_unicode_str = isinstance(dtype, (types.UnicodeType, types.StringLiteral)) if not _check_dtype_param_type(dtype): ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype') if not (isinstance(copy, (types.NoneType, types.Omitted, types.Boolean)) or copy is False): ty_checker.raise_exc(copy, 'bool', 'copy') if not (isinstance(name, (types.NoneType, types.Omitted, types.StringLiteral, types.UnicodeType)) or name is None): ty_checker.raise_exc(name, 'string or none', 'name') is_data_array = isinstance(data, types.Array) is_data_index = isinstance(data, convertible_indexes) data_dtype_is_int64 = data.dtype is types.int64 def pd_int64_index_ctor_impl(data, dtype=None, copy=False, name=None): if not (dtype is None or dtype_is_numpy_signed_int or dtype_is_unicode_str and dtype in ('int8', 'int16', 'int32', 'int64')): raise ValueError( "Incorrect `dtype` passed: expected signed integer") if is_data_array == True: # noqa _data = data elif is_data_index == True: # noqa _data = data.values else: # using fix_df_index to get array since it handles index=None _data = fix_df_index(data)._data if data_dtype_is_int64 == False: # noqa _data = numpy_like.astype(_data, dtype=types.int64) else: if copy: _data = np.copy(_data) return init_int64_index(_data, name) return pd_int64_index_ctor_impl
def _sdc_pandas_series_align_overload(series, other, size='max', finiteness=False): ty_checker = TypeChecker( 'Function sdc.common_functions._sdc_pandas_series_align().') ty_checker.check(series, SeriesType) ty_checker.check(other, SeriesType) str_types = (str, types.StringLiteral, types.UnicodeType, types.Omitted) if not isinstance(size, str_types): ty_checker.raise_exc(size, 'str', 'size') if not isinstance(finiteness, (bool, types.Boolean, types.Omitted)): ty_checker.raise_exc(finiteness, 'bool', 'finiteness') def _sdc_pandas_series_align_impl(series, other, size='max', finiteness=False): if size != 'max' and size != 'min': raise ValueError( "Function sdc.common_functions._sdc_pandas_series_align(). " "The object size\n expected: 'max' or 'min'") arr, other_arr = series._data, other._data arr_len, other_arr_len = len(arr), len(other_arr) min_length = min(arr_len, other_arr_len) length = max(arr_len, other_arr_len) if size == 'max' else min_length aligned_arr = numpy.repeat([numpy.nan], length) aligned_other_arr = numpy.repeat([numpy.nan], length) for i in numba.prange(min_length): if not finiteness or (numpy.isfinite(arr[i]) and numpy.isfinite(other_arr[i])): aligned_arr[i] = arr[i] aligned_other_arr[i] = other_arr[i] else: aligned_arr[i] = aligned_other_arr[i] = numpy.nan aligned = pandas.Series(aligned_arr, name=series._name) aligned_other = pandas.Series(aligned_other_arr, name=other._name) return aligned, aligned_other return _sdc_pandas_series_align_impl
def sdc_pandas_series_groupby_var(self, ddof=1, *args): method_name = 'GroupBy.var().' ty_checker = TypeChecker(method_name) ty_checker.check(self, SeriesGroupByType) if not isinstance(ddof, (types.Omitted, int, types.Integer)): ty_checker.raise_exc(ddof, 'int', 'ddof') method_args = ['self', 'ddof', '*args'] default_values = {'ddof': 1} impl_used_params = {'ddof': 'ddof'} applied_func_name = 'var' return sdc_pandas_series_groupby_apply_func(self, applied_func_name, method_args, default_values, impl_used_params)