def raise_exc(self, data, expected_types, name=''): """ Raise exception with unified message Parameters ---------- data: :obj:`any` real type of the data expected_types: :obj:`str` expected types inserting directly to the exception name: :obj:`str` name of the parameter """ msg = self.msg_template.format(self.func_name, name, data, expected_types) raise TypingError(msg)
def unicode_rjust(string, width, fillchar=' '): if not isinstance(width, types.Integer): raise TypingError('The width must be an Integer') if not (fillchar == ' ' or isinstance(fillchar, (types.Omitted, types.UnicodeType))): raise TypingError('The fillchar must be a UnicodeType') def rjust_impl(string, width, fillchar=' '): str_len = len(string) fillchar_len = len(fillchar) if fillchar_len != 1: raise ValueError( 'The fill character must be exactly one character long') if width <= str_len: return string newstr = (fillchar * (width - str_len)) + string return newstr return rjust_impl
def unicode_center(string, width, fillchar=' '): if not isinstance(width, types.Integer): raise TypingError('The width must be an Integer') if isinstance(fillchar, types.UnicodeCharSeq): def center_impl(string, width, fillchar): return string.center(width, str(fillchar)) return center_impl if not (fillchar == ' ' or isinstance(fillchar, (types.Omitted, types.UnicodeType))): raise TypingError('The fillchar must be a UnicodeType') def center_impl(string, width, fillchar=' '): str_len = len(string) fillchar_len = len(fillchar) if fillchar_len != 1: raise ValueError( 'The fill character must be exactly one character long') if width <= str_len: return string allmargin = width - str_len lmargin = (allmargin // 2) + (allmargin & width & 1) rmargin = allmargin - lmargin l_string = fillchar * lmargin if lmargin == rmargin: return l_string + string + l_string else: return l_string + string + (fillchar * rmargin) return center_impl
def resolve_argsort(self, ary, args, kws): assert not args kwargs = dict(kws) kind = kwargs.pop('kind', types.StringLiteral('quicksort')) if kwargs: msg = "Unsupported keywords: {!r}" raise TypingError(msg.format([k for k in kwargs.keys()])) if ary.ndim == 1: def argsort_stub(kind='quicksort'): pass pysig = utils.pysignature(argsort_stub) sig = signature(types.Array(types.intp, 1, 'C'), kind).replace(pysig=pysig) return sig
def _sentry_safe_cast(fromty, toty): """Check and raise TypingError if *fromty* cannot be safely cast to *toty* """ tyctxt = cpu_target.typing_context by = tyctxt.can_convert(fromty, toty) if by is None or by > Conversion.safe: if isinstance(fromty, types.Integer) and isinstance(toty, types.Integer): # Accept if both types are ints return if isinstance(fromty, types.Integer) and isinstance(toty, types.Float): # Accept if ints to floats return if isinstance(fromty, types.Float) and isinstance(toty, types.Float): # Accept if floats to floats return raise TypingError('cannot safely cast {} to {}'.format(fromty, toty))
def _as_meminfo(typingctx, dctobj): """Returns the MemInfoPointer of a dictionary. """ if not isinstance(dctobj, types.DictType): raise TypingError('expected *dctobj* to be a DictType') def codegen(context, builder, sig, args): [td] = sig.args [d] = args # Incref context.nrt.incref(builder, td, d) ctor = cgutils.create_struct_proxy(td) dstruct = ctor(context, builder, value=d) # Returns the plain MemInfo return dstruct.meminfo sig = _meminfo_dictptr(dctobj) return sig, codegen
def _as_meminfo(typingctx, lstobj): """Returns the MemInfoPointer of a list. """ if not isinstance(lstobj, types.ListType): raise TypingError('expected *lstobj* to be a ListType') def codegen(context, builder, sig, args): [tl] = sig.args [l] = args # Incref context.nrt.incref(builder, tl, l) ctor = cgutils.create_struct_proxy(tl) lstruct = ctor(context, builder, value=l) # Returns the plain MemInfo return lstruct.meminfo sig = _meminfo_listptr(lstobj) return sig, codegen
def impl_getitem(l, index): if not isinstance(l, types.ListType): return indexty = INDEXTY itemty = l.item_type IS_NOT_NONE = not isinstance(l.item_type, types.NoneType) if index in index_types: if IS_NOT_NONE: def integer_non_none_impl(l, index): index = handle_index(l, index) castedindex = _cast(index, indexty) status, item = _list_getitem(l, castedindex) if status == ListStatus.LIST_OK: return _nonoptional(item) else: raise AssertionError("internal list error during getitem") return integer_non_none_impl else: def integer_none_impl(l, index): index = handle_index(l, index) return None return integer_none_impl elif isinstance(index, types.SliceType): def slice_impl(l, index): newl = new_list(itemty) for i in handle_slice(l, index): newl.append(l[i]) return newl return slice_impl else: raise TypingError("list indices must be integers or slices")
def impl_delitem(l, index): if not isinstance(l, types.ListType): return if index in types.signed_domain: def integer_impl(l, index): l.pop(index) return integer_impl elif isinstance(index, types.SliceType): def slice_impl(l, index): slice_range = handle_slice(l, index) _list_delete_slice(l, slice_range.start, slice_range.stop, slice_range.step) return slice_impl else: raise TypingError("list indices must be signed integers or slices")
def impl_insert(l, index, item): if not isinstance(l, types.ListType): return if index in index_types: def impl(l, index, item): # If the index is larger than the size of the list or if the list is # empty, just append. if index >= len(l) or len(l) == 0: l.append(item) # Else, do the insert dance else: # convert negative indices if index < 0: # if the index is still negative after conversion, use 0 index = max(len(l) + index, 0) # grow the list by one, make room for item to insert l.append(l[0]) # reverse iterate over the list and shift all elements i = len(l) - 1 while (i > index): l[i] = l[i - 1] i -= 1 # finally, insert the item l[index] = item if l.is_precise(): # Handle the precise case. return impl else: # Handle the imprecise case l = l.refine(item) # Re-bind the item type to match the arguments. itemty = l.item_type # Create the signature that we wanted this impl to have. sig = typing.signature(types.void, l, INDEXTY, itemty) return sig, impl else: raise TypingError("list insert indices must be integers")
def hpat_pandas_series_iloc(self): """ Pandas Series attribute 'values' implementation. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.values.html#pandas.Series.values Algorithm: result = series.values Where: series: pandas.series result: pandas.series as ndarray Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_values """ _func_name = 'Attribute values.' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) def hpat_pandas_series_values_impl(self): return self._data return hpat_pandas_series_values_impl
def unicode_zfill(string, width): if not isinstance(width, types.Integer): raise TypingError("<width> must be an Integer") def zfill_impl(string, width): str_len = len(string) if width <= str_len: return string first_char = string[0] if str_len else '' padding = '0' * (width - str_len) if first_char in ['+', '-']: newstr = first_char + padding + string[1:] else: newstr = padding + string return newstr return zfill_impl
def to_fixed_tuple(typingctx, array, length): """Convert *array* into a tuple of *length* Returns ``UniTuple(array.dtype, length)`` ** Warning ** - No boundchecking. If *length* is longer than *array.size*, the behavior is undefined. """ if not isinstance(length, types.Const): raise RequireConstValue('*length* argument must be a constant') if array.ndim != 1: raise TypingError("Not supported on array.ndim={}".format(array.ndim)) # Determine types tuple_size = int(length.value) tuple_type = types.UniTuple(dtype=array.dtype, count=tuple_size) sig = tuple_type(array, length) def codegen(context, builder, signature, args): def impl(array, length, empty_tuple): out = empty_tuple for i in range(length): out = tuple_setitem(out, i, array[i]) return out inner_argtypes = [signature.args[0], types.intp, tuple_type] inner_sig = typing.signature(tuple_type, *inner_argtypes) ll_idx_type = context.get_value_type(types.intp) # Allocate an empty tuple empty_tuple = context.get_constant_undef(tuple_type) inner_args = [args[0], ll_idx_type(tuple_size), empty_tuple] res = context.compile_internal(builder, impl, inner_sig, inner_args) return res return sig, codegen
def impl_pop(l, index=-1): if not isinstance(l, types.ListType): return indexty = INDEXTY # FIXME: this type check works, but it isn't clear why and if it optimal if (isinstance(index, int) or index in types.signed_domain or isinstance(index, types.Omitted)): def impl(l, index=-1): if len(l) == 0: raise IndexError("pop from empty list") index = handle_index(l, index) castedindex = _cast(index, indexty) status, item = _list_pop(l, castedindex) if status == ListStatus.LIST_OK: return _nonoptional(item) else: raise AssertionError("internal list error during pop") return impl else: raise TypingError("argument for pop must be a signed integer")
def check_arg(arg, name): if not (arg is None or arg in index_types or isinstance(arg, (types.Omitted, types.NoneType))): raise TypingError( "{} argument for index must be an integer".format(name))
def sdc_pandas_series_comp_binop(self, other, level=None, fill_value=None, axis=0): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.comp_binop Limitations ----------- Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler Examples -------- .. literalinclude:: ../../../examples/series/series_comp_binop.py :language: python :lines: 27- :caption: :name: ex_series_comp_binop .. command-output:: python ./series/series_comp_binop.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.comp_binop` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op8 """ _func_name = 'Method comp_binop().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(level, types.Omitted) or level is None): ty_checker.raise_exc(level, 'None', 'level') if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None: ty_checker.raise_exc(fill_value, 'number', 'fill_value') if not (isinstance(axis, types.Omitted) or axis == 0): ty_checker.raise_exc(axis, 'int', 'axis') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) fill_value_is_none = isinstance(fill_value, (types.NoneType, types.Omitted)) or fill_value is None if not operands_are_series: def _series_comp_binop_scalar_impl(self, other, level=None, fill_value=None, axis=0): if self_is_series == True: # noqa if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(self._data, inplace=True, value=fill_value) return pandas.Series(self._data < other, index=self._index, name=self._name) else: if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(other._data, inplace=True, value=fill_value) return pandas.Series(self < other._data, index=other._index, name=other._name) return _series_comp_binop_scalar_impl else: # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_comp_binop_none_indexes_impl(self, other, level=None, fill_value=None, axis=0): if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) left_size, right_size = len(self._data), len(other._data) if (left_size == right_size): return pandas.Series(self._data < other._data) else: raise ValueError("Can only compare identically-labeled Series objects") return _series_comp_binop_none_indexes_impl else: if none_or_numeric_indexes: ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [ty_left_index_dtype, ty_right_index_dtype], []) def _series_comp_binop_common_impl(self, other, level=None, fill_value=None, axis=0): if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) left_index, right_index = self.index, other.index if sdc_check_indexes_equal(left_index, right_index): if none_or_numeric_indexes == True: # noqa new_index = numpy_like.astype(left_index, numba_index_common_dtype) else: new_index = self._index return pandas.Series(self._data < other._data, new_index) else: raise ValueError("Can only compare identically-labeled Series objects") return _series_comp_binop_common_impl return None
def impl_setitem(l, index, item): if not isinstance(l, types.ListType): return indexty = INDEXTY itemty = l.item_type if index in index_types: def impl_integer(l, index, item): index = handle_index(l, index) castedindex = _cast(index, indexty) casteditem = _cast(item, itemty) status = _list_setitem(l, castedindex, casteditem) if status == ListStatus.LIST_OK: return else: raise AssertionError("internal list error during settitem") return impl_integer elif isinstance(index, types.SliceType): if not isinstance(item, types.IterableType): raise TypingError("can only assign an iterable when using a slice " "with assignment/setitem") def impl_slice(l, index, item): # special case "a[i:j] = a", need to copy first if l == item: item = item.copy() slice_range = handle_slice(l, index) # non-extended (simple) slices if slice_range.step == 1: # replace if len(item) == len(slice_range): for i, j in zip(slice_range, item): l[i] = j # replace and insert if len(item) > len(slice_range): # do the replaces we can for i, j in zip(slice_range, item[:len(slice_range)]): l[i] = j # insert the remaining ones insert_range = range( slice_range.stop, slice_range.stop + len(item) - len(slice_range)) for i, k in zip(insert_range, item[len(slice_range):]): # FIXME: This may be slow. Each insert can incur a # memory copy of one or more items. l.insert(i, k) # replace and delete if len(item) < len(slice_range): # do the replaces we can replace_range = range(slice_range.start, slice_range.start + len(item)) for i, j in zip(replace_range, item): l[i] = j # delete remaining ones del l[slice_range.start + len(item):slice_range.stop] # Extended slices else: if len(slice_range) != len(item): raise ValueError( "length mismatch for extended slice and sequence") # extended slice can only replace for i, j in zip(slice_range, item): l[i] = j return impl_slice else: raise TypingError("list indices must be integers or slices")
def sdc_pandas_series_operator_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.binop` implementation Note: Currently implemented for numeric Series only. Differs from Pandas in returning Series with fixed dtype :obj:`float64` .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ ty_checker = TypeChecker('Operator binop().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is not for string series self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType) if self_is_string_series or other_is_string_series: return None if not isinstance(self, (SeriesType, types.Number)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) def sdc_pandas_series_operator_binop_impl(self, other): return self.binop(other) return sdc_pandas_series_operator_binop_impl
def sdc_pandas_series_operator_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.binop` implementation Note: Currently implemented for numeric Series only. Differs from Pandas in returning Series with fixed dtype :obj:`float64` .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ _func_name = 'Operator binop().' ty_checker = TypeChecker('Operator binop().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is not for string series self_is_string_series = self_is_series and isinstance( self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance( other.dtype, types.UnicodeType) if self_is_string_series or other_is_string_series: return None if not isinstance(self, (SeriesType, types.Number)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) # specializations for numeric series only if not operands_are_series: def _series_operator_binop_scalar_impl(self, other): if self_is_series == True: # noqa result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + numpy.float64(other) return pandas.Series(result_data, index=self._index, name=self._name) else: result_data = numpy.empty(len(other._data), dtype=numpy.float64) result_data[:] = numpy.float64(self) + other._data return pandas.Series(result_data, index=other._index, name=other._name) return _series_operator_binop_scalar_impl else: # both operands are numeric series # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_operator_binop_none_indexes_impl(self, other): if (len(self._data) == len(other._data)): result_data = astype(self._data, numpy.float64) result_data = result_data + other._data return pandas.Series(result_data) else: left_size, right_size = len(self._data), len(other._data) min_data_size = min(left_size, right_size) max_data_size = max(left_size, right_size) result_data = numpy.empty(max_data_size, dtype=numpy.float64) if (left_size == min_data_size): result_data[:min_data_size] = self._data result_data[min_data_size:] = numpy.nan result_data = result_data + other._data else: result_data[:min_data_size] = other._data result_data[min_data_size:] = numpy.nan result_data = self._data + result_data return pandas.Series(result_data) return _series_operator_binop_none_indexes_impl else: # for numeric indexes find common dtype to be used when creating joined index if none_or_numeric_indexes: ty_left_index_dtype = types.int64 if isinstance( self.index, types.NoneType) else self.index.dtype ty_right_index_dtype = types.int64 if isinstance( other.index, types.NoneType) else other.index.dtype numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [ty_left_index_dtype, ty_right_index_dtype], []) def _series_operator_binop_common_impl(self, other): left_index, right_index = self.index, other.index # check if indexes are equal and series don't have to be aligned if sdc_check_indexes_equal(left_index, right_index): result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + other._data if none_or_numeric_indexes == True: # noqa result_index = astype(left_index, numba_index_common_dtype) else: result_index = self._index return pandas.Series(result_data, index=result_index) # TODO: replace below with core join(how='outer', return_indexers=True) when implemented joined_index, left_indexer, right_indexer = sdc_join_series_indexes( left_index, right_index) result_size = len(joined_index) left_values = numpy.empty(result_size, dtype=numpy.float64) right_values = numpy.empty(result_size, dtype=numpy.float64) for i in numba.prange(result_size): left_pos, right_pos = left_indexer[i], right_indexer[i] left_values[i] = self._data[ left_pos] if left_pos != -1 else numpy.nan right_values[i] = other._data[ right_pos] if right_pos != -1 else numpy.nan result_data = left_values + right_values return pandas.Series(result_data, joined_index) return _series_operator_binop_common_impl return None
def sdc_pandas_series_operator_comp_binop(self, other): """ Pandas Series operator :attr:`pandas.Series.comp_binop` implementation .. only:: developer **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7* python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop* Parameters ---------- series: :obj:`pandas.Series` Input series other: :obj:`pandas.Series` or :obj:`scalar` Series or scalar value to be used as a second argument of binary operation Returns ------- :obj:`pandas.Series` The result of the operation """ _func_name = 'Operator comp_binop().' ty_checker = TypeChecker('Operator comp_binop().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance( other, SeriesType) if not (self_is_series or other_is_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable( self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError( '{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format( _func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) if not operands_are_series: def _series_operator_comp_binop_scalar_impl(self, other): if self_is_series == True: # noqa return pandas.Series(self._data < other, index=self._index, name=self._name) else: return pandas.Series(self < other._data, index=other._index, name=other._name) return _series_operator_comp_binop_scalar_impl else: # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_operator_comp_binop_none_indexes_impl(self, other): left_size, right_size = len(self._data), len(other._data) if (left_size == right_size): return pandas.Series(self._data < other._data) else: raise ValueError( "Can only compare identically-labeled Series objects") return _series_operator_comp_binop_none_indexes_impl else: if none_or_numeric_indexes: ty_left_index_dtype = types.int64 if isinstance( self.index, types.NoneType) else self.index.dtype ty_right_index_dtype = types.int64 if isinstance( other.index, types.NoneType) else other.index.dtype numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [ty_left_index_dtype, ty_right_index_dtype], []) def _series_operator_comp_binop_common_impl(self, other): left_index, right_index = self.index, other.index if sdc_check_indexes_equal(left_index, right_index): if none_or_numeric_indexes == True: # noqa new_index = astype(left_index, numba_index_common_dtype) else: new_index = self._index return pandas.Series(self._data < other._data, new_index) else: raise ValueError( "Can only compare identically-labeled Series objects") return _series_operator_comp_binop_common_impl return None
def sdc_pandas_read_csv( filepath_or_buffer, sep=',', delimiter=None, # Column and Index Locations and Names header="infer", names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, # General Parsing Configuration dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, # NA and Missing Data Handling na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, # Datetime Handling parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, cache_dates=True, # Iteration iterator=False, chunksize=None, # Quoting, Compression, and File Format compression="infer", thousands=None, decimal=b".", lineterminator=None, quotechar='"', # quoting=csv.QUOTE_MINIMAL, # not supported doublequote=True, escapechar=None, comment=None, encoding=None, dialect=None, # Error Handling error_bad_lines=True, warn_bad_lines=True, # Internal delim_whitespace=False, # low_memory=_c_parser_defaults["low_memory"], # not supported memory_map=False, float_precision=None, ): signature = """ filepath_or_buffer, sep=',', delimiter=None, # Column and Index Locations and Names header="infer", names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, # General Parsing Configuration dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, # NA and Missing Data Handling na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, # Datetime Handling parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, cache_dates=True, # Iteration iterator=False, chunksize=None, # Quoting, Compression, and File Format compression="infer", thousands=None, decimal=b".", lineterminator=None, quotechar='"', # quoting=csv.QUOTE_MINIMAL, # not supported doublequote=True, escapechar=None, comment=None, encoding=None, dialect=None, # Error Handling error_bad_lines=True, warn_bad_lines=True, # Internal delim_whitespace=False, # low_memory=_c_parser_defaults["low_memory"], # not supported memory_map=False, float_precision=None, """ # read_csv can infer result DataFrame type from file or from params # for inferring from file this parameters should be literal or omitted infer_from_file = all([ isinstance(filepath_or_buffer, types.Literal), isinstance(sep, (types.Literal, types.Omitted)) or sep == ',', isinstance(delimiter, (types.Literal, types.Omitted)) or delimiter is None, isinstance(names, (types.Tuple, types.Omitted, type(None))), isinstance(usecols, (types.Tuple, types.Omitted, type(None))), isinstance(skiprows, (types.Literal, types.Omitted)) or skiprows is None, ]) # for inference from params dtype and (names or usecols) shoud present # names, dtype and usecols should be literal tuples after rewrite pass (see. RewriteReadCsv) # header not supported infer_from_params = all([ isinstance(dtype, types.Tuple), any([ isinstance(names, types.Tuple) and isinstance(usecols, types.Tuple), isinstance(names, types.Tuple) and isinstance(usecols, (types.Omitted, type(None))), isinstance(names, (types.Omitted, type(None))) and isinstance(usecols, types.Tuple), ]), isinstance(header, types.Omitted) or header == 'infer', ]) # cannot create function if parameters provide not enough info if not any([infer_from_file, infer_from_params]): msg = "Cannot infer resulting DataFrame from constant file or parameters." raise TypingError(msg) if infer_from_file: # parameters should be constants and are important only for inference from file if isinstance(filepath_or_buffer, types.Literal): filepath_or_buffer = filepath_or_buffer.literal_value if isinstance(sep, types.Literal): sep = sep.literal_value if isinstance(delimiter, types.Literal): delimiter = delimiter.literal_value # Alias sep -> delimiter. if delimiter is None: delimiter = sep if isinstance(skiprows, types.Literal): skiprows = skiprows.literal_value # names and usecols influence on both inferencing from file and from params if isinstance(names, types.Tuple): assert all(isinstance(name, types.Literal) for name in names) names = [name.literal_value for name in names] if isinstance(usecols, types.Tuple): assert all(isinstance(col, types.Literal) for col in usecols) usecols = [col.literal_value for col in usecols] if infer_from_params: # dtype should be constants and is important only for inference from params if isinstance(dtype, types.Tuple): assert all(isinstance(key, types.Literal) for key in dtype[::2]) keys = (k.literal_value for k in dtype[::2]) values = dtype[1::2] values = [ v.typing_key if isinstance(v, types.Function) else v for v in values ] values = [ types.Array(numba.from_dtype(np.dtype(v.literal_value)), 1, 'C') if isinstance(v, types.Literal) else v for v in values ] values = [ types.Array(types.int_, 1, 'C') if v == int else v for v in values ] values = [ types.Array(types.float64, 1, 'C') if v == float else v for v in values ] values = [string_array_type if v == str else v for v in values] dtype = dict(zip(keys, values)) # in case of both are available # inferencing from params has priority over inferencing from file if infer_from_params: col_names = names # all names should be in dtype return_columns = usecols if usecols else names col_typs = [dtype[n] for n in return_columns] elif infer_from_file: col_names, col_typs = infer_column_names_and_types_from_constant_filename( filepath_or_buffer, delimiter, names, usecols, skiprows) else: return None dtype_present = not isinstance(dtype, (types.Omitted, type(None))) # generate function text with signature and returning DataFrame func_text, func_name = _gen_csv_reader_py_pyarrow_func_text_dataframe( col_names, col_typs, dtype_present, usecols, signature) # compile with Python csv_reader_py = _gen_csv_reader_py_pyarrow_py_func(func_text, func_name) return csv_reader_py
def sdc_pandas_series_binop(self, other, level=None, fill_value=None, axis=0): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.binop Limitations ----------- Parameters ``level`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler Examples -------- .. literalinclude:: ../../../examples/series/series_binop.py :language: python :lines: 27- :caption: :name: ex_series_binop .. command-output:: python ./series/series_binop.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.binop` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ ty_checker = TypeChecker('Method binop().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is not for string series self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType) if self_is_string_series or other_is_string_series: return None if not isinstance(self, (SeriesType, types.Number)): ty_checker.raise_exc(self, 'pandas.series or scalar', 'self') if not isinstance(other, (SeriesType, types.Number)): ty_checker.raise_exc(other, 'pandas.series or scalar', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) if not isinstance(level, types.Omitted) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None: ty_checker.raise_exc(fill_value, 'number', 'fill_value') if not isinstance(axis, types.Omitted) and axis != 0: ty_checker.raise_exc(axis, 'int', 'axis') fill_value_is_none = isinstance(fill_value, (types.NoneType, types.Omitted)) or fill_value is None # specializations for numeric series only if not operands_are_series: def _series_binop_scalar_impl(self, other, level=None, fill_value=None, axis=0): if self_is_series == True: # noqa if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(self._data, inplace=True, value=fill_value) result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + numpy.float64(other) return pandas.Series(result_data, index=self._index, name=self._name) else: if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(other._data, inplace=True, value=fill_value) result_data = numpy.empty(len(other._data), dtype=numpy.float64) result_data[:] = numpy.float64(self) + other._data return pandas.Series(result_data, index=other._index, name=other._name) return _series_binop_scalar_impl else: # both operands are numeric series # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_binop_none_indexes_impl(self, other, level=None, fill_value=None, axis=0): _fill_value = numpy.nan if fill_value_is_none == True else fill_value # noqa if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) if (len(self._data) == len(other._data)): result_data = numpy_like.astype(self._data, numpy.float64) result_data = result_data + other._data return pandas.Series(result_data) else: left_size, right_size = len(self._data), len(other._data) min_data_size = min(left_size, right_size) max_data_size = max(left_size, right_size) result_data = numpy.empty(max_data_size, dtype=numpy.float64) if (left_size == min_data_size): result_data[:min_data_size] = self._data for i in range(min_data_size, len(result_data)): result_data[i] = _fill_value result_data = result_data + other._data else: result_data[:min_data_size] = other._data for i in range(min_data_size, len(result_data)): result_data[i] = _fill_value result_data = self._data + result_data return pandas.Series(result_data) return _series_binop_none_indexes_impl else: # for numeric indexes find common dtype to be used when creating joined index if none_or_numeric_indexes: ty_left_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype ty_right_index_dtype = types.int64 if isinstance(other.index, types.NoneType) else other.index.dtype numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [ty_left_index_dtype, ty_right_index_dtype], []) def _series_binop_common_impl(self, other, level=None, fill_value=None, axis=0): left_index, right_index = self.index, other.index _fill_value = numpy.nan if fill_value_is_none == True else fill_value # noqa if not (fill_value is None or numpy.isnan(fill_value)): numpy_like.fillna(self._data, inplace=True, value=fill_value) numpy_like.fillna(other._data, inplace=True, value=fill_value) # check if indexes are equal and series don't have to be aligned if sdc_check_indexes_equal(left_index, right_index): result_data = numpy.empty(len(self._data), dtype=numpy.float64) result_data[:] = self._data + other._data if none_or_numeric_indexes == True: # noqa result_index = numpy_like.astype(left_index, numba_index_common_dtype) else: result_index = self._index return pandas.Series(result_data, index=result_index) # TODO: replace below with core join(how='outer', return_indexers=True) when implemented joined_index, left_indexer, right_indexer = sdc_join_series_indexes(left_index, right_index) result_size = len(joined_index) left_values = numpy.empty(result_size, dtype=numpy.float64) right_values = numpy.empty(result_size, dtype=numpy.float64) for i in range(result_size): left_pos, right_pos = left_indexer[i], right_indexer[i] left_values[i] = self._data[left_pos] if left_pos != -1 else _fill_value right_values[i] = other._data[right_pos] if right_pos != -1 else _fill_value result_data = left_values + right_values return pandas.Series(result_data, joined_index) return _series_binop_common_impl return None
def _check_for_none_typed(lst, method): if isinstance(lst.dtype, NoneType): raise TypingError("method support for List[None] is limited, " "not supported: '{}'.".format(method))
def assert_item_type_consistent_with_heap_type(heap, item): if not heap.dtype == item: raise TypingError('heap type must be the same as item type')