def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these maybe_integer_op_deprecated(self) result = self._time_shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.sub) elif isinstance(other, ABCIndexClass): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result
def __add__(self, other): from pandas.core.index import Index from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tseries.offsets import DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): return NotImplemented elif is_timedelta64_dtype(other): return self._add_delta(other) elif isinstance(other, (DateOffset, timedelta)): return self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects return self._add_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if hasattr(other, '_add_delta'): return other._add_delta(self) raise TypeError("cannot add TimedeltaIndex and {typ}" .format(typ=type(other))) elif is_integer(other): return self.shift(other) elif isinstance(other, (datetime, np.datetime64)): return self._add_datelike(other) elif isinstance(other, Index): return self._add_datelike(other) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented
def _evaluate_compare(self, other, op): """ We have been called because a comparison between 8 aware arrays. numpy >= 1.11 will now warn about NaT comparisons """ # Called by comparison methods when comparing datetimelike # with datetimelike if not isinstance(other, type(self)): # coerce to a similar object if not is_list_like(other): # scalar other = [other] elif lib.is_scalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) # compare result = op(self.asi8, other.asi8) # technically we could support bool dtyped Index # for now just return the indexing array directly mask = (self._isnan) | (other._isnan) filler = iNaT if is_bool_dtype(result): filler = False result[mask] = filler return result
def cmp_method(self, other): op_name = op.__name__ mask = None if isinstance(other, (ABCSeries, ABCIndexClass)): # Rely on pandas to unbox and dispatch to us. return NotImplemented if isinstance(other, IntegerArray): other, mask = other._data, other._mask elif is_list_like(other): other = np.asarray(other) if other.ndim > 0 and len(self) != len(other): raise ValueError('Lengths must match to compare') other = lib.item_from_zerodim(other) # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future with warnings.catch_warnings(): warnings.filterwarnings("ignore", "elementwise", FutureWarning) with np.errstate(all='ignore'): result = op(self._data, other) # nans propagate if mask is None: mask = self._mask else: mask = self._mask | mask result[mask] = op_name == 'ne' return result
def test_isscalar_numpy_zerodim_arrays(self): for zerodim in [np.array(1), np.array('foobar'), np.array(np.datetime64('2014-01-01')), np.array(np.timedelta64(1, 'h')), np.array(np.datetime64('NaT'))]: assert not is_scalar(zerodim) assert is_scalar(lib.item_from_zerodim(zerodim))
def __mul__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): return NotImplemented if is_scalar(other): # numpy will accept float and int, raise TypeError for others result = self._data * other freq = None if self.freq is not None and not isna(other): freq = self.freq * other return type(self)(result, freq=freq) if not hasattr(other, "dtype"): # list, tuple other = np.array(other) if len(other) != len(self) and not is_timedelta64_dtype(other): # Exclude timedelta64 here so we correctly raise TypeError # for that instead of ValueError raise ValueError("Cannot multiply with unequal lengths") if is_object_dtype(other): # this multiplication will succeed only if all elements of other # are int or float scalars, so we will end up with # timedelta64[ns]-dtyped result result = [self[n] * other[n] for n in range(len(self))] result = np.array(result) return type(self)(result) # numpy will accept float or int dtype, raise TypeError for others result = self._data * other return type(self)(result)
def wrapper(self, other): op = getattr(self.asi8, opname) other = lib.item_from_zerodim(other) if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): return NotImplemented if is_list_like(other) and len(other) != len(self): raise ValueError("Lengths must match") if isinstance(other, Period): self._check_compatible_with(other) result = op(other.ordinal) elif isinstance(other, cls): self._check_compatible_with(other) result = op(other.asi8) mask = self._isnan | other._isnan if mask.any(): result[mask] = nat_result return result elif other is NaT: result = np.empty(len(self.asi8), dtype=bool) result.fill(nat_result) else: other = Period(other, freq=self.freq) result = op(other.ordinal) if self._hasnans: result[self._isnan] = nat_result return result
def __sub__(self, other): from pandas import Index other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) elif isinstance(other, Index): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def __rmod__(self, other): # Note: This is a naive implementation, can likely be optimized if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): return NotImplemented other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) return other - (other // self) * self
def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these maybe_integer_op_deprecated(self) result = self._time_shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datetime_arraylike(other) elif is_integer_dtype(other): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_period_dtype(other): # if self is a TimedeltaArray and other is a PeriodArray with # a timedelta-like (i.e. Tick) freq, this operation is valid. # Defer to the PeriodArray implementation. # In remaining cases, this will end up raising TypeError. return NotImplemented elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result
def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other) or is_period_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def __truediv__(self, other): # timedelta / X is well-defined for timedelta-like or numeric X other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): return NotImplemented if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) if other is NaT: # specifically timedelta64-NaT result = np.empty(self.shape, dtype=np.float64) result.fill(np.nan) return result # otherwise, dispatch to Timedelta implementation return self._data / other elif lib.is_scalar(other): # assume it is numeric result = self._data / other freq = None if self.freq is not None: # Tick division is not implemented, so operate on Timedelta freq = self.freq.delta / other return type(self)(result, freq=freq) if not hasattr(other, "dtype"): # e.g. list, tuple other = np.array(other) if len(other) != len(self): raise ValueError("Cannot divide vectors with unequal lengths") elif is_timedelta64_dtype(other): # let numpy handle it return self._data / other elif is_object_dtype(other): # Note: we do not do type inference on the result, so either # an object array or numeric-dtyped (if numpy does inference) # will be returned. GH#23829 result = [self[n] / other[n] for n in range(len(self))] result = np.array(result) return result else: result = self._data / other return type(self)(result)
def __rfloordiv__(self, other): if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): return NotImplemented other = lib.item_from_zerodim(other) if is_scalar(other): if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) if other is NaT: # treat this specifically as timedelta-NaT result = np.empty(self.shape, dtype=np.float64) result.fill(np.nan) return result # dispatch to Timedelta implementation result = other.__floordiv__(self._data) return result raise TypeError("Cannot divide {typ} by {cls}" .format(typ=type(other).__name__, cls=type(self).__name__)) if not hasattr(other, "dtype"): # list, tuple other = np.array(other) if len(other) != len(self): raise ValueError("Cannot divide with unequal lengths") elif is_timedelta64_dtype(other): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate # on the i8 values result = other.asi8 // self.asi8 mask = self._isnan | other._isnan if mask.any(): result = result.astype(np.int64) result[mask] = np.nan return result elif is_object_dtype(other): result = [other[n] // self[n] for n in range(len(self))] result = np.array(result) return result else: dtype = getattr(other, "dtype", type(other).__name__) raise TypeError("Cannot divide {typ} by {cls}" .format(typ=dtype, cls=type(self).__name__))
def __sub__(self, other): from pandas.core.index import Index from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tseries.offsets import DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): return NotImplemented elif is_timedelta64_dtype(other): result = self._add_delta(-other) elif isinstance(other, (DateOffset, timedelta)): result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._sub_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if not isinstance(other, TimedeltaIndex): raise TypeError("cannot subtract TimedeltaIndex and {typ}" .format(typ=type(other).__name__)) result = self._add_delta(-other) elif isinstance(other, DatetimeIndex): result = self._sub_datelike(other) elif is_integer(other): # This check must come after the check for timedelta64_dtype # or else it will incorrectly catch np.timedelta64 objects result = self.shift(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datelike(other) elif isinstance(other, Period): result = self._sub_period(other) elif isinstance(other, Index): raise TypeError("cannot subtract {typ1} and {typ2}" .format(typ1=type(self).__name__, typ2=type(other).__name__)) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented if result is not NotImplemented: res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def __rtruediv__(self, other): # X / timedelta is defined only for timedelta-like X other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): return NotImplemented if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) if other is NaT: # specifically timedelta64-NaT result = np.empty(self.shape, dtype=np.float64) result.fill(np.nan) return result # otherwise, dispatch to Timedelta implementation return other / self._data elif lib.is_scalar(other): raise TypeError("Cannot divide {typ} by {cls}" .format(typ=type(other).__name__, cls=type(self).__name__)) if not hasattr(other, "dtype"): # e.g. list, tuple other = np.array(other) if len(other) != len(self): raise ValueError("Cannot divide vectors with unequal lengths") elif is_timedelta64_dtype(other): # let numpy handle it return other / self._data elif is_object_dtype(other): # Note: unlike in __truediv__, we do not _need_ to do type# # inference on the result. It does not raise, a numeric array # is returned. GH#23829 result = [other[n] / self[n] for n in range(len(self))] return np.array(result) else: raise TypeError("Cannot divide {dtype} data by {cls}" .format(dtype=other.dtype, cls=type(self).__name__))
def _wrap_result(name, data, sparse_index, fill_value, dtype=None): """ wrap op result to have correct dtype """ if name.startswith('__'): # e.g. __eq__ --> eq name = name[2:-2] if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): dtype = np.bool fill_value = lib.item_from_zerodim(fill_value) if is_bool_dtype(dtype): # fill_value may be np.bool_ fill_value = bool(fill_value) return SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype)
def __add__(self, other): from pandas import DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def wrapper(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): return NotImplemented if _is_convertible_to_td(other) or other is NaT: try: other = Timedelta(other) except ValueError: # failed to parse as timedelta return ops.invalid_comparison(self, other, op) result = op(self.view('i8'), other.value) if isna(other): result.fill(nat_result) elif not is_list_like(other): return ops.invalid_comparison(self, other, op) elif len(other) != len(self): raise ValueError("Lengths must match") else: try: other = type(self)._from_sequence(other)._data except (ValueError, TypeError): return ops.invalid_comparison(self, other, op) result = op(self.view('i8'), other.view('i8')) result = com.values_from_object(result) o_mask = np.array(isna(other)) if o_mask.any(): result[o_mask] = nat_result if self._hasnans: result[self._isnan] = nat_result return result
def __sub__(self, other): from pandas.core.index import Index from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tseries.offsets import DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): return NotImplemented elif is_timedelta64_dtype(other): return self._add_delta(-other) elif isinstance(other, (DateOffset, timedelta)): return self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects return self._sub_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if not isinstance(other, TimedeltaIndex): raise TypeError("cannot subtract TimedeltaIndex and {typ}" .format(typ=type(other).__name__)) return self._add_delta(-other) elif isinstance(other, DatetimeIndex): return self._sub_datelike(other) elif is_integer(other): return self.shift(-other) elif isinstance(other, (datetime, np.datetime64)): return self._sub_datelike(other) elif isinstance(other, Period): return self._sub_period(other) elif isinstance(other, Index): raise TypeError("cannot subtract {typ1} and {typ2}" .format(typ1=type(self).__name__, typ2=type(other).__name__)) else: # pragma: no cover return NotImplemented
def dispatch_to_series(left, right, func, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Get the appropriate array-op to apply to each column/block's values. array_op = get_array_op(func) right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: bm = left._mgr.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert left.index.equals(right.index) assert left.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(left)` # fails in cases with empty columns reached via # _frame_arith_method_with_reindex bm = left._mgr.operate_blockwise(right._mgr, array_op) return type(left)(bm) elif isinstance(right, ABCSeries) and axis == "columns": # We only get here if called via _combine_series_frame, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) if right.dtype == "timedelta64[ns]": # ensure we treat NaT values as the correct dtype # Note: we do not do this unconditionally as it may be lossy or # expensive for EA dtypes. right = np.asarray(right) else: right = right._values arrays = [ array_op(l, r) for l, r in zip(left._iter_column_arrays(), right) ] elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later right = right._values arrays = [array_op(l, right) for l in left._iter_column_arrays()] else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) return type(left)._from_arrays(arrays, left.columns, left.index, verify_integrity=False)
def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a logical operation `|`, `&`, or `^`. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.and_, operator.or_, operator.xor} Or one of the reversed variants from roperator. Returns ------- ndarray or ExtensionArray """ fill_int = lambda x: x def fill_bool(x, left=None): # if `left` is specifically not-boolean, we do not cast to bool if x.dtype.kind in ["c", "f", "O"]: # dtypes that can hold NA mask = isna(x) if mask.any(): x = x.astype(object) x[mask] = False if left is None or is_bool_dtype(left.dtype): x = x.astype(bool) return x is_self_int_dtype = is_integer_dtype(left.dtype) right = lib.item_from_zerodim(right) if is_list_like(right) and not hasattr(right, "dtype"): # e.g. list, tuple right = construct_1d_object_array_from_listlike(right) # NB: We assume extract_array has already been called on left and right lvalues = maybe_upcast_datetimelike_array(left) rvalues = right if should_extension_dispatch(lvalues, rvalues): # Call the method on lvalues res_values = op(lvalues, rvalues) else: if isinstance(rvalues, np.ndarray): is_other_int_dtype = is_integer_dtype(rvalues.dtype) rvalues = rvalues if is_other_int_dtype else fill_bool( rvalues, lvalues) else: # i.e. scalar is_other_int_dtype = lib.is_integer(rvalues) # For int vs int `^`, `|`, `&` are bitwise operators and return # integer dtypes. Otherwise these are boolean ops filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool res_values = na_logical_op(lvalues, rvalues, op) res_values = filler(res_values) # type: ignore return res_values
def sanitize_array( data, index: Optional[Index], dtype: Optional[DtypeObj] = None, copy: bool = False, raise_cast_failure: bool = True, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. Parameters ---------- data : Any index : Index or None, default None dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False raise_cast_failure : bool, default True Returns ------- np.ndarray or ExtensionArray Notes ----- raise_cast_failure=False is only intended to be True when called from the DataFrame constructor, as the dtype keyword there may be interpreted as only applying to a subset of columns, see GH#24435. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: # TODO: deque, array.array if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError(f"'{type(data).__name__}' type is unordered") data = list(data) if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray, List[Any]]", variable has type # "ExtensionArray") subarr = maybe_cast_to_datetime(subarr, dtype) # type: ignore[assignment] elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif not is_list_like(data): if index is None: raise ValueError( "index must be specified when data is not list-like") subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: # realize e.g. generators # TODO: non-standard array-likes we can convert to ndarray more efficiently? data = list(data) subarr = _try_cast(data, dtype, copy, raise_cast_failure) subarr = _sanitize_ndim(subarr, data, dtype, index) if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): # error: Argument 1 to "_sanitize_str_dtypes" has incompatible type # "ExtensionArray"; expected "ndarray" subarr = _sanitize_str_dtypes( subarr, data, dtype, copy # type: ignore[arg-type] ) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype( dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) subarr = extract_array(subarr, extract_numpy=True) return subarr
def sanitize_array( data, index: Index | None, dtype: DtypeObj | None = None, copy: bool = False, raise_cast_failure: bool = True, *, allow_2d: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. Parameters ---------- data : Any index : Index or None, default None dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False raise_cast_failure : bool, default True allow_2d : bool, default False If False, raise if we have a 2D Arraylike. Returns ------- np.ndarray or ExtensionArray Notes ----- raise_cast_failure=False is only intended to be True when called from the DataFrame constructor, as the dtype keyword there may be interpreted as only applying to a subset of columns, see GH#24435. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) elif isinstance(data, range): # GH#16804 data = range_to_ndarray(data) copy = False if not is_list_like(data): if index is None: raise ValueError("index must be specified when data is not list-like") data = construct_1d_arraylike_from_scalar(data, len(index), dtype) return data # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr else: if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError(f"'{type(data).__name__}' type is unordered") # materialize e.g. generators, convert e.g. tuples, abc.ValueView # TODO: non-standard array-likes we can convert to ndarray more efficiently? data = list(data) if dtype is not None or len(data) == 0: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: # TODO: copy? subarr = maybe_convert_platform(data) if subarr.dtype == object: # Argument 1 to "maybe_infer_to_datetimelike" has incompatible # type "Union[ExtensionArray, ndarray]"; expected "ndarray" subarr = maybe_infer_to_datetimelike(subarr) # type: ignore[arg-type] subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) if not ( isinstance(subarr.dtype, ExtensionDtype) or isinstance(dtype, ExtensionDtype) ): subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) subarr = extract_array(subarr, extract_numpy=True) return subarr
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, ABCDataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') if isinstance(other, ABCPeriodIndex): # temp workaround until fixing GH 13637 # tested in test_nat_comparisons # (pandas.tests.series.test_operators.TestSeriesOperators) return self._constructor(na_op(self.values, other.astype(object).values), index=self.index) return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) elif isinstance(other, pd.Categorical): if not is_categorical_dtype(self): msg = ("Cannot compare a Categorical for op {op} with Series " "of dtype {typ}.\nIf you want to compare values, use " "'series <op> np.asarray(other)'.") raise TypeError(msg.format(op=op, typ=self.dtype)) if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): raise TypeError('Could not compare {typ} type with Series' .format(typ=type(other))) # always return a full value series here res = com._values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res
def to_timedelta(arg, unit=None, errors="raise"): """ Convert argument to timedelta. Timedeltas are absolute differences in times, expressed in difference units (e.g. days, hours, minutes, seconds). This method converts an argument from a recognized timedelta format / value into a Timedelta type. Parameters ---------- arg : str, timedelta, list-like or Series The data to be converted to timedelta. .. deprecated:: 1.2 Strings with units 'M', 'Y' and 'y' do not represent unambiguous timedelta values and will be removed in a future version unit : str, optional Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. Possible values: * 'W' * 'D' / 'days' / 'day' * 'hours' / 'hour' / 'hr' / 'h' * 'm' / 'minute' / 'min' / 'minutes' / 'T' * 'S' / 'seconds' / 'sec' / 'second' * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L' * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U' * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N' .. versionchanged:: 1.1.0 Must not be specified when `arg` context strings and ``errors="raise"``. errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaT. - If 'ignore', then invalid parsing will return the input. Returns ------- timedelta64 or numpy.array of timedelta64 Output type returned if parsing succeeded. See Also -------- DataFrame.astype : Cast argument to a specified dtype. to_datetime : Convert argument to datetime. convert_dtypes : Convert dtypes. Notes ----- If the precision is higher than nanoseconds, the precision of the duration is truncated to nanoseconds for string inputs. Examples -------- Parsing a single string to a Timedelta: >>> pd.to_timedelta('1 days 06:05:01.00003') Timedelta('1 days 06:05:01.000030') >>> pd.to_timedelta('15.5us') Timedelta('0 days 00:00:00.000015500') Parsing a list or array of strings: >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT], dtype='timedelta64[ns]', freq=None) Converting numbers by specifying the `unit` keyword argument: >>> pd.to_timedelta(np.arange(5), unit='s') TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03', '0 days 00:00:04'], dtype='timedelta64[ns]', freq=None) >>> pd.to_timedelta(np.arange(5), unit='d') TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) """ if unit is not None: unit = parse_timedelta_unit(unit) if errors not in ("ignore", "raise", "coerce"): raise ValueError( "errors must be one of 'ignore', 'raise', or 'coerce'.") if unit in {"Y", "y", "M"}: raise ValueError( "Units 'M', 'Y', and 'y' are no longer supported, as they do not " "represent unambiguous timedelta values durations.") if arg is None: return arg elif isinstance(arg, ABCSeries): values = _convert_listlike(arg._values, unit=unit, errors=errors) return arg._constructor(values, index=arg.index, name=arg.name) elif isinstance(arg, ABCIndex): return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name) elif isinstance(arg, np.ndarray) and arg.ndim == 0: # extract array scalar and process below arg = lib.item_from_zerodim(arg) elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1: return _convert_listlike(arg, unit=unit, errors=errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, timedelta, list, tuple, 1-d array, or Series" ) if isinstance(arg, str) and unit is not None: raise ValueError( "unit must not be specified if the input is/contains a str") # ...so it must be a scalar value. Return scalar. return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. Note: the caller is responsible for ensuring that numpy warnings are suppressed (with np.errstate(all="ignore")) if needed. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} Returns ------- ndarray or ExtensionArray """ # NB: We assume extract_array has already been called on left and right lvalues = ensure_wrapped_if_datetimelike(left) rvalues = ensure_wrapped_if_datetimelike(right) rvalues = lib.item_from_zerodim(rvalues) if isinstance(rvalues, list): # We don't catch tuple here bc we may be comparing e.g. MultiIndex # to a tuple that represents a single entry, see test_compare_tuple_strs rvalues = np.asarray(rvalues) if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(lvalues) != len(rvalues): raise ValueError("Lengths must match to compare", lvalues.shape, rvalues.shape) if should_extension_dispatch(lvalues, rvalues) or ( (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT) and not is_object_dtype(lvalues.dtype)): # Call the method on lvalues res_values = op(lvalues, rvalues) elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA? # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(lvalues.shape, dtype=bool) else: res_values = np.zeros(lvalues.shape, dtype=bool) elif is_numeric_v_string_like(lvalues, rvalues): # GH#36377 going through the numexpr path would incorrectly raise return invalid_comparison(lvalues, rvalues, op) elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) return res_values
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented elif isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) res_values = na_op(self.values, other.values) return self._constructor(res_values, index=self.index, name=name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') res_values = na_op(self.values, np.asarray(other)) return self._constructor(res_values, index=self.index).__finalize__(self) elif (isinstance(other, pd.Categorical) and not is_categorical_dtype(self)): raise TypeError( "Cannot compare a Categorical for op {op} with " "Series of dtype {typ}.\nIf you want to compare " "values, use 'series <op> np.asarray(other)'.".format( op=op, typ=self.dtype)) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(len(self), dtype=bool) else: res_values = np.zeros(len(self), dtype=bool) return self._constructor(res_values, index=self.index, name=self.name, dtype='bool') if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): raise TypeError( 'Could not compare {typ} type with Series'.format( typ=type(other))) # always return a full value series here res = com._values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res
def comparison_op( left: Union[np.ndarray, ABCExtensionArray], right: Any, op ) -> Union[np.ndarray, ABCExtensionArray]: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} Returns ------- ndarrray or ExtensionArray """ # NB: We assume extract_array has already been called on left and right lvalues = left rvalues = right rvalues = lib.item_from_zerodim(rvalues) if isinstance(rvalues, list): # TODO: same for tuples? rvalues = np.asarray(rvalues) if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(lvalues) != len(rvalues): raise ValueError("Lengths must match to compare") if should_extension_dispatch(lvalues, rvalues): res_values = dispatch_to_extension_op(op, lvalues, rvalues) elif is_scalar(rvalues) and isna(rvalues): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(len(lvalues), dtype=bool) else: res_values = np.zeros(len(lvalues), dtype=bool) elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: op_name = "__{op}__".format(op=op.__name__) method = getattr(lvalues, op_name) with np.errstate(all="ignore"): res_values = method(rvalues) if res_values is NotImplemented: res_values = invalid_comparison(lvalues, rvalues, op) if is_scalar(res_values): raise TypeError( "Could not compare {typ} type with Series".format(typ=type(rvalues)) ) return res_values
def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator str_rep : str or None, default None axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} elif isinstance(right, ABCSeries) and axis == "columns": # We only get here if called via _combine_frame_series, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) if right.dtype == "timedelta64[ns]": # ensure we treat NaT values as the correct dtype # Note: we do not do this unconditionally as it may be lossy or # expensive for EA dtypes. right = np.asarray(right) def column_op(a, b): return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))} else: def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))} elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) return new_data
def dispatch_to_series(left, right, func, axis: Optional[int] = None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar, Series, or DataFrame func : arithmetic or comparison operator axis : {None, 0, 1} Returns ------- DataFrame """ # Get the appropriate array-op to apply to each column/block's values. array_op = get_array_op(func) right = lib.item_from_zerodim(right) if not is_list_like(right): # i.e. scalar, faster than checking np.ndim(right) == 0 bm = left._mgr.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert left.index.equals(right.index) assert left.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(left)` # fails in cases with empty columns reached via # _frame_arith_method_with_reindex bm = left._mgr.operate_blockwise(right._mgr, array_op) return type(left)(bm) elif isinstance(right, ABCSeries) and axis == 1: # axis=1 means we want to operate row-by-row assert right.index.equals(left.columns) right = right._values # maybe_align_as_frame ensures we do not have an ndarray here assert not isinstance(right, np.ndarray) arrays = [ array_op(l, r) for l, r in zip(left._iter_column_arrays(), right) ] elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later right = right._values arrays = [array_op(l, right) for l in left._iter_column_arrays()] else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) return type(left)._from_arrays(arrays, left.columns, left.index, verify_integrity=False)
def __sub__(self, other): from pandas import Index other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.sub) elif isinstance(other, Index): raise TypeError("cannot subtract {cls} and {typ}".format( cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError( "cannot subtract {dtype}-dtype from {cls}".format( dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented elif isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) res_values = na_op(self.values, other.values) return self._constructor(res_values, index=self.index, name=name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') res_values = na_op(self.values, np.asarray(other)) return self._constructor(res_values, index=self.index).__finalize__(self) elif (isinstance(other, pd.Categorical) and not is_categorical_dtype(self)): raise TypeError("Cannot compare a Categorical for op {op} with " "Series of dtype {typ}.\nIf you want to compare " "values, use 'series <op> np.asarray(other)'." .format(op=op, typ=self.dtype)) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(len(self), dtype=bool) else: res_values = np.zeros(len(self), dtype=bool) return self._constructor(res_values, index=self.index, name=self.name, dtype='bool') if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): raise TypeError('Could not compare {typ} type with Series' .format(typ=type(other))) # always return a full value series here res = com._values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res
def quantile_with_mask( values: np.ndarray, mask: np.ndarray, fill_value, qs, interpolation: str, axis: int, ) -> np.ndarray: """ Compute the quantiles of the given values for each quantile in `qs`. Parameters ---------- values : np.ndarray For ExtensionArray, this is _values_for_factorize()[0] mask : np.ndarray[bool] mask = isna(values) For ExtensionArray, this is computed before calling _value_for_factorize fill_value : Scalar The value to interpret fill NA entries with For ExtensionArray, this is _values_for_factorize()[1] qs : a scalar or list of the quantiles to be computed interpolation : str Type of interpolation axis : int Axis along which to compute quantiles. Returns ------- np.ndarray Notes ----- Assumes values is already 2D. For ExtensionArray this means np.atleast_2d has been called on _values_for_factorize()[0] """ is_empty = values.shape[axis] == 0 orig_scalar = not is_list_like(qs) if orig_scalar: # make list-like, unpack later qs = [qs] if is_empty: # create the array of na_values # 2d len(values) * len(qs) flat = np.array([fill_value] * len(qs)) result = np.repeat(flat, len(values)).reshape(len(values), len(qs)) else: # asarray needed for Sparse, see GH#24600 result = nanpercentile( values, np.array(qs) * 100, axis=axis, na_value=fill_value, mask=mask, ndim=values.ndim, interpolation=interpolation, ) result = np.array(result, copy=False) result = result.T if orig_scalar: assert result.shape[-1] == 1, result.shape result = result[..., 0] result = lib.item_from_zerodim(result) return result
def __floordiv__(self, other): if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): return NotImplemented other = lib.item_from_zerodim(other) if is_scalar(other): if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) if other is NaT: # treat this specifically as timedelta-NaT result = np.empty(self.shape, dtype=np.float64) result.fill(np.nan) return result # dispatch to Timedelta implementation result = other.__rfloordiv__(self._data) return result # at this point we should only have numeric scalars; anything # else will raise result = self.asi8 // other result[self._isnan] = iNaT freq = None if self.freq is not None: # Note: freq gets division, not floor-division freq = self.freq / other return type(self)(result.view('m8[ns]'), freq=freq) if not hasattr(other, "dtype"): # list, tuple other = np.array(other) if len(other) != len(self): raise ValueError("Cannot divide with unequal lengths") elif is_timedelta64_dtype(other): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate # on the i8 values result = self.asi8 // other.asi8 mask = self._isnan | other._isnan if mask.any(): result = result.astype(np.int64) result[mask] = np.nan return result elif is_object_dtype(other): result = [self[n] // other[n] for n in range(len(self))] result = np.array(result) if lib.infer_dtype(result) == 'timedelta': result, _ = sequence_to_td64ns(result) return type(self)(result) return result elif is_integer_dtype(other) or is_float_dtype(other): result = self._data // other return type(self)(result) else: dtype = getattr(other, "dtype", type(other).__name__) raise TypeError("Cannot divide {typ} by {cls}".format( typ=dtype, cls=type(self).__name__))
def wrapper(self, other): res_name = get_op_result_name(self, other) other = lib.item_from_zerodim(other) # TODO: shouldn't we be applying finalize whenever # not isinstance(other, ABCSeries)? finalizer = ( lambda x: x.__finalize__(self) if isinstance(other, (np.ndarray, ABCIndexClass)) else x ) if isinstance(other, list): # TODO: same for tuples? other = np.asarray(other) if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented if isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") elif isinstance( other, (np.ndarray, ABCExtensionArray, ABCIndexClass, ABCSeries) ): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(self) != len(other): raise ValueError("Lengths must match to compare") lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) if should_extension_dispatch(lvalues, rvalues): res_values = dispatch_to_extension_op(op, lvalues, rvalues) elif is_scalar(rvalues) and isna(rvalues): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(len(lvalues), dtype=bool) else: res_values = np.zeros(len(lvalues), dtype=bool) else: with np.errstate(all="ignore"): res_values = na_op(lvalues, rvalues) if is_scalar(res_values): raise TypeError( "Could not compare {typ} type with Series".format(typ=type(rvalues)) ) result = self._constructor(res_values, index=self.index) result = finalizer(result) # Set the result's name after finalizer is called because finalizer # would set it back to self.name result.name = res_name return result
def dispatch_to_series(left, right, func, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: # Get the appropriate array-op to apply to each block's values. array_op = get_array_op(func) bm = left._mgr.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert left.index.equals(right.index) assert left.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(left)` # fails in cases with empty columns reached via # _frame_arith_method_with_reindex array_op = get_array_op(func) bm = left._mgr.operate_blockwise(right._mgr, array_op) return type(left)(bm) elif isinstance(right, ABCSeries) and axis == "columns": # We only get here if called via _combine_series_frame, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) if right.dtype == "timedelta64[ns]": # ensure we treat NaT values as the correct dtype # Note: we do not do this unconditionally as it may be lossy or # expensive for EA dtypes. right = np.asarray(right) def column_op(a, b): return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))} else: def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))} elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) new_data = expressions.evaluate(column_op, left, right) return new_data
def sanitize_array( data, index: Index | None, dtype: DtypeObj | None = None, copy: bool = False, raise_cast_failure: bool = True, *, allow_2d: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. Parameters ---------- data : Any index : Index or None, default None dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False raise_cast_failure : bool, default True allow_2d : bool, default False If False, raise if we have a 2D Arraylike. Returns ------- np.ndarray or ExtensionArray Notes ----- raise_cast_failure=False is only intended to be True when called from the DataFrame constructor, as the dtype keyword there may be interpreted as only applying to a subset of columns, see GH#24435. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) if isinstance(dtype, PandasDtype): # Avoid ending up with a PandasArray dtype = dtype.numpy_dtype # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) elif isinstance(data, range): # GH#16804 data = range_to_ndarray(data) copy = False if not is_list_like(data): if index is None: raise ValueError( "index must be specified when data is not list-like") data = construct_1d_arraylike_from_scalar(data, len(index), dtype) return data # GH#846 if isinstance(data, np.ndarray): if isinstance(data, np.matrix): data = data.A if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except IntCastingNaNError: warnings.warn( "In a future version, passing float-dtype values containing NaN " "and an integer dtype will raise IntCastingNaNError " "(subclass of ValueError) instead of silently ignoring the " "passed dtype. To retain the old behavior, call Series(arr) or " "DataFrame(arr) without passing a dtype.", FutureWarning, stacklevel=find_stack_level(), ) subarr = np.array(data, copy=copy) except ValueError: if not raise_cast_failure: # i.e. called via DataFrame constructor warnings.warn( "In a future version, passing float-dtype values and an " "integer dtype to DataFrame will retain floating dtype " "if they cannot be cast losslessly (matching Series behavior). " "To retain the old behavior, use DataFrame(data).astype(dtype)", FutureWarning, stacklevel=find_stack_level(), ) # GH#40110 until the deprecation is enforced, we _dont_ # ignore the dtype for DataFrame, and _do_ cast even though # it is lossy. dtype = cast(np.dtype, dtype) return np.array(data, dtype=dtype, copy=copy) subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() else: if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError(f"'{type(data).__name__}' type is unordered") # materialize e.g. generators, convert e.g. tuples, abc.ValueView if hasattr(data, "__array__"): # e.g. dask array GH#38645 data = np.asarray(data) else: data = list(data) if dtype is not None or len(data) == 0: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) if subarr.dtype == object: subarr = cast(np.ndarray, subarr) subarr = maybe_infer_to_datetimelike(subarr) subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) if isinstance(subarr, np.ndarray): # at this point we should have dtype be None or subarr.dtype == dtype dtype = cast(np.dtype, dtype) subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) return subarr
def comparison_op( left: ArrayLike, right: Any, op, str_rep: Optional[str] = None, ) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} Returns ------- ndarray or ExtensionArray """ # NB: We assume extract_array has already been called on left and right lvalues = left rvalues = right rvalues = lib.item_from_zerodim(rvalues) if isinstance(rvalues, list): # TODO: same for tuples? rvalues = np.asarray(rvalues) if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(lvalues) != len(rvalues): if _can_broadcast(lvalues, rvalues): return _broadcast_comparison_op(lvalues, rvalues, op) raise ValueError("Lengths must match to compare", lvalues.shape, rvalues.shape) if should_extension_dispatch(lvalues, rvalues): res_values = dispatch_to_extension_op(op, lvalues, rvalues) elif is_scalar(rvalues) and isna(rvalues): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(lvalues.shape, dtype=bool) else: res_values = np.zeros(lvalues.shape, dtype=bool) elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: with np.errstate(all="ignore"): res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep, is_cmp=True) return res_values
def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator str_rep : str or None, default None axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) def column_op(a, b): return { i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns)) } elif isinstance(right, ABCSeries) and axis == "columns": # We only get here if called via left._combine_match_columns, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) def column_op(a, b): return { i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns)) } elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) result = left._constructor(new_data, index=left.index, copy=False) # Pin columns instead of passing to constructor for compat with # non-unique columns case result.columns = left.columns return result
def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._time_shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.sub) elif isinstance(other, ABCIndexClass): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) res_name = get_op_result_name(self, other) other = lib.item_from_zerodim(other) # TODO: shouldn't we be applying finalize whenever # not isinstance(other, ABCSeries)? finalizer = (lambda x: x.__finalize__(self) if isinstance(other, (np.ndarray, ABCIndexClass)) else x) if isinstance(other, list): # TODO: same for tuples? other = np.asarray(other) if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented if isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError( "Can only compare identically-labeled Series objects") elif (is_list_like(other) and len(other) != len(self) and not isinstance(other, (set, frozenset))): raise ValueError("Lengths must match") elif isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(self) != len(other): raise ValueError("Lengths must match to compare") if is_categorical_dtype(self): # Dispatch to Categorical implementation; CategoricalIndex # behavior is non-canonical GH#19513 res_values = dispatch_to_extension_op(op, self, other) elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self): # Dispatch to DatetimeIndex to ensure identical # Series/Index behavior from pandas.core.arrays import DatetimeArray res_values = dispatch_to_extension_op(op, DatetimeArray(self), other) elif is_timedelta64_dtype(self): from pandas.core.arrays import TimedeltaArray res_values = dispatch_to_extension_op(op, TimedeltaArray(self), other) elif is_extension_array_dtype(self) or (is_extension_array_dtype(other) and not is_scalar(other)): # Note: the `not is_scalar(other)` condition rules out # e.g. other == "category" res_values = dispatch_to_extension_op(op, self, other) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(len(self), dtype=bool) else: res_values = np.zeros(len(self), dtype=bool) else: lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) with np.errstate(all="ignore"): res_values = na_op(lvalues, rvalues) if is_scalar(res_values): raise TypeError( "Could not compare {typ} type with Series".format( typ=type(other))) result = self._constructor(res_values, index=self.index) # rename is needed in case res_name is None and result.name # is not. return finalizer(result).rename(res_name)
def __floordiv__(self, other): if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): return NotImplemented other = lib.item_from_zerodim(other) if is_scalar(other): if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) if other is NaT: # treat this specifically as timedelta-NaT result = np.empty(self.shape, dtype=np.float64) result.fill(np.nan) return result # dispatch to Timedelta implementation result = other.__rfloordiv__(self._data) return result # at this point we should only have numeric scalars; anything # else will raise result = self.asi8 // other result[self._isnan] = iNaT freq = None if self.freq is not None: # Note: freq gets division, not floor-division freq = self.freq / other return type(self)(result.view('m8[ns]'), freq=freq) if not hasattr(other, "dtype"): # list, tuple other = np.array(other) if len(other) != len(self): raise ValueError("Cannot divide with unequal lengths") elif is_timedelta64_dtype(other): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate # on the i8 values result = self.asi8 // other.asi8 mask = self._isnan | other._isnan if mask.any(): result = result.astype(np.int64) result[mask] = np.nan return result elif is_object_dtype(other): result = [self[n] // other[n] for n in range(len(self))] result = np.array(result) if lib.infer_dtype(result, skipna=False) == 'timedelta': result, _ = sequence_to_td64ns(result) return type(self)(result) return result elif is_integer_dtype(other) or is_float_dtype(other): result = self._data // other return type(self)(result) else: dtype = getattr(other, "dtype", type(other).__name__) raise TypeError("Cannot divide {typ} by {cls}" .format(typ=dtype, cls=type(self).__name__))
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. Parameters ---------- left : np.ndarray or ExtensionArray right : object Cannot be a DataFrame, Series, or Index. op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} Returns ------- ndarray or ExtensionArray """ # NB: We assume extract_array has already been called on left and right lvalues = ensure_wrapped_if_datetimelike(left) rvalues = right rvalues = lib.item_from_zerodim(rvalues) if isinstance(rvalues, list): # TODO: same for tuples? rvalues = np.asarray(rvalues) if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(lvalues) != len(rvalues): raise ValueError("Lengths must match to compare", lvalues.shape, rvalues.shape) if should_extension_dispatch(lvalues, rvalues): # Call the method on lvalues res_values = op(lvalues, rvalues) elif is_scalar(rvalues) and isna(rvalues): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(lvalues.shape, dtype=bool) else: res_values = np.zeros(lvalues.shape, dtype=bool) elif is_numeric_v_string_like(lvalues, rvalues): # GH#36377 going through the numexpr path would incorrectly raise return invalid_comparison(lvalues, rvalues, op) elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: with warnings.catch_warnings(): # suppress warnings from numpy about element-wise comparison warnings.simplefilter("ignore", DeprecationWarning) with np.errstate(all="ignore"): res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) return res_values
def sanitize_array( data, index: Optional[Index], dtype: Optional[DtypeObj] = None, copy: bool = False, raise_cast_failure: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: # TODO: deque, array.array if isinstance(data, set): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError("Set type is unordered") data = list(data) if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif not is_list_like(data): if index is None: raise ValueError( "index must be specified when data is not list-like") subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) subarr = _sanitize_ndim(subarr, data, dtype, index) if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype( dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) return subarr
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, ABCDataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') if isinstance(other, ABCPeriodIndex): # temp workaround until fixing GH 13637 # tested in test_nat_comparisons # (pandas.tests.series.test_operators.TestSeriesOperators) return self._constructor(na_op(self.values, other.astype(object).values), index=self.index) return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) elif isinstance(other, pd.Categorical): if not is_categorical_dtype(self): msg = ("Cannot compare a Categorical for op {op} with Series " "of dtype {typ}.\nIf you want to compare values, use " "'series <op> np.asarray(other)'.") raise TypeError(msg.format(op=op, typ=self.dtype)) if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): raise TypeError( 'Could not compare {typ} type with Series'.format( typ=type(other))) # always return a full value series here res = com._values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res