def inherit_from_data(name: str, delegate, cache: bool = False): """ Make an alias for a method of the underlying ExtensionArray. Parameters ---------- name : str Name of an attribute the class should inherit from its EA parent. delegate : class cache : bool, default False Whether to convert wrapped properties into cache_readonly Returns ------- attribute, method, property, or cache_readonly """ attr = getattr(delegate, name) if isinstance(attr, property): if cache: method = cache_readonly(attr.fget) else: def fget(self): return getattr(self._data, name) def fset(self, value): setattr(self._data, name, value) fget.__name__ = name fget.__doc__ = attr.__doc__ method = property(fget, fset) elif not callable(attr): # just a normal attribute, no wrapping method = attr else: def method(self, *args, **kwargs): result = attr(self._data, *args, **kwargs) return result method.__name__ = name method.__doc__ = attr.__doc__ return method
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget) _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except Exception: return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False elif is_period_dtype(self): if not is_period_dtype(other): return False if self.freq != other.freq: return False return np.array_equal(self.asi8, other.asi8) @staticmethod def _join_i8_wrapper(joinf, dtype, with_indexers=True): """ create the join wrapper methods """ @staticmethod def wrapper(left, right): if isinstance(left, (np.ndarray, ABCIndex, ABCSeries)): left = left.view('i8') if isinstance(right, (np.ndarray, ABCIndex, ABCSeries)): right = right.view('i8') results = joinf(left, right) if with_indexers: join_index, left_indexer, right_indexer = results join_index = join_index.view(dtype) return join_index, left_indexer, right_indexer return results return wrapper @Appender(DatetimeLikeArrayMixin._evaluate_compare.__doc__) def _evaluate_compare(self, other, op): result = DatetimeLikeArrayMixin._evaluate_compare(self, other, op) if is_bool_dtype(result): return result try: return Index(result) except TypeError: return result def _ensure_localized(self, arg, ambiguous='raise', from_utc=False): """ ensure that we are re-localized This is for compat as we can then call this on all datetimelike indexes generally (ignored for Period/Timedelta) Parameters ---------- arg : DatetimeIndex / i8 ndarray ambiguous : str, bool, or bool-ndarray, default 'raise' from_utc : bool, default False If True, localize the i8 ndarray to UTC first before converting to the appropriate tz. If False, localize directly to the tz. Returns ------- localized DTI """ # reconvert to local tz if getattr(self, 'tz', None) is not None: if not isinstance(arg, ABCIndexClass): arg = self._simple_new(arg) if from_utc: arg = arg.tz_localize('UTC').tz_convert(self.tz) else: arg = arg.tz_localize(self.tz, ambiguous=ambiguous) return arg def _box_values_as_index(self): """ return object Index which contains boxed values """ from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object) def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) return (is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))) except (KeyError, TypeError, ValueError): return False contains = __contains__ # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, f): try: result = f(self) # Try to use this result if we can if isinstance(result, np.ndarray): result = Index(result) if not isinstance(result, Index): raise TypeError('The map function must return an Index object') return result except Exception: return self.astype(object).map(f) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: sorted_values = np.sort(self._ndarray_values) attribs = self._get_attributes_dict() freq = attribs['freq'] if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 attribs['freq'] = freq if not ascending: sorted_values = sorted_values[::-1] return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] taken = self._assert_take_fillable(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=iNaT) # keep freq in PeriodArray/Index, reset otherwise freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" @property def asobject(self): """Return object Index which contains boxed values. .. deprecated:: 0.23.0 Use ``astype(object)`` instead. *this is an internal non-public method* """ warnings.warn("'asobject' is deprecated. Use 'astype(object)'" " instead", FutureWarning, stacklevel=2) return self.astype(object) def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance, box=False)) if target.size != tolerance.size and tolerance.size > 1: raise ValueError('list-like tolerance size must match ' 'target index size') return tolerance def tolist(self): """ return a list of the underlying data """ return list(self.astype(object)) def min(self, axis=None, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See also -------- numpy.ndarray.min """ nv.validate_min(args, kwargs) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: min_stamp = self[~self._isnan].asi8.min() else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max return i8.argmin() def max(self, axis=None, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See also -------- numpy.ndarray.max """ nv.validate_max(args, kwargs) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: max_stamp = self[~self._isnan].asi8.max() else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() @property def _formatter_func(self): raise com.AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() for attrib in self._attributes: if attrib == 'freq': freq = self.freqstr if freq is not None: freq = "'%s'" % freq attrs.append(('freq', freq)) return attrs def _convert_scalar_indexer(self, key, kind=None): """ we don't allow integer or float indexing on datetime-like when using loc Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): self._invalid_indexer('index', key) elif kind in ['ix', 'getitem'] and is_flt: self._invalid_indexer('index', key) return (super(DatetimeIndexOpsMixin, self) ._convert_scalar_indexer(key, kind=kind)) @classmethod def _add_datetimelike_methods(cls): """ add in the datetimelike methods (as we may have to override the superclass) """ def __add__(self, other): # dispatch to ExtensionArray implementation result = super(cls, self).__add__(other) return wrap_arithmetic_op(self, other, result) cls.__add__ = __add__ def __radd__(self, other): # alias for __add__ return self.__add__(other) cls.__radd__ = __radd__ def __sub__(self, other): # dispatch to ExtensionArray implementation result = super(cls, self).__sub__(other) return wrap_arithmetic_op(self, other, result) cls.__sub__ = __sub__ def __rsub__(self, other): result = super(cls, self).__rsub__(other) return wrap_arithmetic_op(self, other, result) cls.__rsub__ = __rsub__ def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) def repeat(self, repeats, *args, **kwargs): """ Analogous to ndarray.repeat """ nv.validate_repeat(args, kwargs) if is_period_dtype(self): freq = self.freq else: freq = None return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) @Appender(_index_shared_docs['where'] % _index_doc_kwargs) def where(self, cond, other=None): other = _ensure_datetimelike_to_i8(other, to_utc=True) values = _ensure_datetimelike_to_i8(self, to_utc=True) result = np.where(cond, values, other).astype('i8') result = self._ensure_localized(result, from_utc=True) return self._shallow_copy(result, **self._get_attributes_dict()) def _summary(self, name=None): """ Return a summarized representation Parameters ---------- name : str name to use in the summary representation Returns ------- String with a summarized representation of the index """ formatter = self._formatter_func if len(self) > 0: index_summary = ', %s to %s' % (formatter(self[0]), formatter(self[-1])) else: index_summary = '' if name is None: name = type(self).__name__ result = '%s: %s entries%s' % (printing.pprint_thing(name), len(self), index_summary) if self.freq: result += '\nFreq: %s' % self.freqstr # display as values, not quoted result = result.replace("'", "") return result def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ attribs = self._get_attributes_dict() attribs['name'] = name if not is_period_dtype(self): # reset freq attribs['freq'] = None if getattr(self, 'tz', None) is not None: return _concat._concat_datetimetz(to_concat, name) else: new_data = np.concatenate([c.asi8 for c in to_concat]) return self._simple_new(new_data, **attribs) def astype(self, dtype, copy=True): if is_object_dtype(dtype): return self._box_values_as_index() elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
class DatetimeIndexOpsMixin(ExtensionOpsMixin): """ Common ops mixin to support a unified interface datetimelike Index. """ _data: ExtensionArray # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable inferred_freq = cache_readonly( DatetimeLikeArrayMixin.inferred_freq.fget # type: ignore ) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) # type: ignore hasnans = cache_readonly( DatetimeLikeArrayMixin._hasnans.fget) # type: ignore _hasnans = hasnans # for index / array -agnostic code _resolution = cache_readonly( DatetimeLikeArrayMixin._resolution.fget # type: ignore ) resolution = cache_readonly( DatetimeLikeArrayMixin.resolution.fget) # type: ignore _maybe_mask_results = ea_passthrough( DatetimeLikeArrayMixin._maybe_mask_results) __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__) mean = ea_passthrough(DatetimeLikeArrayMixin.mean) @property def freq(self): """ Return the frequency object if it is set, otherwise None. """ return self._data.freq @property def freqstr(self): """ Return the frequency object as a string if it is set, otherwise None. """ return self._data.freqstr def unique(self, level=None): if level is not None: self._validate_index_level(level) result = self._data.unique() # Note: if `self` is already unique, then self.unique() should share # a `freq` with self. If not already unique, then self.freq must be # None, so again sharing freq is correct. return self._shallow_copy(result._data) @classmethod def _create_comparison_method(cls, op): """ Create a comparison method that dispatches to ``cls.values``. """ def wrapper(self, other): if isinstance(other, ABCSeries): # the arrays defer to Series for comparison ops but the indexes # don't, so we have to unwrap here. other = other._values result = op(self._data, maybe_unwrap_index(other)) return result wrapper.__doc__ = op.__doc__ wrapper.__name__ = f"__{op.__name__}__" return wrapper @property def _ndarray_values(self) -> np.ndarray: return self._data._ndarray_values # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self): # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data @property # type: ignore # https://github.com/python/mypy/issues/1362 @Appender(DatetimeLikeArrayMixin.asi8.__doc__) def asi8(self): return self._data.asi8 def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. """ result = lib.item_from_zerodim(result) if is_bool_dtype(result) or lib.is_scalar(result): return result attrs = self._get_attributes_dict() if not is_period_dtype(self) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" return Index(result, **attrs) # ------------------------------------------------------------------------ def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False elif is_period_dtype(self): if not is_period_dtype(other): return False if self.freq != other.freq: return False return np.array_equal(self.asi8, other.asi8) @staticmethod def _join_i8_wrapper(joinf, dtype, with_indexers=True): """ Create the join wrapper methods. """ from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin @staticmethod def wrapper(left, right): if isinstance( left, (np.ndarray, ABCIndex, ABCSeries, DatetimeLikeArrayMixin)): left = left.view("i8") if isinstance( right, (np.ndarray, ABCIndex, ABCSeries, DatetimeLikeArrayMixin)): right = right.view("i8") results = joinf(left, right) if with_indexers: join_index, left_indexer, right_indexer = results join_index = join_index.view(dtype) return join_index, left_indexer, right_indexer return results return wrapper def _ensure_localized(self, arg, ambiguous="raise", nonexistent="raise", from_utc=False): # See DatetimeLikeArrayMixin._ensure_localized.__doc__ if getattr(self, "tz", None): # ensure_localized is only relevant for tz-aware DTI result = self._data._ensure_localized(arg, ambiguous=ambiguous, nonexistent=nonexistent, from_utc=from_utc) return type(self)._simple_new(result, name=self.name) return arg def _box_values(self, values): return self._data._box_values(values) @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) return (is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))) except (KeyError, TypeError, ValueError): return False # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, mapper, na_action=None): try: result = mapper(self) # Try to use this result if we can if isinstance(result, np.ndarray): result = Index(result) if not isinstance(result, Index): raise TypeError("The map function must return an Index object") return result except Exception: return self.astype(object).map(mapper) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index. """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: # NB: using asi8 instead of _ndarray_values matters in numpy 1.18 # because the treatment of NaT has been changed to put NaT last # instead of first. sorted_values = np.sort(self.asi8) attribs = self._get_attributes_dict() freq = attribs["freq"] if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 attribs["freq"] = freq if not ascending: sorted_values = sorted_values[::-1] return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] taken = self._assert_take_fillable( self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=iNaT, ) # keep freq in PeriodArray/Index, reset otherwise freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) if target.size != tolerance.size and tolerance.size > 1: raise ValueError( "list-like tolerance size must match target index size") return tolerance def tolist(self) -> List: """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: if skipna: min_stamp = self[~self._isnan].asi8.min() else: return self._na_value else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo("int64").max return i8.argmin() def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: if skipna: max_stamp = self[~self._isnan].asi8.max() else: return self._na_value else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def _format_with_header(self, header, na_rep="NaT", **kwargs): return header + list(self._format_native_types(na_rep, **kwargs)) @property def _formatter_func(self): raise AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) attrs.append(("freq", freq)) return attrs # -------------------------------------------------------------------- def _convert_scalar_indexer(self, key, kind=None): """ We don't allow integer or float indexing on datetime-like when using loc. Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ["ix", "loc", "getitem", "iloc", None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ["loc"] and (is_int or is_flt): self._invalid_indexer("index", key) elif kind in ["ix", "getitem"] and is_flt: self._invalid_indexer("index", key) return super()._convert_scalar_indexer(key, kind=kind) @classmethod def _add_datetimelike_methods(cls): """ Add in the datetimelike methods (as we may have to override the superclass). """ def __add__(self, other): # dispatch to ExtensionArray implementation result = self._data.__add__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__add__ = __add__ def __radd__(self, other): # alias for __add__ return self.__add__(other) cls.__radd__ = __radd__ def __sub__(self, other): # dispatch to ExtensionArray implementation result = self._data.__sub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__sub__ = __sub__ def __rsub__(self, other): result = self._data.__rsub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__rsub__ = __rsub__ __pow__ = _make_wrapped_arith_op("__pow__") __rpow__ = _make_wrapped_arith_op("__rpow__") __mul__ = _make_wrapped_arith_op("__mul__") __rmul__ = _make_wrapped_arith_op("__rmul__") __floordiv__ = _make_wrapped_arith_op("__floordiv__") __rfloordiv__ = _make_wrapped_arith_op("__rfloordiv__") __mod__ = _make_wrapped_arith_op("__mod__") __rmod__ = _make_wrapped_arith_op("__rmod__") __divmod__ = _make_wrapped_arith_op("__divmod__") __rdivmod__ = _make_wrapped_arith_op("__rdivmod__") __truediv__ = _make_wrapped_arith_op("__truediv__") __rtruediv__ = _make_wrapped_arith_op("__rtruediv__") def isin(self, values, level=None): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if level is not None: self._validate_index_level(level) if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) def intersection(self, other, sort=False): self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): return self._get_reconciled_name_object(other) if len(self) == 0: return self.copy() if len(other) == 0: return other.copy() if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: # TODO: find a less code-smelly way to set this result._data._freq = to_offset(result.inferred_freq) return result elif (other.freq is None or self.freq is None or other.freq != self.freq or not other.freq.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other, sort=sort) # Invalidate the freq of `result`, which may not be correct at # this point, depending on the values. # TODO: find a less code-smelly way to set this result._data._freq = None if hasattr(self, "tz"): result = self._shallow_copy(result._values, name=result.name, tz=result.tz, freq=None) else: result = self._shallow_copy(result._values, name=result.name, freq=None) if result.freq is None: # TODO: find a less code-smelly way to set this result._data._freq = to_offset(result.inferred_freq) return result # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self # after sorting, the intersection always starts with the right index # and ends with the index of which the last elements is smallest end = min(left[-1], right[-1]) start = right[0] if end < start: return type(self)(data=[]) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left.values[lslice] return self._shallow_copy(left_chunk) @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) @Appender(_index_shared_docs["where"] % _index_doc_kwargs) def where(self, cond, other=None): other = _ensure_datetimelike_to_i8(other, to_utc=True) values = _ensure_datetimelike_to_i8(self, to_utc=True) result = np.where(cond, values, other).astype("i8") result = self._ensure_localized(result, from_utc=True) return self._shallow_copy(result) def _summary(self, name=None): """ Return a summarized representation. Parameters ---------- name : str Name to use in the summary representation. Returns ------- str Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" else: index_summary = "" if name is None: name = type(self).__name__ result = f"{name}: {len(self)} entries{index_summary}" if self.freq: result += f"\nFreq: {self.freqstr}" # display as values, not quoted result = result.replace("'", "") return result def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """ attribs = self._get_attributes_dict() attribs["name"] = name # do not pass tz to set because tzlocal cannot be hashed if len({str(x.dtype) for x in to_concat}) != 1: raise ValueError("to_concat must have the same tz") new_data = type(self._values)._concat_same_type(to_concat).asi8 # GH 3232: If the concat result is evenly spaced, we can retain the # original frequency is_diff_evenly_spaced = len(unique_deltas(new_data)) == 1 if not is_period_dtype(self) and not is_diff_evenly_spaced: # reset freq attribs["freq"] = None return self._simple_new(new_data, **attribs) @Appender(_index_shared_docs["astype"]) def astype(self, dtype, copy=True): if is_dtype_equal(self.dtype, dtype) and copy is False: # Ensure that self.astype(self.dtype) is self return self new_values = self._data.astype(dtype, copy=copy) # pass copy=False because any copying will be done in the # _data.astype call above return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False) def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. .. versionchanged:: 0.24.0 freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ result = self._data._time_shift(periods, freq=freq) return type(self)(result, name=self.name)
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget) _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except Exception: return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False # ToDo: Remove this when PeriodDtype is added elif isinstance(self, ABCPeriodIndex): if not isinstance(other, ABCPeriodIndex): return False if self.freq != other.freq: return False return np.array_equal(self.asi8, other.asi8) @staticmethod def _join_i8_wrapper(joinf, dtype, with_indexers=True): """ create the join wrapper methods """ @staticmethod def wrapper(left, right): if isinstance(left, (np.ndarray, ABCIndex, ABCSeries)): left = left.view('i8') if isinstance(right, (np.ndarray, ABCIndex, ABCSeries)): right = right.view('i8') results = joinf(left, right) if with_indexers: join_index, left_indexer, right_indexer = results join_index = join_index.view(dtype) return join_index, left_indexer, right_indexer return results return wrapper @Appender(DatetimeLikeArrayMixin._evaluate_compare.__doc__) def _evaluate_compare(self, other, op): result = DatetimeLikeArrayMixin._evaluate_compare(self, other, op) if is_bool_dtype(result): return result try: return Index(result) except TypeError: return result def _ensure_localized(self, result): """ ensure that we are re-localized This is for compat as we can then call this on all datetimelike indexes generally (ignored for Period/Timedelta) Parameters ---------- result : DatetimeIndex / i8 ndarray Returns ------- localized DTI """ # reconvert to local tz if getattr(self, 'tz', None) is not None: if not isinstance(result, ABCIndexClass): result = self._simple_new(result) result = result.tz_localize(self.tz) return result def _box_values_as_index(self): """ return object Index which contains boxed values """ from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object) def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) return (is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))) except (KeyError, TypeError, ValueError): return False contains = __contains__ # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, f): try: result = f(self) # Try to use this result if we can if isinstance(result, np.ndarray): result = Index(result) if not isinstance(result, Index): raise TypeError('The map function must return an Index object') return result except Exception: return self.astype(object).map(f) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: sorted_values = np.sort(self._ndarray_values) attribs = self._get_attributes_dict() freq = attribs['freq'] if freq is not None and not isinstance(self, ABCPeriodIndex): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 attribs['freq'] = freq if not ascending: sorted_values = sorted_values[::-1] return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] taken = self._assert_take_fillable(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=iNaT) # keep freq in PeriodIndex, reset otherwise freq = self.freq if isinstance(self, ABCPeriodIndex) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" @property def asobject(self): """Return object Index which contains boxed values. .. deprecated:: 0.23.0 Use ``astype(object)`` instead. *this is an internal non-public method* """ warnings.warn( "'asobject' is deprecated. Use 'astype(object)'" " instead", FutureWarning, stacklevel=2) return self.astype(object) def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance, box=False)) if target.size != tolerance.size and tolerance.size > 1: raise ValueError('list-like tolerance size must match ' 'target index size') return tolerance def tolist(self): """ return a list of the underlying data """ return list(self.astype(object)) def min(self, axis=None, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See also -------- numpy.ndarray.min """ nv.validate_min(args, kwargs) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: min_stamp = self[~self._isnan].asi8.min() else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max return i8.argmin() def max(self, axis=None, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See also -------- numpy.ndarray.max """ nv.validate_max(args, kwargs) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: max_stamp = self[~self._isnan].asi8.max() else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() @property def _formatter_func(self): raise com.AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() for attrib in self._attributes: if attrib == 'freq': freq = self.freqstr if freq is not None: freq = "'%s'" % freq attrs.append(('freq', freq)) return attrs def _convert_scalar_indexer(self, key, kind=None): """ we don't allow integer or float indexing on datetime-like when using loc Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): self._invalid_indexer('index', key) elif kind in ['ix', 'getitem'] and is_flt: self._invalid_indexer('index', key) return (super(DatetimeIndexOpsMixin, self)._convert_scalar_indexer(key, kind=kind)) @classmethod def _add_datetimelike_methods(cls): """ add in the datetimelike methods (as we may have to override the superclass) """ def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other) or is_period_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}".format( dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result cls.__add__ = __add__ def __radd__(self, other): # alias for __add__ return self.__add__(other) cls.__radd__ = __radd__ def __sub__(self, other): from pandas import Index other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.sub) elif isinstance(other, Index): raise TypeError("cannot subtract {cls} and {typ}".format( cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError( "cannot subtract {dtype}-dtype from {cls}".format( dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result cls.__sub__ = __sub__ def __rsub__(self, other): if is_datetime64_dtype(other) and is_timedelta64_dtype(self): # ndarray[datetime64] cannot be subtracted from self, so # we need to wrap in DatetimeIndex and flip the operation from pandas import DatetimeIndex return DatetimeIndex(other) - self elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and not is_datetime64_any_dtype(other)): # GH#19959 datetime - datetime is well-defined as timedelta, # but any other type - datetime is not well-defined. raise TypeError("cannot subtract {cls} from {typ}".format( cls=type(self).__name__, typ=type(other).__name__)) return -(self - other) cls.__rsub__ = __rsub__ def __iadd__(self, other): # alias for __add__ return self.__add__(other) cls.__iadd__ = __iadd__ def __isub__(self, other): # alias for __sub__ return self.__sub__(other) cls.__isub__ = __isub__ def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) def shift(self, n, freq=None): """ Specialized shift which produces a DatetimeIndex Parameters ---------- n : int Periods to shift by freq : DateOffset or timedelta-like, optional Returns ------- shifted : DatetimeIndex """ if freq is not None and freq != self.freq: if isinstance(freq, compat.string_types): freq = frequencies.to_offset(freq) offset = n * freq result = self + offset if hasattr(self, 'tz'): result._tz = self.tz return result if n == 0: # immutable so OK return self if self.freq is None: raise NullFrequencyError("Cannot shift with no freq") start = self[0] + n * self.freq end = self[-1] + n * self.freq attribs = self._get_attributes_dict() return self._generate_range(start=start, end=end, periods=None, **attribs) def repeat(self, repeats, *args, **kwargs): """ Analogous to ndarray.repeat """ nv.validate_repeat(args, kwargs) if isinstance(self, ABCPeriodIndex): freq = self.freq else: freq = None return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) @Appender(_index_shared_docs['where'] % _index_doc_kwargs) def where(self, cond, other=None): other = _ensure_datetimelike_to_i8(other) values = _ensure_datetimelike_to_i8(self) result = np.where(cond, values, other).astype('i8') result = self._ensure_localized(result) return self._shallow_copy(result, **self._get_attributes_dict()) def _summary(self, name=None): """ Return a summarized representation Parameters ---------- name : str name to use in the summary representation Returns ------- String with a summarized representation of the index """ formatter = self._formatter_func if len(self) > 0: index_summary = ', %s to %s' % (formatter( self[0]), formatter(self[-1])) else: index_summary = '' if name is None: name = type(self).__name__ result = '%s: %s entries%s' % (printing.pprint_thing(name), len(self), index_summary) if self.freq: result += '\nFreq: %s' % self.freqstr # display as values, not quoted result = result.replace("'", "") return result def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ attribs = self._get_attributes_dict() attribs['name'] = name if not isinstance(self, ABCPeriodIndex): # reset freq attribs['freq'] = None if getattr(self, 'tz', None) is not None: return _concat._concat_datetimetz(to_concat, name) else: new_data = np.concatenate([c.asi8 for c in to_concat]) return self._simple_new(new_data, **attribs) def astype(self, dtype, copy=True): if is_object_dtype(dtype): return self._box_values_as_index() elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): """ Common ops mixin to support a unified interface datetimelike Index. """ _is_numeric_dtype = False _can_hold_strings = False _data: DatetimeArray | TimedeltaArray | PeriodArray freq: BaseOffset | None freqstr: str | None _resolution_obj: Resolution _bool_ops: list[str] = [] _field_ops: list[str] = [] # error: "Callable[[Any], Any]" has no attribute "fget" hasnans = cache_readonly( DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined] ) @property def _is_all_dates(self) -> bool: return True # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self) -> np.ndarray: # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._ndarray def __array_wrap__(self, result, context=None): """ Gets called after a ufunc and other functions. """ out = super().__array_wrap__(result, context=context) if isinstance(out, DatetimeTimedeltaMixin) and self.freq is not None: out = out._with_freq("infer") return out # ------------------------------------------------------------------------ def equals(self, other: Any) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, Index): return False elif other.dtype.kind in ["f", "i", "u", "c"]: return False elif not isinstance(other, type(self)): should_try = False inferable = self._data._infer_matches if other.dtype == object: should_try = other.inferred_type in inferable elif is_categorical_dtype(other.dtype): other = cast("CategoricalIndex", other) should_try = other.categories.inferred_type in inferable if should_try: try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False return np.array_equal(self.asi8, other.asi8) @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: hash(key) try: self.get_loc(key) except (KeyError, TypeError, ValueError): return False return True @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take((), kwargs) indices = np.asarray(indices, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) result = NDArrayBackedExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) if isinstance(maybe_slice, slice): freq = self._data._get_getitem_freq(maybe_slice) result._data._freq = freq return result _can_hold_na = True _na_value: NaTType = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) return super()._convert_tolerance(tolerance, target) def tolist(self) -> list: """ Return a list of the underlying data. """ return list(self.astype(object)) # -------------------------------------------------------------------- # Rendering Methods def format( self, name: bool = False, formatter: Callable | None = None, na_rep: str = "NaT", date_format: str | None = None, ) -> list[str]: """ Render a string representation of the Index. """ header = [] if name: header.append( ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) if self.name is not None else "" ) if formatter is not None: return header + list(self.map(formatter)) return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header( self, header: list[str], na_rep: str = "NaT", date_format: str | None = None ) -> list[str]: return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format) ) @property def _formatter_func(self): return self._data._formatter() def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) # Argument 1 to "append" of "list" has incompatible type # "Tuple[str, Optional[str]]"; expected "Tuple[str, Union[str, int]]" attrs.append(("freq", freq)) # type: ignore[arg-type] return attrs def _summary(self, name=None) -> str: """ Return a summarized representation. Parameters ---------- name : str Name to use in the summary representation. Returns ------- str Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" else: index_summary = "" if name is None: name = type(self).__name__ result = f"{name}: {len(self)} entries{index_summary}" if self.freq: result += f"\nFreq: {self.freqstr}" # display as values, not quoted result = result.replace("'", "") return result # -------------------------------------------------------------------- # Indexing Methods def _can_partial_date_slice(self, reso: Resolution) -> bool: raise NotImplementedError def _parsed_string_to_bounds(self, reso: Resolution, parsed): raise NotImplementedError def _parse_with_reso(self, label: str): # overridden by TimedeltaIndex parsed, reso_str = parsing.parse_time_string(label, self.freq) reso = Resolution.from_attrname(reso_str) return parsed, reso def _get_string_slice(self, key: str): parsed, reso = self._parse_with_reso(key) try: return self._partial_date_slice(reso, parsed) except KeyError as err: raise KeyError(key) from err @final def _partial_date_slice( self, reso: Resolution, parsed: datetime, ): """ Parameters ---------- reso : Resolution parsed : datetime Returns ------- slice or ndarray[intp] """ if not self._can_partial_date_slice(reso): raise ValueError t1, t2 = self._parsed_string_to_bounds(reso, parsed) vals = self._data._ndarray unbox = self._data._unbox if self.is_monotonic_increasing: if len(self) and ( (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1]) ): # we are out of range raise KeyError # TODO: does this depend on being monotonic _increasing_? # a monotonic (sorted) series can be sliced left = vals.searchsorted(unbox(t1), side="left") right = vals.searchsorted(unbox(t2), side="right") return slice(left, right) else: lhs_mask = vals >= unbox(t1) rhs_mask = vals <= unbox(t2) # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] # -------------------------------------------------------------------- # Arithmetic Methods def shift(self: _T, periods: int = 1, freq=None) -> _T: """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ arr = self._data.view() arr._freq = self.freq result = arr._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- # List-like Methods def _get_delete_freq(self, loc: int | slice | Sequence[int]): """ Find the `freq` for self.delete(loc). """ freq = None if is_period_dtype(self.dtype): freq = self.freq elif self.freq is not None: if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if is_list_like(loc): # error: Incompatible types in assignment (expression has # type "Union[slice, ndarray]", variable has type # "Union[int, slice, Sequence[int]]") loc = lib.maybe_indices_to_slice( # type: ignore[assignment] np.asarray(loc, dtype=np.intp), len(self) ) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq return freq def _get_insert_freq(self, loc: int, item): """ Find the `freq` for self.insert(loc, item). """ value = self._data._validate_scalar(item) item = self._data._box_func(value) freq = None if is_period_dtype(self.dtype): freq = self.freq elif self.freq is not None: # freq can be preserved on edge cases if self.size: if item is NaT: pass elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq else: # Adding a single item to an empty index may preserve freq if self.freq.is_on_offset(item): freq = self.freq return freq @doc(NDArrayBackedExtensionIndex.delete) def delete(self: _T, loc) -> _T: result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result @doc(NDArrayBackedExtensionIndex.insert) def insert(self, loc: int, item): result = super().insert(loc, item) if isinstance(result, type(self)): # i.e. parent class method did not cast result._data._freq = self._get_insert_freq(loc, item) return result # -------------------------------------------------------------------- # Join/Set Methods def _get_join_freq(self, other): """ Get the freq to attach to the result of a join operation. """ if is_period_dtype(self.dtype): freq = self.freq else: self = cast(DatetimeTimedeltaMixin, self) freq = self.freq if self._can_fast_union(other) else None return freq def _wrap_joined_index(self, joined, other): assert other.dtype == self.dtype, (other.dtype, self.dtype) result = super()._wrap_joined_index(joined, other) result._data._freq = self._get_join_freq(other) return result def _get_join_target(self) -> np.ndarray: return self._data._ndarray.view("i8") def _from_join_target(self, result: np.ndarray): # view e.g. i8 back to M8[ns] result = result.view(self._data._ndarray.dtype) return self._data._from_backing_data(result) # -------------------------------------------------------------------- @doc(Index._maybe_cast_listlike_indexer) def _maybe_cast_listlike_indexer(self, keyarr): try: res = self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): if not isinstance(keyarr, ExtensionArray): # e.g. we don't want to cast DTA to ndarray[object] res = com.asarray_tuplesafe(keyarr) # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray else: res = keyarr return Index(res, dtype=res.dtype)
class DatetimeIndexOpsMixin(ExtensionOpsMixin): """ common ops mixin to support a unified interface datetimelike Index """ _data = None # type: DatetimeLikeArrayMixin # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) _hasnans = hasnans # for index / array -agnostic code _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) _box_values = ea_passthrough(DatetimeLikeArrayMixin._box_values) _maybe_mask_results = ea_passthrough( DatetimeLikeArrayMixin._maybe_mask_results) __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__) @property def freq(self): """ Return the frequency object if it is set, otherwise None. """ return self._data.freq @freq.setter def freq(self, value): # validation is handled by _data setter self._data.freq = value @property def freqstr(self): """ Return the frequency object as a string if it is set, otherwise None. """ return self._data.freqstr def unique(self, level=None): if level is not None: self._validate_index_level(level) result = self._data.unique() # Note: if `self` is already unique, then self.unique() should share # a `freq` with self. If not already unique, then self.freq must be # None, so again sharing freq is correct. return self._shallow_copy(result._data) @classmethod def _create_comparison_method(cls, op): """ Create a comparison method that dispatches to ``cls.values``. """ def wrapper(self, other): if isinstance(other, ABCSeries): # the arrays defer to Series for comparison ops but the indexes # don't, so we have to unwrap here. other = other._values result = op(self._data, maybe_unwrap_index(other)) return result wrapper.__doc__ = op.__doc__ wrapper.__name__ = '__{}__'.format(op.__name__) return wrapper @property def _ndarray_values(self): return self._data._ndarray_values # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self) -> np.ndarray: # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data @property @Appender(DatetimeLikeArrayMixin.asi8.__doc__) def asi8(self): return self._data.asi8 # ------------------------------------------------------------------------ def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except Exception: return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False elif is_period_dtype(self): if not is_period_dtype(other): return False if self.freq != other.freq: return False return np.array_equal(self.asi8, other.asi8) @staticmethod def _join_i8_wrapper(joinf, dtype, with_indexers=True): """ Create the join wrapper methods. """ from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin @staticmethod def wrapper(left, right): if isinstance( left, (np.ndarray, ABCIndex, ABCSeries, DatetimeLikeArrayMixin)): left = left.view('i8') if isinstance( right, (np.ndarray, ABCIndex, ABCSeries, DatetimeLikeArrayMixin)): right = right.view('i8') results = joinf(left, right) if with_indexers: join_index, left_indexer, right_indexer = results join_index = join_index.view(dtype) return join_index, left_indexer, right_indexer return results return wrapper def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', from_utc=False): # See DatetimeLikeArrayMixin._ensure_localized.__doc__ if getattr(self, 'tz', None): # ensure_localized is only relevant for tz-aware DTI result = self._data._ensure_localized(arg, ambiguous=ambiguous, nonexistent=nonexistent, from_utc=from_utc) return type(self)._simple_new(result, name=self.name) return arg def _box_values(self, values): return self._data._box_values(values) @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) return (is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))) except (KeyError, TypeError, ValueError): return False contains = __contains__ # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, f): try: result = f(self) # Try to use this result if we can if isinstance(result, np.ndarray): result = Index(result) if not isinstance(result, Index): raise TypeError('The map function must return an Index object') return result except Exception: return self.astype(object).map(f) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index. """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: sorted_values = np.sort(self._ndarray_values) attribs = self._get_attributes_dict() freq = attribs['freq'] if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 attribs['freq'] = freq if not ascending: sorted_values = sorted_values[::-1] return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] taken = self._assert_take_fillable(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=iNaT) # keep freq in PeriodArray/Index, reset otherwise freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" @property def asobject(self): """ Return object Index which contains boxed values. .. deprecated:: 0.23.0 Use ``astype(object)`` instead. *this is an internal non-public method* """ warnings.warn( "'asobject' is deprecated. Use 'astype(object)'" " instead", FutureWarning, stacklevel=2) return self.astype(object) def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) if target.size != tolerance.size and tolerance.size > 1: raise ValueError('list-like tolerance size must match ' 'target index size') return tolerance def tolist(self): """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: if skipna: min_stamp = self[~self._isnan].asi8.min() else: return self._na_value else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max return i8.argmin() def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: if skipna: max_stamp = self[~self._isnan].asi8.max() else: return self._na_value else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) @property def _formatter_func(self): raise AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == 'freq': freq = self.freqstr if freq is not None: freq = "'%s'" % freq attrs.append(('freq', freq)) return attrs # -------------------------------------------------------------------- def _convert_scalar_indexer(self, key, kind=None): """ We don't allow integer or float indexing on datetime-like when using loc. Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): self._invalid_indexer('index', key) elif kind in ['ix', 'getitem'] and is_flt: self._invalid_indexer('index', key) return super()._convert_scalar_indexer(key, kind=kind) @classmethod def _add_datetimelike_methods(cls): """ Add in the datetimelike methods (as we may have to override the superclass). """ def __add__(self, other): # dispatch to ExtensionArray implementation result = self._data.__add__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__add__ = __add__ def __radd__(self, other): # alias for __add__ return self.__add__(other) cls.__radd__ = __radd__ def __sub__(self, other): # dispatch to ExtensionArray implementation result = self._data.__sub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__sub__ = __sub__ def __rsub__(self, other): result = self._data.__rsub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__rsub__ = __rsub__ def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs) def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) @Appender(_index_shared_docs['where'] % _index_doc_kwargs) def where(self, cond, other=None): other = _ensure_datetimelike_to_i8(other, to_utc=True) values = _ensure_datetimelike_to_i8(self, to_utc=True) result = np.where(cond, values, other).astype('i8') result = self._ensure_localized(result, from_utc=True) return self._shallow_copy(result) def _summary(self, name=None): """ Return a summarized representation. Parameters ---------- name : str name to use in the summary representation Returns ------- String with a summarized representation of the index """ formatter = self._formatter_func if len(self) > 0: index_summary = ', %s to %s' % (formatter( self[0]), formatter(self[-1])) else: index_summary = '' if name is None: name = type(self).__name__ result = '%s: %s entries%s' % (printing.pprint_thing(name), len(self), index_summary) if self.freq: result += '\nFreq: %s' % self.freqstr # display as values, not quoted result = result.replace("'", "") return result def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """ attribs = self._get_attributes_dict() attribs['name'] = name # do not pass tz to set because tzlocal cannot be hashed if len({str(x.dtype) for x in to_concat}) != 1: raise ValueError('to_concat must have the same tz') new_data = type(self._values)._concat_same_type(to_concat).asi8 # GH 3232: If the concat result is evenly spaced, we can retain the # original frequency is_diff_evenly_spaced = len(unique_deltas(new_data)) == 1 if not is_period_dtype(self) and not is_diff_evenly_spaced: # reset freq attribs['freq'] = None return self._simple_new(new_data, **attribs) @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): if is_dtype_equal(self.dtype, dtype) and copy is False: # Ensure that self.astype(self.dtype) is self return self new_values = self._data.astype(dtype, copy=copy) # pass copy=False because any copying will be done in the # _data.astype call above return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False) @deprecate_kwarg(old_arg_name='n', new_arg_name='periods') def shift(self, periods, freq=None): """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int Number of periods (or increments) to shift by, can be positive or negative. .. versionchanged:: 0.24.0 freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ result = self._data._time_shift(periods, freq=freq) return type(self)(result, name=self.name)
class DatetimeIndexOpsMixin(ExtensionIndex): """ Common ops mixin to support a unified interface datetimelike Index. """ _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] freq: Optional[DateOffset] freqstr: Optional[str] _resolution: int _bool_ops: List[str] = [] _field_ops: List[str] = [] hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) # type: ignore _hasnans = hasnans # for index / array -agnostic code @property def is_all_dates(self) -> bool: return True # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self): # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. """ result = lib.item_from_zerodim(result) if is_bool_dtype(result) or lib.is_scalar(result): return result attrs = self._get_attributes_dict() if not is_period_dtype(self) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" return Index(result, **attrs) # ------------------------------------------------------------------------ def equals(self, other) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False return np.array_equal(self.asi8, other.asi8) @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: hash(key) try: res = self.get_loc(key) except (KeyError, TypeError, ValueError): return False return bool( is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res)) ) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index. """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: # NB: using asi8 instead of _ndarray_values matters in numpy 1.18 # because the treatment of NaT has been changed to put NaT last # instead of first. sorted_values = np.sort(self.asi8) freq = self.freq if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 if not ascending: sorted_values = sorted_values[::-1] arr = type(self._data)._simple_new( sorted_values, dtype=self.dtype, freq=freq ) return type(self)._simple_new(arr, name=self.name) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] return ExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) @Appender(_shared_docs["searchsorted"]) def searchsorted(self, value, side="left", sorter=None): if isinstance(value, str): raise TypeError( "searchsorted requires compatible dtype or scalar, " f"not {type(value).__name__}" ) if isinstance(value, Index): value = value._data return self._data.searchsorted(value, side=side, sorter=sorter) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) if target.size != tolerance.size and tolerance.size > 1: raise ValueError("list-like tolerance size must match target index size") return tolerance def tolist(self) -> List: """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: if skipna: min_stamp = self[~self._isnan].asi8.min() else: return self._na_value else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo("int64").max return i8.argmin() def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: if skipna: max_stamp = self[~self._isnan].asi8.max() else: return self._na_value else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def _format_with_header(self, header, na_rep="NaT", **kwargs): return header + list(self._format_native_types(na_rep, **kwargs)) @property def _formatter_func(self): raise AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) attrs.append(("freq", freq)) return attrs # -------------------------------------------------------------------- # Indexing Methods def _convert_scalar_indexer(self, key, kind: str): """ We don't allow integer or float indexing on datetime-like when using loc. Parameters ---------- key : label of the slice bound kind : {'loc', 'getitem'} """ assert kind in ["loc", "getitem"] if not is_scalar(key): raise TypeError(key) # we don't allow integer/float indexing for loc # we don't allow float indexing for getitem is_int = is_integer(key) is_flt = is_float(key) if kind == "loc" and (is_int or is_flt): raise KeyError(key) elif kind == "getitem" and is_flt: raise KeyError(key) return super()._convert_scalar_indexer(key, kind=kind) def _validate_partial_date_slice(self, reso: str): raise NotImplementedError def _parsed_string_to_bounds(self, reso: str, parsed: datetime): raise NotImplementedError def _partial_date_slice( self, reso: str, parsed: datetime, use_lhs: bool = True, use_rhs: bool = True ): """ Parameters ---------- reso : str parsed : datetime use_lhs : bool, default True use_rhs : bool, default True Returns ------- slice or ndarray[intp] """ self._validate_partial_date_slice(reso) t1, t2 = self._parsed_string_to_bounds(reso, parsed) i8vals = self.asi8 unbox = self._data._unbox_scalar if self.is_monotonic: if len(self) and ( (use_lhs and t1 < self[0] and t2 < self[0]) or ((use_rhs and t1 > self[-1] and t2 > self[-1])) ): # we are out of range raise KeyError # TODO: does this depend on being monotonic _increasing_? # a monotonic (sorted) series can be sliced # Use asi8.searchsorted to avoid re-validating Periods/Timestamps left = i8vals.searchsorted(unbox(t1), side="left") if use_lhs else None right = i8vals.searchsorted(unbox(t2), side="right") if use_rhs else None return slice(left, right) else: lhs_mask = (i8vals >= unbox(t1)) if use_lhs else True rhs_mask = (i8vals <= unbox(t2)) if use_rhs else True # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] # -------------------------------------------------------------------- __add__ = make_wrapped_arith_op("__add__") __radd__ = make_wrapped_arith_op("__radd__") __sub__ = make_wrapped_arith_op("__sub__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") __rpow__ = make_wrapped_arith_op("__rpow__") __mul__ = make_wrapped_arith_op("__mul__") __rmul__ = make_wrapped_arith_op("__rmul__") __floordiv__ = make_wrapped_arith_op("__floordiv__") __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__") __mod__ = make_wrapped_arith_op("__mod__") __rmod__ = make_wrapped_arith_op("__rmod__") __divmod__ = make_wrapped_arith_op("__divmod__") __rdivmod__ = make_wrapped_arith_op("__rdivmod__") __truediv__ = make_wrapped_arith_op("__truediv__") __rtruediv__ = make_wrapped_arith_op("__rtruediv__") def isin(self, values, level=None): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if level is not None: self._validate_index_level(level) if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) @Appender(Index.where.__doc__) def where(self, cond, other=None): values = self.view("i8") if is_scalar(other) and isna(other): other = NaT.value else: # Do type inference if necessary up front # e.g. we passed PeriodIndex.values and got an ndarray of Periods other = Index(other) if is_categorical_dtype(other): # e.g. we have a Categorical holding self.dtype if needs_i8_conversion(other.categories): other = other._internal_get_values() if not is_dtype_equal(self.dtype, other.dtype): raise TypeError(f"Where requires matching dtype, not {other.dtype}") other = other.view("i8") result = np.where(cond, values, other).astype("i8") arr = type(self._data)._simple_new(result, dtype=self.dtype) return type(self)._simple_new(arr, name=self.name) def _summary(self, name=None) -> str: """ Return a summarized representation. Parameters ---------- name : str Name to use in the summary representation. Returns ------- str Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" else: index_summary = "" if name is None: name = type(self).__name__ result = f"{name}: {len(self)} entries{index_summary}" if self.freq: result += f"\nFreq: {self.freqstr}" # display as values, not quoted result = result.replace("'", "") return result def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. .. versionchanged:: 0.24.0 freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ result = self._data._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- # List-like Methods def delete(self, loc): new_i8s = np.delete(self.asi8, loc) freq = None if is_period_dtype(self): freq = self.freq elif is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if is_list_like(loc): loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) return type(self)._simple_new(arr, name=self.name)
class DatetimeIndexOpsMixin(ExtensionIndex): """ Common ops mixin to support a unified interface datetimelike Index. """ _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] freq: Optional[BaseOffset] freqstr: Optional[str] _resolution_obj: Resolution _bool_ops: List[str] = [] _field_ops: List[str] = [] # error: "Callable[[Any], Any]" has no attribute "fget" hasnans = cache_readonly( DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined] ) _hasnans = hasnans # for index / array -agnostic code @property def _is_all_dates(self) -> bool: return True # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self) -> np.ndarray: # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data def __array_wrap__(self, result, context=None): """ Gets called after a ufunc and other functions. """ result = lib.item_from_zerodim(result) if is_bool_dtype(result) or lib.is_scalar(result): return result attrs = self._get_attributes_dict() if not is_period_dtype(self.dtype) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" return Index(result, **attrs) # ------------------------------------------------------------------------ def equals(self, other: object) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, Index): return False elif other.dtype.kind in ["f", "i", "u", "c"]: return False elif not isinstance(other, type(self)): try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False return np.array_equal(self.asi8, other.asi8) @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: hash(key) try: res = self.get_loc(key) except (KeyError, TypeError, ValueError): return False return bool( is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res)) ) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] return ExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) @doc(IndexOpsMixin.searchsorted, klass="Datetime-like Index") def searchsorted(self, value, side="left", sorter=None): return self._data.searchsorted(value, side=side, sorter=sorter) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) if target.size != tolerance.size and tolerance.size > 1: raise ValueError("list-like tolerance size must match target index size") return tolerance def tolist(self) -> List: """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._data._box_func(i8[0]) if self.hasnans: if skipna: min_stamp = self[~self._isnan].asi8.min() else: return self._na_value else: min_stamp = i8.min() return self._data._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo("int64").max return i8.argmin() def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._data._box_func(i8[-1]) if self.hasnans: if skipna: max_stamp = self[~self._isnan].asi8.max() else: return self._na_value else: max_stamp = i8.max() return self._data._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def format( self, name: bool = False, formatter: Optional[Callable] = None, na_rep: str = "NaT", date_format: Optional[str] = None, ) -> List[str]: """ Render a string representation of the Index. """ header = [] if name: header.append( ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) if self.name is not None else "" ) if formatter is not None: return header + list(self.map(formatter)) return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header( self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None ) -> List[str]: return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format) ) @property def _formatter_func(self): raise AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) attrs.append(("freq", freq)) return attrs # -------------------------------------------------------------------- # Indexing Methods def _validate_partial_date_slice(self, reso: Resolution): raise NotImplementedError def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): raise NotImplementedError def _partial_date_slice( self, reso: Resolution, parsed: datetime, use_lhs: bool = True, use_rhs: bool = True, ): """ Parameters ---------- reso : Resolution parsed : datetime use_lhs : bool, default True use_rhs : bool, default True Returns ------- slice or ndarray[intp] """ self._validate_partial_date_slice(reso) t1, t2 = self._parsed_string_to_bounds(reso, parsed) i8vals = self.asi8 unbox = self._data._unbox_scalar if self.is_monotonic: if len(self) and ( (use_lhs and t1 < self[0] and t2 < self[0]) or (use_rhs and t1 > self[-1] and t2 > self[-1]) ): # we are out of range raise KeyError # TODO: does this depend on being monotonic _increasing_? # a monotonic (sorted) series can be sliced # Use asi8.searchsorted to avoid re-validating Periods/Timestamps left = i8vals.searchsorted(unbox(t1), side="left") if use_lhs else None right = i8vals.searchsorted(unbox(t2), side="right") if use_rhs else None return slice(left, right) else: lhs_mask = (i8vals >= unbox(t1)) if use_lhs else True rhs_mask = (i8vals <= unbox(t2)) if use_rhs else True # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] # -------------------------------------------------------------------- # Arithmetic Methods __add__ = make_wrapped_arith_op("__add__") __sub__ = make_wrapped_arith_op("__sub__") __radd__ = make_wrapped_arith_op("__radd__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") __rpow__ = make_wrapped_arith_op("__rpow__") __mul__ = make_wrapped_arith_op("__mul__") __rmul__ = make_wrapped_arith_op("__rmul__") __floordiv__ = make_wrapped_arith_op("__floordiv__") __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__") __mod__ = make_wrapped_arith_op("__mod__") __rmod__ = make_wrapped_arith_op("__rmod__") __divmod__ = make_wrapped_arith_op("__divmod__") __rdivmod__ = make_wrapped_arith_op("__rdivmod__") __truediv__ = make_wrapped_arith_op("__truediv__") __rtruediv__ = make_wrapped_arith_op("__rtruediv__") def isin(self, values, level=None): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if level is not None: self._validate_index_level(level) if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) @Appender(Index.where.__doc__) def where(self, cond, other=None): values = self.view("i8") try: other = self._data._validate_where_value(other) except (TypeError, ValueError) as err: # Includes tzawareness mismatch and IncompatibleFrequencyError oth = getattr(other, "dtype", other) raise TypeError(f"Where requires matching dtype, not {oth}") from err result = np.where(cond, values, other).astype("i8") arr = self._data._from_backing_data(result) return type(self)._simple_new(arr, name=self.name) def putmask(self, mask, value): try: value = self._data._validate_where_value(value) except (TypeError, ValueError): return self.astype(object).putmask(mask, value) result = self._data._ndarray.copy() np.putmask(result, mask, value) arr = self._data._from_backing_data(result) return type(self)._simple_new(arr, name=self.name) def _summary(self, name=None) -> str: """ Return a summarized representation. Parameters ---------- name : str Name to use in the summary representation. Returns ------- str Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" else: index_summary = "" if name is None: name = type(self).__name__ result = f"{name}: {len(self)} entries{index_summary}" if self.freq: result += f"\nFreq: {self.freqstr}" # display as values, not quoted result = result.replace("'", "") return result def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. .. versionchanged:: 0.24.0 freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ arr = self._data.view() arr._freq = self.freq result = arr._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- # List-like Methods def delete(self, loc): new_i8s = np.delete(self.asi8, loc) freq = None if is_period_dtype(self.dtype): freq = self.freq elif is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if is_list_like(loc): loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) return type(self)._simple_new(arr, name=self.name) def insert(self, loc: int, item): """ Make new Index inserting new item at location Parameters ---------- loc : int item : object if not either a Python datetime or a numpy integer-like, returned Index dtype will be object rather than datetime. Returns ------- new_index : Index """ item = self._data._validate_insert_value(item) freq = None if is_period_dtype(self.dtype): freq = self.freq elif self.freq is not None: # freq can be preserved on edge cases if self.size: if item is NaT: pass elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq else: # Adding a single item to an empty index may preserve freq if self.freq.is_on_offset(item): freq = self.freq arr = self._data item = arr._unbox_scalar(item) item = arr._rebox_native(item) new_values = np.concatenate([arr._ndarray[:loc], [item], arr._ndarray[loc:]]) new_arr = self._data._from_backing_data(new_values) new_arr._freq = freq return type(self)._simple_new(new_arr, name=self.name) # -------------------------------------------------------------------- # Join/Set Methods def _can_union_without_object_cast(self, other) -> bool: return is_dtype_equal(self.dtype, other.dtype) def _wrap_joined_index(self, joined: np.ndarray, other): assert other.dtype == self.dtype, (other.dtype, self.dtype) name = get_op_result_name(self, other) if is_period_dtype(self.dtype): freq = self.freq else: self = cast(DatetimeTimedeltaMixin, self) freq = self.freq if self._can_fast_union(other) else None new_data = self._data._from_backing_data(joined) new_data._freq = freq return type(self)._simple_new(new_data, name=name) @doc(Index._convert_arr_indexer) def _convert_arr_indexer(self, keyarr): try: return self._data._validate_listlike( keyarr, "convert_arr_indexer", allow_object=True ) except (ValueError, TypeError): return com.asarray_tuplesafe(keyarr)
def inherit_from_data(name: str, delegate, cache: bool = False, wrap: bool = False): """ Make an alias for a method of the underlying ExtensionArray. Parameters ---------- name : str Name of an attribute the class should inherit from its EA parent. delegate : class cache : bool, default False Whether to convert wrapped properties into cache_readonly wrap : bool, default False Whether to wrap the inherited result in an Index. Returns ------- attribute, method, property, or cache_readonly """ attr = getattr(delegate, name) if isinstance(attr, property) or type(attr).__name__ == "getset_descriptor": # getset_descriptor i.e. property defined in cython class if cache: def cached(self): return getattr(self._data, name) cached.__name__ = name cached.__doc__ = attr.__doc__ method = cache_readonly(cached) else: def fget(self): result = getattr(self._data, name) if wrap: if isinstance(result, type(self._data)): return type(self)._simple_new(result, name=self.name) elif isinstance(result, ABCDataFrame): return result.set_index(self) return Index(result, name=self.name) return result def fset(self, value): setattr(self._data, name, value) fget.__name__ = name fget.__doc__ = attr.__doc__ method = property(fget, fset) elif not callable(attr): # just a normal attribute, no wrapping method = attr else: def method(self, *args, **kwargs): if "inplace" in kwargs: raise ValueError(f"cannot use inplace with {type(self).__name__}") result = attr(self._data, *args, **kwargs) if wrap: if isinstance(result, type(self._data)): return type(self)._simple_new(result, name=self.name) elif isinstance(result, ABCDataFrame): return result.set_index(self) return Index(result, name=self.name) return result method.__name__ = name method.__doc__ = attr.__doc__ return method
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): """ Common ops mixin to support a unified interface datetimelike Index. """ _is_numeric_dtype = False _can_hold_strings = False _data: DatetimeArray | TimedeltaArray | PeriodArray freq: BaseOffset | None freqstr: str | None _resolution_obj: Resolution _bool_ops: list[str] = [] _field_ops: list[str] = [] # error: "Callable[[Any], Any]" has no attribute "fget" hasnans = cache_readonly( DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined] ) @property def _is_all_dates(self) -> bool: return True # ------------------------------------------------------------------------ def equals(self, other: Any) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, Index): return False elif other.dtype.kind in ["f", "i", "u", "c"]: return False elif not isinstance(other, type(self)): should_try = False inferable = self._data._infer_matches if other.dtype == object: should_try = other.inferred_type in inferable elif is_categorical_dtype(other.dtype): other = cast("CategoricalIndex", other) should_try = other.categories.inferred_type in inferable if should_try: try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False return np.array_equal(self.asi8, other.asi8) @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: hash(key) try: self.get_loc(key) except (KeyError, TypeError, ValueError): return False return True _can_hold_na = True _na_value: NaTType = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) return super()._convert_tolerance(tolerance, target) # -------------------------------------------------------------------- # Rendering Methods def format( self, name: bool = False, formatter: Callable | None = None, na_rep: str = "NaT", date_format: str | None = None, ) -> list[str]: """ Render a string representation of the Index. """ header = [] if name: header.append( ibase.pprint_thing(self.name, escape_chars=( "\t", "\r", "\n")) if self.name is not None else "") if formatter is not None: return header + list(self.map(formatter)) return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header(self, header: list[str], na_rep: str = "NaT", date_format: str | None = None) -> list[str]: # matches base class except for whitespace padding and date_format return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format)) @property def _formatter_func(self): return self._data._formatter() def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: # iterating over _attributes prevents us from doing this for PeriodIndex if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) # e.g. D -> 'D' attrs.append(("freq", freq)) return attrs @Appender(Index._summary.__doc__) def _summary(self, name=None) -> str: result = super()._summary(name=name) if self.freq: result += f"\nFreq: {self.freqstr}" return result # -------------------------------------------------------------------- # Indexing Methods def _can_partial_date_slice(self, reso: Resolution) -> bool: raise NotImplementedError def _parsed_string_to_bounds(self, reso: Resolution, parsed): raise NotImplementedError def _parse_with_reso(self, label: str): # overridden by TimedeltaIndex parsed, reso_str = parsing.parse_time_string(label, self.freq) reso = Resolution.from_attrname(reso_str) return parsed, reso def _get_string_slice(self, key: str): parsed, reso = self._parse_with_reso(key) try: return self._partial_date_slice(reso, parsed) except KeyError as err: raise KeyError(key) from err @final def _partial_date_slice( self, reso: Resolution, parsed: datetime, ): """ Parameters ---------- reso : Resolution parsed : datetime Returns ------- slice or ndarray[intp] """ if not self._can_partial_date_slice(reso): raise ValueError t1, t2 = self._parsed_string_to_bounds(reso, parsed) vals = self._data._ndarray unbox = self._data._unbox if self.is_monotonic_increasing: if len(self) and ((t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])): # we are out of range raise KeyError # TODO: does this depend on being monotonic _increasing_? # a monotonic (sorted) series can be sliced left = vals.searchsorted(unbox(t1), side="left") right = vals.searchsorted(unbox(t2), side="right") return slice(left, right) else: lhs_mask = vals >= unbox(t1) rhs_mask = vals <= unbox(t2) # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): """ If label is a string, cast it to scalar type according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'loc', 'getitem'} or None Returns ------- label : object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ["loc", "getitem", None, lib.no_default] self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") if isinstance(label, str): try: parsed, reso = self._parse_with_reso(label) except ValueError as err: # DTI -> parsing.DateParseError # TDI -> 'unit abbreviation w/o a number' # PI -> string cannot be parsed as datetime-like raise self._invalid_indexer("slice", label) from err lower, upper = self._parsed_string_to_bounds(reso, parsed) return lower if side == "left" else upper elif not isinstance(label, self._data._recognized_scalars): raise self._invalid_indexer("slice", label) return label # -------------------------------------------------------------------- # Arithmetic Methods def shift(self: _T, periods: int = 1, freq=None) -> _T: """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ arr = self._data.view() arr._freq = self.freq result = arr._time_shift(periods, freq=freq) return type(self)._simple_new(result, name=self.name) # -------------------------------------------------------------------- @doc(Index._maybe_cast_listlike_indexer) def _maybe_cast_listlike_indexer(self, keyarr): try: res = self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): if not isinstance(keyarr, ExtensionArray): # e.g. we don't want to cast DTA to ndarray[object] res = com.asarray_tuplesafe(keyarr) # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray else: res = keyarr return Index(res, dtype=res.dtype)
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): """ Common ops mixin to support a unified interface datetimelike Index. """ _can_hold_strings = False _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] freq: Optional[BaseOffset] freqstr: Optional[str] _resolution_obj: Resolution _bool_ops: List[str] = [] _field_ops: List[str] = [] # error: "Callable[[Any], Any]" has no attribute "fget" hasnans = cache_readonly( DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined] ) _hasnans = hasnans # for index / array -agnostic code @property def _is_all_dates(self) -> bool: return True # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self) -> np.ndarray: # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._ndarray def __array_wrap__(self, result, context=None): """ Gets called after a ufunc and other functions. """ result = lib.item_from_zerodim(result) if is_bool_dtype(result) or lib.is_scalar(result): return result attrs = self._get_attributes_dict() if not is_period_dtype(self.dtype) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" return type(self)(result, **attrs) # ------------------------------------------------------------------------ def equals(self, other: Any) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, Index): return False elif other.dtype.kind in ["f", "i", "u", "c"]: return False elif not isinstance(other, type(self)): should_try = False inferable = self._data._infer_matches if other.dtype == object: should_try = other.inferred_type in inferable elif is_categorical_dtype(other.dtype): other = cast("CategoricalIndex", other) should_try = other.categories.inferred_type in inferable if should_try: try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False return np.array_equal(self.asi8, other.asi8) @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: hash(key) try: self.get_loc(key) except (KeyError, TypeError, ValueError): return False return True @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take((), kwargs) indices = np.asarray(indices, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) result = NDArrayBackedExtensionIndex.take(self, indices, axis, allow_fill, fill_value, **kwargs) if isinstance(maybe_slice, slice): freq = self._data._get_getitem_freq(maybe_slice) result._data._freq = freq return result _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) return super()._convert_tolerance(tolerance, target) def tolist(self) -> List: """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 if len(i8) and self.is_monotonic_increasing: # quick check if i8[0] != iNaT: return self._data._box_func(i8[0]) if self.hasnans: if not skipna: return self._na_value i8 = i8[~self._isnan] if not len(i8): return self._na_value min_stamp = i8.min() return self._data._box_func(min_stamp) def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo("int64").max return i8.argmin() def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 if len(i8) and self.is_monotonic: # quick check if i8[-1] != iNaT: return self._data._box_func(i8[-1]) if self.hasnans: if not skipna: return self._na_value i8 = i8[~self._isnan] if not len(i8): return self._na_value max_stamp = i8.max() return self._data._box_func(max_stamp) def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def format( self, name: bool = False, formatter: Optional[Callable] = None, na_rep: str = "NaT", date_format: Optional[str] = None, ) -> List[str]: """ Render a string representation of the Index. """ header = [] if name: header.append( ibase.pprint_thing(self.name, escape_chars=( "\t", "\r", "\n")) if self.name is not None else "") if formatter is not None: return header + list(self.map(formatter)) return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header(self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None) -> List[str]: return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format)) @property def _formatter_func(self): return self._data._formatter() def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) attrs.append(("freq", freq)) return attrs def _summary(self, name=None) -> str: """ Return a summarized representation. Parameters ---------- name : str Name to use in the summary representation. Returns ------- str Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" else: index_summary = "" if name is None: name = type(self).__name__ result = f"{name}: {len(self)} entries{index_summary}" if self.freq: result += f"\nFreq: {self.freqstr}" # display as values, not quoted result = result.replace("'", "") return result # -------------------------------------------------------------------- # Indexing Methods def _validate_partial_date_slice(self, reso: Resolution): raise NotImplementedError def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): raise NotImplementedError def _partial_date_slice( self, reso: Resolution, parsed: datetime, ): """ Parameters ---------- reso : Resolution parsed : datetime Returns ------- slice or ndarray[intp] """ self._validate_partial_date_slice(reso) t1, t2 = self._parsed_string_to_bounds(reso, parsed) vals = self._data._ndarray unbox = self._data._unbox if self.is_monotonic_increasing: if len(self) and ((t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])): # we are out of range raise KeyError # TODO: does this depend on being monotonic _increasing_? # a monotonic (sorted) series can be sliced left = vals.searchsorted(unbox(t1), side="left") right = vals.searchsorted(unbox(t2), side="right") return slice(left, right) else: lhs_mask = vals >= unbox(t1) rhs_mask = vals <= unbox(t2) # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] # -------------------------------------------------------------------- # Arithmetic Methods __add__ = make_wrapped_arith_op("__add__") __sub__ = make_wrapped_arith_op("__sub__") __radd__ = make_wrapped_arith_op("__radd__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") __rpow__ = make_wrapped_arith_op("__rpow__") __mul__ = make_wrapped_arith_op("__mul__") __rmul__ = make_wrapped_arith_op("__rmul__") __floordiv__ = make_wrapped_arith_op("__floordiv__") __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__") __mod__ = make_wrapped_arith_op("__mod__") __rmod__ = make_wrapped_arith_op("__rmod__") __divmod__ = make_wrapped_arith_op("__divmod__") __rdivmod__ = make_wrapped_arith_op("__rdivmod__") __truediv__ = make_wrapped_arith_op("__truediv__") __rtruediv__ = make_wrapped_arith_op("__rtruediv__") def shift(self: _T, periods: int = 1, freq=None) -> _T: """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. .. versionchanged:: 0.24.0 freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ arr = self._data.view() arr._freq = self.freq result = arr._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- # List-like Methods def _get_delete_freq(self, loc: int): """ Find the `freq` for self.delete(loc). """ freq = None if is_period_dtype(self.dtype): freq = self.freq elif self.freq is not None: if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if is_list_like(loc): loc = lib.maybe_indices_to_slice( np.asarray(loc, dtype=np.intp), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq return freq def _get_insert_freq(self, loc: int, item): """ Find the `freq` for self.insert(loc, item). """ value = self._data._validate_scalar(item) item = self._data._box_func(value) freq = None if is_period_dtype(self.dtype): freq = self.freq elif self.freq is not None: # freq can be preserved on edge cases if self.size: if item is NaT: pass elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq else: # Adding a single item to an empty index may preserve freq if self.freq.is_on_offset(item): freq = self.freq return freq @doc(NDArrayBackedExtensionIndex.delete) def delete(self: _T, loc) -> _T: result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result @doc(NDArrayBackedExtensionIndex.insert) def insert(self, loc: int, item): result = super().insert(loc, item) if isinstance(result, type(self)): # i.e. parent class method did not cast result._data._freq = self._get_insert_freq(loc, item) return result # -------------------------------------------------------------------- # Join/Set Methods _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer) _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer) _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer) _left_indexer_unique = _join_i8_wrapper(libjoin.left_join_indexer_unique, with_indexers=False) def _get_join_freq(self, other): """ Get the freq to attach to the result of a join operation. """ if is_period_dtype(self.dtype): freq = self.freq else: self = cast(DatetimeTimedeltaMixin, self) freq = self.freq if self._can_fast_union(other) else None return freq def _wrap_joined_index(self, joined: np.ndarray, other): assert other.dtype == self.dtype, (other.dtype, self.dtype) assert joined.dtype == "i8" or joined.dtype == self.dtype, joined.dtype joined = joined.view(self._data._ndarray.dtype) result = super()._wrap_joined_index(joined, other) result._data._freq = self._get_join_freq(other) return result @doc(Index._convert_arr_indexer) def _convert_arr_indexer(self, keyarr): try: return self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): return com.asarray_tuplesafe(keyarr)
class DatetimeIndexOpsMixin(ExtensionIndex, ExtensionOpsMixin): """ Common ops mixin to support a unified interface datetimelike Index. """ _data: ExtensionArray freq: Optional[DateOffset] freqstr: Optional[str] _resolution: int _bool_ops: List[str] = [] _field_ops: List[str] = [] hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) # type: ignore _hasnans = hasnans # for index / array -agnostic code @property def is_all_dates(self) -> bool: return True @classmethod def _create_comparison_method(cls, op): """ Create a comparison method that dispatches to ``cls.values``. """ return make_wrapped_comparison_op(f"__{op.__name__}__") # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self): # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. """ result = lib.item_from_zerodim(result) if is_bool_dtype(result) or lib.is_scalar(result): return result attrs = self._get_attributes_dict() if not is_period_dtype(self) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" return Index(result, **attrs) # ------------------------------------------------------------------------ def equals(self, other) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False return np.array_equal(self.asi8, other.asi8) @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) return ( is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res)) ) except (KeyError, TypeError, ValueError): return False # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, mapper, na_action=None): try: result = mapper(self) # Try to use this result if we can if isinstance(result, np.ndarray): result = Index(result) if not isinstance(result, Index): raise TypeError("The map function must return an Index object") return result except Exception: return self.astype(object).map(mapper) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index. """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: # NB: using asi8 instead of _ndarray_values matters in numpy 1.18 # because the treatment of NaT has been changed to put NaT last # instead of first. sorted_values = np.sort(self.asi8) attribs = self._get_attributes_dict() freq = attribs["freq"] if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 attribs["freq"] = freq if not ascending: sorted_values = sorted_values[::-1] return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] return ExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) if target.size != tolerance.size and tolerance.size > 1: raise ValueError("list-like tolerance size must match target index size") return tolerance def tolist(self) -> List: """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: if skipna: min_stamp = self[~self._isnan].asi8.min() else: return self._na_value else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo("int64").max return i8.argmin() def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 try: # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: if skipna: max_stamp = self[~self._isnan].asi8.max() else: return self._na_value else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def _format_with_header(self, header, na_rep="NaT", **kwargs): return header + list(self._format_native_types(na_rep, **kwargs)) @property def _formatter_func(self): raise AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) attrs.append(("freq", freq)) return attrs # -------------------------------------------------------------------- def _convert_scalar_indexer(self, key, kind=None): """ We don't allow integer or float indexing on datetime-like when using loc. Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ["ix", "loc", "getitem", "iloc", None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ["loc"] and (is_int or is_flt): self._invalid_indexer("index", key) elif kind in ["ix", "getitem"] and is_flt: self._invalid_indexer("index", key) return super()._convert_scalar_indexer(key, kind=kind) __add__ = make_wrapped_arith_op("__add__") __radd__ = make_wrapped_arith_op("__radd__") __sub__ = make_wrapped_arith_op("__sub__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") __rpow__ = make_wrapped_arith_op("__rpow__") __mul__ = make_wrapped_arith_op("__mul__") __rmul__ = make_wrapped_arith_op("__rmul__") __floordiv__ = make_wrapped_arith_op("__floordiv__") __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__") __mod__ = make_wrapped_arith_op("__mod__") __rmod__ = make_wrapped_arith_op("__rmod__") __divmod__ = make_wrapped_arith_op("__divmod__") __rdivmod__ = make_wrapped_arith_op("__rdivmod__") __truediv__ = make_wrapped_arith_op("__truediv__") __rtruediv__ = make_wrapped_arith_op("__rtruediv__") def isin(self, values, level=None): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if level is not None: self._validate_index_level(level) if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) @Appender(_index_shared_docs["where"] % _index_doc_kwargs) def where(self, cond, other=None): values = self.view("i8") if is_scalar(other) and isna(other): other = NaT.value else: # Do type inference if necessary up front # e.g. we passed PeriodIndex.values and got an ndarray of Periods other = Index(other) if is_categorical_dtype(other): # e.g. we have a Categorical holding self.dtype if needs_i8_conversion(other.categories): other = other._internal_get_values() if not is_dtype_equal(self.dtype, other.dtype): raise TypeError(f"Where requires matching dtype, not {other.dtype}") other = other.view("i8") result = np.where(cond, values, other).astype("i8") return self._shallow_copy(result) def _summary(self, name=None): """ Return a summarized representation. Parameters ---------- name : str Name to use in the summary representation. Returns ------- str Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" else: index_summary = "" if name is None: name = type(self).__name__ result = f"{name}: {len(self)} entries{index_summary}" if self.freq: result += f"\nFreq: {self.freqstr}" # display as values, not quoted result = result.replace("'", "") return result def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """ attribs = self._get_attributes_dict() attribs["name"] = name # do not pass tz to set because tzlocal cannot be hashed if len({str(x.dtype) for x in to_concat}) != 1: raise ValueError("to_concat must have the same tz") new_data = type(self._values)._concat_same_type(to_concat).asi8 # GH 3232: If the concat result is evenly spaced, we can retain the # original frequency is_diff_evenly_spaced = len(unique_deltas(new_data)) == 1 if not is_period_dtype(self) and not is_diff_evenly_spaced: # reset freq attribs["freq"] = None return self._simple_new(new_data, **attribs) def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. .. versionchanged:: 0.24.0 freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ result = self._data._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- # List-like Methods def delete(self, loc): new_i8s = np.delete(self.asi8, loc) freq = None if is_period_dtype(self): freq = self.freq elif is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if is_list_like(loc): loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq return self._shallow_copy(new_i8s, freq=freq)
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ # override DatetimeLikeArrayMixin method copy = Index.copy # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) _hasnans = hasnans # for index / array -agnostic code _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) def unique(self, level=None): if level is not None: self._validate_index_level(level) result = self._eadata.unique() # Note: if `self` is already unique, then self.unique() should share # a `freq` with self. If not already unique, then self.freq must be # None, so again sharing freq is correct. return self._shallow_copy(result._data) @classmethod def _create_comparison_method(cls, op): """ Create a comparison method that dispatches to ``cls.values``. """ def wrapper(self, other): result = op(self._eadata, maybe_unwrap_index(other)) return result wrapper.__doc__ = op.__doc__ wrapper.__name__ = '__{}__'.format(op.__name__) return wrapper # A few methods that are shared _maybe_mask_results = DatetimeLikeArrayMixin._maybe_mask_results # ------------------------------------------------------------------------ def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except Exception: return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False elif is_period_dtype(self): if not is_period_dtype(other): return False if self.freq != other.freq: return False return np.array_equal(self.asi8, other.asi8) @staticmethod def _join_i8_wrapper(joinf, dtype, with_indexers=True): """ Create the join wrapper methods. """ @staticmethod def wrapper(left, right): if isinstance(left, (np.ndarray, ABCIndex, ABCSeries)): left = left.view('i8') if isinstance(right, (np.ndarray, ABCIndex, ABCSeries)): right = right.view('i8') results = joinf(left, right) if with_indexers: join_index, left_indexer, right_indexer = results join_index = join_index.view(dtype) return join_index, left_indexer, right_indexer return results return wrapper @Appender(DatetimeLikeArrayMixin._evaluate_compare.__doc__) def _evaluate_compare(self, other, op): result = self._eadata._evaluate_compare(other, op) if is_bool_dtype(result): return result try: return Index(result) except TypeError: return result def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', from_utc=False): # See DatetimeLikeArrayMixin._ensure_localized.__doc__ if getattr(self, 'tz', None): # ensure_localized is only relevant for tz-aware DTI from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray dtarr = DatetimeArray(self) result = dtarr._ensure_localized(arg, ambiguous=ambiguous, nonexistent=nonexistent, from_utc=from_utc) return type(self)(result, name=self.name) return arg def _box_values_as_index(self): """ Return object Index which contains boxed values. """ from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object) @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) return (is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))) except (KeyError, TypeError, ValueError): return False contains = __contains__ # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, f): try: result = f(self) # Try to use this result if we can if isinstance(result, np.ndarray): result = Index(result) if not isinstance(result, Index): raise TypeError('The map function must return an Index object') return result except Exception: return self.astype(object).map(f) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index. """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: sorted_values = np.sort(self._ndarray_values) attribs = self._get_attributes_dict() freq = attribs['freq'] if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 attribs['freq'] = freq if not ascending: sorted_values = sorted_values[::-1] sorted_values = self._maybe_box_as_values(sorted_values, **attribs) return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] taken = self._assert_take_fillable(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=iNaT) # keep freq in PeriodArray/Index, reset otherwise freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" @property def asobject(self): """ Return object Index which contains boxed values. .. deprecated:: 0.23.0 Use ``astype(object)`` instead. *this is an internal non-public method* """ warnings.warn( "'asobject' is deprecated. Use 'astype(object)'" " instead", FutureWarning, stacklevel=2) return self.astype(object) def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance, box=False)) if target.size != tolerance.size and tolerance.size > 1: raise ValueError('list-like tolerance size must match ' 'target index size') return tolerance def tolist(self): """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: min_stamp = self[~self._isnan].asi8.min() else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max return i8.argmin() def max(self, axis=None, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: max_stamp = self[~self._isnan].asi8.max() else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) @property def _formatter_func(self): raise AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() for attrib in self._attributes: if attrib == 'freq': freq = self.freqstr if freq is not None: freq = "'%s'" % freq attrs.append(('freq', freq)) return attrs # -------------------------------------------------------------------- def _convert_scalar_indexer(self, key, kind=None): """ We don't allow integer or float indexing on datetime-like when using loc. Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): self._invalid_indexer('index', key) elif kind in ['ix', 'getitem'] and is_flt: self._invalid_indexer('index', key) return (super(DatetimeIndexOpsMixin, self)._convert_scalar_indexer(key, kind=kind)) @classmethod def _add_datetimelike_methods(cls): """ Add in the datetimelike methods (as we may have to override the superclass). """ def __add__(self, other): # dispatch to ExtensionArray implementation result = self._eadata.__add__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__add__ = __add__ def __radd__(self, other): # alias for __add__ return self.__add__(other) cls.__radd__ = __radd__ def __sub__(self, other): # dispatch to ExtensionArray implementation result = self._eadata.__sub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__sub__ = __sub__ def __rsub__(self, other): result = self._eadata.__rsub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__rsub__ = __rsub__ def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs) def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) @Appender(_index_shared_docs['where'] % _index_doc_kwargs) def where(self, cond, other=None): other = _ensure_datetimelike_to_i8(other, to_utc=True) values = _ensure_datetimelike_to_i8(self, to_utc=True) result = np.where(cond, values, other).astype('i8') result = self._ensure_localized(result, from_utc=True) return self._shallow_copy(result) def _summary(self, name=None): """ Return a summarized representation. Parameters ---------- name : str name to use in the summary representation Returns ------- String with a summarized representation of the index """ formatter = self._formatter_func if len(self) > 0: index_summary = ', %s to %s' % (formatter( self[0]), formatter(self[-1])) else: index_summary = '' if name is None: name = type(self).__name__ result = '%s: %s entries%s' % (printing.pprint_thing(name), len(self), index_summary) if self.freq: result += '\nFreq: %s' % self.freqstr # display as values, not quoted result = result.replace("'", "") return result def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """ attribs = self._get_attributes_dict() attribs['name'] = name # do not pass tz to set because tzlocal cannot be hashed if len({str(x.dtype) for x in to_concat}) != 1: raise ValueError('to_concat must have the same tz') if not is_period_dtype(self): # reset freq attribs['freq'] = None # TODO(DatetimeArray) # - remove the .asi8 here # - remove the _maybe_box_as_values # - combine with the `else` block new_data = self._concat_same_type(to_concat).asi8 else: new_data = type(self._values)._concat_same_type(to_concat) return self._simple_new(new_data, **attribs) def _maybe_box_as_values(self, values, **attribs): # TODO(DatetimeArray): remove # This is a temporary shim while PeriodArray is an ExtensoinArray, # but others are not. When everyone is an ExtensionArray, this can # be removed. Currently used in # - sort_values return values def astype(self, dtype, copy=True): if is_object_dtype(dtype): return self._box_values_as_index() elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) elif is_integer_dtype(dtype): # TODO(DatetimeArray): use self._values here. # Can't use ._values currently, because that returns a # DatetimeIndex, which throws us in an infinite loop. return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) def _time_shift(self, periods, freq=None): result = self._eadata._time_shift(periods, freq=freq) return type(self)(result, name=self.name)
class DatetimeIndex(DatetimeTimedeltaMixin, DatetimeDelegateMixin): """ Immutable ndarray of datetime64 data, represented internally as int64, and which can be boxed to Timestamp objects that are subclasses of datetime and carry metadata such as frequency information. Parameters ---------- data : array-like (1-dimensional), optional Optional datetime-like data to construct index with. copy : bool Make a copy of input ndarray. freq : str or pandas offset object, optional One of pandas date offset strings or corresponding objects. The string 'infer' can be passed in order to set the frequency of the index as the inferred frequency upon creation. tz : pytz.timezone or dateutil.tz.tzfile ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' When clocks moved backward due to DST, ambiguous times may arise. For example in Central European Time (UTC+01), when going from 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter dictates how ambiguous times should be handled. - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False signifies a non-DST time (note that this flag is only applicable for ambiguous times) - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times. name : object Name to be stored in the index. dayfirst : bool, default False If True, parse dates in `data` with the day first order. yearfirst : bool, default False If True parse dates in `data` with the year first order. Attributes ---------- year month day hour minute second microsecond nanosecond date time timetz dayofyear weekofyear week dayofweek weekday quarter tz freq freqstr is_month_start is_month_end is_quarter_start is_quarter_end is_year_start is_year_end is_leap_year inferred_freq Methods ------- normalize strftime snap tz_convert tz_localize round floor ceil to_period to_perioddelta to_pydatetime to_series to_frame month_name day_name mean See Also -------- Index : The base pandas Index type. TimedeltaIndex : Index of timedelta64 data. PeriodIndex : Index of Period data. to_datetime : Convert argument to datetime. date_range : Create a fixed-frequency DatetimeIndex. Notes ----- To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. """ _typ = "datetimeindex" _engine_type = libindex.DatetimeEngine _supports_partial_string_indexing = True _tz = None _freq = None _comparables = ["name", "freqstr", "tz"] _attributes = ["name", "tz", "freq"] _is_numeric_dtype = False _infer_as_myclass = True # Use faster implementation given we know we have DatetimeArrays __iter__ = DatetimeArray.__iter__ # some things like freq inference make use of these attributes. _bool_ops = DatetimeArray._bool_ops _object_ops = DatetimeArray._object_ops _field_ops = DatetimeArray._field_ops _datetimelike_ops = DatetimeArray._datetimelike_ops _datetimelike_methods = DatetimeArray._datetimelike_methods # -------------------------------------------------------------------- # Constructors def __new__( cls, data=None, freq=None, tz=None, normalize=False, closed=None, ambiguous="raise", dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None, ): if is_scalar(data): raise TypeError( f"{cls.__name__}() must be called with a " f"collection of some kind, {repr(data)} was passed" ) # - Cases checked above all return/raise before reaching here - # name = maybe_extract_name(name, data, cls) dtarr = DatetimeArray._from_sequence( data, dtype=dtype, copy=copy, tz=tz, freq=freq, dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous, ) subarr = cls._simple_new(dtarr, name=name, freq=dtarr.freq, tz=dtarr.tz) return subarr @classmethod def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): """ We require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ if isinstance(values, DatetimeArray): if tz: tz = validate_tz_from_dtype(dtype, tz) dtype = DatetimeTZDtype(tz=tz) elif dtype is None: dtype = _NS_DTYPE values = DatetimeArray(values, freq=freq, dtype=dtype) tz = values.tz freq = values.freq values = values._data # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes if isinstance(values, DatetimeIndex): values = values._data dtype = tz_to_dtype(tz) dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) assert isinstance(dtarr, DatetimeArray) result = object.__new__(cls) result._data = dtarr result.name = name # For groupby perf. See note in indexes/base about _index_data result._index_data = dtarr._data result._reset_identity() return result # -------------------------------------------------------------------- def __array__(self, dtype=None): if ( dtype is None and isinstance(self._data, DatetimeArray) and getattr(self.dtype, "tz", None) ): msg = ( "Converting timezone-aware DatetimeArray to timezone-naive " "ndarray with 'datetime64[ns]' dtype. In the future, this " "will return an ndarray with 'object' dtype where each " "element is a 'pandas.Timestamp' with the correct 'tz'.\n\t" "To accept the future behavior, pass 'dtype=object'.\n\t" "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'." ) warnings.warn(msg, FutureWarning, stacklevel=3) dtype = "M8[ns]" return np.asarray(self._data, dtype=dtype) @property def dtype(self): return self._data.dtype @property def tz(self): # GH 18595 return self._data.tz @tz.setter def tz(self, value): # GH 3746: Prevent localizing or converting the index by setting tz raise AttributeError( "Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate" ) tzinfo = tz @cache_readonly def _is_dates_only(self) -> bool: """ Return a boolean if we are only dates (and don't have a timezone) Returns ------- bool """ from pandas.io.formats.format import _is_dates_only return _is_dates_only(self.values) and self.tz is None def __reduce__(self): # we use a special reduce here because we need # to simply set the .tz (and not reinterpret it) d = dict(data=self._data) d.update(self._get_attributes_dict()) return _new_DatetimeIndex, (type(self), d), None def __setstate__(self, state): """ Necessary for making this object picklable. """ if isinstance(state, dict): super().__setstate__(state) elif isinstance(state, tuple): # < 0.15 compat if len(state) == 2: nd_state, own_state = state data = np.empty(nd_state[1], dtype=nd_state[2]) np.ndarray.__setstate__(data, nd_state) freq = own_state[1] tz = timezones.tz_standardize(own_state[2]) dtype = tz_to_dtype(tz) dtarr = DatetimeArray._simple_new(data, freq=freq, dtype=dtype) self.name = own_state[0] else: # pragma: no cover data = np.empty(state) np.ndarray.__setstate__(data, state) dtarr = DatetimeArray(data) self._data = dtarr self._reset_identity() else: raise Exception("invalid pickle state") _unpickle_compat = __setstate__ def _convert_for_op(self, value): """ Convert value to be insertable to ndarray. """ if self._has_same_tz(value): return _to_M8(value) raise ValueError("Passed item and index have different timezone") # -------------------------------------------------------------------- # Rendering Methods def _mpl_repr(self): # how to represent ourselves to matplotlib return libts.ints_to_pydatetime(self.asi8, self.tz) def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): from pandas.io.formats.format import _get_format_datetime64_from_values fmt = _get_format_datetime64_from_values(self, date_format) return libts.format_array_from_datetime( self.asi8, tz=self.tz, format=fmt, na_rep=na_rep ) @property def _formatter_func(self): from pandas.io.formats.format import _get_format_datetime64 formatter = _get_format_datetime64(is_dates_only=self._is_dates_only) return lambda x: f"'{formatter(x, tz=self.tz)}'" # -------------------------------------------------------------------- # Set Operation Methods def _union(self, other, sort): if not len(other) or self.equals(other) or not len(self): return super()._union(other, sort=sort) if len(other) == 0 or self.equals(other) or len(self) == 0: return super().union(other, sort=sort) if not isinstance(other, DatetimeIndex): try: other = DatetimeIndex(other) except TypeError: pass this, other = self._maybe_utc_convert(other) if this._can_fast_union(other): return this._fast_union(other, sort=sort) else: result = Index._union(this, other, sort=sort) if isinstance(result, DatetimeIndex): # TODO: we shouldn't be setting attributes like this; # in all the tests this equality already holds result._data._dtype = this.dtype if result.freq is None and ( this.freq is not None or other.freq is not None ): result._set_freq("infer") return result def union_many(self, others): """ A bit of a hack to accelerate unioning a collection of indexes. """ this = self for other in others: if not isinstance(this, DatetimeIndex): this = Index.union(this, other) continue if not isinstance(other, DatetimeIndex): try: other = DatetimeIndex(other) except TypeError: pass this, other = this._maybe_utc_convert(other) if this._can_fast_union(other): this = this._fast_union(other) else: dtype = this.dtype this = Index.union(this, other) if isinstance(this, DatetimeIndex): # TODO: we shouldn't be setting attributes like this; # in all the tests this equality already holds this._data._dtype = dtype return this def _fast_union(self, other, sort=None): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) # Both DTIs are monotonic. Check if they are already # in the "correct" order if self[0] <= other[0]: left, right = self, other # DTIs are not in the "correct" order and we don't want # to sort but want to remove overlaps elif sort is False: left, right = self, other left_start = left[0] loc = right.searchsorted(left_start, side="left") right_chunk = right.values[:loc] dates = concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) # DTIs are not in the "correct" order and we want # to sort else: left, right = other, self left_end = left[-1] right_end = right[-1] # TODO: consider re-implementing freq._should_cache for fastpath # concatenate dates if left_end < right_end: loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] dates = concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) else: return left def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) return self._shallow_copy(result, name=name, freq=None, tz=self.tz) # -------------------------------------------------------------------- def _get_time_micros(self): values = self.asi8 if self.tz is not None and not timezones.is_utc(self.tz): values = self._data._local_timestamps() return fields.get_time_micros(values) def to_series(self, keep_tz=lib._no_default, index=None, name=None): """ Create a Series with both index and values equal to the index keys useful with map for returning an indexer based on an index. Parameters ---------- keep_tz : optional, defaults True Return the data keeping the timezone. If keep_tz is True: If the timezone is not set, the resulting Series will have a datetime64[ns] dtype. Otherwise the Series will have an datetime64[ns, tz] dtype; the tz will be preserved. If keep_tz is False: Series will have a datetime64[ns] dtype. TZ aware objects will have the tz removed. .. versionchanged:: 1.0.0 The default value is now True. In a future version, this keyword will be removed entirely. Stop passing the argument to obtain the future behavior and silence the warning. index : Index, optional Index of resulting Series. If None, defaults to original index. name : str, optional Name of resulting Series. If None, defaults to name of original index. Returns ------- Series """ from pandas import Series if index is None: index = self._shallow_copy() if name is None: name = self.name if keep_tz is not lib._no_default: if keep_tz: warnings.warn( "The 'keep_tz' keyword in DatetimeIndex.to_series " "is deprecated and will be removed in a future version. " "You can stop passing 'keep_tz' to silence this warning.", FutureWarning, stacklevel=2, ) else: warnings.warn( "Specifying 'keep_tz=False' is deprecated and this " "option will be removed in a future release. If " "you want to remove the timezone information, you " "can do 'idx.tz_convert(None)' before calling " "'to_series'.", FutureWarning, stacklevel=2, ) else: keep_tz = True if keep_tz and self.tz is not None: # preserve the tz & copy values = self.copy(deep=True) else: values = self.values.copy() return Series(values, index=index, name=name) def snap(self, freq="S"): """ Snap time stamps to nearest occurring frequency. Returns ------- DatetimeIndex """ # Superdumb, punting on any optimizing freq = to_offset(freq) snapped = np.empty(len(self), dtype=_NS_DTYPE) for i, v in enumerate(self): s = v if not freq.is_on_offset(s): t0 = freq.rollback(s) t1 = freq.rollforward(s) if abs(s - t0) < abs(t1 - s): s = t0 else: s = t1 snapped[i] = s # we know it conforms; skip check return DatetimeIndex._simple_new(snapped, name=self.name, tz=self.tz, freq=freq) def _parsed_string_to_bounds(self, reso, parsed): """ Calculate datetime bounds for parsed time string and its resolution. Parameters ---------- reso : Resolution Resolution provided by parsed string. parsed : datetime Datetime from parsed string. Returns ------- lower, upper: pd.Timestamp """ valid_resos = { "year", "month", "quarter", "day", "hour", "minute", "second", "minute", "second", "microsecond", } if reso not in valid_resos: raise KeyError if reso == "year": start = Timestamp(parsed.year, 1, 1) end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999) elif reso == "month": d = ccalendar.get_days_in_month(parsed.year, parsed.month) start = Timestamp(parsed.year, parsed.month, 1) end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999) elif reso == "quarter": qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month start = Timestamp(parsed.year, parsed.month, 1) end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999) elif reso == "day": start = Timestamp(parsed.year, parsed.month, parsed.day) end = start + timedelta(days=1) - Nano(1) elif reso == "hour": start = Timestamp(parsed.year, parsed.month, parsed.day, parsed.hour) end = start + timedelta(hours=1) - Nano(1) elif reso == "minute": start = Timestamp( parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute ) end = start + timedelta(minutes=1) - Nano(1) elif reso == "second": start = Timestamp( parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute, parsed.second, ) end = start + timedelta(seconds=1) - Nano(1) elif reso == "microsecond": start = Timestamp( parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute, parsed.second, parsed.microsecond, ) end = start + timedelta(microseconds=1) - Nano(1) # GH 24076 # If an incoming date string contained a UTC offset, need to localize # the parsed date to this offset first before aligning with the index's # timezone if parsed.tzinfo is not None: if self.tz is None: raise ValueError( "The index must be timezone aware when indexing " "with a date string with a UTC offset" ) start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) elif self.tz is not None: start = start.tz_localize(self.tz) end = end.tz_localize(self.tz) return start, end def _partial_date_slice( self, reso: str, parsed, use_lhs: bool = True, use_rhs: bool = True ): """ Parameters ---------- reso : str use_lhs : bool, default True use_rhs : bool, default True """ is_monotonic = self.is_monotonic if ( is_monotonic and reso in ["day", "hour", "minute", "second"] and self._resolution >= Resolution.get_reso(reso) ): # These resolution/monotonicity validations came from GH3931, # GH3452 and GH2369. # See also GH14826 raise KeyError if reso == "microsecond": # _partial_date_slice doesn't allow microsecond resolution, but # _parsed_string_to_bounds allows it. raise KeyError t1, t2 = self._parsed_string_to_bounds(reso, parsed) stamps = self.asi8 if is_monotonic: # we are out of range if len(stamps) and ( (use_lhs and t1.value < stamps[0] and t2.value < stamps[0]) or ((use_rhs and t1.value > stamps[-1] and t2.value > stamps[-1])) ): raise KeyError # a monotonic (sorted) series can be sliced left = stamps.searchsorted(t1.value, side="left") if use_lhs else None right = stamps.searchsorted(t2.value, side="right") if use_rhs else None return slice(left, right) lhs_mask = (stamps >= t1.value) if use_lhs else True rhs_mask = (stamps <= t2.value) if use_rhs else True # try to find a the dates return (lhs_mask & rhs_mask).nonzero()[0] def _maybe_promote(self, other): if other.inferred_type == "date": other = DatetimeIndex(other) return self, other def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ if isinstance(key, datetime): # needed to localize naive datetimes if self.tz is not None: if key.tzinfo is not None: key = Timestamp(key).tz_convert(self.tz) else: key = Timestamp(key).tz_localize(self.tz) return self.get_value_maybe_box(series, key) if isinstance(key, time): locs = self.indexer_at_time(key) return series.take(locs) try: return com.maybe_box(self, Index.get_value(self, series, key), series, key) except KeyError: try: loc = self._get_string_slice(key) return series[loc] except (TypeError, ValueError, KeyError): pass try: return self.get_value_maybe_box(series, key) except (TypeError, ValueError, KeyError): raise KeyError(key) def get_value_maybe_box(self, series, key): # needed to localize naive datetimes if self.tz is not None: key = Timestamp(key) if key.tzinfo is not None: key = key.tz_convert(self.tz) else: key = key.tz_localize(self.tz) elif not isinstance(key, Timestamp): key = Timestamp(key) values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz) return com.maybe_box(self, values, series, key) def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label Returns ------- loc : int """ if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by # the try/except clauses below tolerance = self._convert_tolerance(tolerance, np.asarray(key)) if isinstance(key, datetime): # needed to localize naive datetimes if key.tzinfo is None: key = Timestamp(key, tz=self.tz) else: key = Timestamp(key).tz_convert(self.tz) return Index.get_loc(self, key, method, tolerance) elif isinstance(key, timedelta): # GH#20464 raise TypeError( f"Cannot index {type(self).__name__} with {type(key).__name__}" ) if isinstance(key, time): if method is not None: raise NotImplementedError( "cannot yet lookup inexact labels when key is a time object" ) return self.indexer_at_time(key) try: return Index.get_loc(self, key, method, tolerance) except (KeyError, ValueError, TypeError): try: return self._get_string_slice(key) except (TypeError, KeyError, ValueError, OverflowError): pass try: stamp = Timestamp(key) if stamp.tzinfo is not None and self.tz is not None: stamp = stamp.tz_convert(self.tz) else: stamp = stamp.tz_localize(self.tz) return Index.get_loc(self, stamp, method, tolerance) except KeyError: raise KeyError(key) except ValueError as e: # list-like tolerance size must match target index size if "list-like" in str(e): raise e raise KeyError(key) def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to datetime according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} Returns ------- label : object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ["ix", "loc", "getitem", None] if is_float(label) or isinstance(label, time) or is_integer(label): self._invalid_indexer("slice", label) if isinstance(label, str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) _, parsed, reso = parsing.parse_time_string(label, freq) lower, upper = self._parsed_string_to_bounds(reso, parsed) # lower, upper form the half-open interval: # [parsed, parsed + 1 freq) # because label may be passed to searchsorted # the bounds need swapped if index is reverse sorted and has a # length > 1 (is_monotonic_decreasing gives True for empty # and length 1 index) if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == "left" else lower return lower if side == "left" else upper else: return label def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) _, parsed, reso = parsing.parse_time_string(key, freq) loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs) return loc def slice_indexer(self, start=None, end=None, step=None, kind=None): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. In addition to functionality provided by Index.slice_indexer, does the following: - if both `start` and `end` are instances of `datetime.time`, it invokes `indexer_between_time` - if `start` and `end` are both either string or None perform value-based selection in non-monotonic cases. """ # For historical reasons DatetimeIndex supports slices between two # instances of datetime.time as if it were applying a slice mask to # an array of (self.hour, self.minute, self.seconds, self.microsecond). if isinstance(start, time) and isinstance(end, time): if step is not None and step != 1: raise ValueError("Must have step size of 1 with time slices") return self.indexer_between_time(start, end) if isinstance(start, time) or isinstance(end, time): raise KeyError("Cannot mix time and non-time slice keys") try: return Index.slice_indexer(self, start, end, step, kind=kind) except KeyError: # For historical reasons DatetimeIndex by default supports # value-based partial (aka string) slices on non-monotonic arrays, # let's try that. if (start is None or isinstance(start, str)) and ( end is None or isinstance(end, str) ): mask = True if start is not None: start_casted = self._maybe_cast_slice_bound(start, "left", kind) mask = start_casted <= self if end is not None: end_casted = self._maybe_cast_slice_bound(end, "right", kind) mask = (self <= end_casted) & mask indexer = mask.nonzero()[0][::step] if len(indexer) == len(self): return slice(None) else: return indexer else: raise # -------------------------------------------------------------------- # Wrapping DatetimeArray _timezone = cache_readonly(DatetimeArray._timezone.fget) # type: ignore is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) # type: ignore _resolution = cache_readonly(DatetimeArray._resolution.fget) # type: ignore def __getitem__(self, key): result = self._data.__getitem__(key) if is_scalar(result): return result elif result.ndim > 1: # To support MPL which performs slicing with 2 dim # even though it only has 1 dim by definition assert isinstance(result, np.ndarray), result return result return type(self)(result, name=self.name) @property def _box_func(self): return lambda x: Timestamp(x, tz=self.tz) # -------------------------------------------------------------------- @Substitution(klass="DatetimeIndex") @Appender(_shared_docs["searchsorted"]) def searchsorted(self, value, side="left", sorter=None): if isinstance(value, (np.ndarray, Index)): value = np.array(value, dtype=_NS_DTYPE, copy=False) else: value = _to_M8(value, tz=self.tz) return self.values.searchsorted(value, side=side) def is_type_compatible(self, typ) -> bool: return typ == self.inferred_type or typ == "datetime" @property def inferred_type(self) -> str: # b/c datetime is represented as microseconds since the epoch, make # sure we can't have ambiguous indexing return "datetime64" def insert(self, loc, item): """ Make new Index inserting new item at location Parameters ---------- loc : int item : object if not either a Python datetime or a numpy integer-like, returned Index dtype will be object rather than datetime. Returns ------- new_index : Index """ if is_scalar(item) and isna(item): # GH 18295 item = self._na_value freq = None if isinstance(item, (datetime, np.datetime64)): self._assert_can_do_op(item) if not self._has_same_tz(item) and not isna(item): raise ValueError("Passed item and index have different timezone") # check freq can be preserved on edge cases if self.size and self.freq is not None: if item is NaT: pass elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq item = _to_M8(item, tz=self.tz) try: new_dates = np.concatenate( (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8) ) return self._shallow_copy(new_dates, freq=freq) except (AttributeError, TypeError): # fall back to object index if isinstance(item, str): return self.astype(object).insert(loc, item) raise TypeError("cannot insert DatetimeIndex with incompatible label") def delete(self, loc): """ Make a new DatetimeIndex with passed location(s) deleted. Parameters ---------- loc: int, slice or array of ints Indicate which sub-arrays to remove. Returns ------- new_index : DatetimeIndex """ new_dates = np.delete(self.asi8, loc) freq = None if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if is_list_like(loc): loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq return self._shallow_copy(new_dates, freq=freq) def indexer_at_time(self, time, asof=False): """ Return index locations of index values at particular time of day (e.g. 9:30AM). Parameters ---------- time : datetime.time or str datetime.time or string in appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p"). Returns ------- values_at_time : array of integers See Also -------- indexer_between_time, DataFrame.at_time """ if asof: raise NotImplementedError("'asof' argument is not supported") if isinstance(time, str): from dateutil.parser import parse time = parse(time).time() if time.tzinfo: if self.tz is None: raise ValueError("Index must be timezone aware.") time_micros = self.tz_convert(time.tzinfo)._get_time_micros() else: time_micros = self._get_time_micros() micros = _time_to_micros(time) return (micros == time_micros).nonzero()[0] def indexer_between_time( self, start_time, end_time, include_start=True, include_end=True ): """ Return index locations of values between particular times of day (e.g., 9:00-9:30AM). Parameters ---------- start_time, end_time : datetime.time, str datetime.time or string in appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p"). include_start : bool, default True include_end : bool, default True Returns ------- values_between_time : array of integers See Also -------- indexer_at_time, DataFrame.between_time """ start_time = tools.to_time(start_time) end_time = tools.to_time(end_time) time_micros = self._get_time_micros() start_micros = _time_to_micros(start_time) end_micros = _time_to_micros(end_time) if include_start and include_end: lop = rop = operator.le elif include_start: lop = operator.le rop = operator.lt elif include_end: lop = operator.lt rop = operator.le else: lop = rop = operator.lt if start_time <= end_time: join_op = operator.and_ else: join_op = operator.or_ mask = join_op(lop(start_micros, time_micros), rop(time_micros, end_micros)) return mask.nonzero()[0]
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget) def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except Exception: return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False # ToDo: Remove this when PeriodDtype is added elif isinstance(self, ABCPeriodIndex): if not isinstance(other, ABCPeriodIndex): return False if self.freq != other.freq: return False return np.array_equal(self.asi8, other.asi8) @staticmethod def _join_i8_wrapper(joinf, dtype, with_indexers=True): """ create the join wrapper methods """ @staticmethod def wrapper(left, right): if isinstance(left, (np.ndarray, ABCIndex, ABCSeries)): left = left.view('i8') if isinstance(right, (np.ndarray, ABCIndex, ABCSeries)): right = right.view('i8') results = joinf(left, right) if with_indexers: join_index, left_indexer, right_indexer = results join_index = join_index.view(dtype) return join_index, left_indexer, right_indexer return results return wrapper def _evaluate_compare(self, other, op): """ We have been called because a comparison between 8 aware arrays. numpy >= 1.11 will now warn about NaT comparisons """ # coerce to a similar object if not isinstance(other, type(self)): if not is_list_like(other): # scalar other = [other] elif is_scalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) # compare result = op(self.asi8, other.asi8) # technically we could support bool dtyped Index # for now just return the indexing array directly mask = (self._isnan) | (other._isnan) if is_bool_dtype(result): result[mask] = False return result result[mask] = iNaT try: return Index(result) except TypeError: return result def _ensure_localized(self, result): """ ensure that we are re-localized This is for compat as we can then call this on all datetimelike indexes generally (ignored for Period/Timedelta) Parameters ---------- result : DatetimeIndex / i8 ndarray Returns ------- localized DTI """ # reconvert to local tz if getattr(self, 'tz', None) is not None: if not isinstance(result, ABCIndexClass): result = self._simple_new(result) result = result.tz_localize(self.tz) return result def _box_values(self, values): """ apply box func to passed values """ return lib.map_infer(values, self._box_func) def _box_values_as_index(self): """ return object Index which contains boxed values """ from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object) def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): try: res = self.get_loc(key) return (is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))) except (KeyError, TypeError, ValueError): return False contains = __contains__ def __getitem__(self, key): """ This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars """ is_int = is_integer(key) if is_scalar(key) and not is_int: raise IndexError("only integers, slices (`:`), ellipsis (`...`), " "numpy.newaxis (`None`) and integer or boolean " "arrays are valid indices") getitem = self._data.__getitem__ if is_int: val = getitem(key) return self._box_func(val) else: if com.is_bool_indexer(key): key = np.asarray(key) if key.all(): key = slice(0, None, None) else: key = lib.maybe_booleans_to_slice(key.view(np.uint8)) attribs = self._get_attributes_dict() is_period = isinstance(self, ABCPeriodIndex) if is_period: freq = self.freq else: freq = None if isinstance(key, slice): if self.freq is not None and key.step is not None: freq = key.step * self.freq else: freq = self.freq attribs['freq'] = freq result = getitem(key) if result.ndim > 1: # To support MPL which performs slicing with 2 dim # even though it only has 1 dim by definition if is_period: return self._simple_new(result, **attribs) return result return self._simple_new(result, **attribs) def _nat_new(self, box=True): """ Return Index or ndarray filled with NaT which has the same length as the caller. Parameters ---------- box : boolean, default True - If True returns a Index as the same as caller. - If False returns ndarray of np.int64. """ result = np.zeros(len(self), dtype=np.int64) result.fill(iNaT) if not box: return result attribs = self._get_attributes_dict() if not is_period_dtype(self): attribs['freq'] = None return self._simple_new(result, **attribs) # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, f): try: result = f(self) # Try to use this result if we can if isinstance(result, np.ndarray): result = Index(result) if not isinstance(result, Index): raise TypeError('The map function must return an Index object') return result except Exception: return self.astype(object).map(f) def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index """ if return_indexer: _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) return sorted_index, _as else: sorted_values = np.sort(self._ndarray_values) attribs = self._get_attributes_dict() freq = attribs['freq'] if freq is not None and not isinstance(self, ABCPeriodIndex): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: freq = freq * -1 attribs['freq'] = freq if not ascending: sorted_values = sorted_values[::-1] return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = _ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] taken = self._assert_take_fillable(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=iNaT) # keep freq in PeriodIndex, reset otherwise freq = self.freq if isinstance(self, ABCPeriodIndex) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" @property def asobject(self): """Return object Index which contains boxed values. .. deprecated:: 0.23.0 Use ``astype(object)`` instead. *this is an internal non-public method* """ warnings.warn("'asobject' is deprecated. Use 'astype(object)'" " instead", FutureWarning, stacklevel=2) return self.astype(object) def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance, box=False)) if target.size != tolerance.size and tolerance.size > 1: raise ValueError('list-like tolerance size must match ' 'target index size') return tolerance def tolist(self): """ return a list of the underlying data """ return list(self.astype(object)) def min(self, axis=None, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See also -------- numpy.ndarray.min """ nv.validate_min(args, kwargs) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: min_stamp = self[~self._isnan].asi8.min() else: min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value def argmin(self, axis=None, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max return i8.argmin() def max(self, axis=None, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See also -------- numpy.ndarray.max """ nv.validate_max(args, kwargs) try: i8 = self.asi8 # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: max_stamp = self[~self._isnan].asi8.max() else: max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value def argmax(self, axis=None, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all(): return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() @property def _formatter_func(self): raise com.AbstractMethodError(self) def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() for attrib in self._attributes: if attrib == 'freq': freq = self.freqstr if freq is not None: freq = "'%s'" % freq attrs.append(('freq', freq)) return attrs @cache_readonly def _resolution(self): return frequencies.Resolution.get_reso_from_freq(self.freqstr) @cache_readonly def resolution(self): """ Returns day, hour, minute, second, millisecond or microsecond """ return frequencies.Resolution.get_str(self._resolution) def _convert_scalar_indexer(self, key, kind=None): """ we don't allow integer or float indexing on datetime-like when using loc Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): self._invalid_indexer('index', key) elif kind in ['ix', 'getitem'] and is_flt: self._invalid_indexer('index', key) return (super(DatetimeIndexOpsMixin, self) ._convert_scalar_indexer(key, kind=kind)) def _add_nat(self): """Add pd.NaT to self""" if is_period_dtype(self): raise TypeError('Cannot add {cls} and {typ}' .format(cls=type(self).__name__, typ=type(NaT).__name__)) # GH#19124 pd.NaT is treated like a timedelta for both timedelta # and datetime dtypes return self._nat_new(box=True) def _sub_nat(self): """Subtract pd.NaT from self""" # GH#19124 Timedelta - datetime is not in general well-defined. # We make an exception for pd.NaT, which in this case quacks # like a timedelta. # For datetime64 dtypes by convention we treat NaT as a datetime, so # this subtraction returns a timedelta64 dtype. # For period dtype, timedelta64 is a close-enough return dtype. result = self._nat_new(box=False) return result.view('timedelta64[ns]') def _sub_period(self, other): return NotImplemented def _sub_period_array(self, other): """ Subtract one PeriodIndex from another. This is only valid if they have the same frequency. Parameters ---------- other : PeriodIndex Returns ------- result : np.ndarray[object] Array of DateOffset objects; nulls represented by NaT """ if not is_period_dtype(self): raise TypeError("cannot subtract {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) if not len(self) == len(other): raise ValueError("cannot subtract indices of unequal length") if self.freq != other.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) new_values = checked_add_with_arr(self.asi8, -other.asi8, arr_mask=self._isnan, b_mask=other._isnan) new_values = np.array([self.freq * x for x in new_values]) if self.hasnans or other.hasnans: mask = (self._isnan) | (other._isnan) new_values[mask] = NaT return new_values def _add_offset(self, offset): raise com.AbstractMethodError(self) def _addsub_offset_array(self, other, op): """ Add or subtract array-like of DateOffset objects Parameters ---------- other : Index, np.ndarray object-dtype containing pd.DateOffset objects op : {operator.add, operator.sub} Returns ------- result : same class as self """ assert op in [operator.add, operator.sub] if len(other) == 1: return op(self, other[0]) warnings.warn("Adding/subtracting array of DateOffsets to " "{cls} not vectorized" .format(cls=type(self).__name__), PerformanceWarning) res_values = op(self.astype('O').values, np.array(other)) kwargs = {} if not is_period_dtype(self): kwargs['freq'] = 'infer' return self._constructor(res_values, **kwargs) def _addsub_int_array(self, other, op): """ Add or subtract array-like of integers equivalent to applying `shift` pointwise. Parameters ---------- other : Index, np.ndarray integer-dtype op : {operator.add, operator.sub} Returns ------- result : same class as self """ assert op in [operator.add, operator.sub] if is_period_dtype(self): # easy case for PeriodIndex if op is operator.sub: other = -other res_values = checked_add_with_arr(self.asi8, other, arr_mask=self._isnan) res_values = res_values.view('i8') res_values[self._isnan] = iNaT return self._from_ordinals(res_values, freq=self.freq) elif self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") elif isinstance(self.freq, Tick): # easy case where we can convert to timedelta64 operation td = Timedelta(self.freq) return op(self, td * other) # We should only get here with DatetimeIndex; dispatch # to _addsub_offset_array assert not is_timedelta64_dtype(self) return op(self, np.array(other) * self.freq) @classmethod def _add_datetimelike_methods(cls): """ add in the datetimelike methods (as we may have to override the superclass) """ def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other) or is_period_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result cls.__add__ = __add__ def __radd__(self, other): # alias for __add__ return self.__add__(other) cls.__radd__ = __radd__ def __sub__(self, other): from pandas import Index other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.sub) elif isinstance(other, Index): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result cls.__sub__ = __sub__ def __rsub__(self, other): if is_datetime64_dtype(other) and is_timedelta64_dtype(self): # ndarray[datetime64] cannot be subtracted from self, so # we need to wrap in DatetimeIndex and flip the operation from pandas import DatetimeIndex return DatetimeIndex(other) - self elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and not is_datetime64_any_dtype(other)): # GH#19959 datetime - datetime is well-defined as timedelta, # but any other type - datetime is not well-defined. raise TypeError("cannot subtract {cls} from {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) return -(self - other) cls.__rsub__ = __rsub__ def __iadd__(self, other): # alias for __add__ return self.__add__(other) cls.__iadd__ = __iadd__ def __isub__(self, other): # alias for __sub__ return self.__sub__(other) cls.__isub__ = __isub__ def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8) def shift(self, n, freq=None): """ Specialized shift which produces a DatetimeIndex Parameters ---------- n : int Periods to shift by freq : DateOffset or timedelta-like, optional Returns ------- shifted : DatetimeIndex """ if freq is not None and freq != self.freq: if isinstance(freq, compat.string_types): freq = frequencies.to_offset(freq) offset = n * freq result = self + offset if hasattr(self, 'tz'): result._tz = self.tz return result if n == 0: # immutable so OK return self if self.freq is None: raise NullFrequencyError("Cannot shift with no freq") start = self[0] + n * self.freq end = self[-1] + n * self.freq attribs = self._get_attributes_dict() attribs['start'] = start attribs['end'] = end return type(self)(**attribs) def repeat(self, repeats, *args, **kwargs): """ Analogous to ndarray.repeat """ nv.validate_repeat(args, kwargs) if isinstance(self, ABCPeriodIndex): freq = self.freq else: freq = None return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) @Appender(_index_shared_docs['where'] % _index_doc_kwargs) def where(self, cond, other=None): other = _ensure_datetimelike_to_i8(other) values = _ensure_datetimelike_to_i8(self) result = np.where(cond, values, other).astype('i8') result = self._ensure_localized(result) return self._shallow_copy(result, **self._get_attributes_dict()) def _summary(self, name=None): """ Return a summarized representation Parameters ---------- name : str name to use in the summary representation Returns ------- String with a summarized representation of the index """ formatter = self._formatter_func if len(self) > 0: index_summary = ', %s to %s' % (formatter(self[0]), formatter(self[-1])) else: index_summary = '' if name is None: name = type(self).__name__ result = '%s: %s entries%s' % (printing.pprint_thing(name), len(self), index_summary) if self.freq: result += '\nFreq: %s' % self.freqstr # display as values, not quoted result = result.replace("'", "") return result def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ attribs = self._get_attributes_dict() attribs['name'] = name if not isinstance(self, ABCPeriodIndex): # reset freq attribs['freq'] = None if getattr(self, 'tz', None) is not None: return _concat._concat_datetimetz(to_concat, name) else: new_data = np.concatenate([c.asi8 for c in to_concat]) return self._simple_new(new_data, **attribs) def astype(self, dtype, copy=True): if is_object_dtype(dtype): return self._box_values_as_index() elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)