コード例 #1
0
ファイル: datetimelike.py プロジェクト: pyff-ny/pandas
def _make_wrapped_arith_op_with_freq(opname: str):
    """
    Dispatch the operation to the underlying ExtensionArray, and infer
    the appropriate frequency for the result.
    """
    meth = make_wrapped_arith_op(opname)

    def wrapped(self, other):
        result = meth(self, other)
        if result is NotImplemented:
            return NotImplemented

        new_freq = self._get_addsub_freq(other)
        result._freq = new_freq
        return result

    wrapped.__name__ = opname
    return wrapped
コード例 #2
0
class DatetimeIndexOpsMixin(ExtensionIndex):
    """
    Common ops mixin to support a unified interface datetimelike Index.
    """

    _data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
    freq: Optional[DateOffset]
    freqstr: Optional[str]
    _resolution: int
    _bool_ops: List[str] = []
    _field_ops: List[str] = []

    hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget)  # type: ignore
    _hasnans = hasnans  # for index / array -agnostic code

    @property
    def is_all_dates(self) -> bool:
        return True

    # ------------------------------------------------------------------------
    # Abstract data attributes

    @property
    def values(self):
        # Note: PeriodArray overrides this to return an ndarray of objects.
        return self._data._data

    def __array_wrap__(self, result, context=None):
        """
        Gets called after a ufunc.
        """
        result = lib.item_from_zerodim(result)
        if is_bool_dtype(result) or lib.is_scalar(result):
            return result

        attrs = self._get_attributes_dict()
        if not is_period_dtype(self) and attrs["freq"]:
            # no need to infer if freq is None
            attrs["freq"] = "infer"
        return Index(result, **attrs)

    # ------------------------------------------------------------------------

    def equals(self, other) -> bool:
        """
        Determines if two Index objects contain the same elements.
        """
        if self.is_(other):
            return True

        if not isinstance(other, ABCIndexClass):
            return False
        elif not isinstance(other, type(self)):
            try:
                other = type(self)(other)
            except (ValueError, TypeError, OverflowError):
                # e.g.
                #  ValueError -> cannot parse str entry, or OutOfBoundsDatetime
                #  TypeError  -> trying to convert IntervalIndex to DatetimeIndex
                #  OverflowError -> Index([very_large_timedeltas])
                return False

        if not is_dtype_equal(self.dtype, other.dtype):
            # have different timezone
            return False

        return np.array_equal(self.asi8, other.asi8)

    @Appender(Index.__contains__.__doc__)
    def __contains__(self, key: Any) -> bool:
        hash(key)
        try:
            res = self.get_loc(key)
        except (KeyError, TypeError, ValueError):
            return False
        return bool(
            is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))
        )

    def sort_values(self, return_indexer=False, ascending=True):
        """
        Return sorted copy of Index.
        """
        if return_indexer:
            _as = self.argsort()
            if not ascending:
                _as = _as[::-1]
            sorted_index = self.take(_as)
            return sorted_index, _as
        else:
            # NB: using asi8 instead of _ndarray_values matters in numpy 1.18
            #  because the treatment of NaT has been changed to put NaT last
            #  instead of first.
            sorted_values = np.sort(self.asi8)

            freq = self.freq
            if freq is not None and not is_period_dtype(self):
                if freq.n > 0 and not ascending:
                    freq = freq * -1
                elif freq.n < 0 and ascending:
                    freq = freq * -1

            if not ascending:
                sorted_values = sorted_values[::-1]

            arr = type(self._data)._simple_new(
                sorted_values, dtype=self.dtype, freq=freq
            )
            return type(self)._simple_new(arr, name=self.name)

    @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
    def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
        nv.validate_take(tuple(), kwargs)
        indices = ensure_int64(indices)

        maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
        if isinstance(maybe_slice, slice):
            return self[maybe_slice]

        return ExtensionIndex.take(
            self, indices, axis, allow_fill, fill_value, **kwargs
        )

    @Appender(_shared_docs["searchsorted"])
    def searchsorted(self, value, side="left", sorter=None):
        if isinstance(value, str):
            raise TypeError(
                "searchsorted requires compatible dtype or scalar, "
                f"not {type(value).__name__}"
            )
        if isinstance(value, Index):
            value = value._data

        return self._data.searchsorted(value, side=side, sorter=sorter)

    _can_hold_na = True

    _na_value = NaT
    """The expected NA value to use with this index."""

    def _convert_tolerance(self, tolerance, target):
        tolerance = np.asarray(to_timedelta(tolerance).to_numpy())

        if target.size != tolerance.size and tolerance.size > 1:
            raise ValueError("list-like tolerance size must match target index size")
        return tolerance

    def tolist(self) -> List:
        """
        Return a list of the underlying data.
        """
        return list(self.astype(object))

    def min(self, axis=None, skipna=True, *args, **kwargs):
        """
        Return the minimum value of the Index or minimum along
        an axis.

        See Also
        --------
        numpy.ndarray.min
        Series.min : Return the minimum value in a Series.
        """
        nv.validate_min(args, kwargs)
        nv.validate_minmax_axis(axis)

        if not len(self):
            return self._na_value

        i8 = self.asi8
        try:
            # quick check
            if len(i8) and self.is_monotonic:
                if i8[0] != iNaT:
                    return self._box_func(i8[0])

            if self.hasnans:
                if skipna:
                    min_stamp = self[~self._isnan].asi8.min()
                else:
                    return self._na_value
            else:
                min_stamp = i8.min()
            return self._box_func(min_stamp)
        except ValueError:
            return self._na_value

    def argmin(self, axis=None, skipna=True, *args, **kwargs):
        """
        Returns the indices of the minimum values along an axis.

        See `numpy.ndarray.argmin` for more information on the
        `axis` parameter.

        See Also
        --------
        numpy.ndarray.argmin
        """
        nv.validate_argmin(args, kwargs)
        nv.validate_minmax_axis(axis)

        i8 = self.asi8
        if self.hasnans:
            mask = self._isnan
            if mask.all() or not skipna:
                return -1
            i8 = i8.copy()
            i8[mask] = np.iinfo("int64").max
        return i8.argmin()

    def max(self, axis=None, skipna=True, *args, **kwargs):
        """
        Return the maximum value of the Index or maximum along
        an axis.

        See Also
        --------
        numpy.ndarray.max
        Series.max : Return the maximum value in a Series.
        """
        nv.validate_max(args, kwargs)
        nv.validate_minmax_axis(axis)

        if not len(self):
            return self._na_value

        i8 = self.asi8
        try:
            # quick check
            if len(i8) and self.is_monotonic:
                if i8[-1] != iNaT:
                    return self._box_func(i8[-1])

            if self.hasnans:
                if skipna:
                    max_stamp = self[~self._isnan].asi8.max()
                else:
                    return self._na_value
            else:
                max_stamp = i8.max()
            return self._box_func(max_stamp)
        except ValueError:
            return self._na_value

    def argmax(self, axis=None, skipna=True, *args, **kwargs):
        """
        Returns the indices of the maximum values along an axis.

        See `numpy.ndarray.argmax` for more information on the
        `axis` parameter.

        See Also
        --------
        numpy.ndarray.argmax
        """
        nv.validate_argmax(args, kwargs)
        nv.validate_minmax_axis(axis)

        i8 = self.asi8
        if self.hasnans:
            mask = self._isnan
            if mask.all() or not skipna:
                return -1
            i8 = i8.copy()
            i8[mask] = 0
        return i8.argmax()

    # --------------------------------------------------------------------
    # Rendering Methods

    def _format_with_header(self, header, na_rep="NaT", **kwargs):
        return header + list(self._format_native_types(na_rep, **kwargs))

    @property
    def _formatter_func(self):
        raise AbstractMethodError(self)

    def _format_attrs(self):
        """
        Return a list of tuples of the (attr,formatted_value).
        """
        attrs = super()._format_attrs()
        for attrib in self._attributes:
            if attrib == "freq":
                freq = self.freqstr
                if freq is not None:
                    freq = repr(freq)
                attrs.append(("freq", freq))
        return attrs

    # --------------------------------------------------------------------
    # Indexing Methods

    def _convert_scalar_indexer(self, key, kind: str):
        """
        We don't allow integer or float indexing on datetime-like when using
        loc.

        Parameters
        ----------
        key : label of the slice bound
        kind : {'loc', 'getitem'}
        """
        assert kind in ["loc", "getitem"]

        if not is_scalar(key):
            raise TypeError(key)

        # we don't allow integer/float indexing for loc
        # we don't allow float indexing for getitem
        is_int = is_integer(key)
        is_flt = is_float(key)
        if kind == "loc" and (is_int or is_flt):
            raise KeyError(key)
        elif kind == "getitem" and is_flt:
            raise KeyError(key)

        return super()._convert_scalar_indexer(key, kind=kind)

    def _validate_partial_date_slice(self, reso: str):
        raise NotImplementedError

    def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
        raise NotImplementedError

    def _partial_date_slice(
        self, reso: str, parsed: datetime, use_lhs: bool = True, use_rhs: bool = True
    ):
        """
        Parameters
        ----------
        reso : str
        parsed : datetime
        use_lhs : bool, default True
        use_rhs : bool, default True

        Returns
        -------
        slice or ndarray[intp]
        """
        self._validate_partial_date_slice(reso)

        t1, t2 = self._parsed_string_to_bounds(reso, parsed)
        i8vals = self.asi8
        unbox = self._data._unbox_scalar

        if self.is_monotonic:

            if len(self) and (
                (use_lhs and t1 < self[0] and t2 < self[0])
                or ((use_rhs and t1 > self[-1] and t2 > self[-1]))
            ):
                # we are out of range
                raise KeyError

            # TODO: does this depend on being monotonic _increasing_?

            # a monotonic (sorted) series can be sliced
            # Use asi8.searchsorted to avoid re-validating Periods/Timestamps
            left = i8vals.searchsorted(unbox(t1), side="left") if use_lhs else None
            right = i8vals.searchsorted(unbox(t2), side="right") if use_rhs else None
            return slice(left, right)

        else:
            lhs_mask = (i8vals >= unbox(t1)) if use_lhs else True
            rhs_mask = (i8vals <= unbox(t2)) if use_rhs else True

            # try to find the dates
            return (lhs_mask & rhs_mask).nonzero()[0]

    # --------------------------------------------------------------------

    __add__ = make_wrapped_arith_op("__add__")
    __radd__ = make_wrapped_arith_op("__radd__")
    __sub__ = make_wrapped_arith_op("__sub__")
    __rsub__ = make_wrapped_arith_op("__rsub__")
    __pow__ = make_wrapped_arith_op("__pow__")
    __rpow__ = make_wrapped_arith_op("__rpow__")
    __mul__ = make_wrapped_arith_op("__mul__")
    __rmul__ = make_wrapped_arith_op("__rmul__")
    __floordiv__ = make_wrapped_arith_op("__floordiv__")
    __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__")
    __mod__ = make_wrapped_arith_op("__mod__")
    __rmod__ = make_wrapped_arith_op("__rmod__")
    __divmod__ = make_wrapped_arith_op("__divmod__")
    __rdivmod__ = make_wrapped_arith_op("__rdivmod__")
    __truediv__ = make_wrapped_arith_op("__truediv__")
    __rtruediv__ = make_wrapped_arith_op("__rtruediv__")

    def isin(self, values, level=None):
        """
        Compute boolean array of whether each index value is found in the
        passed set of values.

        Parameters
        ----------
        values : set or sequence of values

        Returns
        -------
        is_contained : ndarray (boolean dtype)
        """
        if level is not None:
            self._validate_index_level(level)

        if not isinstance(values, type(self)):
            try:
                values = type(self)(values)
            except ValueError:
                return self.astype(object).isin(values)

        return algorithms.isin(self.asi8, values.asi8)

    @Appender(Index.where.__doc__)
    def where(self, cond, other=None):
        values = self.view("i8")

        if is_scalar(other) and isna(other):
            other = NaT.value

        else:
            # Do type inference if necessary up front
            # e.g. we passed PeriodIndex.values and got an ndarray of Periods
            other = Index(other)

            if is_categorical_dtype(other):
                # e.g. we have a Categorical holding self.dtype
                if needs_i8_conversion(other.categories):
                    other = other._internal_get_values()

            if not is_dtype_equal(self.dtype, other.dtype):
                raise TypeError(f"Where requires matching dtype, not {other.dtype}")

            other = other.view("i8")

        result = np.where(cond, values, other).astype("i8")
        arr = type(self._data)._simple_new(result, dtype=self.dtype)
        return type(self)._simple_new(arr, name=self.name)

    def _summary(self, name=None) -> str:
        """
        Return a summarized representation.

        Parameters
        ----------
        name : str
            Name to use in the summary representation.

        Returns
        -------
        str
            Summarized representation of the index.
        """
        formatter = self._formatter_func
        if len(self) > 0:
            index_summary = f", {formatter(self[0])} to {formatter(self[-1])}"
        else:
            index_summary = ""

        if name is None:
            name = type(self).__name__
        result = f"{name}: {len(self)} entries{index_summary}"
        if self.freq:
            result += f"\nFreq: {self.freqstr}"

        # display as values, not quoted
        result = result.replace("'", "")
        return result

    def shift(self, periods=1, freq=None):
        """
        Shift index by desired number of time frequency increments.

        This method is for shifting the values of datetime-like indexes
        by a specified time increment a given number of times.

        Parameters
        ----------
        periods : int, default 1
            Number of periods (or increments) to shift by,
            can be positive or negative.

            .. versionchanged:: 0.24.0

        freq : pandas.DateOffset, pandas.Timedelta or string, optional
            Frequency increment to shift by.
            If None, the index is shifted by its own `freq` attribute.
            Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.

        Returns
        -------
        pandas.DatetimeIndex
            Shifted index.

        See Also
        --------
        Index.shift : Shift values of Index.
        PeriodIndex.shift : Shift values of PeriodIndex.
        """
        result = self._data._time_shift(periods, freq=freq)
        return type(self)(result, name=self.name)

    # --------------------------------------------------------------------
    # List-like Methods

    def delete(self, loc):
        new_i8s = np.delete(self.asi8, loc)

        freq = None
        if is_period_dtype(self):
            freq = self.freq
        elif is_integer(loc):
            if loc in (0, -len(self), -1, len(self) - 1):
                freq = self.freq
        else:
            if is_list_like(loc):
                loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self))
            if isinstance(loc, slice) and loc.step in (1, None):
                if loc.start in (0, None) or loc.stop in (len(self), None):
                    freq = self.freq

        arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq)
        return type(self)._simple_new(arr, name=self.name)
コード例 #3
0
ファイル: datetimelike.py プロジェクト: soon14/pandas
class DatetimeIndexOpsMixin(ExtensionIndex):
    """
    Common ops mixin to support a unified interface datetimelike Index.
    """

    _data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
    freq: Optional[BaseOffset]
    freqstr: Optional[str]
    _resolution_obj: Resolution
    _bool_ops: List[str] = []
    _field_ops: List[str] = []

    # error: "Callable[[Any], Any]" has no attribute "fget"
    hasnans = cache_readonly(
        DatetimeLikeArrayMixin._hasnans.fget  # type: ignore[attr-defined]
    )
    _hasnans = hasnans  # for index / array -agnostic code

    @property
    def _is_all_dates(self) -> bool:
        return True

    # ------------------------------------------------------------------------
    # Abstract data attributes

    @property
    def values(self) -> np.ndarray:
        # Note: PeriodArray overrides this to return an ndarray of objects.
        return self._data._data

    def __array_wrap__(self, result, context=None):
        """
        Gets called after a ufunc and other functions.
        """
        result = lib.item_from_zerodim(result)
        if is_bool_dtype(result) or lib.is_scalar(result):
            return result

        attrs = self._get_attributes_dict()
        if not is_period_dtype(self.dtype) and attrs["freq"]:
            # no need to infer if freq is None
            attrs["freq"] = "infer"
        return Index(result, **attrs)

    # ------------------------------------------------------------------------

    def equals(self, other: object) -> bool:
        """
        Determines if two Index objects contain the same elements.
        """
        if self.is_(other):
            return True

        if not isinstance(other, Index):
            return False
        elif other.dtype.kind in ["f", "i", "u", "c"]:
            return False
        elif not isinstance(other, type(self)):
            try:
                other = type(self)(other)
            except (ValueError, TypeError, OverflowError):
                # e.g.
                #  ValueError -> cannot parse str entry, or OutOfBoundsDatetime
                #  TypeError  -> trying to convert IntervalIndex to DatetimeIndex
                #  OverflowError -> Index([very_large_timedeltas])
                return False

        if not is_dtype_equal(self.dtype, other.dtype):
            # have different timezone
            return False

        return np.array_equal(self.asi8, other.asi8)

    @Appender(Index.__contains__.__doc__)
    def __contains__(self, key: Any) -> bool:
        hash(key)
        try:
            res = self.get_loc(key)
        except (KeyError, TypeError, ValueError):
            return False
        return bool(
            is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))
        )

    @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
    def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
        nv.validate_take(tuple(), kwargs)
        indices = ensure_int64(indices)

        maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
        if isinstance(maybe_slice, slice):
            return self[maybe_slice]

        return ExtensionIndex.take(
            self, indices, axis, allow_fill, fill_value, **kwargs
        )

    @doc(IndexOpsMixin.searchsorted, klass="Datetime-like Index")
    def searchsorted(self, value, side="left", sorter=None):
        return self._data.searchsorted(value, side=side, sorter=sorter)

    _can_hold_na = True

    _na_value = NaT
    """The expected NA value to use with this index."""

    def _convert_tolerance(self, tolerance, target):
        tolerance = np.asarray(to_timedelta(tolerance).to_numpy())

        if target.size != tolerance.size and tolerance.size > 1:
            raise ValueError("list-like tolerance size must match target index size")
        return tolerance

    def tolist(self) -> List:
        """
        Return a list of the underlying data.
        """
        return list(self.astype(object))

    def min(self, axis=None, skipna=True, *args, **kwargs):
        """
        Return the minimum value of the Index or minimum along
        an axis.

        See Also
        --------
        numpy.ndarray.min
        Series.min : Return the minimum value in a Series.
        """
        nv.validate_min(args, kwargs)
        nv.validate_minmax_axis(axis)

        if not len(self):
            return self._na_value

        i8 = self.asi8
        try:
            # quick check
            if len(i8) and self.is_monotonic:
                if i8[0] != iNaT:
                    return self._data._box_func(i8[0])

            if self.hasnans:
                if skipna:
                    min_stamp = self[~self._isnan].asi8.min()
                else:
                    return self._na_value
            else:
                min_stamp = i8.min()
            return self._data._box_func(min_stamp)
        except ValueError:
            return self._na_value

    def argmin(self, axis=None, skipna=True, *args, **kwargs):
        """
        Returns the indices of the minimum values along an axis.

        See `numpy.ndarray.argmin` for more information on the
        `axis` parameter.

        See Also
        --------
        numpy.ndarray.argmin
        """
        nv.validate_argmin(args, kwargs)
        nv.validate_minmax_axis(axis)

        i8 = self.asi8
        if self.hasnans:
            mask = self._isnan
            if mask.all() or not skipna:
                return -1
            i8 = i8.copy()
            i8[mask] = np.iinfo("int64").max
        return i8.argmin()

    def max(self, axis=None, skipna=True, *args, **kwargs):
        """
        Return the maximum value of the Index or maximum along
        an axis.

        See Also
        --------
        numpy.ndarray.max
        Series.max : Return the maximum value in a Series.
        """
        nv.validate_max(args, kwargs)
        nv.validate_minmax_axis(axis)

        if not len(self):
            return self._na_value

        i8 = self.asi8
        try:
            # quick check
            if len(i8) and self.is_monotonic:
                if i8[-1] != iNaT:
                    return self._data._box_func(i8[-1])

            if self.hasnans:
                if skipna:
                    max_stamp = self[~self._isnan].asi8.max()
                else:
                    return self._na_value
            else:
                max_stamp = i8.max()
            return self._data._box_func(max_stamp)
        except ValueError:
            return self._na_value

    def argmax(self, axis=None, skipna=True, *args, **kwargs):
        """
        Returns the indices of the maximum values along an axis.

        See `numpy.ndarray.argmax` for more information on the
        `axis` parameter.

        See Also
        --------
        numpy.ndarray.argmax
        """
        nv.validate_argmax(args, kwargs)
        nv.validate_minmax_axis(axis)

        i8 = self.asi8
        if self.hasnans:
            mask = self._isnan
            if mask.all() or not skipna:
                return -1
            i8 = i8.copy()
            i8[mask] = 0
        return i8.argmax()

    # --------------------------------------------------------------------
    # Rendering Methods

    def format(
        self,
        name: bool = False,
        formatter: Optional[Callable] = None,
        na_rep: str = "NaT",
        date_format: Optional[str] = None,
    ) -> List[str]:
        """
        Render a string representation of the Index.
        """
        header = []
        if name:
            header.append(
                ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
                if self.name is not None
                else ""
            )

        if formatter is not None:
            return header + list(self.map(formatter))

        return self._format_with_header(header, na_rep=na_rep, date_format=date_format)

    def _format_with_header(
        self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None
    ) -> List[str]:
        return header + list(
            self._format_native_types(na_rep=na_rep, date_format=date_format)
        )

    @property
    def _formatter_func(self):
        raise AbstractMethodError(self)

    def _format_attrs(self):
        """
        Return a list of tuples of the (attr,formatted_value).
        """
        attrs = super()._format_attrs()
        for attrib in self._attributes:
            if attrib == "freq":
                freq = self.freqstr
                if freq is not None:
                    freq = repr(freq)
                attrs.append(("freq", freq))
        return attrs

    # --------------------------------------------------------------------
    # Indexing Methods

    def _validate_partial_date_slice(self, reso: Resolution):
        raise NotImplementedError

    def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
        raise NotImplementedError

    def _partial_date_slice(
        self,
        reso: Resolution,
        parsed: datetime,
        use_lhs: bool = True,
        use_rhs: bool = True,
    ):
        """
        Parameters
        ----------
        reso : Resolution
        parsed : datetime
        use_lhs : bool, default True
        use_rhs : bool, default True

        Returns
        -------
        slice or ndarray[intp]
        """
        self._validate_partial_date_slice(reso)

        t1, t2 = self._parsed_string_to_bounds(reso, parsed)
        i8vals = self.asi8
        unbox = self._data._unbox_scalar

        if self.is_monotonic:

            if len(self) and (
                (use_lhs and t1 < self[0] and t2 < self[0])
                or (use_rhs and t1 > self[-1] and t2 > self[-1])
            ):
                # we are out of range
                raise KeyError

            # TODO: does this depend on being monotonic _increasing_?

            # a monotonic (sorted) series can be sliced
            # Use asi8.searchsorted to avoid re-validating Periods/Timestamps
            left = i8vals.searchsorted(unbox(t1), side="left") if use_lhs else None
            right = i8vals.searchsorted(unbox(t2), side="right") if use_rhs else None
            return slice(left, right)

        else:
            lhs_mask = (i8vals >= unbox(t1)) if use_lhs else True
            rhs_mask = (i8vals <= unbox(t2)) if use_rhs else True

            # try to find the dates
            return (lhs_mask & rhs_mask).nonzero()[0]

    # --------------------------------------------------------------------
    # Arithmetic Methods

    __add__ = make_wrapped_arith_op("__add__")
    __sub__ = make_wrapped_arith_op("__sub__")
    __radd__ = make_wrapped_arith_op("__radd__")
    __rsub__ = make_wrapped_arith_op("__rsub__")
    __pow__ = make_wrapped_arith_op("__pow__")
    __rpow__ = make_wrapped_arith_op("__rpow__")
    __mul__ = make_wrapped_arith_op("__mul__")
    __rmul__ = make_wrapped_arith_op("__rmul__")
    __floordiv__ = make_wrapped_arith_op("__floordiv__")
    __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__")
    __mod__ = make_wrapped_arith_op("__mod__")
    __rmod__ = make_wrapped_arith_op("__rmod__")
    __divmod__ = make_wrapped_arith_op("__divmod__")
    __rdivmod__ = make_wrapped_arith_op("__rdivmod__")
    __truediv__ = make_wrapped_arith_op("__truediv__")
    __rtruediv__ = make_wrapped_arith_op("__rtruediv__")

    def isin(self, values, level=None):
        """
        Compute boolean array of whether each index value is found in the
        passed set of values.

        Parameters
        ----------
        values : set or sequence of values

        Returns
        -------
        is_contained : ndarray (boolean dtype)
        """
        if level is not None:
            self._validate_index_level(level)

        if not isinstance(values, type(self)):
            try:
                values = type(self)(values)
            except ValueError:
                return self.astype(object).isin(values)

        return algorithms.isin(self.asi8, values.asi8)

    @Appender(Index.where.__doc__)
    def where(self, cond, other=None):
        values = self.view("i8")

        try:
            other = self._data._validate_where_value(other)
        except (TypeError, ValueError) as err:
            # Includes tzawareness mismatch and IncompatibleFrequencyError
            oth = getattr(other, "dtype", other)
            raise TypeError(f"Where requires matching dtype, not {oth}") from err

        result = np.where(cond, values, other).astype("i8")
        arr = self._data._from_backing_data(result)
        return type(self)._simple_new(arr, name=self.name)

    def putmask(self, mask, value):
        try:
            value = self._data._validate_where_value(value)
        except (TypeError, ValueError):
            return self.astype(object).putmask(mask, value)

        result = self._data._ndarray.copy()
        np.putmask(result, mask, value)
        arr = self._data._from_backing_data(result)
        return type(self)._simple_new(arr, name=self.name)

    def _summary(self, name=None) -> str:
        """
        Return a summarized representation.

        Parameters
        ----------
        name : str
            Name to use in the summary representation.

        Returns
        -------
        str
            Summarized representation of the index.
        """
        formatter = self._formatter_func
        if len(self) > 0:
            index_summary = f", {formatter(self[0])} to {formatter(self[-1])}"
        else:
            index_summary = ""

        if name is None:
            name = type(self).__name__
        result = f"{name}: {len(self)} entries{index_summary}"
        if self.freq:
            result += f"\nFreq: {self.freqstr}"

        # display as values, not quoted
        result = result.replace("'", "")
        return result

    def shift(self, periods=1, freq=None):
        """
        Shift index by desired number of time frequency increments.

        This method is for shifting the values of datetime-like indexes
        by a specified time increment a given number of times.

        Parameters
        ----------
        periods : int, default 1
            Number of periods (or increments) to shift by,
            can be positive or negative.

            .. versionchanged:: 0.24.0

        freq : pandas.DateOffset, pandas.Timedelta or string, optional
            Frequency increment to shift by.
            If None, the index is shifted by its own `freq` attribute.
            Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.

        Returns
        -------
        pandas.DatetimeIndex
            Shifted index.

        See Also
        --------
        Index.shift : Shift values of Index.
        PeriodIndex.shift : Shift values of PeriodIndex.
        """
        arr = self._data.view()
        arr._freq = self.freq
        result = arr._time_shift(periods, freq=freq)
        return type(self)(result, name=self.name)

    # --------------------------------------------------------------------
    # List-like Methods

    def delete(self, loc):
        new_i8s = np.delete(self.asi8, loc)

        freq = None
        if is_period_dtype(self.dtype):
            freq = self.freq
        elif is_integer(loc):
            if loc in (0, -len(self), -1, len(self) - 1):
                freq = self.freq
        else:
            if is_list_like(loc):
                loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self))
            if isinstance(loc, slice) and loc.step in (1, None):
                if loc.start in (0, None) or loc.stop in (len(self), None):
                    freq = self.freq

        arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq)
        return type(self)._simple_new(arr, name=self.name)

    def insert(self, loc: int, item):
        """
        Make new Index inserting new item at location

        Parameters
        ----------
        loc : int
        item : object
            if not either a Python datetime or a numpy integer-like, returned
            Index dtype will be object rather than datetime.

        Returns
        -------
        new_index : Index
        """
        item = self._data._validate_insert_value(item)

        freq = None
        if is_period_dtype(self.dtype):
            freq = self.freq
        elif self.freq is not None:
            # freq can be preserved on edge cases
            if self.size:
                if item is NaT:
                    pass
                elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
                    freq = self.freq
                elif (loc == len(self)) and item - self.freq == self[-1]:
                    freq = self.freq
            else:
                # Adding a single item to an empty index may preserve freq
                if self.freq.is_on_offset(item):
                    freq = self.freq

        arr = self._data
        item = arr._unbox_scalar(item)
        item = arr._rebox_native(item)

        new_values = np.concatenate([arr._ndarray[:loc], [item], arr._ndarray[loc:]])
        new_arr = self._data._from_backing_data(new_values)
        new_arr._freq = freq

        return type(self)._simple_new(new_arr, name=self.name)

    # --------------------------------------------------------------------
    # Join/Set Methods

    def _can_union_without_object_cast(self, other) -> bool:
        return is_dtype_equal(self.dtype, other.dtype)

    def _wrap_joined_index(self, joined: np.ndarray, other):
        assert other.dtype == self.dtype, (other.dtype, self.dtype)
        name = get_op_result_name(self, other)

        if is_period_dtype(self.dtype):
            freq = self.freq
        else:
            self = cast(DatetimeTimedeltaMixin, self)
            freq = self.freq if self._can_fast_union(other) else None

        new_data = self._data._from_backing_data(joined)
        new_data._freq = freq

        return type(self)._simple_new(new_data, name=name)

    @doc(Index._convert_arr_indexer)
    def _convert_arr_indexer(self, keyarr):
        try:
            return self._data._validate_listlike(
                keyarr, "convert_arr_indexer", allow_object=True
            )
        except (ValueError, TypeError):
            return com.asarray_tuplesafe(keyarr)
コード例 #4
0
ファイル: datetimelike.py プロジェクト: yeung-r/pandas
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
    """
    Common ops mixin to support a unified interface datetimelike Index.
    """

    _can_hold_strings = False
    _data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
    freq: Optional[BaseOffset]
    freqstr: Optional[str]
    _resolution_obj: Resolution
    _bool_ops: List[str] = []
    _field_ops: List[str] = []

    # error: "Callable[[Any], Any]" has no attribute "fget"
    hasnans = cache_readonly(
        DatetimeLikeArrayMixin._hasnans.fget  # type: ignore[attr-defined]
    )
    _hasnans = hasnans  # for index / array -agnostic code

    @property
    def _is_all_dates(self) -> bool:
        return True

    # ------------------------------------------------------------------------
    # Abstract data attributes

    @property
    def values(self) -> np.ndarray:
        # Note: PeriodArray overrides this to return an ndarray of objects.
        return self._data._ndarray

    def __array_wrap__(self, result, context=None):
        """
        Gets called after a ufunc and other functions.
        """
        result = lib.item_from_zerodim(result)
        if is_bool_dtype(result) or lib.is_scalar(result):
            return result

        attrs = self._get_attributes_dict()
        if not is_period_dtype(self.dtype) and attrs["freq"]:
            # no need to infer if freq is None
            attrs["freq"] = "infer"
        return type(self)(result, **attrs)

    # ------------------------------------------------------------------------

    def equals(self, other: Any) -> bool:
        """
        Determines if two Index objects contain the same elements.
        """
        if self.is_(other):
            return True

        if not isinstance(other, Index):
            return False
        elif other.dtype.kind in ["f", "i", "u", "c"]:
            return False
        elif not isinstance(other, type(self)):
            should_try = False
            inferable = self._data._infer_matches
            if other.dtype == object:
                should_try = other.inferred_type in inferable
            elif is_categorical_dtype(other.dtype):
                other = cast("CategoricalIndex", other)
                should_try = other.categories.inferred_type in inferable

            if should_try:
                try:
                    other = type(self)(other)
                except (ValueError, TypeError, OverflowError):
                    # e.g.
                    #  ValueError -> cannot parse str entry, or OutOfBoundsDatetime
                    #  TypeError  -> trying to convert IntervalIndex to DatetimeIndex
                    #  OverflowError -> Index([very_large_timedeltas])
                    return False

        if not is_dtype_equal(self.dtype, other.dtype):
            # have different timezone
            return False

        return np.array_equal(self.asi8, other.asi8)

    @Appender(Index.__contains__.__doc__)
    def __contains__(self, key: Any) -> bool:
        hash(key)
        try:
            self.get_loc(key)
        except (KeyError, TypeError, ValueError):
            return False
        return True

    @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
    def take(self,
             indices,
             axis=0,
             allow_fill=True,
             fill_value=None,
             **kwargs):
        nv.validate_take((), kwargs)
        indices = np.asarray(indices, dtype=np.intp)

        maybe_slice = lib.maybe_indices_to_slice(indices, len(self))

        result = NDArrayBackedExtensionIndex.take(self, indices, axis,
                                                  allow_fill, fill_value,
                                                  **kwargs)
        if isinstance(maybe_slice, slice):
            freq = self._data._get_getitem_freq(maybe_slice)
            result._data._freq = freq
        return result

    _can_hold_na = True

    _na_value = NaT
    """The expected NA value to use with this index."""

    def _convert_tolerance(self, tolerance, target):
        tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
        return super()._convert_tolerance(tolerance, target)

    def tolist(self) -> List:
        """
        Return a list of the underlying data.
        """
        return list(self.astype(object))

    def min(self, axis=None, skipna=True, *args, **kwargs):
        """
        Return the minimum value of the Index or minimum along
        an axis.

        See Also
        --------
        numpy.ndarray.min
        Series.min : Return the minimum value in a Series.
        """
        nv.validate_min(args, kwargs)
        nv.validate_minmax_axis(axis)

        if not len(self):
            return self._na_value

        i8 = self.asi8

        if len(i8) and self.is_monotonic_increasing:
            # quick check
            if i8[0] != iNaT:
                return self._data._box_func(i8[0])

        if self.hasnans:
            if not skipna:
                return self._na_value
            i8 = i8[~self._isnan]

        if not len(i8):
            return self._na_value

        min_stamp = i8.min()
        return self._data._box_func(min_stamp)

    def argmin(self, axis=None, skipna=True, *args, **kwargs):
        """
        Returns the indices of the minimum values along an axis.

        See `numpy.ndarray.argmin` for more information on the
        `axis` parameter.

        See Also
        --------
        numpy.ndarray.argmin
        """
        nv.validate_argmin(args, kwargs)
        nv.validate_minmax_axis(axis)

        i8 = self.asi8
        if self.hasnans:
            mask = self._isnan
            if mask.all() or not skipna:
                return -1
            i8 = i8.copy()
            i8[mask] = np.iinfo("int64").max
        return i8.argmin()

    def max(self, axis=None, skipna=True, *args, **kwargs):
        """
        Return the maximum value of the Index or maximum along
        an axis.

        See Also
        --------
        numpy.ndarray.max
        Series.max : Return the maximum value in a Series.
        """
        nv.validate_max(args, kwargs)
        nv.validate_minmax_axis(axis)

        if not len(self):
            return self._na_value

        i8 = self.asi8

        if len(i8) and self.is_monotonic:
            # quick check
            if i8[-1] != iNaT:
                return self._data._box_func(i8[-1])

        if self.hasnans:
            if not skipna:
                return self._na_value
            i8 = i8[~self._isnan]

        if not len(i8):
            return self._na_value

        max_stamp = i8.max()
        return self._data._box_func(max_stamp)

    def argmax(self, axis=None, skipna=True, *args, **kwargs):
        """
        Returns the indices of the maximum values along an axis.

        See `numpy.ndarray.argmax` for more information on the
        `axis` parameter.

        See Also
        --------
        numpy.ndarray.argmax
        """
        nv.validate_argmax(args, kwargs)
        nv.validate_minmax_axis(axis)

        i8 = self.asi8
        if self.hasnans:
            mask = self._isnan
            if mask.all() or not skipna:
                return -1
            i8 = i8.copy()
            i8[mask] = 0
        return i8.argmax()

    # --------------------------------------------------------------------
    # Rendering Methods

    def format(
        self,
        name: bool = False,
        formatter: Optional[Callable] = None,
        na_rep: str = "NaT",
        date_format: Optional[str] = None,
    ) -> List[str]:
        """
        Render a string representation of the Index.
        """
        header = []
        if name:
            header.append(
                ibase.pprint_thing(self.name, escape_chars=(
                    "\t", "\r", "\n")) if self.name is not None else "")

        if formatter is not None:
            return header + list(self.map(formatter))

        return self._format_with_header(header,
                                        na_rep=na_rep,
                                        date_format=date_format)

    def _format_with_header(self,
                            header: List[str],
                            na_rep: str = "NaT",
                            date_format: Optional[str] = None) -> List[str]:
        return header + list(
            self._format_native_types(na_rep=na_rep, date_format=date_format))

    @property
    def _formatter_func(self):
        return self._data._formatter()

    def _format_attrs(self):
        """
        Return a list of tuples of the (attr,formatted_value).
        """
        attrs = super()._format_attrs()
        for attrib in self._attributes:
            if attrib == "freq":
                freq = self.freqstr
                if freq is not None:
                    freq = repr(freq)
                attrs.append(("freq", freq))
        return attrs

    def _summary(self, name=None) -> str:
        """
        Return a summarized representation.

        Parameters
        ----------
        name : str
            Name to use in the summary representation.

        Returns
        -------
        str
            Summarized representation of the index.
        """
        formatter = self._formatter_func
        if len(self) > 0:
            index_summary = f", {formatter(self[0])} to {formatter(self[-1])}"
        else:
            index_summary = ""

        if name is None:
            name = type(self).__name__
        result = f"{name}: {len(self)} entries{index_summary}"
        if self.freq:
            result += f"\nFreq: {self.freqstr}"

        # display as values, not quoted
        result = result.replace("'", "")
        return result

    # --------------------------------------------------------------------
    # Indexing Methods

    def _validate_partial_date_slice(self, reso: Resolution):
        raise NotImplementedError

    def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
        raise NotImplementedError

    def _partial_date_slice(
        self,
        reso: Resolution,
        parsed: datetime,
    ):
        """
        Parameters
        ----------
        reso : Resolution
        parsed : datetime

        Returns
        -------
        slice or ndarray[intp]
        """
        self._validate_partial_date_slice(reso)

        t1, t2 = self._parsed_string_to_bounds(reso, parsed)
        vals = self._data._ndarray
        unbox = self._data._unbox

        if self.is_monotonic_increasing:

            if len(self) and ((t1 < self[0] and t2 < self[0]) or
                              (t1 > self[-1] and t2 > self[-1])):
                # we are out of range
                raise KeyError

            # TODO: does this depend on being monotonic _increasing_?

            # a monotonic (sorted) series can be sliced
            left = vals.searchsorted(unbox(t1), side="left")
            right = vals.searchsorted(unbox(t2), side="right")
            return slice(left, right)

        else:
            lhs_mask = vals >= unbox(t1)
            rhs_mask = vals <= unbox(t2)

            # try to find the dates
            return (lhs_mask & rhs_mask).nonzero()[0]

    # --------------------------------------------------------------------
    # Arithmetic Methods

    __add__ = make_wrapped_arith_op("__add__")
    __sub__ = make_wrapped_arith_op("__sub__")
    __radd__ = make_wrapped_arith_op("__radd__")
    __rsub__ = make_wrapped_arith_op("__rsub__")
    __pow__ = make_wrapped_arith_op("__pow__")
    __rpow__ = make_wrapped_arith_op("__rpow__")
    __mul__ = make_wrapped_arith_op("__mul__")
    __rmul__ = make_wrapped_arith_op("__rmul__")
    __floordiv__ = make_wrapped_arith_op("__floordiv__")
    __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__")
    __mod__ = make_wrapped_arith_op("__mod__")
    __rmod__ = make_wrapped_arith_op("__rmod__")
    __divmod__ = make_wrapped_arith_op("__divmod__")
    __rdivmod__ = make_wrapped_arith_op("__rdivmod__")
    __truediv__ = make_wrapped_arith_op("__truediv__")
    __rtruediv__ = make_wrapped_arith_op("__rtruediv__")

    def shift(self: _T, periods: int = 1, freq=None) -> _T:
        """
        Shift index by desired number of time frequency increments.

        This method is for shifting the values of datetime-like indexes
        by a specified time increment a given number of times.

        Parameters
        ----------
        periods : int, default 1
            Number of periods (or increments) to shift by,
            can be positive or negative.

            .. versionchanged:: 0.24.0

        freq : pandas.DateOffset, pandas.Timedelta or string, optional
            Frequency increment to shift by.
            If None, the index is shifted by its own `freq` attribute.
            Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.

        Returns
        -------
        pandas.DatetimeIndex
            Shifted index.

        See Also
        --------
        Index.shift : Shift values of Index.
        PeriodIndex.shift : Shift values of PeriodIndex.
        """
        arr = self._data.view()
        arr._freq = self.freq
        result = arr._time_shift(periods, freq=freq)
        return type(self)(result, name=self.name)

    # --------------------------------------------------------------------
    # List-like Methods

    def _get_delete_freq(self, loc: int):
        """
        Find the `freq` for self.delete(loc).
        """
        freq = None
        if is_period_dtype(self.dtype):
            freq = self.freq
        elif self.freq is not None:
            if is_integer(loc):
                if loc in (0, -len(self), -1, len(self) - 1):
                    freq = self.freq
            else:
                if is_list_like(loc):
                    loc = lib.maybe_indices_to_slice(
                        np.asarray(loc, dtype=np.intp), len(self))
                if isinstance(loc, slice) and loc.step in (1, None):
                    if loc.start in (0, None) or loc.stop in (len(self), None):
                        freq = self.freq
        return freq

    def _get_insert_freq(self, loc: int, item):
        """
        Find the `freq` for self.insert(loc, item).
        """
        value = self._data._validate_scalar(item)
        item = self._data._box_func(value)

        freq = None
        if is_period_dtype(self.dtype):
            freq = self.freq
        elif self.freq is not None:
            # freq can be preserved on edge cases
            if self.size:
                if item is NaT:
                    pass
                elif (loc == 0
                      or loc == -len(self)) and item + self.freq == self[0]:
                    freq = self.freq
                elif (loc == len(self)) and item - self.freq == self[-1]:
                    freq = self.freq
            else:
                # Adding a single item to an empty index may preserve freq
                if self.freq.is_on_offset(item):
                    freq = self.freq
        return freq

    @doc(NDArrayBackedExtensionIndex.delete)
    def delete(self: _T, loc) -> _T:
        result = super().delete(loc)
        result._data._freq = self._get_delete_freq(loc)
        return result

    @doc(NDArrayBackedExtensionIndex.insert)
    def insert(self, loc: int, item):
        result = super().insert(loc, item)
        if isinstance(result, type(self)):
            # i.e. parent class method did not cast
            result._data._freq = self._get_insert_freq(loc, item)
        return result

    # --------------------------------------------------------------------
    # Join/Set Methods

    _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer)
    _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer)
    _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer)
    _left_indexer_unique = _join_i8_wrapper(libjoin.left_join_indexer_unique,
                                            with_indexers=False)

    def _get_join_freq(self, other):
        """
        Get the freq to attach to the result of a join operation.
        """
        if is_period_dtype(self.dtype):
            freq = self.freq
        else:
            self = cast(DatetimeTimedeltaMixin, self)
            freq = self.freq if self._can_fast_union(other) else None
        return freq

    def _wrap_joined_index(self, joined: np.ndarray, other):
        assert other.dtype == self.dtype, (other.dtype, self.dtype)
        assert joined.dtype == "i8" or joined.dtype == self.dtype, joined.dtype
        joined = joined.view(self._data._ndarray.dtype)
        result = super()._wrap_joined_index(joined, other)
        result._data._freq = self._get_join_freq(other)
        return result

    @doc(Index._convert_arr_indexer)
    def _convert_arr_indexer(self, keyarr):
        try:
            return self._data._validate_listlike(keyarr, allow_object=True)
        except (ValueError, TypeError):
            return com.asarray_tuplesafe(keyarr)