def __new__( cls, data, inclusive=None, closed: None | lib.NoDefault = lib.no_default, dtype: Dtype | None = None, copy: bool = False, name: Hashable = None, verify_integrity: bool = True, ) -> IntervalIndex: inclusive, closed = _warning_interval(inclusive, closed) name = maybe_extract_name(name, data, cls) with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray( data, inclusive=inclusive, copy=copy, dtype=dtype, verify_integrity=verify_integrity, ) return cls._simple_new(array, name)
def __init__( self, subtype, inclusive: str | None = None, closed: None | lib.NoDefault = lib.no_default, ) -> None: # attributes need to be set first before calling # super init (as that calls serialize) inclusive, closed = _warning_interval(inclusive, closed) assert inclusive in VALID_CLOSED self._closed = inclusive if not isinstance(subtype, pyarrow.DataType): subtype = pyarrow.type_for_alias(str(subtype)) self._subtype = subtype storage_type = pyarrow.struct([("left", subtype), ("right", subtype)]) pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")
def from_tuples( cls, data, inclusive=None, closed: None | lib.NoDefault = lib.no_default, name: Hashable = None, copy: bool = False, dtype: Dtype | None = None, ) -> IntervalIndex: inclusive, closed = _warning_interval(inclusive, closed) if inclusive is None: inclusive = "both" with rewrite_exception("IntervalArray", cls.__name__): arr = IntervalArray.from_tuples( data, inclusive=inclusive, copy=copy, dtype=dtype ) return cls._simple_new(arr, name=name)
def interval_range( start=None, end=None, periods=None, freq=None, name: Hashable = None, closed: IntervalClosedType | lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. .. deprecated:: 1.5.0 Argument `closed` has been deprecated to standardize boundary inputs. Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. .. versionadded:: 1.5.0 Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5, inclusive="right") IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04'), inclusive="right") IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], dtype='interval[datetime64[ns], right]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS', inclusive="right") IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], dtype='interval[datetime64[ns], right]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') The ``inclusive`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, inclusive='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], dtype='interval[int64, both]') """ inclusive, closed = _warning_interval(inclusive, closed) if inclusive is None: inclusive = "both" start = maybe_box_datetimelike(start) end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, and " "freq, exactly three must be specified" ) if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all( [ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ] ): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_numeric(breaks, np.dtype("int64")) else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, inclusive=inclusive)
def __new__( cls, subtype=None, inclusive: str_type | None = None, closed: None | lib.NoDefault = lib.no_default, ): from pandas.core.dtypes.common import ( is_string_dtype, pandas_dtype, ) inclusive, closed = _warning_interval(inclusive, closed) if inclusive is not None and inclusive not in { "right", "left", "both", "neither", }: raise ValueError( "inclusive must be one of 'right', 'left', 'both', 'neither'") if isinstance(subtype, IntervalDtype): if inclusive is not None and inclusive != subtype.inclusive: raise ValueError( "dtype.inclusive and 'inclusive' do not match. " "Try IntervalDtype(dtype.subtype, inclusive) instead.") return subtype elif subtype is None: # we are called as an empty constructor # generally for pickle compat u = object.__new__(cls) u._subtype = None u._closed = inclusive return u elif isinstance(subtype, str) and subtype.lower() == "interval": subtype = None else: if isinstance(subtype, str): m = cls._match.search(subtype) if m is not None: gd = m.groupdict() subtype = gd["subtype"] if gd.get("inclusive", None) is not None: if inclusive is not None: if inclusive != gd["inclusive"]: raise ValueError( "'inclusive' keyword does not match value " "specified in dtype string") inclusive = gd["inclusive"] try: subtype = pandas_dtype(subtype) except TypeError as err: raise TypeError("could not construct IntervalDtype") from err if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype): # GH 19016 msg = ("category, object, and string subtypes are not supported " "for IntervalDtype") raise TypeError(msg) key = str(subtype) + str(inclusive) try: return cls._cache_dtypes[key] except KeyError: u = object.__new__(cls) u._subtype = subtype u._closed = inclusive cls._cache_dtypes[key] = u return u