def __init__( self, objs: Union[Iterable["NDFrame"], Mapping[Label, "NDFrame"]], axis=0, join: str = "outer", keys=None, levels=None, names=None, ignore_index: bool = False, verify_integrity: bool = False, copy: bool = True, sort=False, ): if isinstance(objs, (ABCSeries, ABCDataFrame, str)): raise TypeError( "first argument must be an iterable of pandas " f'objects, you passed an object of type "{type(objs).__name__}"' ) if join == "outer": self.intersect = False elif join == "inner": self.intersect = True else: # pragma: no cover raise ValueError( "Only can inner (intersect) or outer (union) join the other axis" ) if isinstance(objs, abc.Mapping): if keys is None: keys = list(objs.keys()) objs = [objs[k] for k in keys] else: objs = list(objs) if len(objs) == 0: raise ValueError("No objects to concatenate") if keys is None: objs = list(com.not_none(*objs)) else: # #1649 clean_keys = [] clean_objs = [] for k, v in zip(keys, objs): if v is None: continue clean_keys.append(k) clean_objs.append(v) objs = clean_objs name = getattr(keys, "name", None) keys = Index(clean_keys, name=name) if len(objs) == 0: raise ValueError("All objects passed were None") # figure out what our result ndim is going to be ndims = set() for obj in objs: if not isinstance(obj, (ABCSeries, ABCDataFrame)): msg = (f"cannot concatenate object of type '{type(obj)}'; " "only Series and DataFrame objs are valid") raise TypeError(msg) ndims.add(obj.ndim) # get the sample # want the highest ndim that we have, and must be non-empty # unless all objs are empty sample: Optional["NDFrame"] = None if len(ndims) > 1: max_ndim = max(ndims) for obj in objs: if obj.ndim == max_ndim and np.sum(obj.shape): sample = obj break else: # filter out the empties if we have not multi-index possibilities # note to keep empty Series as it affect to result columns / name non_empties = [ obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries) ] if len(non_empties) and (keys is None and names is None and levels is None and not self.intersect): objs = non_empties sample = objs[0] if sample is None: sample = objs[0] self.objs = objs # Standardize axis parameter to int if isinstance(sample, ABCSeries): axis = sample._constructor_expanddim._get_axis_number(axis) else: axis = sample._get_axis_number(axis) # Need to flip BlockManager axis in the DataFrame special case self._is_frame = isinstance(sample, ABCDataFrame) if self._is_frame: axis = sample._get_block_manager_axis(axis) self._is_series = isinstance(sample, ABCSeries) if not 0 <= axis <= sample.ndim: raise AssertionError( f"axis must be between 0 and {sample.ndim}, input was {axis}") # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed if len(ndims) > 1: current_column = 0 max_ndim = sample.ndim self.objs, objs = [], self.objs for obj in objs: ndim = obj.ndim if ndim == max_ndim: pass elif ndim != max_ndim - 1: raise ValueError("cannot concatenate unaligned mixed " "dimensional NDFrame objects") else: name = getattr(obj, "name", None) if ignore_index or name is None: name = current_column current_column += 1 # doing a row-wise concatenation so need everything # to line up if self._is_frame and axis == 1: name = 0 # mypy needs to know sample is not an NDFrame sample = cast("FrameOrSeriesUnion", sample) obj = sample._constructor({name: obj}) self.objs.append(obj) # note: this is the BlockManager axis (since DataFrame is transposed) self.bm_axis = axis self.axis = 1 - self.bm_axis if self._is_frame else 0 self.keys = keys self.names = names or getattr(keys, "names", None) self.levels = levels self.sort = sort self.ignore_index = ignore_index self.verify_integrity = verify_integrity self.copy = copy self.new_axes = self._get_new_axes()
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed="right"): """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError("Of the four parameters: start, end, periods, and " "freq, exactly three must be specified") if not _is_valid_endpoint(start): raise ValueError( f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) # verify type compatibility if not all([ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, "int64") else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def interval_range( start=None, end=None, periods=None, freq=None, name: Hashable = None, closed: lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. .. deprecated:: 1.5.0 Argument `closed` has been deprecated to standardize boundary inputs. Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. .. versionadded:: 1.5.0 Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5, inclusive="right") IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04'), inclusive="right") IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], dtype='interval[datetime64[ns], right]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS', inclusive="right") IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], dtype='interval[datetime64[ns], right]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') The ``inclusive`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, inclusive='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], dtype='interval[int64, both]') """ if inclusive is not None and not isinstance(closed, lib.NoDefault): raise ValueError( "Deprecated argument `closed` cannot be passed " "if argument `inclusive` is not None" ) elif not isinstance(closed, lib.NoDefault): warnings.warn( "Argument `closed` is deprecated in favor of `inclusive`.", FutureWarning, stacklevel=2, ) if closed is None: inclusive = "both" elif closed in ("both", "neither", "left", "right"): inclusive = closed else: raise ValueError( "Argument `closed` has to be either" "'both', 'neither', 'left' or 'right'" ) elif inclusive is None: inclusive = "both" start = maybe_box_datetimelike(start) end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, and " "freq, exactly three must be specified" ) if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all( [ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ] ): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray]", variable has type "ndarray") breaks = maybe_downcast_numeric( # type: ignore[assignment] breaks, np.dtype("int64") ) else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=inclusive)