Beispiel #1
0
    def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
        # **kwargs are for compat with TimedeltaIndex, which includes `name`
        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')

        if start is not None:
            start = Timedelta(start)

        if end is not None:
            end = Timedelta(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        if freq is not None:
            index = _generate_regular_range(start, end, periods, freq)
            index = cls._simple_new(index, freq=freq, **kwargs)
        else:
            index = np.linspace(start.value, end.value, periods).astype('i8')
            # TODO: shouldn't we pass `name` here?  (via **kwargs)
            index = cls._simple_new(index, freq=freq)

        if not left_closed:
            index = index[1:]
        if not right_closed:
            index = index[:-1]

        return index
Beispiel #2
0
    def _generate_range(cls, start, end, periods, freq, closed=None):

        periods = dtl.validate_periods(periods)
        if freq is None and any(x is None for x in [periods, start, end]):
            raise ValueError('Must provide freq argument if no data is '
                             'supplied')

        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')

        if start is not None:
            start = Timedelta(start)

        if end is not None:
            end = Timedelta(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        if freq is not None:
            index = _generate_regular_range(start, end, periods, freq)
        else:
            index = np.linspace(start.value, end.value, periods).astype('i8')

        if not left_closed:
            index = index[1:]
        if not right_closed:
            index = index[:-1]

        return cls._simple_new(index, freq=freq)
Beispiel #3
0
def period_range(start=None, end=None, periods=None, freq='D', name=None):
    """
    Return a fixed frequency PeriodIndex, with day (calendar) as the default
    frequency

    Parameters
    ----------
    start : string or period-like, default None
        Left bound for generating periods
    end : string or period-like, default None
        Right bound for generating periods
    periods : integer, default None
        Number of periods to generate
    freq : string or DateOffset, default 'D'
        Frequency alias
    name : string, default None
        Name of the resulting PeriodIndex

    Notes
    -----
    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
    must be specified.

    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

    Returns
    -------
    prng : PeriodIndex

    Examples
    --------

    >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
    PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05',
                 '2017-06', '2017-06', '2017-07', '2017-08', '2017-09',
                 '2017-10', '2017-11', '2017-12', '2018-01'],
                dtype='period[M]', freq='M')

    If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
    endpoints for a ``PeriodIndex`` with frequency matching that of the
    ``period_range`` constructor.

    >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
    ...                 end=pd.Period('2017Q2', freq='Q'), freq='M')
    PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
                dtype='period[M]', freq='M')
    """
    if com.count_not_none(start, end, periods) != 2:
        raise ValueError('Of the three parameters: start, end, and periods, '
                         'exactly two must be specified')

    return PeriodIndex(start=start, end=end, periods=periods,
                       freq=freq, name=name)
Beispiel #4
0
    def _generate_range(cls, start, end, periods, freq, fields):
        if freq is not None:
            freq = Period._maybe_convert_freq(freq)

        field_count = len(fields)
        if com.count_not_none(start, end) > 0:
            if field_count > 0:
                raise ValueError('Can either instantiate from fields '
                                 'or endpoints, but not both')
            subarr, freq = _get_ordinal_range(start, end, periods, freq)
        elif field_count > 0:
            subarr, freq = _range_from_fields(freq=freq, **fields)
        else:
            raise ValueError('Not enough parameters to construct '
                             'Period range')

        return subarr, freq
Beispiel #5
0
def _get_ordinal_range(start, end, periods, freq, mult=1):
    if com.count_not_none(start, end, periods) != 2:
        raise ValueError('Of the three parameters: start, end, and periods, '
                         'exactly two must be specified')

    if freq is not None:
        _, mult = libfrequencies.get_freq_code(freq)

    if start is not None:
        start = Period(start, freq)
    if end is not None:
        end = Period(end, freq)

    is_start_per = isinstance(start, Period)
    is_end_per = isinstance(end, Period)

    if is_start_per and is_end_per and start.freq != end.freq:
        raise ValueError('start and end must have same freq')
    if (start is NaT or end is NaT):
        raise ValueError('start and end must not be NaT')

    if freq is None:
        if is_start_per:
            freq = start.freq
        elif is_end_per:
            freq = end.freq
        else:  # pragma: no cover
            raise ValueError('Could not infer freq from start/end')

    if periods is not None:
        periods = periods * mult
        if start is None:
            data = np.arange(end.ordinal - periods + mult,
                             end.ordinal + 1, mult,
                             dtype=np.int64)
        else:
            data = np.arange(start.ordinal, start.ordinal + periods, mult,
                             dtype=np.int64)
    else:
        data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)

    return data, freq
Beispiel #6
0
def _get_ordinal_range(start, end, periods, freq, mult=1):
    if com.count_not_none(start, end, periods) != 2:
        raise ValueError('Of the three parameters: start, end, and periods, '
                         'exactly two must be specified')

    if freq is not None:
        _, mult = frequencies.get_freq_code(freq)

    if start is not None:
        start = Period(start, freq)
    if end is not None:
        end = Period(end, freq)

    is_start_per = isinstance(start, Period)
    is_end_per = isinstance(end, Period)

    if is_start_per and is_end_per and start.freq != end.freq:
        raise ValueError('start and end must have same freq')
    if (start is NaT or end is NaT):
        raise ValueError('start and end must not be NaT')

    if freq is None:
        if is_start_per:
            freq = start.freq
        elif is_end_per:
            freq = end.freq
        else:  # pragma: no cover
            raise ValueError('Could not infer freq from start/end')

    if periods is not None:
        periods = periods * mult
        if start is None:
            data = np.arange(end.ordinal - periods + mult,
                             end.ordinal + 1, mult,
                             dtype=np.int64)
        else:
            data = np.arange(start.ordinal, start.ordinal + periods, mult,
                             dtype=np.int64)
    else:
        data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)

    return data, freq
Beispiel #7
0
    def _generate_range(cls, start, end, periods, freq, closed=None):

        periods = dtl.validate_periods(periods)
        if freq is None and any(x is None for x in [periods, start, end]):
            raise ValueError("Must provide freq argument if no data is supplied")

        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError(
                "Of the four parameters: start, end, periods, "
                "and freq, exactly three must be specified"
            )

        if start is not None:
            start = Timedelta(start)

        if end is not None:
            end = Timedelta(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError(
                    "Closed has to be None if not both of startand end are defined"
                )

        left_closed, right_closed = dtl.validate_endpoints(closed)

        if freq is not None:
            index = _generate_regular_range(start, end, periods, freq)
        else:
            index = np.linspace(start.value, end.value, periods).astype("i8")
            if len(index) >= 2:
                # Infer a frequency
                td = Timedelta(index[1] - index[0])
                freq = to_offset(td)

        if not left_closed:
            index = index[1:]
        if not right_closed:
            index = index[:-1]

        return cls._simple_new(index, freq=freq)
 def __init__(self,
              tickers: {str, list} = None,
              observation_calendar: pd.DatetimeIndex = None,
              eligibility_df: pd.DataFrame = None):
     # if eligibility_df is not specified, create one from the tickers and observation calendar if they are both
     # available, else set it to None
     if eligibility_df is None:
         if com.count_not_none(tickers, observation_calendar) == 2:
             # reformat to list and capital letters
             if not isinstance(tickers, (list, tuple)):
                 tickers = [tickers.upper()]
             else:
                 tickers = [ticker.upper() for ticker in tickers]
             self.eligibility_df = pd.DataFrame(index=observation_calendar,
                                                columns=tickers,
                                                data=1)
         else:
             # set to None since either one of tickers or observation_calendar was not specified
             self._eligibility_df = None
     else:
         self.eligibility_df = eligibility_df
Beispiel #9
0
    def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
        # **kwargs are for compat with TimedeltaIndex, which includes `name`

        periods = dtl.validate_periods(periods)
        if freq is None and any(x is None for x in [periods, start, end]):
            raise ValueError('Must provide freq argument if no data is '
                             'supplied')

        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')

        if start is not None:
            start = Timedelta(start)

        if end is not None:
            end = Timedelta(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        if freq is not None:
            index = _generate_regular_range(start, end, periods, freq)
            index = cls._simple_new(index, freq=freq, **kwargs)
        else:
            index = np.linspace(start.value, end.value, periods).astype('i8')
            # TODO: shouldn't we pass `name` here?  (via **kwargs)
            index = cls._simple_new(index, freq=freq)

        if not left_closed:
            index = index[1:]
        if not right_closed:
            index = index[:-1]

        return index
 def _calculate_weight(self):
     # calculate the number of non-NaN columns per row
     if self.net_zero_exposure:
         if com.count_not_none(self.max_instrument_weight,
                               self.min_instrument_weight) > 0:
             logger.warning(
                 'net zero exposure is not guaranteed since constraints are specified (max/min weights)'
             )
         positive_columns = self.signal_df[self.signal_df > 0.0].count(
             axis=1)
         negative_columns = self.signal_df[self.signal_df < 0.0].count(
             axis=1)
         weight_df = self.signal_df.copy().replace(np.nan, 0)
         weight_df[weight_df > 0.0] = weight_df[weight_df > 0.0].divide(
             positive_columns, axis=0)
         weight_df[weight_df < 0.0] = weight_df[weight_df < 0.0].divide(
             negative_columns, axis=0)
     else:
         numeric_columns = self.signal_df.count(axis=1)
         weight_df = self.signal_df.divide(numeric_columns,
                                           axis=0).replace(np.nan, 0)
     return weight_df
Beispiel #11
0
def interval_range(
    start=None, end=None, periods=None, freq=None, name=None, closed="right"
):
    """
    Return a fixed frequency IntervalIndex.

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals.
    end : numeric or datetime-like, default None
        Right bound for generating intervals.
    periods : int, default None
        Number of periods to generate.
    freq : numeric, str, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' for datetime-like.
    name : str, default None
        Name of the resulting IntervalIndex.
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

    Returns
    -------
    IntervalIndex

    See Also
    --------
    IntervalIndex : An Index of intervals that are all closed on the same side.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]],
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]],
                  closed='right', dtype='interval[datetime64[ns]]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
              closed='right',
              dtype='interval[float64]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
                  closed='both', dtype='interval[int64]')
    """
    start = com.maybe_box_datetimelike(start)
    end = com.maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com.any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else "D"

    if com.count_not_none(start, end, periods, freq) != 3:
        raise ValueError(
            "Of the four parameters: start, end, periods, and "
            "freq, exactly three must be specified"
        )

    if not _is_valid_endpoint(start):
        raise ValueError(f"start must be numeric or datetime-like, got {start}")
    elif not _is_valid_endpoint(end):
        raise ValueError(f"end must be numeric or datetime-like, got {end}")

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        raise TypeError(f"periods must be a number, got {periods}")

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError as err:
            raise ValueError(
                f"freq must be numeric or convertible to DateOffset, got {freq}"
            ) from err

    # verify type compatibility
    if not all(
        [
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq),
        ]
    ):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com.all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com.not_none(start, end, freq)):
            # np.linspace always produces float output
            breaks = maybe_downcast_to_dtype(breaks, "int64")
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            range_func = date_range
        else:
            range_func = timedelta_range

        breaks = range_func(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
Beispiel #12
0
 def __init__(
     self,
     obj: NDFrame,
     com: float | None = None,
     span: float | None = None,
     halflife: float | TimedeltaConvertibleTypes | None = None,
     alpha: float | None = None,
     min_periods: int | None = 0,
     adjust: bool = True,
     ignore_na: bool = False,
     axis: Axis = 0,
     times: str | np.ndarray | NDFrame | None = None,
     method: str = "single",
     *,
     selection=None,
 ) -> None:
     super().__init__(
         obj=obj,
         min_periods=1 if min_periods is None else max(int(min_periods), 1),
         on=None,
         center=False,
         closed=None,
         method=method,
         axis=axis,
         selection=selection,
     )
     self.com = com
     self.span = span
     self.halflife = halflife
     self.alpha = alpha
     self.adjust = adjust
     self.ignore_na = ignore_na
     self.times = times
     if self.times is not None:
         if not self.adjust:
             raise NotImplementedError(
                 "times is not supported with adjust=False.")
         if isinstance(self.times, str):
             warnings.warn(
                 ("Specifying times as a string column label is deprecated "
                  "and will be removed in a future version. Pass the column "
                  "into times instead."),
                 FutureWarning,
                 stacklevel=find_stack_level(),
             )
             # self.times cannot be str anymore
             self.times = cast("Series", self._selected_obj[self.times])
         if not is_datetime64_ns_dtype(self.times):
             raise ValueError("times must be datetime64[ns] dtype.")
         if len(self.times) != len(obj):
             raise ValueError(
                 "times must be the same length as the object.")
         if not isinstance(self.halflife,
                           (str, datetime.timedelta, np.timedelta64)):
             raise ValueError(
                 "halflife must be a timedelta convertible object")
         if isna(self.times).any():
             raise ValueError("Cannot convert NaT values to integer")
         self._deltas = _calculate_deltas(self.times, self.halflife)
         # Halflife is no longer applicable when calculating COM
         # But allow COM to still be calculated if the user passes other decay args
         if common.count_not_none(self.com, self.span, self.alpha) > 0:
             self._com = get_center_of_mass(self.com, self.span, None,
                                            self.alpha)
         else:
             self._com = 1.0
     else:
         if self.halflife is not None and isinstance(
                 self.halflife, (str, datetime.timedelta, np.timedelta64)):
             raise ValueError(
                 "halflife can only be a timedelta convertible argument if "
                 "times is not None.")
         # Without times, points are equally spaced
         self._deltas = np.ones(max(self.obj.shape[self.axis] - 1, 0),
                                dtype=np.float64)
         self._com = get_center_of_mass(
             # error: Argument 3 to "get_center_of_mass" has incompatible type
             # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]";
             # expected "Optional[float]"
             self.com,
             self.span,
             self.halflife,  # type: ignore[arg-type]
             self.alpha,
         )
Beispiel #13
0
 def __init__(
     self,
     obj,
     com: Optional[float] = None,
     span: Optional[float] = None,
     halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None,
     alpha: Optional[float] = None,
     min_periods: int = 0,
     adjust: bool = True,
     ignore_na: bool = False,
     axis: int = 0,
     times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None,
 ):
     super().__init__(
         obj=obj,
         min_periods=max(int(min_periods), 1),
         on=None,
         center=False,
         closed=None,
         method="single",
         axis=axis,
     )
     self.com = com
     self.span = span
     self.halflife = halflife
     self.alpha = alpha
     self.adjust = adjust
     self.ignore_na = ignore_na
     self.times = times
     if self.times is not None:
         if not self.adjust:
             raise NotImplementedError(
                 "times is not supported with adjust=False.")
         if isinstance(self.times, str):
             self.times = self._selected_obj[self.times]
         if not is_datetime64_ns_dtype(self.times):
             raise ValueError("times must be datetime64[ns] dtype.")
         if len(self.times) != len(obj):
             raise ValueError(
                 "times must be the same length as the object.")
         if not isinstance(self.halflife, (str, datetime.timedelta)):
             raise ValueError(
                 "halflife must be a string or datetime.timedelta object")
         if isna(self.times).any():
             raise ValueError("Cannot convert NaT values to integer")
         _times = np.asarray(self.times.view(np.int64), dtype=np.float64)
         _halflife = float(Timedelta(self.halflife).value)
         self._deltas = np.diff(_times) / _halflife
         # Halflife is no longer applicable when calculating COM
         # But allow COM to still be calculated if the user passes other decay args
         if common.count_not_none(self.com, self.span, self.alpha) > 0:
             self._com = get_center_of_mass(self.com, self.span, None,
                                            self.alpha)
         else:
             self._com = 1.0
     else:
         if self.halflife is not None and isinstance(
                 self.halflife, (str, datetime.timedelta)):
             raise ValueError(
                 "halflife can only be a timedelta convertible argument if "
                 "times is not None.")
         # Without times, points are equally spaced
         self._deltas = np.ones(max(len(self.obj) - 1, 0), dtype=np.float64)
         self._com = get_center_of_mass(self.com, self.span, self.halflife,
                                        self.alpha)
Beispiel #14
0
def interval_range(
    start=None,
    end=None,
    periods=None,
    freq=None,
    name: Hashable = None,
    closed: lib.NoDefault = lib.no_default,
    inclusive: IntervalClosedType | None = None,
) -> IntervalIndex:
    """
    Return a fixed frequency IntervalIndex.

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals.
    end : numeric or datetime-like, default None
        Right bound for generating intervals.
    periods : int, default None
        Number of periods to generate.
    freq : numeric, str, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' for datetime-like.
    name : str, default None
        Name of the resulting IntervalIndex.
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

        .. deprecated:: 1.5.0
           Argument `closed` has been deprecated to standardize boundary inputs.
           Use `inclusive` instead, to set each bound as closed or open.
    inclusive : {"both", "neither", "left", "right"}, default "both"
        Include boundaries; Whether to set each bound as closed or open.

        .. versionadded:: 1.5.0

    Returns
    -------
    IntervalIndex

    See Also
    --------
    IntervalIndex : An Index of intervals that are all closed on the same side.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5, inclusive="right")
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
                  dtype='interval[int64, right]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   end=pd.Timestamp('2017-01-04'), inclusive="right")
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]],
                  dtype='interval[datetime64[ns], right]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right")
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
                  dtype='interval[float64, right]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   periods=3, freq='MS', inclusive="right")
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]],
                  dtype='interval[datetime64[ns], right]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4, inclusive="right")
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
              dtype='interval[float64, right]')

    The ``inclusive`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, inclusive='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
                  dtype='interval[int64, both]')
    """
    if inclusive is not None and not isinstance(closed, lib.NoDefault):
        raise ValueError(
            "Deprecated argument `closed` cannot be passed "
            "if argument `inclusive` is not None"
        )
    elif not isinstance(closed, lib.NoDefault):
        warnings.warn(
            "Argument `closed` is deprecated in favor of `inclusive`.",
            FutureWarning,
            stacklevel=2,
        )
        if closed is None:
            inclusive = "both"
        elif closed in ("both", "neither", "left", "right"):
            inclusive = closed
        else:
            raise ValueError(
                "Argument `closed` has to be either"
                "'both', 'neither', 'left' or 'right'"
            )
    elif inclusive is None:
        inclusive = "both"

    start = maybe_box_datetimelike(start)
    end = maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com.any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else "D"

    if com.count_not_none(start, end, periods, freq) != 3:
        raise ValueError(
            "Of the four parameters: start, end, periods, and "
            "freq, exactly three must be specified"
        )

    if not _is_valid_endpoint(start):
        raise ValueError(f"start must be numeric or datetime-like, got {start}")
    elif not _is_valid_endpoint(end):
        raise ValueError(f"end must be numeric or datetime-like, got {end}")

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        raise TypeError(f"periods must be a number, got {periods}")

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError as err:
            raise ValueError(
                f"freq must be numeric or convertible to DateOffset, got {freq}"
            ) from err

    # verify type compatibility
    if not all(
        [
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq),
        ]
    ):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    breaks: np.ndarray | TimedeltaIndex | DatetimeIndex

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com.all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com.not_none(start, end, freq)):
            # np.linspace always produces float output

            # error: Incompatible types in assignment (expression has type
            # "Union[ExtensionArray, ndarray]", variable has type "ndarray")
            breaks = maybe_downcast_numeric(  # type: ignore[assignment]
                breaks, np.dtype("int64")
            )
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            breaks = date_range(start=start, end=end, periods=periods, freq=freq)
        else:
            breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=inclusive)
Beispiel #15
0
def period_range(
    start=None, end=None, periods=None, freq=None, name=None
) -> PeriodIndex:
    """
    Return a fixed frequency PeriodIndex.

    The day (calendar) is the default frequency.

    Parameters
    ----------
    start : str or period-like, default None
        Left bound for generating periods.
    end : str or period-like, default None
        Right bound for generating periods.
    periods : int, default None
        Number of periods to generate.
    freq : str or DateOffset, optional
        Frequency alias. By default the freq is taken from `start` or `end`
        if those are Period objects. Otherwise, the default is ``"D"`` for
        daily frequency.
    name : str, default None
        Name of the resulting PeriodIndex.

    Returns
    -------
    PeriodIndex

    Notes
    -----
    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
    must be specified.

    To learn more about the frequency strings, please see `this link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------
    >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
    PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
             '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
             '2018-01'],
            dtype='period[M]', freq='M')

    If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
    endpoints for a ``PeriodIndex`` with frequency matching that of the
    ``period_range`` constructor.

    >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
    ...                 end=pd.Period('2017Q2', freq='Q'), freq='M')
    PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
                dtype='period[M]', freq='M')
    """
    if com.count_not_none(start, end, periods) != 2:
        raise ValueError(
            "Of the three parameters: start, end, and periods, "
            "exactly two must be specified"
        )
    if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
        freq = "D"

    data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
    data = PeriodArray(data, freq=freq)
    return PeriodIndex(data, name=name)
    def _calculate_weight(self):
        """
        Return a DataFrame with the same index and columns as the signal DataFrame containing the optimized weights.
        A list of price return DataFrames is used to calculate a list of covariance matrices and mean return vectors
        when applicable. If specified and when applicable, the script will use an analytical solution of the optimization
        problem.
        :return:
        """
        # retrieve a list of DataFrames containing price returns
        return_df_list = self._get_price_return_df_list()

        # using the price returns calculate the inputs to be used in the optimizer
        # inputs should all be numpy arrays
        cov_matrix_list = self._get_covariance_matrix_list(return_df_list)
        if self.calculate_mean_returns:
            mean_return_list = self._get_mean_return_array_list(return_df_list)
        else:
            mean_return_list = self.signal_df.shape[0] * [None]

        # only use the theoretical optimizer when there are no constraints and if an analytical solution is available
        use_theoretical_optimizer = (com.count_not_none(self.min_instrument_weight, self.max_instrument_weight) == 0) \
                                    and self.min_total_weight == self.max_total_weight and self.has_analytical_solution

        # list of list of eligible tickers (i.e. having a defined signal) for each signal observation date
        eligible_tickers_list = self._get_eligible_tickers_list()

        # loop through each observation date and use the optimizer to calculate the weights
        prev_eligible_tickers = eligible_tickers_list[0]
        optimized_weight_list = []  # the solved weight arrays are stored here
        self._counter = 0  # used to display progress
        self._total = len(return_df_list)  # used to display progress
        for i in range(self.signal_df.shape[0]):
            if use_theoretical_optimizer:
                # no initial guess is needed when using a theoretical/analytical solution
                # however since _theoretical_optimizer passes on the
                optimized_weight = self._theoretical_optimizer(
                    cov_matrix=cov_matrix_list[i],
                    mean_returns=mean_return_list[i])
            else:
                # an initial guess is needed when using an optimizer with constraints
                # this initial guess is the previous solved weights except at the start or when the instruments changes
                new_eligible_tickers = eligible_tickers_list[i]
                if i == 0 or prev_eligible_tickers != new_eligible_tickers:
                    initial_guess = None  # equal weights by default inside the optimization function
                else:
                    initial_guess = optimized_weight_list[
                        i - 1]  # previous solved weights
                optimized_weight = self._optimizer(
                    cov_matrix=cov_matrix_list[i],
                    mean_returns=mean_return_list[i],
                    initial_guess=initial_guess)
                prev_eligible_tickers = new_eligible_tickers

                # display progress
                self._counter += 1
                logger.info('progress: {}%...'.format(
                    round(100 * self._counter / self._total, 2)))
            optimized_weight_list.append(optimized_weight)

        # reformat result as a DataFrame
        optimized_weight_df = pd.DataFrame(np.zeros(
            (self.signal_df.shape[0], self.signal_df.shape[1])),
                                           index=self.signal_df.index,
                                           columns=self.signal_df.columns)
        obs_calendar = self.signal_df.index
        for i in range(self.signal_df.shape[0]):
            obs_date = obs_calendar[i]
            eligible_tickers = eligible_tickers_list[i]
            optimized_weight_df.loc[
                obs_date, eligible_tickers] = optimized_weight_list[i]
        return optimized_weight_df
Beispiel #17
0
def period_range(start=None, end=None, periods=None, freq=None, name=None):
    """
    Return a fixed frequency PeriodIndex, with day (calendar) as the default
    frequency

    Parameters
    ----------
    start : string or period-like, default None
        Left bound for generating periods
    end : string or period-like, default None
        Right bound for generating periods
    periods : integer, default None
        Number of periods to generate
    freq : string or DateOffset, optional
        Frequency alias. By default the freq is taken from `start` or `end`
        if those are Period objects. Otherwise, the default is ``"D"`` for
        daily frequency.

    name : string, default None
        Name of the resulting PeriodIndex

    Returns
    -------
    prng : PeriodIndex

    Notes
    -----
    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
    must be specified.

    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------

    >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
    PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05',
                 '2017-06', '2017-06', '2017-07', '2017-08', '2017-09',
                 '2017-10', '2017-11', '2017-12', '2018-01'],
                dtype='period[M]', freq='M')

    If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
    endpoints for a ``PeriodIndex`` with frequency matching that of the
    ``period_range`` constructor.

    >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
    ...                 end=pd.Period('2017Q2', freq='Q'), freq='M')
    PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
                dtype='period[M]', freq='M')
    """
    if com.count_not_none(start, end, periods) != 2:
        raise ValueError('Of the three parameters: start, end, and periods, '
                         'exactly two must be specified')
    if freq is None and (not isinstance(start, Period)
                         and not isinstance(end, Period)):
        freq = 'D'

    data, freq = PeriodArray._generate_range(start, end, periods, freq,
                                             fields={})
    data = PeriodArray(data, freq=freq)
    return PeriodIndex(data, name=name)
Beispiel #18
0
 def __init__(
     self,
     obj: FrameOrSeries,
     com: float | None = None,
     span: float | None = None,
     halflife: float | TimedeltaConvertibleTypes | None = None,
     alpha: float | None = None,
     min_periods: int | None = 0,
     adjust: bool = True,
     ignore_na: bool = False,
     axis: Axis = 0,
     times: str | np.ndarray | FrameOrSeries | None = None,
     method: str = "single",
     *,
     selection=None,
 ):
     super().__init__(
         obj=obj,
         min_periods=1 if min_periods is None else max(int(min_periods), 1),
         on=None,
         center=False,
         closed=None,
         method=method,
         axis=axis,
         selection=selection,
     )
     self.com = com
     self.span = span
     self.halflife = halflife
     self.alpha = alpha
     self.adjust = adjust
     self.ignore_na = ignore_na
     self.times = times
     if self.times is not None:
         if not self.adjust:
             raise NotImplementedError(
                 "times is not supported with adjust=False.")
         if isinstance(self.times, str):
             self.times = self._selected_obj[self.times]
         if not is_datetime64_ns_dtype(self.times):
             raise ValueError("times must be datetime64[ns] dtype.")
         # error: Argument 1 to "len" has incompatible type "Union[str, ndarray,
         # FrameOrSeries, None]"; expected "Sized"
         if len(self.times) != len(obj):  # type: ignore[arg-type]
             raise ValueError(
                 "times must be the same length as the object.")
         if not isinstance(self.halflife, (str, datetime.timedelta)):
             raise ValueError(
                 "halflife must be a string or datetime.timedelta object")
         if isna(self.times).any():
             raise ValueError("Cannot convert NaT values to integer")
         self._deltas = _calculate_deltas(self.times, self.halflife)
         # Halflife is no longer applicable when calculating COM
         # But allow COM to still be calculated if the user passes other decay args
         if common.count_not_none(self.com, self.span, self.alpha) > 0:
             self._com = get_center_of_mass(self.com, self.span, None,
                                            self.alpha)
         else:
             self._com = 1.0
     else:
         if self.halflife is not None and isinstance(
                 self.halflife, (str, datetime.timedelta)):
             raise ValueError(
                 "halflife can only be a timedelta convertible argument if "
                 "times is not None.")
         # Without times, points are equally spaced
         self._deltas = np.ones(max(len(self.obj) - 1, 0), dtype=np.float64)
         self._com = get_center_of_mass(
             # error: Argument 3 to "get_center_of_mass" has incompatible type
             # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]";
             # expected "Optional[float]"
             self.com,
             self.span,
             self.halflife,  # type: ignore[arg-type]
             self.alpha,
         )
Beispiel #19
0
    def _generate_range(cls, start, end, periods, freq, tz=None,
                        normalize=False, ambiguous='raise', closed=None):
        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')
        freq = to_offset(freq)

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        start, end, _normalized = _maybe_normalize_endpoints(start, end,
                                                             normalize)

        tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz)

        if hasattr(freq, 'delta') and freq != Day():
            # sub-Day Tick
            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is None:
                    start = start.tz_localize(tz, ambiguous=False)

                if end is not None and end.tz is None:
                    end = end.tz_localize(tz, ambiguous=False)

            if start and end:
                if start.tz is None and end.tz is not None:
                    start = start.tz_localize(end.tz, ambiguous=False)

                if end.tz is None and start.tz is not None:
                    end = end.tz_localize(start.tz, ambiguous=False)

            if cls._use_cached_range(freq, _normalized, start, end):
                index = cls._cached_range(start, end, periods=periods,
                                          freq=freq)
            else:
                index = _generate_regular_range(cls, start, end, periods, freq)

        else:

            if tz is not None:
                # naive dates
                if start is not None and start.tz is not None:
                    start = start.replace(tzinfo=None)

                if end is not None and end.tz is not None:
                    end = end.replace(tzinfo=None)

            if start and end:
                if start.tz is None and end.tz is not None:
                    end = end.replace(tzinfo=None)

                if end.tz is None and start.tz is not None:
                    start = start.replace(tzinfo=None)

            if freq is not None:
                if cls._use_cached_range(freq, _normalized, start, end):
                    index = cls._cached_range(start, end, periods=periods,
                                              freq=freq)
                else:
                    index = _generate_regular_range(cls, start, end,
                                                    periods, freq)

                if tz is not None and getattr(index, 'tz', None) is None:
                    arr = conversion.tz_localize_to_utc(
                        ensure_int64(index.values),
                        tz, ambiguous=ambiguous)

                    index = cls(arr)

                    # index is localized datetime64 array -> have to convert
                    # start/end as well to compare
                    if start is not None:
                        start = start.tz_localize(tz).asm8
                    if end is not None:
                        end = end.tz_localize(tz).asm8
            else:
                # Create a linearly spaced date_range in local time
                start = start.tz_localize(tz)
                end = end.tz_localize(tz)
                arr = np.linspace(start.value, end.value, periods)
                index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz)

        if not left_closed and len(index) and index[0] == start:
            index = index[1:]
        if not right_closed and len(index) and index[-1] == end:
            index = index[:-1]

        return cls._simple_new(index.values, freq=freq, tz=tz)
Beispiel #20
0
    def _generate_range(cls,
                        start,
                        end,
                        periods,
                        freq,
                        tz=None,
                        normalize=False,
                        ambiguous='raise',
                        closed=None):
        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')
        freq = to_offset(freq)

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        start, end, _normalized = _maybe_normalize_endpoints(
            start, end, normalize)

        tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz)

        if hasattr(freq, 'delta') and freq != Day():
            # sub-Day Tick
            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is None:
                    start = start.tz_localize(tz, ambiguous=False)

                if end is not None and end.tz is None:
                    end = end.tz_localize(tz, ambiguous=False)

            if start and end:
                if start.tz is None and end.tz is not None:
                    start = start.tz_localize(end.tz, ambiguous=False)

                if end.tz is None and start.tz is not None:
                    end = end.tz_localize(start.tz, ambiguous=False)

            if cls._use_cached_range(freq, _normalized, start, end):
                index = cls._cached_range(start,
                                          end,
                                          periods=periods,
                                          freq=freq)
            else:
                index = _generate_regular_range(cls, start, end, periods, freq)

        else:

            if tz is not None:
                # naive dates
                if start is not None and start.tz is not None:
                    start = start.replace(tzinfo=None)

                if end is not None and end.tz is not None:
                    end = end.replace(tzinfo=None)

            if start and end:
                if start.tz is None and end.tz is not None:
                    end = end.replace(tzinfo=None)

                if end.tz is None and start.tz is not None:
                    start = start.replace(tzinfo=None)

            if freq is not None:
                if cls._use_cached_range(freq, _normalized, start, end):
                    index = cls._cached_range(start,
                                              end,
                                              periods=periods,
                                              freq=freq)
                else:
                    index = _generate_regular_range(cls, start, end, periods,
                                                    freq)

                if tz is not None and getattr(index, 'tz', None) is None:
                    arr = conversion.tz_localize_to_utc(ensure_int64(
                        index.values),
                                                        tz,
                                                        ambiguous=ambiguous)

                    index = cls(arr)

                    # index is localized datetime64 array -> have to convert
                    # start/end as well to compare
                    if start is not None:
                        start = start.tz_localize(tz).asm8
                    if end is not None:
                        end = end.tz_localize(tz).asm8
            else:
                # Create a linearly spaced date_range in local time
                start = start.tz_localize(tz)
                end = end.tz_localize(tz)
                arr = np.linspace(start.value, end.value, periods)
                index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz)

        if not left_closed and len(index) and index[0] == start:
            index = index[1:]
        if not right_closed and len(index) and index[-1] == end:
            index = index[:-1]

        return cls._simple_new(index.values, freq=freq, tz=tz)
Beispiel #21
0
def interval_range(start=None, end=None, periods=None, freq=None,
                   name=None, closed='right'):
    """
    Return a fixed frequency IntervalIndex

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals
    end : numeric or datetime-like, default None
        Right bound for generating intervals
    periods : integer, default None
        Number of periods to generate
    freq : numeric, string, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' for datetime-like.
    name : string, default None
        Name of the resulting IntervalIndex
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

    Returns
    -------
    rng : IntervalIndex

    See Also
    --------
    IntervalIndex : An Index of intervals that are all closed on the same side.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]],
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]],
                  closed='right', dtype='interval[datetime64[ns]]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
              closed='right',
              dtype='interval[float64]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
                  closed='both', dtype='interval[int64]')
    """
    start = com.maybe_box_datetimelike(start)
    end = com.maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com._any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else 'D'

    if com.count_not_none(start, end, periods, freq) != 3:
        raise ValueError('Of the four parameters: start, end, periods, and '
                         'freq, exactly three must be specified')

    if not _is_valid_endpoint(start):
        msg = 'start must be numeric or datetime-like, got {start}'
        raise ValueError(msg.format(start=start))
    elif not _is_valid_endpoint(end):
        msg = 'end must be numeric or datetime-like, got {end}'
        raise ValueError(msg.format(end=end))

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        msg = 'periods must be a number, got {periods}'
        raise TypeError(msg.format(periods=periods))

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError:
            raise ValueError('freq must be numeric or convertible to '
                             'DateOffset, got {freq}'.format(freq=freq))

    # verify type compatibility
    if not all([_is_type_compatible(start, end),
                _is_type_compatible(start, freq),
                _is_type_compatible(end, freq)]):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com._all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com._not_none(start, end, freq)):
            # np.linspace always produces float output
            breaks = maybe_downcast_to_dtype(breaks, 'int64')
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            range_func = date_range
        else:
            range_func = timedelta_range

        breaks = range_func(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
Beispiel #22
0
    def _generate_range(cls, start, end, periods, freq, tz=None,
                        normalize=False, ambiguous='raise', closed=None):
        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')
        freq = to_offset(freq)

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        start, end, _normalized = _maybe_normalize_endpoints(start, end,
                                                             normalize)

        tz, _ = _infer_tz_from_endpoints(start, end, tz)

        if tz is not None:
            # Localize the start and end arguments
            start = _maybe_localize_point(
                start, getattr(start, 'tz', None), start, freq, tz
            )
            end = _maybe_localize_point(
                end, getattr(end, 'tz', None), end, freq, tz
            )
        if start and end:
            # Make sure start and end have the same tz
            start = _maybe_localize_point(
                start, start.tz, end.tz, freq, tz
            )
            end = _maybe_localize_point(
                end, end.tz, start.tz, freq, tz
            )
        if freq is not None:
            if cls._use_cached_range(freq, _normalized, start, end):
                # Currently always False; never hit
                # Should be reimplemented as apart of GH 17914
                index = cls._cached_range(start, end, periods=periods,
                                          freq=freq)
            else:
                index = _generate_regular_range(cls, start, end, periods, freq)

                if tz is not None and getattr(index, 'tz', None) is None:
                    arr = conversion.tz_localize_to_utc(
                        ensure_int64(index.values),
                        tz, ambiguous=ambiguous)

                    index = cls(arr)

                    # index is localized datetime64 array -> have to convert
                    # start/end as well to compare
                    if start is not None:
                        start = start.tz_localize(tz).asm8
                    if end is not None:
                        end = end.tz_localize(tz).asm8
        else:
            # Create a linearly spaced date_range in local time
            arr = np.linspace(start.value, end.value, periods)
            index = cls._simple_new(
                arr.astype('M8[ns]', copy=False), freq=None, tz=tz
            )

        if not left_closed and len(index) and index[0] == start:
            index = index[1:]
        if not right_closed and len(index) and index[-1] == end:
            index = index[:-1]

        return cls._simple_new(index.values, freq=freq, tz=tz)
Beispiel #23
0
    def _generate_range(cls, start, end, periods, freq, tz=None,
                        normalize=False, ambiguous='raise', closed=None):
        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')
        freq = to_offset(freq)

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        start, end, _normalized = _maybe_normalize_endpoints(start, end,
                                                             normalize)

        tz, _ = _infer_tz_from_endpoints(start, end, tz)

        if tz is not None:
            # Localize the start and end arguments
            start = _maybe_localize_point(
                start, getattr(start, 'tz', None), start, freq, tz
            )
            end = _maybe_localize_point(
                end, getattr(end, 'tz', None), end, freq, tz
            )
        if start and end:
            # Make sure start and end have the same tz
            start = _maybe_localize_point(
                start, start.tz, end.tz, freq, tz
            )
            end = _maybe_localize_point(
                end, end.tz, start.tz, freq, tz
            )
        if freq is not None:
            if cls._use_cached_range(freq, _normalized, start, end):
                # Currently always False; never hit
                # Should be reimplemented as apart of GH 17914
                index = cls._cached_range(start, end, periods=periods,
                                          freq=freq)
            else:
                index = _generate_regular_range(cls, start, end, periods, freq)

                if tz is not None and getattr(index, 'tz', None) is None:
                    arr = conversion.tz_localize_to_utc(
                        ensure_int64(index.values),
                        tz, ambiguous=ambiguous)

                    index = cls(arr)

                    # index is localized datetime64 array -> have to convert
                    # start/end as well to compare
                    if start is not None:
                        start = start.tz_localize(tz).asm8
                    if end is not None:
                        end = end.tz_localize(tz).asm8
        else:
            # Create a linearly spaced date_range in local time
            arr = np.linspace(start.value, end.value, periods)
            index = cls._simple_new(
                arr.astype('M8[ns]', copy=False), freq=None, tz=tz
            )

        if not left_closed and len(index) and index[0] == start:
            index = index[1:]
        if not right_closed and len(index) and index[-1] == end:
            index = index[:-1]

        return cls._simple_new(index.values, freq=freq, tz=tz)