def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): # **kwargs are for compat with TimedeltaIndex, which includes `name` if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') if start is not None: start = Timedelta(start) if end is not None: end = Timedelta(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: index = _generate_regular_range(start, end, periods, freq) index = cls._simple_new(index, freq=freq, **kwargs) else: index = np.linspace(start.value, end.value, periods).astype('i8') # TODO: shouldn't we pass `name` here? (via **kwargs) index = cls._simple_new(index, freq=freq) if not left_closed: index = index[1:] if not right_closed: index = index[:-1] return index
def _generate_range(cls, start, end, periods, freq, closed=None): periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): raise ValueError('Must provide freq argument if no data is ' 'supplied') if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') if start is not None: start = Timedelta(start) if end is not None: end = Timedelta(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: index = _generate_regular_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype('i8') if not left_closed: index = index[1:] if not right_closed: index = index[:-1] return cls._simple_new(index, freq=freq)
def period_range(start=None, end=None, periods=None, freq='D', name=None): """ Return a fixed frequency PeriodIndex, with day (calendar) as the default frequency Parameters ---------- start : string or period-like, default None Left bound for generating periods end : string or period-like, default None Right bound for generating periods periods : integer, default None Number of periods to generate freq : string or DateOffset, default 'D' Frequency alias name : string, default None Name of the resulting PeriodIndex Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. Returns ------- prng : PeriodIndex Examples -------- >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], dtype='period[M]', freq='M') If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor endpoints for a ``PeriodIndex`` with frequency matching that of the ``period_range`` constructor. >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), ... end=pd.Period('2017Q2', freq='Q'), freq='M') PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], dtype='period[M]', freq='M') """ if com.count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') return PeriodIndex(start=start, end=end, periods=periods, freq=freq, name=name)
def _generate_range(cls, start, end, periods, freq, fields): if freq is not None: freq = Period._maybe_convert_freq(freq) field_count = len(fields) if com.count_not_none(start, end) > 0: if field_count > 0: raise ValueError('Can either instantiate from fields ' 'or endpoints, but not both') subarr, freq = _get_ordinal_range(start, end, periods, freq) elif field_count > 0: subarr, freq = _range_from_fields(freq=freq, **fields) else: raise ValueError('Not enough parameters to construct ' 'Period range') return subarr, freq
def _get_ordinal_range(start, end, periods, freq, mult=1): if com.count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') if freq is not None: _, mult = libfrequencies.get_freq_code(freq) if start is not None: start = Period(start, freq) if end is not None: end = Period(end, freq) is_start_per = isinstance(start, Period) is_end_per = isinstance(end, Period) if is_start_per and is_end_per and start.freq != end.freq: raise ValueError('start and end must have same freq') if (start is NaT or end is NaT): raise ValueError('start and end must not be NaT') if freq is None: if is_start_per: freq = start.freq elif is_end_per: freq = end.freq else: # pragma: no cover raise ValueError('Could not infer freq from start/end') if periods is not None: periods = periods * mult if start is None: data = np.arange(end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64) else: data = np.arange(start.ordinal, start.ordinal + periods, mult, dtype=np.int64) else: data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) return data, freq
def _get_ordinal_range(start, end, periods, freq, mult=1): if com.count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') if freq is not None: _, mult = frequencies.get_freq_code(freq) if start is not None: start = Period(start, freq) if end is not None: end = Period(end, freq) is_start_per = isinstance(start, Period) is_end_per = isinstance(end, Period) if is_start_per and is_end_per and start.freq != end.freq: raise ValueError('start and end must have same freq') if (start is NaT or end is NaT): raise ValueError('start and end must not be NaT') if freq is None: if is_start_per: freq = start.freq elif is_end_per: freq = end.freq else: # pragma: no cover raise ValueError('Could not infer freq from start/end') if periods is not None: periods = periods * mult if start is None: data = np.arange(end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64) else: data = np.arange(start.ordinal, start.ordinal + periods, mult, dtype=np.int64) else: data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) return data, freq
def _generate_range(cls, start, end, periods, freq, closed=None): periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): raise ValueError("Must provide freq argument if no data is supplied") if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, " "and freq, exactly three must be specified" ) if start is not None: start = Timedelta(start) if end is not None: end = Timedelta(end) if start is None and end is None: if closed is not None: raise ValueError( "Closed has to be None if not both of startand end are defined" ) left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: index = _generate_regular_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype("i8") if len(index) >= 2: # Infer a frequency td = Timedelta(index[1] - index[0]) freq = to_offset(td) if not left_closed: index = index[1:] if not right_closed: index = index[:-1] return cls._simple_new(index, freq=freq)
def __init__(self, tickers: {str, list} = None, observation_calendar: pd.DatetimeIndex = None, eligibility_df: pd.DataFrame = None): # if eligibility_df is not specified, create one from the tickers and observation calendar if they are both # available, else set it to None if eligibility_df is None: if com.count_not_none(tickers, observation_calendar) == 2: # reformat to list and capital letters if not isinstance(tickers, (list, tuple)): tickers = [tickers.upper()] else: tickers = [ticker.upper() for ticker in tickers] self.eligibility_df = pd.DataFrame(index=observation_calendar, columns=tickers, data=1) else: # set to None since either one of tickers or observation_calendar was not specified self._eligibility_df = None else: self.eligibility_df = eligibility_df
def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): # **kwargs are for compat with TimedeltaIndex, which includes `name` periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): raise ValueError('Must provide freq argument if no data is ' 'supplied') if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') if start is not None: start = Timedelta(start) if end is not None: end = Timedelta(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: index = _generate_regular_range(start, end, periods, freq) index = cls._simple_new(index, freq=freq, **kwargs) else: index = np.linspace(start.value, end.value, periods).astype('i8') # TODO: shouldn't we pass `name` here? (via **kwargs) index = cls._simple_new(index, freq=freq) if not left_closed: index = index[1:] if not right_closed: index = index[:-1] return index
def _calculate_weight(self): # calculate the number of non-NaN columns per row if self.net_zero_exposure: if com.count_not_none(self.max_instrument_weight, self.min_instrument_weight) > 0: logger.warning( 'net zero exposure is not guaranteed since constraints are specified (max/min weights)' ) positive_columns = self.signal_df[self.signal_df > 0.0].count( axis=1) negative_columns = self.signal_df[self.signal_df < 0.0].count( axis=1) weight_df = self.signal_df.copy().replace(np.nan, 0) weight_df[weight_df > 0.0] = weight_df[weight_df > 0.0].divide( positive_columns, axis=0) weight_df[weight_df < 0.0] = weight_df[weight_df < 0.0].divide( negative_columns, axis=0) else: numeric_columns = self.signal_df.count(axis=1) weight_df = self.signal_df.divide(numeric_columns, axis=0).replace(np.nan, 0) return weight_df
def interval_range( start=None, end=None, periods=None, freq=None, name=None, closed="right" ): """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, and " "freq, exactly three must be specified" ) if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all( [ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ] ): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, "int64") else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def __init__( self, obj: NDFrame, com: float | None = None, span: float | None = None, halflife: float | TimedeltaConvertibleTypes | None = None, alpha: float | None = None, min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, times: str | np.ndarray | NDFrame | None = None, method: str = "single", *, selection=None, ) -> None: super().__init__( obj=obj, min_periods=1 if min_periods is None else max(int(min_periods), 1), on=None, center=False, closed=None, method=method, axis=axis, selection=selection, ) self.com = com self.span = span self.halflife = halflife self.alpha = alpha self.adjust = adjust self.ignore_na = ignore_na self.times = times if self.times is not None: if not self.adjust: raise NotImplementedError( "times is not supported with adjust=False.") if isinstance(self.times, str): warnings.warn( ("Specifying times as a string column label is deprecated " "and will be removed in a future version. Pass the column " "into times instead."), FutureWarning, stacklevel=find_stack_level(), ) # self.times cannot be str anymore self.times = cast("Series", self._selected_obj[self.times]) if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): raise ValueError( "times must be the same length as the object.") if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)): raise ValueError( "halflife must be a timedelta convertible object") if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(self.com, self.span, self.alpha) > 0: self._com = get_center_of_mass(self.com, self.span, None, self.alpha) else: self._com = 1.0 else: if self.halflife is not None and isinstance( self.halflife, (str, datetime.timedelta, np.timedelta64)): raise ValueError( "halflife can only be a timedelta convertible argument if " "times is not None.") # Without times, points are equally spaced self._deltas = np.ones(max(self.obj.shape[self.axis] - 1, 0), dtype=np.float64) self._com = get_center_of_mass( # error: Argument 3 to "get_center_of_mass" has incompatible type # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]"; # expected "Optional[float]" self.com, self.span, self.halflife, # type: ignore[arg-type] self.alpha, )
def __init__( self, obj, com: Optional[float] = None, span: Optional[float] = None, halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, alpha: Optional[float] = None, min_periods: int = 0, adjust: bool = True, ignore_na: bool = False, axis: int = 0, times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, ): super().__init__( obj=obj, min_periods=max(int(min_periods), 1), on=None, center=False, closed=None, method="single", axis=axis, ) self.com = com self.span = span self.halflife = halflife self.alpha = alpha self.adjust = adjust self.ignore_na = ignore_na self.times = times if self.times is not None: if not self.adjust: raise NotImplementedError( "times is not supported with adjust=False.") if isinstance(self.times, str): self.times = self._selected_obj[self.times] if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): raise ValueError( "times must be the same length as the object.") if not isinstance(self.halflife, (str, datetime.timedelta)): raise ValueError( "halflife must be a string or datetime.timedelta object") if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") _times = np.asarray(self.times.view(np.int64), dtype=np.float64) _halflife = float(Timedelta(self.halflife).value) self._deltas = np.diff(_times) / _halflife # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(self.com, self.span, self.alpha) > 0: self._com = get_center_of_mass(self.com, self.span, None, self.alpha) else: self._com = 1.0 else: if self.halflife is not None and isinstance( self.halflife, (str, datetime.timedelta)): raise ValueError( "halflife can only be a timedelta convertible argument if " "times is not None.") # Without times, points are equally spaced self._deltas = np.ones(max(len(self.obj) - 1, 0), dtype=np.float64) self._com = get_center_of_mass(self.com, self.span, self.halflife, self.alpha)
def interval_range( start=None, end=None, periods=None, freq=None, name: Hashable = None, closed: lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. .. deprecated:: 1.5.0 Argument `closed` has been deprecated to standardize boundary inputs. Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. .. versionadded:: 1.5.0 Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5, inclusive="right") IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04'), inclusive="right") IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], dtype='interval[datetime64[ns], right]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS', inclusive="right") IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], dtype='interval[datetime64[ns], right]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') The ``inclusive`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, inclusive='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], dtype='interval[int64, both]') """ if inclusive is not None and not isinstance(closed, lib.NoDefault): raise ValueError( "Deprecated argument `closed` cannot be passed " "if argument `inclusive` is not None" ) elif not isinstance(closed, lib.NoDefault): warnings.warn( "Argument `closed` is deprecated in favor of `inclusive`.", FutureWarning, stacklevel=2, ) if closed is None: inclusive = "both" elif closed in ("both", "neither", "left", "right"): inclusive = closed else: raise ValueError( "Argument `closed` has to be either" "'both', 'neither', 'left' or 'right'" ) elif inclusive is None: inclusive = "both" start = maybe_box_datetimelike(start) end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, and " "freq, exactly three must be specified" ) if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all( [ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ] ): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray]", variable has type "ndarray") breaks = maybe_downcast_numeric( # type: ignore[assignment] breaks, np.dtype("int64") ) else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=inclusive)
def period_range( start=None, end=None, periods=None, freq=None, name=None ) -> PeriodIndex: """ Return a fixed frequency PeriodIndex. The day (calendar) is the default frequency. Parameters ---------- start : str or period-like, default None Left bound for generating periods. end : str or period-like, default None Right bound for generating periods. periods : int, default None Number of periods to generate. freq : str or DateOffset, optional Frequency alias. By default the freq is taken from `start` or `end` if those are Period objects. Otherwise, the default is ``"D"`` for daily frequency. name : str, default None Name of the resulting PeriodIndex. Returns ------- PeriodIndex Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], dtype='period[M]', freq='M') If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor endpoints for a ``PeriodIndex`` with frequency matching that of the ``period_range`` constructor. >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), ... end=pd.Period('2017Q2', freq='Q'), freq='M') PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], dtype='period[M]', freq='M') """ if com.count_not_none(start, end, periods) != 2: raise ValueError( "Of the three parameters: start, end, and periods, " "exactly two must be specified" ) if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): freq = "D" data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) data = PeriodArray(data, freq=freq) return PeriodIndex(data, name=name)
def _calculate_weight(self): """ Return a DataFrame with the same index and columns as the signal DataFrame containing the optimized weights. A list of price return DataFrames is used to calculate a list of covariance matrices and mean return vectors when applicable. If specified and when applicable, the script will use an analytical solution of the optimization problem. :return: """ # retrieve a list of DataFrames containing price returns return_df_list = self._get_price_return_df_list() # using the price returns calculate the inputs to be used in the optimizer # inputs should all be numpy arrays cov_matrix_list = self._get_covariance_matrix_list(return_df_list) if self.calculate_mean_returns: mean_return_list = self._get_mean_return_array_list(return_df_list) else: mean_return_list = self.signal_df.shape[0] * [None] # only use the theoretical optimizer when there are no constraints and if an analytical solution is available use_theoretical_optimizer = (com.count_not_none(self.min_instrument_weight, self.max_instrument_weight) == 0) \ and self.min_total_weight == self.max_total_weight and self.has_analytical_solution # list of list of eligible tickers (i.e. having a defined signal) for each signal observation date eligible_tickers_list = self._get_eligible_tickers_list() # loop through each observation date and use the optimizer to calculate the weights prev_eligible_tickers = eligible_tickers_list[0] optimized_weight_list = [] # the solved weight arrays are stored here self._counter = 0 # used to display progress self._total = len(return_df_list) # used to display progress for i in range(self.signal_df.shape[0]): if use_theoretical_optimizer: # no initial guess is needed when using a theoretical/analytical solution # however since _theoretical_optimizer passes on the optimized_weight = self._theoretical_optimizer( cov_matrix=cov_matrix_list[i], mean_returns=mean_return_list[i]) else: # an initial guess is needed when using an optimizer with constraints # this initial guess is the previous solved weights except at the start or when the instruments changes new_eligible_tickers = eligible_tickers_list[i] if i == 0 or prev_eligible_tickers != new_eligible_tickers: initial_guess = None # equal weights by default inside the optimization function else: initial_guess = optimized_weight_list[ i - 1] # previous solved weights optimized_weight = self._optimizer( cov_matrix=cov_matrix_list[i], mean_returns=mean_return_list[i], initial_guess=initial_guess) prev_eligible_tickers = new_eligible_tickers # display progress self._counter += 1 logger.info('progress: {}%...'.format( round(100 * self._counter / self._total, 2))) optimized_weight_list.append(optimized_weight) # reformat result as a DataFrame optimized_weight_df = pd.DataFrame(np.zeros( (self.signal_df.shape[0], self.signal_df.shape[1])), index=self.signal_df.index, columns=self.signal_df.columns) obs_calendar = self.signal_df.index for i in range(self.signal_df.shape[0]): obs_date = obs_calendar[i] eligible_tickers = eligible_tickers_list[i] optimized_weight_df.loc[ obs_date, eligible_tickers] = optimized_weight_list[i] return optimized_weight_df
def period_range(start=None, end=None, periods=None, freq=None, name=None): """ Return a fixed frequency PeriodIndex, with day (calendar) as the default frequency Parameters ---------- start : string or period-like, default None Left bound for generating periods end : string or period-like, default None Right bound for generating periods periods : integer, default None Number of periods to generate freq : string or DateOffset, optional Frequency alias. By default the freq is taken from `start` or `end` if those are Period objects. Otherwise, the default is ``"D"`` for daily frequency. name : string, default None Name of the resulting PeriodIndex Returns ------- prng : PeriodIndex Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], dtype='period[M]', freq='M') If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor endpoints for a ``PeriodIndex`` with frequency matching that of the ``period_range`` constructor. >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), ... end=pd.Period('2017Q2', freq='Q'), freq='M') PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], dtype='period[M]', freq='M') """ if com.count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): freq = 'D' data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) data = PeriodArray(data, freq=freq) return PeriodIndex(data, name=name)
def __init__( self, obj: FrameOrSeries, com: float | None = None, span: float | None = None, halflife: float | TimedeltaConvertibleTypes | None = None, alpha: float | None = None, min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, times: str | np.ndarray | FrameOrSeries | None = None, method: str = "single", *, selection=None, ): super().__init__( obj=obj, min_periods=1 if min_periods is None else max(int(min_periods), 1), on=None, center=False, closed=None, method=method, axis=axis, selection=selection, ) self.com = com self.span = span self.halflife = halflife self.alpha = alpha self.adjust = adjust self.ignore_na = ignore_na self.times = times if self.times is not None: if not self.adjust: raise NotImplementedError( "times is not supported with adjust=False.") if isinstance(self.times, str): self.times = self._selected_obj[self.times] if not is_datetime64_ns_dtype(self.times): raise ValueError("times must be datetime64[ns] dtype.") # error: Argument 1 to "len" has incompatible type "Union[str, ndarray, # FrameOrSeries, None]"; expected "Sized" if len(self.times) != len(obj): # type: ignore[arg-type] raise ValueError( "times must be the same length as the object.") if not isinstance(self.halflife, (str, datetime.timedelta)): raise ValueError( "halflife must be a string or datetime.timedelta object") if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(self.com, self.span, self.alpha) > 0: self._com = get_center_of_mass(self.com, self.span, None, self.alpha) else: self._com = 1.0 else: if self.halflife is not None and isinstance( self.halflife, (str, datetime.timedelta)): raise ValueError( "halflife can only be a timedelta convertible argument if " "times is not None.") # Without times, points are equally spaced self._deltas = np.ones(max(len(self.obj) - 1, 0), dtype=np.float64) self._com = get_center_of_mass( # error: Argument 3 to "get_center_of_mass" has incompatible type # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]"; # expected "Optional[float]" self.com, self.span, self.halflife, # type: ignore[arg-type] self.alpha, )
def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) if hasattr(freq, 'delta') and freq != Day(): # sub-Day Tick if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is None: start = start.tz_localize(tz, ambiguous=False) if end is not None and end.tz is None: end = end.tz_localize(tz, ambiguous=False) if start and end: if start.tz is None and end.tz is not None: start = start.tz_localize(end.tz, ambiguous=False) if end.tz is None and start.tz is not None: end = end.tz_localize(start.tz, ambiguous=False) if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) else: if tz is not None: # naive dates if start is not None and start.tz is not None: start = start.replace(tzinfo=None) if end is not None and end.tz is not None: end = end.replace(tzinfo=None) if start and end: if start.tz is None and end.tz is not None: end = end.replace(tzinfo=None) if end.tz is None and start.tz is not None: start = start.replace(tzinfo=None) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), tz, ambiguous=ambiguous) index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time start = start.tz_localize(tz) end = end.tz_localize(tz) arr = np.linspace(start.value, end.value, periods) index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) if not left_closed and len(index) and index[0] == start: index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] return cls._simple_new(index.values, freq=freq, tz=tz)
def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) start, end, _normalized = _maybe_normalize_endpoints( start, end, normalize) tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) if hasattr(freq, 'delta') and freq != Day(): # sub-Day Tick if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is None: start = start.tz_localize(tz, ambiguous=False) if end is not None and end.tz is None: end = end.tz_localize(tz, ambiguous=False) if start and end: if start.tz is None and end.tz is not None: start = start.tz_localize(end.tz, ambiguous=False) if end.tz is None and start.tz is not None: end = end.tz_localize(start.tz, ambiguous=False) if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) else: if tz is not None: # naive dates if start is not None and start.tz is not None: start = start.replace(tzinfo=None) if end is not None and end.tz is not None: end = end.replace(tzinfo=None) if start and end: if start.tz is None and end.tz is not None: end = end.replace(tzinfo=None) if end.tz is None and start.tz is not None: start = start.replace(tzinfo=None) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc(ensure_int64( index.values), tz, ambiguous=ambiguous) index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time start = start.tz_localize(tz) end = end.tz_localize(tz) arr = np.linspace(start.value, end.value, periods) index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) if not left_closed and len(index) and index[0] == start: index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] return cls._simple_new(index.values, freq=freq, tz=tz)
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- rng : IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com._any_none(periods, start, end): freq = 1 if is_number(endpoint) else 'D' if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) elif not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com._all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com._not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, 'int64') else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz, _ = _infer_tz_from_endpoints(start, end, tz) if tz is not None: # Localize the start and end arguments start = _maybe_localize_point( start, getattr(start, 'tz', None), start, freq, tz ) end = _maybe_localize_point( end, getattr(end, 'tz', None), end, freq, tz ) if start and end: # Make sure start and end have the same tz start = _maybe_localize_point( start, start.tz, end.tz, freq, tz ) end = _maybe_localize_point( end, end.tz, start.tz, freq, tz ) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): # Currently always False; never hit # Should be reimplemented as apart of GH 17914 index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), tz, ambiguous=ambiguous) index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time arr = np.linspace(start.value, end.value, periods) index = cls._simple_new( arr.astype('M8[ns]', copy=False), freq=None, tz=tz ) if not left_closed and len(index) and index[0] == start: index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] return cls._simple_new(index.values, freq=freq, tz=tz)
def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz, _ = _infer_tz_from_endpoints(start, end, tz) if tz is not None: # Localize the start and end arguments start = _maybe_localize_point( start, getattr(start, 'tz', None), start, freq, tz ) end = _maybe_localize_point( end, getattr(end, 'tz', None), end, freq, tz ) if start and end: # Make sure start and end have the same tz start = _maybe_localize_point( start, start.tz, end.tz, freq, tz ) end = _maybe_localize_point( end, end.tz, start.tz, freq, tz ) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): # Currently always False; never hit # Should be reimplemented as apart of GH 17914 index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), tz, ambiguous=ambiguous) index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time arr = np.linspace(start.value, end.value, periods) index = cls._simple_new( arr.astype('M8[ns]', copy=False), freq=None, tz=tz ) if not left_closed and len(index) and index[0] == start: index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] return cls._simple_new(index.values, freq=freq, tz=tz)