class LabelArray(ndarray): """ An ndarray subclass for working with arrays of strings. Factorizes the input array into integers, but overloads equality on strings to check against the factor label. Parameters ---------- values : array-like Array of values that can be passed to np.asarray with dtype=object. missing_value : str Scalar value to treat as 'missing' for operations on ``self``. categories : list[str], optional List of values to use as categories. If not supplied, categories will be inferred as the unique set of entries in ``values``. sort : bool, optional Whether to sort categories. If sort is False and categories is supplied, they are left in the order provided. If sort is False and categories is None, categories will be constructed in a random order. Attributes ---------- categories : ndarray[str] An array containing the unique labels of self. reverse_categories : dict[str -> int] Reverse lookup table for ``categories``. Stores the index in ``categories`` at which each entry each unique entry is found. missing_value : str or None A sentinel missing value with NaN semantics for comparisons. Notes ----- Consumers should be cautious when passing instances of LabelArray to numpy functions. We attempt to disallow as many meaningless operations as possible, but since a LabelArray is just an ndarray of ints with some additional metadata, many numpy functions (for example, trigonometric) will happily accept a LabelArray and treat its values as though they were integers. In a future change, we may be able to disallow more numerical operations by creating a wrapper dtype which doesn't register an implementation for most numpy ufuncs. Until that change is made, consumers of LabelArray should assume that it is undefined behavior to pass a LabelArray to any numpy ufunc that operates on semantically-numerical data. See Also -------- http://docs.scipy.org/doc/numpy-1.10.0/user/basics.subclassing.html """ SUPPORTED_SCALAR_TYPES = (bytes, unicode, type(None)) @preprocess( values=coerce(list, partial(np.asarray, dtype=object)), categories=coerce(np.ndarray, list), ) @expect_types( values=np.ndarray, missing_value=SUPPORTED_SCALAR_TYPES, categories=optional(list), ) @expect_kinds(values=("O", "S", "U")) def __new__(cls, values, missing_value, categories=None, sort=True): # Numpy's fixed-width string types aren't very efficient. Working with # object arrays is faster than bytes or unicode arrays in almost all # cases. if not is_object(values): values = values.astype(object) if categories is None: codes, categories, reverse_categories = factorize_strings( values.ravel(), missing_value=missing_value, sort=sort, ) else: codes, categories, reverse_categories = ( factorize_strings_known_categories( values.ravel(), categories=categories, missing_value=missing_value, sort=sort, )) categories.setflags(write=False) return cls._from_codes_and_metadata( codes=codes.reshape(values.shape), categories=categories, reverse_categories=reverse_categories, missing_value=missing_value, ) @classmethod def _from_codes_and_metadata(cls, codes, categories, reverse_categories, missing_value): """ View codes as a LabelArray and set LabelArray metadata on the result. """ ret = codes.view(type=cls, dtype=np.void) ret._categories = categories ret._reverse_categories = reverse_categories ret._missing_value = missing_value return ret @property def categories(self): # This is a property because it should be immutable. return self._categories @property def reverse_categories(self): # This is a property because it should be immutable. return self._reverse_categories @property def missing_value(self): # This is a property because it should be immutable. return self._missing_value @property def missing_value_code(self): return self.reverse_categories[self.missing_value] def has_label(self, value): return value in self.reverse_categories def __array_finalize__(self, obj): """ Called by Numpy after array construction. There are three cases where this can happen: 1. Someone tries to directly construct a new array by doing:: >>> ndarray.__new__(LabelArray, ...) # doctest: +SKIP In this case, obj will be None. We treat this as an error case and fail. 2. Someone (most likely our own __new__) does:: >>> other_array.view(type=LabelArray) # doctest: +SKIP In this case, `self` will be the new LabelArray instance, and ``obj` will be the array on which ``view`` is being called. The caller of ``obj.view`` is responsible for setting category metadata on ``self`` after we exit. 3. Someone creates a new LabelArray by slicing an existing one. In this case, ``obj`` will be the original LabelArray. We're responsible for copying over the parent array's category metadata. """ if obj is None: raise TypeError( "Direct construction of LabelArrays is not supported.") # See docstring for an explanation of when these will or will not be # set. self._categories = getattr(obj, 'categories', None) self._reverse_categories = getattr(obj, 'reverse_categories', None) self._missing_value = getattr(obj, 'missing_value', None) def as_int_array(self): """ Convert self into a regular ndarray of ints. This is an O(1) operation. It does not copy the underlying data. """ return self.view( type=ndarray, dtype=int_dtype_with_size_in_bytes(self.itemsize), ) def as_string_array(self): """ Convert self back into an array of strings. This is an O(N) operation. """ return self.categories[self.as_int_array()] def as_categorical(self, name=None): """ Coerce self into a pandas categorical. This is only defined on 1D arrays, since that's all pandas supports. """ if len(self.shape) > 1: raise ValueError("Can't convert a 2D array to a categorical.") with ignore_pandas_nan_categorical_warning(): return pd.Categorical.from_codes( self.as_int_array(), # We need to make a copy because pandas >= 0.17 fails if this # buffer isn't writeable. self.categories.copy(), ordered=False, name=name, ) def as_categorical_frame(self, index, columns, name=None): """ Coerce self into a pandas DataFrame of Categoricals. """ if len(self.shape) != 2: raise ValueError( "Can't convert a non-2D LabelArray into a DataFrame.") expected_shape = (len(index), len(columns)) if expected_shape != self.shape: raise ValueError( "Can't construct a DataFrame with provided indices:\n\n" "LabelArray shape is {actual}, but index and columns imply " "that shape should be {expected}.".format( actual=self.shape, expected=expected_shape, )) return pd.Series( index=pd.MultiIndex.from_product([index, columns]), data=self.ravel().as_categorical(name=name), ).unstack() def __setitem__(self, indexer, value): self_categories = self.categories if isinstance(value, LabelArray): value_categories = value.categories if compare_arrays(self_categories, value_categories): return super(LabelArray, self).__setitem__(indexer, value) else: raise CategoryMismatch(self_categories, value_categories) elif isinstance(value, self.SUPPORTED_SCALAR_TYPES): value_code = self.reverse_categories.get(value, -1) if value_code < 0: raise ValueError("%r is not in LabelArray categories." % value) self.as_int_array()[indexer] = value_code else: raise NotImplementedError( "Setting into a LabelArray with a value of " "type {type} is not yet supported.".format( type=type(value).__name__, ), ) def __setslice__(self, i, j, sequence): """ This method was deprecated in Python 2.0. It predates slice objects, but Python 2.7.11 still uses it if you implement it, which ndarray does. In newer Pythons, __setitem__ is always called, but we need to manuallly forward in py2. """ self.__setitem__(slice(i, j), sequence) def __getitem__(self, indexer): result = super(LabelArray, self).__getitem__(indexer) if result.ndim: # Result is still a LabelArray, so we can just return it. return result # Result is a scalar value, which will be an instance of np.void. # Map it back to one of our category entries. index = result.view(int_dtype_with_size_in_bytes(self.itemsize)) return self.categories[index] def is_missing(self): """ Like isnan, but checks for locations where we store missing values. """ return ( self.as_int_array() == self.reverse_categories[self.missing_value]) def not_missing(self): """ Like ~isnan, but checks for locations where we store missing values. """ return (self.as_int_array() != self.reverse_categories[self.missing_value]) def _equality_check(op): """ Shared code for __eq__ and __ne__, parameterized on the actual comparison operator to use. """ def method(self, other): if isinstance(other, LabelArray): self_mv = self.missing_value other_mv = other.missing_value if self_mv != other_mv: raise MissingValueMismatch(self_mv, other_mv) self_categories = self.categories other_categories = other.categories if not compare_arrays(self_categories, other_categories): raise CategoryMismatch(self_categories, other_categories) return (op(self.as_int_array(), other.as_int_array()) & self.not_missing() & other.not_missing()) elif isinstance(other, ndarray): # Compare to ndarrays as though we were an array of strings. # This is fairly expensive, and should generally be avoided. return op(self.as_string_array(), other) & self.not_missing() elif isinstance(other, self.SUPPORTED_SCALAR_TYPES): i = self._reverse_categories.get(other, -1) return op(self.as_int_array(), i) & self.not_missing() return op(super(LabelArray, self), other) return method __eq__ = _equality_check(eq) __ne__ = _equality_check(ne) del _equality_check def view(self, dtype=_NotPassed, type=_NotPassed): if type is _NotPassed and dtype not in (_NotPassed, self.dtype): raise TypeError("Can't view LabelArray as another dtype.") # The text signature on ndarray.view makes it look like the default # values for dtype and type are `None`, but passing None explicitly has # different semantics than not passing an arg at all, so we reconstruct # the kwargs dict here to simulate the args not being passed at all. kwargs = {} if dtype is not _NotPassed: kwargs['dtype'] = dtype if type is not _NotPassed: kwargs['type'] = type return super(LabelArray, self).view(**kwargs) # In general, we support resizing, slicing, and reshaping methods, but not # numeric methods. SUPPORTED_NDARRAY_METHODS = frozenset([ 'base', 'compress', 'copy', 'data', 'diagonal', 'dtype', 'flat', 'flatten', 'item', 'itemset', 'itemsize', 'nbytes', 'ndim', 'ravel', 'repeat', 'reshape', 'resize', 'setflags', 'shape', 'size', 'squeeze', 'strides', 'swapaxes', 'take', 'trace', 'transpose', 'view' ]) PUBLIC_NDARRAY_METHODS = frozenset( [s for s in dir(ndarray) if not s.startswith('_')]) # Generate failing wrappers for all unsupported methods. locals().update({ method: _make_unsupported_method(method) for method in PUBLIC_NDARRAY_METHODS - SUPPORTED_NDARRAY_METHODS }) def __repr__(self): repr_lines = repr(self.as_string_array()).splitlines() repr_lines[0] = repr_lines[0].replace('array(', 'LabelArray(', 1) repr_lines[-1] = repr_lines[-1].rsplit(',', 1)[0] + ')' # The extra spaces here account for the difference in length between # 'array(' and 'LabelArray('. return '\n '.join(repr_lines) def empty_like(self, shape): """ Make an empty LabelArray with the same categories as ``self``, filled with ``self.missing_value``. """ return type(self)._from_codes_and_metadata( codes=np.full( shape, self.reverse_categories[self.missing_value], dtype=int_dtype_with_size_in_bytes(self.itemsize), ), categories=self.categories, reverse_categories=self.reverse_categories, missing_value=self.missing_value, ) def map_predicate(self, f): """ Map a function from str -> bool element-wise over ``self``. ``f`` will be applied exactly once to each non-missing unique value in ``self``. Missing values will always return False. """ # Functions passed to this are of type str -> bool. Don't ever call # them on None, which is the only non-str value we ever store in # categories. if self.missing_value is None: f_to_use = lambda x: False if x is None else f(x) else: f_to_use = f # Call f on each unique value in our categories. results = np.vectorize(f_to_use, otypes=[bool_dtype])(self.categories) # missing_value should produce False no matter what results[self.reverse_categories[self.missing_value]] = False # unpack the results form each unique value into their corresponding # locations in our indices. return results[self.as_int_array()] def startswith(self, prefix): """ Element-wise startswith. Parameters ---------- prefix : str Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self started with ``prefix``. """ return self.map_predicate(lambda elem: elem.startswith(prefix)) def endswith(self, suffix): """ Elementwise endswith. Parameters ---------- suffix : str Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self ended with ``suffix`` """ return self.map_predicate(lambda elem: elem.endswith(suffix)) def has_substring(self, substring): """ Elementwise contains. Parameters ---------- substring : str Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self ended with ``suffix``. """ return self.map_predicate(lambda elem: substring in elem) @preprocess(pattern=coerce(from_=(bytes, unicode), to=re.compile)) def matches(self, pattern): """ Elementwise regex match. Parameters ---------- pattern : str or compiled regex Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self was matched by ``pattern``. """ return self.map_predicate(compose(bool, pattern.match)) # These types all implement an O(N) __contains__, so pre-emptively # coerce to `set`. @preprocess(container=coerce((list, tuple, np.ndarray), set)) def element_of(self, container): """ Check if each element of self is an of ``container``. Parameters ---------- container : object An object implementing a __contains__ to call on each element of ``self``. Returns ------- is_contained : np.ndarray[bool] An array with the same shape as self indicating whether each element of self was an element of ``container``. """ return self.map_predicate(container.__contains__)
class TradingCalendar(with_metaclass(ABCMeta)): """ An TradingCalendar represents the timing information of a single market exchange. The timing information is made up of two parts: sessions, and opens/closes. A session represents a contiguous set of minutes, and has a label that is midnight UTC. It is important to note that a session label should not be considered a specific point in time, and that midnight UTC is just being used for convenience. For each session, we store the open and close time in UTC time. """ def __init__(self, start=start_default, end=end_default): # Midnight in UTC for each trading day. _all_days = date_range(start, end, freq=self.day, tz='UTC') # `DatetimeIndex`s of standard opens/closes for each day. self._opens = days_at_time(_all_days, self.open_time, self.tz, self.open_offset) self._closes = days_at_time(_all_days, self.close_time, self.tz, self.close_offset) # `DatetimeIndex`s of nonstandard opens/closes _special_opens = self._calculate_special_opens(start, end) _special_closes = self._calculate_special_closes(start, end) # Overwrite the special opens and closes on top of the standard ones. _overwrite_special_dates(_all_days, self._opens, _special_opens) _overwrite_special_dates(_all_days, self._closes, _special_closes) # In pandas 0.16.1 _opens and _closes will lose their timezone # information. This looks like it has been resolved in 0.17.1. # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa self.schedule = DataFrame( index=_all_days, columns=['market_open', 'market_close'], data={ 'market_open': self._opens, 'market_close': self._closes, }, dtype='datetime64[ns]', ) self.market_opens_nanos = self.schedule.market_open.values.\ astype(np.int64) self.market_closes_nanos = self.schedule.market_close.values.\ astype(np.int64) self._trading_minutes_nanos = self.all_minutes.values.\ astype(np.int64) self.first_trading_session = _all_days[0] self.last_trading_session = _all_days[-1] self._early_closes = pd.DatetimeIndex( _special_closes.map(self.minute_to_session_label)) @lazyval def day(self): return CustomBusinessDay( holidays=self.adhoc_holidays, calendar=self.regular_holidays, ) @abstractproperty def name(self): raise NotImplementedError() @abstractproperty def tz(self): raise NotImplementedError() @abstractproperty def open_time(self): raise NotImplementedError() @abstractproperty def close_time(self): raise NotImplementedError() @property def open_offset(self): return 0 @property def close_offset(self): return 0 @property def regular_holidays(self): """ Returns ------- pd.AbstractHolidayCalendar: a calendar containing the regular holidays for this calendar """ return None @property def adhoc_holidays(self): return [] @property def special_opens(self): """ A list of special open times and corresponding HolidayCalendars. Returns ------- list: List of (time, AbstractHolidayCalendar) tuples """ return [] @property def special_opens_adhoc(self): """ Returns ------- list: List of (time, DatetimeIndex) tuples that represent special closes that cannot be codified into rules. """ return [] @property def special_closes(self): """ A list of special close times and corresponding HolidayCalendars. Returns ------- list: List of (time, AbstractHolidayCalendar) tuples """ return [] @property def special_closes_adhoc(self): """ Returns ------- list: List of (time, DatetimeIndex) tuples that represent special closes that cannot be codified into rules. """ return [] # ----- def opens(self): return self.schedule.market_open @property def closes(self): return self.schedule.market_close @property def early_closes(self): return self._early_closes def is_session(self, dt): """ Given a dt, returns whether it's a valid session label. Parameters ---------- dt: pd.Timestamp The dt that is being tested. Returns ------- bool Whether the given dt is a valid session label. """ return dt in self.schedule.index def is_open_on_minute(self, dt): """ Given a dt, return whether this exchange is open at the given dt. Parameters ---------- dt: pd.Timestamp The dt for which to check if this exchange is open. Returns ------- bool Whether the exchange is open on this dt. """ return is_open(self.market_opens_nanos, self.market_closes_nanos, dt.value) def next_open(self, dt): """ Given a dt, returns the next open. If the given dt happens to be a session open, the next session's open will be returned. Parameters ---------- dt: pd.Timestamp The dt for which to get the next open. Returns ------- pd.Timestamp The UTC timestamp of the next open. """ idx = next_divider_idx(self.market_opens_nanos, dt.value) return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC') def next_close(self, dt): """ Given a dt, returns the next close. Parameters ---------- dt: pd.Timestamp The dt for which to get the next close. Returns ------- pd.Timestamp The UTC timestamp of the next close. """ idx = next_divider_idx(self.market_closes_nanos, dt.value) return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC') def previous_open(self, dt): """ Given a dt, returns the previous open. Parameters ---------- dt: pd.Timestamp The dt for which to get the previous open. Returns ------- pd.Timestamp The UTC imestamp of the previous open. """ idx = previous_divider_idx(self.market_opens_nanos, dt.value) return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC') def previous_close(self, dt): """ Given a dt, returns the previous close. Parameters ---------- dt: pd.Timestamp The dt for which to get the previous close. Returns ------- pd.Timestamp The UTC timestamp of the previous close. """ idx = previous_divider_idx(self.market_closes_nanos, dt.value) return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC') def next_minute(self, dt): """ Given a dt, return the next exchange minute. If the given dt is not an exchange minute, returns the next exchange open. Parameters ---------- dt: pd.Timestamp The dt for which to get the next exchange minute. Returns ------- pd.Timestamp The next exchange minute. """ idx = next_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] def previous_minute(self, dt): """ Given a dt, return the previous exchange minute. Raises KeyError if the given timestamp is not an exchange minute. Parameters ---------- dt: pd.Timestamp The dt for which to get the previous exchange minute. Returns ------- pd.Timestamp The previous exchange minute. """ idx = previous_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] def next_session_label(self, session_label): """ Given a session label, returns the label of the next session. Parameters ---------- session_label: pd.Timestamp A session whose next session is desired. Returns ------- pd.Timestamp The next session label (midnight UTC). Notes ----- Raises ValueError if the given session is the last session in this calendar. """ idx = self.schedule.index.get_loc(session_label) try: return self.schedule.index[idx + 1] except IndexError: if idx == len(self.schedule.index) - 1: raise ValueError("There is no next session as this is the end" " of the exchange calendar.") else: raise def previous_session_label(self, session_label): """ Given a session label, returns the label of the previous session. Parameters ---------- session_label: pd.Timestamp A session whose previous session is desired. Returns ------- pd.Timestamp The previous session label (midnight UTC). Notes ----- Raises ValueError if the given session is the first session in this calendar. """ idx = self.schedule.index.get_loc(session_label) if idx == 0: raise ValueError("There is no previous session as this is the" " beginning of the exchange calendar.") return self.schedule.index[idx - 1] def minutes_for_session(self, session_label): """ Given a session label, return the minutes for that session. Parameters ---------- session_label: pd.Timestamp (midnight UTC) A session label whose session's minutes are desired. Returns ------- pd.DateTimeIndex All the minutes for the given session. """ data = self.schedule.loc[session_label] return self.all_minutes[self.all_minutes.slice_indexer( data.market_open, data.market_close)] def minutes_window(self, start_dt, count): try: start_idx = self.all_minutes.get_loc(start_dt) except KeyError: # if this is not a market minute, go to the previous session's # close previous_session = self.minute_to_session_label( start_dt, direction="previous") previous_close = self.open_and_close_for_session( previous_session)[1] start_idx = self.all_minutes.get_loc(previous_close) end_idx = start_idx + count if start_idx > end_idx: return self.all_minutes[(end_idx + 1):(start_idx + 1)] else: return self.all_minutes[start_idx:end_idx] def sessions_in_range(self, start_session_label, end_session_label): """ Given start and end session labels, return all the sessions in that range, inclusive. Parameters ---------- start_session_label: pd.Timestamp (midnight UTC) The label representing the first session of the desired range. end_session_label: pd.Timestamp (midnight UTC) The label representing the last session of the desired range. Returns ------- pd.DatetimeIndex The desired sessions. """ return self.all_sessions[self.all_sessions.slice_indexer( start_session_label, end_session_label)] def sessions_window(self, session_label, count): """ Given a session label and a window size, returns a list of sessions of size `count` + 1, that either starts with the given session (if `count` is positive) or ends with the given session (if `count` is negative). Parameters ---------- session_label: pd.Timestamp The label of the initial session. count: int Defines the length and the direction of the window. Returns ------- pd.DatetimeIndex The desired sessions. """ start_idx = self.schedule.index.get_loc(session_label) end_idx = start_idx + count return self.all_sessions[min(start_idx, end_idx ):max(start_idx, end_idx) + 1] def session_distance(self, start_session_label, end_session_label): """ Given a start and end session label, returns the distance between them. For example, for three consecutive sessions Mon., Tues., and Wed, `session_distance(Mon, Wed)` would return 2. Parameters ---------- start_session_label: pd.Timestamp The label of the start session. end_session_label: pd.Timestamp The label of the ending session. Returns ------- int The distance between the two sessions. """ start_idx = self.all_sessions.searchsorted( self.minute_to_session_label(start_session_label)) end_idx = self.all_sessions.searchsorted( self.minute_to_session_label(end_session_label)) return abs(end_idx - start_idx) def minutes_in_range(self, start_minute, end_minute): """ Given start and end minutes, return all the calendar minutes in that range, inclusive. Given minutes don't need to be calendar minutes. Parameters ---------- start_minute: pd.Timestamp The minute representing the start of the desired range. end_minute: pd.Timestamp The minute representing the end of the desired range. Returns ------- pd.DatetimeIndex The minutes in the desired range. """ start_idx = searchsorted(self._trading_minutes_nanos, start_minute.value) end_idx = searchsorted(self._trading_minutes_nanos, end_minute.value) if end_minute.value == self._trading_minutes_nanos[end_idx]: # if the end minute is a market minute, increase by 1 end_idx += 1 return self.all_minutes[start_idx:end_idx] def minutes_for_sessions_in_range(self, start_session_label, end_session_label): """ Returns all the minutes for all the sessions from the given start session label to the given end session label, inclusive. Parameters ---------- start_session_label: pd.Timestamp The label of the first session in the range. end_session_label: pd.Timestamp The label of the last session in the range. Returns ------- pd.DatetimeIndex The minutes in the desired range. """ first_minute, _ = self.open_and_close_for_session(start_session_label) _, last_minute = self.open_and_close_for_session(end_session_label) return self.minutes_in_range(first_minute, last_minute) def open_and_close_for_session(self, session_label): """ Returns a tuple of timestamps of the open and close of the session represented by the given label. Parameters ---------- session_label: pd.Timestamp The session whose open and close are desired. Returns ------- (Timestamp, Timestamp) The open and close for the given session. """ o_and_c = self.schedule.loc[session_label] # `market_open` and `market_close` should be timezone aware, but pandas # 0.16.1 does not appear to support this: # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa return (o_and_c['market_open'].tz_localize('UTC'), o_and_c['market_close'].tz_localize('UTC')) @property def all_sessions(self): return self.schedule.index @property def first_session(self): return self.all_sessions[0] @property def last_session(self): return self.all_sessions[-1] @property @remember_last def all_minutes(self): """ Returns a DatetimeIndex representing all the minutes in this calendar. """ opens_in_ns = \ self._opens.values.astype('datetime64[ns]') closes_in_ns = \ self._closes.values.astype('datetime64[ns]') deltas = closes_in_ns - opens_in_ns # + 1 because we want 390 days per standard day, not 389 daily_sizes = (deltas / NANOS_IN_MINUTE) + 1 num_minutes = np.sum(daily_sizes).astype(np.int64) # One allocation for the entire thing. This assumes that each day # represents a contiguous block of minutes. all_minutes = np.empty(num_minutes, dtype='datetime64[ns]') idx = 0 for day_idx, size in enumerate(daily_sizes): # lots of small allocations, but it's fast enough for now. # size is a np.timedelta64, so we need to int it size_int = int(size) all_minutes[idx:(idx + size_int)] = \ np.arange( opens_in_ns[day_idx], closes_in_ns[day_idx] + NANOS_IN_MINUTE, NANOS_IN_MINUTE ) idx += size_int return DatetimeIndex(all_minutes).tz_localize("UTC") @preprocess(dt=coerce(pd.Timestamp, attrgetter('value'))) def minute_to_session_label(self, dt, direction="next"): """ Given a minute, get the label of its containing session. Parameters ---------- dt : pd.Timestamp or nanosecond offset The dt for which to get the containing session. direction: str "next" (default) means that if the given dt is not part of a session, return the label of the next session. "previous" means that if the given dt is not part of a session, return the label of the previous session. "none" means that a KeyError will be raised if the given dt is not part of a session. Returns ------- pd.Timestamp (midnight UTC) The label of the containing session. """ idx = searchsorted(self.market_closes_nanos, dt) current_or_next_session = self.schedule.index[idx] if direction == "previous": if not is_open(self.market_opens_nanos, self.market_closes_nanos, dt): # if the exchange is closed, use the previous session return self.schedule.index[idx - 1] elif direction == "none": if not is_open(self.market_opens_nanos, self.market_closes_nanos, dt): # if the exchange is closed, blow up raise ValueError("The given dt is not an exchange minute!") elif direction != "next": # invalid direction raise ValueError("Invalid direction parameter: " "{0}".format(direction)) return current_or_next_session def minute_index_to_session_labels(self, index): """ Given a sorted DatetimeIndex of market minutes, return a DatetimeIndex of the corresponding session labels. Parameters ---------- index: pd.DatetimeIndex or pd.Series The ordered list of market minutes we want session labels for. Returns ------- pd.DatetimeIndex (UTC) The list of session labels corresponding to the given minutes. """ def minute_to_session_label_nanos(dt_nanos): return self.minute_to_session_label(dt_nanos).value return DatetimeIndex(minutes_to_session_labels( index.values.astype(np.int64), minute_to_session_label_nanos, self.market_closes_nanos, ).astype('datetime64[ns]'), tz='UTC') def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date): """ Union an iterable of pairs of the form (time, calendar) and an iterable of pairs of the form (time, [dates]) (This is shared logic for computing special opens and special closes.) """ _dates = DatetimeIndex([], tz='UTC').union_many([ holidays_at_time(calendar, start_date, end_date, time_, self.tz) for time_, calendar in calendars ] + [ days_at_time(datetimes, time_, self.tz) for time_, datetimes in ad_hoc_dates ]) return _dates[(_dates >= start_date) & (_dates <= end_date)] def _calculate_special_opens(self, start, end): return self._special_dates( self.special_opens, self.special_opens_adhoc, start, end, ) def _calculate_special_closes(self, start, end): return self._special_dates( self.special_closes, self.special_closes_adhoc, start, end, )
class LabelArray(ndarray): """ An ndarray subclass for working with arrays of strings. Factorizes the input array into integers, but overloads equality on strings to check against the factor label. Parameters ---------- values : array-like Array of values that can be passed to np.asarray with dtype=object. missing_value : str Scalar value to treat as 'missing' for operations on ``self``. categories : list[str], optional List of values to use as categories. If not supplied, categories will be inferred as the unique set of entries in ``values``. sort : bool, optional Whether to sort categories. If sort is False and categories is supplied, they are left in the order provided. If sort is False and categories is None, categories will be constructed in a random order. Attributes ---------- categories : ndarray[str] An array containing the unique labels of self. reverse_categories : dict[str -> int] Reverse lookup table for ``categories``. Stores the index in ``categories`` at which each entry each unique entry is found. missing_value : str or None A sentinel missing value with NaN semantics for comparisons. Notes ----- Consumers should be cautious when passing instances of LabelArray to numpy functions. We attempt to disallow as many meaningless operations as possible, but since a LabelArray is just an ndarray of ints with some additional metadata, many numpy functions (for example, trigonometric) will happily accept a LabelArray and treat its values as though they were integers. In a future change, we may be able to disallow more numerical operations by creating a wrapper dtype which doesn't register an implementation for most numpy ufuncs. Until that change is made, consumers of LabelArray should assume that it is undefined behavior to pass a LabelArray to any numpy ufunc that operates on semantically-numerical data. See Also -------- https://docs.scipy.org/doc/numpy-1.11.0/user/basics.subclassing.html """ SUPPORTED_SCALAR_TYPES = (bytes, unicode, type(None)) SUPPORTED_NON_NONE_SCALAR_TYPES = (bytes, unicode) @preprocess( values=coerce(list, partial(np.asarray, dtype=object)), # Coerce ``list`` to ``list`` to make a copy. Code internally may call # ``categories.insert(0, missing_value)`` which will mutate this list # in place. categories=coerce((list, np.ndarray, set), list), ) @expect_types( values=np.ndarray, missing_value=SUPPORTED_SCALAR_TYPES, categories=optional(list), ) @expect_kinds(values=("O", "S", "U")) def __new__(cls, values, missing_value, categories=None, sort=True): # Numpy's fixed-width string types aren't very efficient. Working with # object arrays is faster than bytes or unicode arrays in almost all # cases. if not is_object(values): values = values.astype(object) if values.flags.f_contiguous: ravel_order = "F" else: ravel_order = "C" if categories is None: codes, categories, reverse_categories = factorize_strings( values.ravel(ravel_order), missing_value=missing_value, sort=sort, ) else: ( codes, categories, reverse_categories, ) = factorize_strings_known_categories( values.ravel(ravel_order), categories=categories, missing_value=missing_value, sort=sort, ) categories.setflags(write=False) return cls.from_codes_and_metadata( codes=codes.reshape(values.shape, order=ravel_order), categories=categories, reverse_categories=reverse_categories, missing_value=missing_value, ) @classmethod def from_codes_and_metadata(cls, codes, categories, reverse_categories, missing_value): """ Rehydrate a LabelArray from the codes and metadata. Parameters ---------- codes : np.ndarray[integral] The codes for the label array. categories : np.ndarray[object] The unique string categories. reverse_categories : dict[str, int] The mapping from category to its code-index. missing_value : any The value used to represent missing data. """ ret = codes.view(type=cls, dtype=np.void) ret._categories = categories ret._reverse_categories = reverse_categories ret._missing_value = missing_value return ret @classmethod def from_categorical(cls, categorical, missing_value=None): """ Create a LabelArray from a pandas categorical. Parameters ---------- categorical : pd.Categorical The categorical object to convert. missing_value : bytes, unicode, or None, optional The missing value to use for this LabelArray. Returns ------- la : LabelArray The LabelArray representation of this categorical. """ return LabelArray( categorical, missing_value, categorical.categories, ) @property def categories(self): # This is a property because it should be immutable. return self._categories @property def reverse_categories(self): # This is a property because it should be immutable. return self._reverse_categories @property def missing_value(self): # This is a property because it should be immutable. return self._missing_value @property def missing_value_code(self): return self.reverse_categories[self.missing_value] def has_label(self, value): return value in self.reverse_categories def __array_finalize__(self, obj): """ Called by Numpy after array construction. There are three cases where this can happen: 1. Someone tries to directly construct a new array by doing:: >>> ndarray.__new__(LabelArray, ...) # doctest: +SKIP In this case, obj will be None. We treat this as an error case and fail. 2. Someone (most likely our own __new__) does:: >>> other_array.view(type=LabelArray) # doctest: +SKIP In this case, `self` will be the new LabelArray instance, and ``obj` will be the array on which ``view`` is being called. The caller of ``obj.view`` is responsible for setting category metadata on ``self`` after we exit. 3. Someone creates a new LabelArray by slicing an existing one. In this case, ``obj`` will be the original LabelArray. We're responsible for copying over the parent array's category metadata. """ if obj is None: raise TypeError( "Direct construction of LabelArrays is not supported.") # See docstring for an explanation of when these will or will not be # set. self._categories = getattr(obj, "categories", None) self._reverse_categories = getattr(obj, "reverse_categories", None) self._missing_value = getattr(obj, "missing_value", None) def as_int_array(self): """ Convert self into a regular ndarray of ints. This is an O(1) operation. It does not copy the underlying data. """ return self.view( type=ndarray, dtype=unsigned_int_dtype_with_size_in_bytes(self.itemsize), ) def as_string_array(self): """ Convert self back into an array of strings. This is an O(N) operation. """ return self.categories[self.as_int_array()] def as_categorical(self): """ Coerce self into a pandas categorical. This is only defined on 1D arrays, since that's all pandas supports. """ if len(self.shape) > 1: raise ValueError("Can't convert a 2D array to a categorical.") with ignore_pandas_nan_categorical_warning(): return pd.Categorical.from_codes( self.as_int_array(), # We need to make a copy because pandas >= 0.17 fails if this # buffer isn't writeable. self.categories.copy(), ordered=False, ) def as_categorical_frame(self, index, columns, name=None): """ Coerce self into a pandas DataFrame of Categoricals. """ if len(self.shape) != 2: raise ValueError( "Can't convert a non-2D LabelArray into a DataFrame.") expected_shape = (len(index), len(columns)) if expected_shape != self.shape: raise ValueError( "Can't construct a DataFrame with provided indices:\n\n" "LabelArray shape is {actual}, but index and columns imply " "that shape should be {expected}.".format( actual=self.shape, expected=expected_shape, )) return pd.Series( index=pd.MultiIndex.from_product([index, columns]), data=self.ravel().as_categorical(), name=name, ).unstack() def __setitem__(self, indexer, value): self_categories = self.categories if isinstance(value, self.SUPPORTED_SCALAR_TYPES): value_code = self.reverse_categories.get(value, None) if value_code is None: raise ValueError("%r is not in LabelArray categories." % value) self.as_int_array()[indexer] = value_code elif isinstance(value, LabelArray): value_categories = value.categories if compare_arrays(self_categories, value_categories): return super(LabelArray, self).__setitem__(indexer, value) elif self.missing_value == value.missing_value and set( value.categories) <= set(self.categories): rhs = LabelArray.from_codes_and_metadata( *factorize_strings_known_categories( value.as_string_array().ravel(), list(self.categories), self.missing_value, False, ), missing_value=self.missing_value, ).reshape(value.shape) super(LabelArray, self).__setitem__(indexer, rhs) else: raise CategoryMismatch(self_categories, value_categories) else: raise NotImplementedError( "Setting into a LabelArray with a value of " "type {type} is not yet supported.".format( type=type(value).__name__, ), ) def set_scalar(self, indexer, value): """ Set scalar value into the array. Parameters ---------- indexer : any The indexer to set the value at. value : str The value to assign at the given locations. Raises ------ ValueError Raised when ``value`` is not a value element of this this label array. """ try: value_code = self.reverse_categories[value] except KeyError: raise ValueError("%r is not in LabelArray categories." % value) self.as_int_array()[indexer] = value_code def __getitem__(self, indexer): result = super(LabelArray, self).__getitem__(indexer) if result.ndim: # Result is still a LabelArray, so we can just return it. return result # Result is a scalar value, which will be an instance of np.void. # Map it back to one of our category entries. index = result.view( unsigned_int_dtype_with_size_in_bytes(self.itemsize), ) return self.categories[index] def is_missing(self): """ Like isnan, but checks for locations where we store missing values. """ return ( self.as_int_array() == self.reverse_categories[self.missing_value]) def not_missing(self): """ Like ~isnan, but checks for locations where we store missing values. """ return (self.as_int_array() != self.reverse_categories[self.missing_value]) def _equality_check(op): """ Shared code for __eq__ and __ne__, parameterized on the actual comparison operator to use. """ def method(self, other): if isinstance(other, LabelArray): self_mv = self.missing_value other_mv = other.missing_value if self_mv != other_mv: raise MissingValueMismatch(self_mv, other_mv) self_categories = self.categories other_categories = other.categories if not compare_arrays(self_categories, other_categories): raise CategoryMismatch(self_categories, other_categories) return (op(self.as_int_array(), other.as_int_array()) & self.not_missing() & other.not_missing()) elif isinstance(other, ndarray): # Compare to ndarrays as though we were an array of strings. # This is fairly expensive, and should generally be avoided. return op(self.as_string_array(), other) & self.not_missing() elif isinstance(other, self.SUPPORTED_SCALAR_TYPES): i = self._reverse_categories.get(other, -1) return op(self.as_int_array(), i) & self.not_missing() return op(super(LabelArray, self), other) return method __eq__ = _equality_check(eq) __ne__ = _equality_check(ne) del _equality_check def view(self, dtype=_NotPassed, type=_NotPassed): if type is _NotPassed and dtype not in (_NotPassed, self.dtype): raise TypeError("Can't view LabelArray as another dtype.") # The text signature on ndarray.view makes it look like the default # values for dtype and type are `None`, but passing None explicitly has # different semantics than not passing an arg at all, so we reconstruct # the kwargs dict here to simulate the args not being passed at all. kwargs = {} if dtype is not _NotPassed: kwargs["dtype"] = dtype if type is not _NotPassed: kwargs["type"] = type return super(LabelArray, self).view(**kwargs) def astype(self, dtype, order="K", casting="unsafe", subok=True, copy=True): if dtype == self.dtype: if not subok: array = self.view(type=np.ndarray) else: array = self if copy: return array.copy() return array if dtype == object_dtype: return self.as_string_array() if dtype.kind == "S": return self.as_string_array().astype( dtype, order=order, casting=casting, subok=subok, copy=copy, ) raise TypeError( "%s can only be converted into object, string, or void," " got: %r" % ( type(self).__name__, dtype, ), ) # In general, we support resizing, slicing, and reshaping methods, but not # numeric methods. SUPPORTED_NDARRAY_METHODS = frozenset([ "astype", "base", "compress", "copy", "data", "diagonal", "dtype", "flat", "flatten", "item", "itemset", "itemsize", "nbytes", "ndim", "ravel", "repeat", "reshape", "resize", "setflags", "shape", "size", "squeeze", "strides", "swapaxes", "take", "trace", "transpose", "view", ]) PUBLIC_NDARRAY_METHODS = frozenset( [s for s in dir(ndarray) if not s.startswith("_")]) # Generate failing wrappers for all unsupported methods. locals().update({ method: _make_unsupported_method(method) for method in PUBLIC_NDARRAY_METHODS - SUPPORTED_NDARRAY_METHODS }) def __repr__(self): repr_lines = repr(self.as_string_array()).splitlines() repr_lines[0] = repr_lines[0].replace("array(", "LabelArray(", 1) repr_lines[-1] = repr_lines[-1].rsplit(",", 1)[0] + ")" # The extra spaces here account for the difference in length between # 'array(' and 'LabelArray('. return "\n ".join(repr_lines) def empty_like(self, shape): """ Make an empty LabelArray with the same categories as ``self``, filled with ``self.missing_value``. """ return type(self).from_codes_and_metadata( codes=np.full( shape, self.reverse_categories[self.missing_value], dtype=unsigned_int_dtype_with_size_in_bytes(self.itemsize), ), categories=self.categories, reverse_categories=self.reverse_categories, missing_value=self.missing_value, ) def map_predicate(self, f): """ Map a function from str -> bool element-wise over ``self``. ``f`` will be applied exactly once to each non-missing unique value in ``self``. Missing values will always return False. """ # Functions passed to this are of type str -> bool. Don't ever call # them on None, which is the only non-str value we ever store in # categories. if self.missing_value is None: def f_to_use(x): return False if x is None else f(x) else: f_to_use = f # Call f on each unique value in our categories. results = np.vectorize(f_to_use, otypes=[bool_dtype])(self.categories) # missing_value should produce False no matter what results[self.reverse_categories[self.missing_value]] = False # unpack the results form each unique value into their corresponding # locations in our indices. return results[self.as_int_array()] def map(self, f): """ Map a function from str -> str element-wise over ``self``. ``f`` will be applied exactly once to each non-missing unique value in ``self``. Missing values will always map to ``self.missing_value``. """ # f() should only return None if None is our missing value. if self.missing_value is None: allowed_outtypes = self.SUPPORTED_SCALAR_TYPES else: allowed_outtypes = self.SUPPORTED_NON_NONE_SCALAR_TYPES def f_to_use(x, missing_value=self.missing_value, otypes=allowed_outtypes): # Don't call f on the missing value; those locations don't exist # semantically. We return _sortable_sentinel rather than None # because the np.unique call below sorts the categories array, # which raises an error on Python 3 because None and str aren't # comparable. if x == missing_value: return _sortable_sentinel ret = f(x) if not isinstance(ret, otypes): raise TypeError( "LabelArray.map expected function {f} to return a string" " or None, but got {type} instead.\n" "Value was {value}.".format( f=f.__name__, type=type(ret).__name__, value=ret, )) if ret == missing_value: return _sortable_sentinel return ret new_categories_with_duplicates = np.vectorize(f_to_use, otypes=[object])( self.categories) # If f() maps multiple inputs to the same output, then we can end up # with the same code duplicated multiple times. Compress the categories # by running them through np.unique, and then use the reverse lookup # table to compress codes as well. new_categories, bloated_inverse_index = np.unique( new_categories_with_duplicates, return_inverse=True) if new_categories[0] is _sortable_sentinel: # f_to_use return _sortable_sentinel for locations that should be # missing values in our output. Since np.unique returns the uniques # in sorted order, and since _sortable_sentinel sorts before any # string, we only need to check the first array entry. new_categories[0] = self.missing_value # `reverse_index` will always be a 64 bit integer even if we can hold a # smaller array. reverse_index = bloated_inverse_index.astype( smallest_uint_that_can_hold(len(new_categories))) new_codes = np.take(reverse_index, self.as_int_array()) return self.from_codes_and_metadata( new_codes, new_categories, dict(zip(new_categories, range(len(new_categories)))), missing_value=self.missing_value, ) def startswith(self, prefix): """ Element-wise startswith. Parameters ---------- prefix : str Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self started with ``prefix``. """ return self.map_predicate(lambda elem: elem.startswith(prefix)) def endswith(self, suffix): """ Elementwise endswith. Parameters ---------- suffix : str Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self ended with ``suffix`` """ return self.map_predicate(lambda elem: elem.endswith(suffix)) def has_substring(self, substring): """ Elementwise contains. Parameters ---------- substring : str Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self ended with ``suffix``. """ return self.map_predicate(lambda elem: substring in elem) @preprocess(pattern=coerce(from_=(bytes, unicode), to=re.compile)) def matches(self, pattern): """ Elementwise regex match. Parameters ---------- pattern : str or compiled regex Returns ------- matches : np.ndarray[bool] An array with the same shape as self indicating whether each element of self was matched by ``pattern``. """ return self.map_predicate(compose(bool, pattern.match)) # These types all implement an O(N) __contains__, so pre-emptively # coerce to `set`. @preprocess(container=coerce((list, tuple, np.ndarray), set)) def element_of(self, container): """ Check if each element of self is an of ``container``. Parameters ---------- container : object An object implementing a __contains__ to call on each element of ``self``. Returns ------- is_contained : np.ndarray[bool] An array with the same shape as self indicating whether each element of self was an element of ``container``. """ return self.map_predicate(container.__contains__)
class TradingCalendar(with_metaclass(ABCMeta)): """ TradingCalendar代表单个市场交易所的时间信息。 时间信息由两部分组成:会话和开盘/收盘。 会话表示一组连续的分钟,外加一个UTC午夜标签。 需要注意的是,会话标签不应被视为特定的时 间点,使用UTC午夜时间纯粹是出于便利的考虑。 对于每个会话,我们存储UTC时间的开盘和收盘时间。 """ use_lunch_break = False # 标记对象是否使用午休时间 def __init__(self, start=start_default, end=end_default): # 每个交易日UTC的午夜 # self.use_lunch_break = lunch_break # 标记对象是否使用午休时间 # In pandas 0.18.1, pandas calls into its own code here in a way that # fires a warning. The calling code in pandas tries to suppress the # warning, but does so incorrectly, causing it to bubble out here. # Actually catch and suppress the warning here: with warnings.catch_warnings(): warnings.simplefilter('ignore') _all_days = date_range(start, end, freq=self.day, tz='UTC') # 每天标准的开盘和收盘`DatetimeIndex` self._opens = days_at_time(_all_days, self.open_time, self.tz, self.open_offset) self._closes = days_at_time( _all_days, self.close_time, self.tz, self.close_offset ) # 每天非标准的开盘和收盘`DatetimeIndex` _special_opens = self._calculate_special_opens(start, end) _special_closes = self._calculate_special_closes(start, end) # 在标准集的基础上,重写特殊开盘与收盘 _overwrite_special_dates(_all_days, self._opens, _special_opens) _overwrite_special_dates(_all_days, self._closes, _special_closes) # In pandas 0.16.1 _opens and _closes will lose their timezone # information. This looks like it has been resolved in 0.17.1. # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa self.schedule = DataFrame( index=_all_days, columns=['market_open', 'market_close'], data={ 'market_open': self._opens, 'market_close': self._closes, }, dtype='datetime64[ns]', ) # 简单缓存以避免在“下一个”模式下重新计算相同的分钟 - >会话。 # 对当前zipline代码路径的分析显示,连续调用函数`minute_to_session_label`通常使用相 # 同的输入。 self._minute_to_session_label_cache = LRU(1) # 转化为纳秒整数 self.market_opens_nanos = self.schedule.market_open.values.\ astype(np.int64) self.market_closes_nanos = self.schedule.market_close.values.\ astype(np.int64) # 所有交易分钟(纳秒整数) self._trading_minutes_nanos = self.all_minutes.values.\ astype(np.int64) self.first_trading_session = _all_days[0] self.last_trading_session = _all_days[-1] self._early_closes = pd.DatetimeIndex( _special_closes.map(self.minute_to_session_label) ) @lazyval def day(self): return CustomBusinessDay( holidays=self.adhoc_holidays, # 特别假期 calendar=self.regular_holidays, # 常规假期 ) @abstractproperty def name(self): raise NotImplementedError() @abstractproperty def tz(self): raise NotImplementedError() @abstractproperty def open_time(self): raise NotImplementedError() @abstractproperty def close_time(self): raise NotImplementedError() @property def lunch_break_start_time(self): """ 如果使用午休间隔,必须重写该属性。使用实际午休开始时间 使用该默认值,即表示所有日期内的分钟都有效。 """ return time(23, 99) @property def lunch_break_end_time(self): """ 如果使用午休间隔,必须重写该属性。使用实际午休结束时间 使用该默认值,即表示所有日期内的分钟都有效。 """ return time(0, 0) @property def open_offset(self): return 0 @property def close_offset(self): return 0 @lazyval def _minutes_per_session(self): diff = self.schedule.market_close - self.schedule.market_open diff = diff.astype('timedelta64[m]') return diff + 1 def minutes_count_for_sessions_in_range(self, start_session, end_session): """ Parameters ---------- start_session: pd.Timestamp The first session. end_session: pd.Timestamp The last session. Returns ------- int: The total number of minutes for the contiguous chunk of sessions. between start_session and end_session, inclusive. """ return int(self._minutes_per_session[start_session:end_session].sum()) @property def regular_holidays(self): """ Returns ------- pd.AbstractHolidayCalendar: a calendar containing the regular holidays for this calendar """ return None @property def adhoc_holidays(self): return [] @property def special_opens(self): """ A list of special open times and corresponding HolidayCalendars. Returns ------- list: List of (time, AbstractHolidayCalendar) tuples """ return [] @property def special_opens_adhoc(self): """ Returns ------- list: List of (time, DatetimeIndex) tuples that represent special closes that cannot be codified into rules. """ return [] @property def special_closes(self): """ A list of special close times and corresponding HolidayCalendars. Returns ------- list: List of (time, AbstractHolidayCalendar) tuples """ return [] @property def special_closes_adhoc(self): """ Returns ------- list: List of (time, DatetimeIndex) tuples that represent special closes that cannot be codified into rules. """ return [] # ----- @property def opens(self): return self.schedule.market_open @property def closes(self): return self.schedule.market_close @property def early_closes(self): return self._early_closes def is_session(self, dt): """ 给定一个dt,返回它是否是有效的会话标签(请注意会话标签是午夜时分)。 Parameters ---------- dt: pd.Timestamp 将要测试的dt Notes ----- 1. 如dt为日期,且在交易日历内,返回真; 2. 如dt带时间,只有午夜时分才为真; 3. 如带时区,为UTC或者None,返回真; Returns ------- bool 给定的dt是否是有效的会话标签 """ return dt in self.schedule.index def is_open_on_minute(self, dt): """ 给定一个dt,返回此时交易所是否已经开盘 Parameters ---------- dt: pd.Timestamp 用于检查交易所是否已经开盘的dt。 Returns ------- bool 在此时点(dt)交易是否开盘 """ return is_open(self.market_opens_nanos, self.market_closes_nanos, dt.value) def next_open(self, dt): """ 给定一个dt,返回下一个开盘时点 即使给定的dt恰好是会话开盘时点,也会返回下一个会话的开盘点。 Parameters ---------- dt: pd.Timestamp The dt for which to get the next open. Returns ------- pd.Timestamp The UTC timestamp of the next open. """ idx = next_divider_idx(self.market_opens_nanos, dt.value) return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC') def next_close(self, dt): """ 给定一个dt,返回下一个收盘时点 Parameters ---------- dt: pd.Timestamp The dt for which to get the next close. Returns ------- pd.Timestamp The UTC timestamp of the next close. """ idx = next_divider_idx(self.market_closes_nanos, dt.value) return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC') def previous_open(self, dt): """ 给定一个dt,返回上一个开盘时点 Parameters ---------- dt: pd.Timestamp The dt for which to get the previous open. Returns ------- pd.Timestamp The UTC imestamp of the previous open. """ idx = previous_divider_idx(self.market_opens_nanos, dt.value) return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC') def previous_close(self, dt): """ 给定一个dt,返回上一个收盘时点 Parameters ---------- dt: pd.Timestamp The dt for which to get the previous close. Returns ------- pd.Timestamp The UTC timestamp of the previous close. """ idx = previous_divider_idx(self.market_closes_nanos, dt.value) return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC') def next_minute(self, dt): """ 给定一个dt,返回下一个交易所在分钟。 如果给定的dt不是交易时点,则返回下一个开盘时点。 如dt在午休时段,则会返回下一个交易分钟,即下午开盘时间。 Parameters ---------- dt: pd.Timestamp The dt for which to get the next exchange minute. Returns ------- pd.Timestamp The next exchange minute. """ idx = next_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] def previous_minute(self, dt): """ 给点dt,返回上一个交易分钟 如为非交易分钟,触发KeyError异常(原文) 其实,只有当dt处于初始化对象时的开始及结束日期外,才会触发ValueError Parameters ---------- dt: pd.Timestamp The dt for which to get the previous exchange minute. Returns ------- pd.Timestamp The previous exchange minute. """ idx = previous_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] def next_session_label(self, session_label): """ 给定一个会话标签,返回下一个会话的标签。 Notes ----- session_label要么为日期,要么normalize,且须为交易日期 非交易日,或在start与end外,均会触发KeyError异常 Parameters ---------- session_label: pd.Timestamp A session whose next session is desired. Returns ------- pd.Timestamp The next session label (midnight UTC). Notes ----- Raises ValueError if the given session is the last session in this calendar. """ idx = self.schedule.index.get_loc(session_label) try: return self.schedule.index[idx + 1] except IndexError: if idx == len(self.schedule.index) - 1: raise ValueError("There is no next session as this is the end" " of the exchange calendar.") else: raise def previous_session_label(self, session_label): """ 给定一个会话标签,返回上一个会话的标签。 Parameters ---------- session_label: pd.Timestamp A session whose previous session is desired. Returns ------- pd.Timestamp The previous session label (midnight UTC). Notes ----- Raises ValueError if the given session is the first session in this calendar. """ idx = self.schedule.index.get_loc(session_label) if idx == 0: raise ValueError("There is no previous session as this is the" " beginning of the exchange calendar.") return self.schedule.index[idx - 1] def minutes_for_session(self, session_label): """ 给定会话标签,返回该会话的所有分钟。 Parameters ---------- session_label: pd.Timestamp (midnight UTC) A session label whose session's minutes are desired. Returns ------- pd.DateTimeIndex All the minutes for the given session. """ return self.minutes_in_range( start_minute=self.schedule.at[session_label, 'market_open'], end_minute=self.schedule.at[session_label, 'market_close'], ) def execution_minutes_for_session(self, session_label): """ 给定会话标签,返回该会话的执行分钟。 Parameters ---------- session_label: pd.Timestamp (midnight UTC) A session label whose session's minutes are desired. Returns ------- pd.DateTimeIndex All the execution minutes for the given session. """ return self.minutes_in_range( start_minute=self.execution_time_from_open( self.schedule.at[session_label, 'market_open'], ), end_minute=self.execution_time_from_close( self.schedule.at[session_label, 'market_close'], ), ) def execution_minutes_for_sessions_in_range(self, start, stop): """期间所有执行分钟""" minutes = self.execution_minutes_for_session return pd.DatetimeIndex( np.concatenate([ minutes(session) for session in self.sessions_in_range(start, stop) ]), tz='UTC', ) def minutes_window(self, start_dt, count): start_dt_nanos = start_dt.value all_minutes_nanos = self._trading_minutes_nanos start_idx = all_minutes_nanos.searchsorted(start_dt_nanos) # searchsorted finds the index of the minute **on or after** start_dt. # If the latter, push back to the prior minute. if all_minutes_nanos[start_idx] != start_dt_nanos: start_idx -= 1 if start_idx < 0 or start_idx >= len(all_minutes_nanos): raise KeyError("Can't start minute window at {}".format(start_dt)) end_idx = start_idx + count if start_idx > end_idx: return self.all_minutes[(end_idx + 1):(start_idx + 1)] else: return self.all_minutes[start_idx:end_idx] def sessions_in_range(self, start_session_label, end_session_label): """ 给定开始和结束会话标签,返回该范围内的所有会话(包含)。 注 -- 输入时使用normalize Parameters ---------- start_session_label: pd.Timestamp (midnight UTC) The label representing the first session of the desired range. end_session_label: pd.Timestamp (midnight UTC) The label representing the last session of the desired range. Returns ------- pd.DatetimeIndex The desired sessions. """ return self.all_sessions[ self.all_sessions.slice_indexer( start_session_label, end_session_label ) ] def sessions_window(self, session_label, count): """ Given a session label and a window size, returns a list of sessions of size `count` + 1, that either starts with the given session (if `count` is positive) or ends with the given session (if `count` is negative). Parameters ---------- session_label: pd.Timestamp The label of the initial session. count: int Defines the length and the direction of the window. Returns ------- pd.DatetimeIndex The desired sessions. """ start_idx = self.schedule.index.get_loc(session_label) end_idx = start_idx + count return self.all_sessions[ min(start_idx, end_idx):max(start_idx, end_idx) + 1 ] def session_distance(self, start_session_label, end_session_label): """ Given a start and end session label, returns the distance between them. For example, for three consecutive sessions Mon., Tues., and Wed, ``session_distance(Mon, Wed)`` returns 3. If ``start_session`` is after ``end_session``, the value will be negated. Parameters ---------- start_session_label: pd.Timestamp The label of the start session. end_session_label: pd.Timestamp The label of the ending session inclusive. Returns ------- int The distance between the two sessions. """ negate = end_session_label < start_session_label if negate: start_session_label, end_session_label = ( end_session_label, start_session_label, ) start_idx = self.all_sessions.searchsorted(start_session_label) end_idx = self.all_sessions.searchsorted( end_session_label, side='right', ) out = end_idx - start_idx if negate: out = -out return out def minutes_in_range(self, start_minute, end_minute): """ 给定开始和结束分钟,返回该范围内的所有日历分钟数,包括开始与结束。 给定分钟并不需要是日历分钟 Parameters ---------- start_minute: pd.Timestamp The minute representing the start of the desired range. end_minute: pd.Timestamp The minute representing the end of the desired range. Returns ------- pd.DatetimeIndex The minutes in the desired range. """ start_idx = searchsorted(self._trading_minutes_nanos, start_minute.value) end_idx = searchsorted(self._trading_minutes_nanos, end_minute.value) if end_minute.value == self._trading_minutes_nanos[end_idx]: # if the end minute is a market minute, increase by 1 end_idx += 1 return self.all_minutes[start_idx:end_idx] def minutes_for_sessions_in_range(self, start_session_label, end_session_label): """ Returns all the minutes for all the sessions from the given start session label to the given end session label, inclusive. Parameters ---------- start_session_label: pd.Timestamp The label of the first session in the range. end_session_label: pd.Timestamp The label of the last session in the range. Returns ------- pd.DatetimeIndex The minutes in the desired range. """ first_minute, _ = self.open_and_close_for_session(start_session_label) _, last_minute = self.open_and_close_for_session(end_session_label) return self.minutes_in_range(first_minute, last_minute) def open_and_close_for_session(self, session_label): """ Returns a tuple of timestamps of the open and close of the session represented by the given label. Parameters ---------- session_label: pd.Timestamp The session whose open and close are desired. Returns ------- (Timestamp, Timestamp) The open and close for the given session. """ sched = self.schedule # `market_open` and `market_close` should be timezone aware, but pandas # 0.16.1 does not appear to support this: # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa return ( sched.at[session_label, 'market_open'].tz_localize('UTC'), sched.at[session_label, 'market_close'].tz_localize('UTC'), ) def session_open(self, session_label): return self.schedule.at[ session_label, 'market_open' ].tz_localize('UTC') def session_close(self, session_label): return self.schedule.at[ session_label, 'market_close' ].tz_localize('UTC') def session_opens_in_range(self, start_session_label, end_session_label): return self.schedule.loc[ start_session_label:end_session_label, 'market_open', ].dt.tz_localize('UTC') def session_closes_in_range(self, start_session_label, end_session_label): return self.schedule.loc[ start_session_label:end_session_label, 'market_close', ].dt.tz_localize('UTC') @property def all_sessions(self): return self.schedule.index @property def first_session(self): return self.all_sessions[0] @property def last_session(self): return self.all_sessions[-1] def execution_time_from_open(self, open_dates): return open_dates def execution_time_from_close(self, close_dates): return close_dates @lazyval def all_minutes(self): """ 返回表示此日历中所有分钟的`DatetimeIndex`。 """ opens_in_ns = self._opens.values.astype( 'datetime64[ns]', ).view('int64') closes_in_ns = self._closes.values.astype( 'datetime64[ns]', ).view('int64') # compute_all_minutes假设每天仅包含连续分钟块 dts = DatetimeIndex( compute_all_minutes(opens_in_ns, closes_in_ns), tz='utc', ) # 如果有午休,则排除午休时段 if self.use_lunch_break: # 需要使用utc时间 utc_start = days_at_time( [dts[0].date()], self.lunch_break_start_time, self.tz).time[0] utc_end = days_at_time( [dts[0].date()], self.lunch_break_end_time, self.tz).time[0] locs = dts.indexer_between_time( utc_start, utc_end, include_start=True, include_end=True) return dts.delete(locs) else: return dts @preprocess(dt=coerce(pd.Timestamp, attrgetter('value'))) def minute_to_session_label(self, dt, direction="next"): """ 给定dt,获取其所在会话的标签 Parameters ---------- dt : pd.Timestamp or nanosecond offset 所含会话的dt direction: str “next”(默认)意味着如果给定的dt不是会话的一部分,则返回下一个会话的标签。 "previous"表示如果给定的dt不是会话的一部分,则返回前一个会话的标签。 "none"表示如果给定的dt不是会话的一部分,则会引发KeyError。 Returns ------- pd.Timestamp (midnight UTC) 所在会话的标签。 """ if direction == "next": try: return self._minute_to_session_label_cache[dt] except KeyError: pass idx = searchsorted(self.market_closes_nanos, dt) current_or_next_session = self.schedule.index[idx] self._minute_to_session_label_cache[dt] = current_or_next_session if direction == "next": return current_or_next_session elif direction == "previous": if not is_open(self.market_opens_nanos, self.market_closes_nanos, dt): # if the exchange is closed, use the previous session return self.schedule.index[idx - 1] elif direction == "none": if not is_open(self.market_opens_nanos, self.market_closes_nanos, dt): # if the exchange is closed, blow up raise ValueError("The given dt is not an exchange minute!") else: # invalid direction raise ValueError("Invalid direction parameter: " "{0}".format(direction)) return current_or_next_session def minute_index_to_session_labels(self, index): """ 给定市场分钟的排序DatetimeIndex,返回相应会话标签的DatetimeIndex。 Parameters ---------- index: pd.DatetimeIndex or pd.Series The ordered list of market minutes we want session labels for. Returns ------- pd.DatetimeIndex (UTC) The list of session labels corresponding to the given minutes. """ return pd.Index(map(self.minute_to_session_label, index)) def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date): """ 联合一对以(时间,日历)格式的迭代和一对以(时间,[日期])格式的迭代 (这是计算特殊开盘和特殊收盘的共享逻辑。) """ _dates = DatetimeIndex([], tz='UTC').union_many( [ holidays_at_time(calendar, start_date, end_date, time_, self.tz) for time_, calendar in calendars ] + [ days_at_time(datetimes, time_, self.tz) for time_, datetimes in ad_hoc_dates ] ) return _dates[(_dates >= start_date) & (_dates <= end_date)] def _calculate_special_opens(self, start, end): return self._special_dates( self.special_opens, self.special_opens_adhoc, start, end, ) def _calculate_special_closes(self, start, end): return self._special_dates( self.special_closes, self.special_closes_adhoc, start, end, )
class TradingCalendar(with_metaclass(ABCMeta)): """ An TradingCalendar represents the timing information of a single market exchange. The timing information is made up of two parts: sessions, and opens/closes. A session represents a contiguous set of minutes, and has a label that is midnight UTC. It is important to note that a session label should not be considered a specific point in time, and that midnight UTC is just being used for convenience. For each session, we store the open and close time in UTC time. """ def __init__(self, start=start_default, end=end_default): # Midnight in UTC for each trading day. # In pandas 0.18.1, pandas calls into its own code here in a way that # fires a warning. The calling code in pandas tries to suppress the # warning, but does so incorrectly, causing it to bubble out here. # Actually catch and suppress the warning here: with warnings.catch_warnings(): warnings.simplefilter('ignore') _all_days = date_range(start, end, freq=self.day, tz='UTC') # `DatetimeIndex`s of standard opens/closes for each day. self._opens = days_at_time(_all_days, self.open_time, self.tz, self.open_offset) self._closes = days_at_time( _all_days, self.close_time, self.tz, self.close_offset ) # `DatetimeIndex`s of nonstandard opens/closes _special_opens = self._calculate_special_opens(start, end) _special_closes = self._calculate_special_closes(start, end) # Overwrite the special opens and closes on top of the standard ones. _overwrite_special_dates(_all_days, self._opens, _special_opens) _overwrite_special_dates(_all_days, self._closes, _special_closes) # In pandas 0.16.1 _opens and _closes will lose their timezone # information. This looks like it has been resolved in 0.17.1. # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa self.schedule = DataFrame( index=_all_days, columns=['market_open', 'market_close'], data={ 'market_open': self._opens, 'market_close': self._closes, }, dtype='datetime64[ns]', ) # Simple cache to avoid recalculating the same minute -> session in # "next" mode. Analysis of current zipline code paths show that # `minute_to_session_label` is often called consecutively with the same # inputs. self._minute_to_session_label_cache = LRU(1) self.market_opens_nanos = self.schedule.market_open.values.\ astype(np.int64) self.market_closes_nanos = self.schedule.market_close.values.\ astype(np.int64) self._trading_minutes_nanos = self.all_minutes.values.\ astype(np.int64) self.first_trading_session = _all_days[0] self.last_trading_session = _all_days[-1] self._early_closes = pd.DatetimeIndex( _special_closes.map(self.minute_to_session_label) ) @lazyval def day(self): return CustomBusinessDay( holidays=self.adhoc_holidays, calendar=self.regular_holidays, ) @abstractproperty def name(self): raise NotImplementedError() @abstractproperty def tz(self): raise NotImplementedError() @abstractproperty def open_time(self): raise NotImplementedError() @abstractproperty def close_time(self): raise NotImplementedError() @property def open_offset(self): return 0 @property def close_offset(self): return 0 @lazyval def _minutes_per_session(self): diff = self.schedule.market_close - self.schedule.market_open diff = diff.astype('timedelta64[m]') return diff + 1 def minutes_count_for_sessions_in_range(self, start_session, end_session): """ Parameters ---------- start_session: pd.Timestamp The first session. end_session: pd.Timestamp The last session. Returns ------- int: The total number of minutes for the contiguous chunk of sessions. between start_session and end_session, inclusive. """ return int(self._minutes_per_session[start_session:end_session].sum()) @property def regular_holidays(self): """ Returns ------- pd.AbstractHolidayCalendar: a calendar containing the regular holidays for this calendar """ return None @property def adhoc_holidays(self): return [] @property def special_opens(self): """ A list of special open times and corresponding HolidayCalendars. Returns ------- list: List of (time, AbstractHolidayCalendar) tuples """ return [] @property def special_opens_adhoc(self): """ Returns ------- list: List of (time, DatetimeIndex) tuples that represent special closes that cannot be codified into rules. """ return [] @property def special_closes(self): """ A list of special close times and corresponding HolidayCalendars. Returns ------- list: List of (time, AbstractHolidayCalendar) tuples """ return [] @property def special_closes_adhoc(self): """ Returns ------- list: List of (time, DatetimeIndex) tuples that represent special closes that cannot be codified into rules. """ return [] # ----- @property def opens(self): return self.schedule.market_open @property def closes(self): return self.schedule.market_close @property def early_closes(self): return self._early_closes def is_session(self, dt): """ Given a dt, returns whether it's a valid session label. Parameters ---------- dt: pd.Timestamp The dt that is being tested. Returns ------- bool Whether the given dt is a valid session label. """ return dt in self.schedule.index def is_open_on_minute(self, dt): """ Given a dt, return whether this exchange is open at the given dt. Parameters ---------- dt: pd.Timestamp The dt for which to check if this exchange is open. Returns ------- bool Whether the exchange is open on this dt. """ return is_open(self.market_opens_nanos, self.market_closes_nanos, dt.value) def next_open(self, dt): """ Given a dt, returns the next open. If the given dt happens to be a session open, the next session's open will be returned. Parameters ---------- dt: pd.Timestamp The dt for which to get the next open. Returns ------- pd.Timestamp The UTC timestamp of the next open. """ idx = next_divider_idx(self.market_opens_nanos, dt.value) return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC') def next_close(self, dt): """ Given a dt, returns the next close. Parameters ---------- dt: pd.Timestamp The dt for which to get the next close. Returns ------- pd.Timestamp The UTC timestamp of the next close. """ idx = next_divider_idx(self.market_closes_nanos, dt.value) return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC') def previous_open(self, dt): """ Given a dt, returns the previous open. Parameters ---------- dt: pd.Timestamp The dt for which to get the previous open. Returns ------- pd.Timestamp The UTC imestamp of the previous open. """ idx = previous_divider_idx(self.market_opens_nanos, dt.value) return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC') def previous_close(self, dt): """ Given a dt, returns the previous close. Parameters ---------- dt: pd.Timestamp The dt for which to get the previous close. Returns ------- pd.Timestamp The UTC timestamp of the previous close. """ idx = previous_divider_idx(self.market_closes_nanos, dt.value) return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC') def next_minute(self, dt): """ Given a dt, return the next exchange minute. If the given dt is not an exchange minute, returns the next exchange open. Parameters ---------- dt: pd.Timestamp The dt for which to get the next exchange minute. Returns ------- pd.Timestamp The next exchange minute. """ idx = next_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] def previous_minute(self, dt): """ Given a dt, return the previous exchange minute. Raises KeyError if the given timestamp is not an exchange minute. Parameters ---------- dt: pd.Timestamp The dt for which to get the previous exchange minute. Returns ------- pd.Timestamp The previous exchange minute. """ idx = previous_divider_idx(self._trading_minutes_nanos, dt.value) return self.all_minutes[idx] def next_session_label(self, session_label): """ Given a session label, returns the label of the next session. Parameters ---------- session_label: pd.Timestamp A session whose next session is desired. Returns ------- pd.Timestamp The next session label (midnight UTC). Notes ----- Raises ValueError if the given session is the last session in this calendar. """ idx = self.schedule.index.get_loc(session_label) try: return self.schedule.index[idx + 1] except IndexError: if idx == len(self.schedule.index) - 1: raise ValueError("There is no next session as this is the end" " of the exchange calendar.") else: raise def previous_session_label(self, session_label): """ Given a session label, returns the label of the previous session. Parameters ---------- session_label: pd.Timestamp A session whose previous session is desired. Returns ------- pd.Timestamp The previous session label (midnight UTC). Notes ----- Raises ValueError if the given session is the first session in this calendar. """ idx = self.schedule.index.get_loc(session_label) if idx == 0: raise ValueError("There is no previous session as this is the" " beginning of the exchange calendar.") return self.schedule.index[idx - 1] def minutes_for_session(self, session_label): """ Given a session label, return the minutes for that session. Parameters ---------- session_label: pd.Timestamp (midnight UTC) A session label whose session's minutes are desired. Returns ------- pd.DateTimeIndex All the minutes for the given session. """ return self.minutes_in_range( start_minute=self.schedule.at[session_label, 'market_open'], end_minute=self.schedule.at[session_label, 'market_close'], ) def execution_minutes_for_session(self, session_label): """ Given a session label, return the execution minutes for that session. Parameters ---------- session_label: pd.Timestamp (midnight UTC) A session label whose session's minutes are desired. Returns ------- pd.DateTimeIndex All the execution minutes for the given session. """ return self.minutes_in_range( start_minute=self.execution_time_from_open( self.schedule.at[session_label, 'market_open'], ), end_minute=self.execution_time_from_close( self.schedule.at[session_label, 'market_close'], ), ) def execution_minutes_for_sessions_in_range(self, start, stop): minutes = self.execution_minutes_for_session return pd.DatetimeIndex( np.concatenate([ minutes(session) for session in self.sessions_in_range(start, stop) ]), tz='UTC', ) def minutes_window(self, start_dt, count): start_dt_nanos = start_dt.value all_minutes_nanos = self._trading_minutes_nanos start_idx = all_minutes_nanos.searchsorted(start_dt_nanos) # searchsorted finds the index of the minute **on or after** start_dt. # If the latter, push back to the prior minute. if all_minutes_nanos[start_idx] != start_dt_nanos: start_idx -= 1 if start_idx < 0 or start_idx >= len(all_minutes_nanos): raise KeyError("Can't start minute window at {}".format(start_dt)) end_idx = start_idx + count if start_idx > end_idx: return self.all_minutes[(end_idx + 1):(start_idx + 1)] else: return self.all_minutes[start_idx:end_idx] def sessions_in_range(self, start_session_label, end_session_label): """ Given start and end session labels, return all the sessions in that range, inclusive. Parameters ---------- start_session_label: pd.Timestamp (midnight UTC) The label representing the first session of the desired range. end_session_label: pd.Timestamp (midnight UTC) The label representing the last session of the desired range. Returns ------- pd.DatetimeIndex The desired sessions. """ return self.all_sessions[ self.all_sessions.slice_indexer( start_session_label, end_session_label ) ] def sessions_window(self, session_label, count): """ Given a session label and a window size, returns a list of sessions of size `count` + 1, that either starts with the given session (if `count` is positive) or ends with the given session (if `count` is negative). Parameters ---------- session_label: pd.Timestamp The label of the initial session. count: int Defines the length and the direction of the window. Returns ------- pd.DatetimeIndex The desired sessions. """ start_idx = self.schedule.index.get_loc(session_label) end_idx = start_idx + count return self.all_sessions[ min(start_idx, end_idx):max(start_idx, end_idx) + 1 ] def session_distance(self, start_session_label, end_session_label): """ Given a start and end session label, returns the distance between them. For example, for three consecutive sessions Mon., Tues., and Wed, ``session_distance(Mon, Wed)`` returns 3. If ``start_session`` is after ``end_session``, the value will be negated. Parameters ---------- start_session_label: pd.Timestamp The label of the start session. end_session_label: pd.Timestamp The label of the ending session inclusive. Returns ------- int The distance between the two sessions. """ negate = end_session_label < start_session_label if negate: start_session_label, end_session_label = ( end_session_label, start_session_label, ) start_idx = self.all_sessions.searchsorted(start_session_label) end_idx = self.all_sessions.searchsorted( end_session_label, side='right', ) out = end_idx - start_idx if negate: out = -out return out def minutes_in_range(self, start_minute, end_minute): """ Given start and end minutes, return all the calendar minutes in that range, inclusive. Given minutes don't need to be calendar minutes. Parameters ---------- start_minute: pd.Timestamp The minute representing the start of the desired range. end_minute: pd.Timestamp The minute representing the end of the desired range. Returns ------- pd.DatetimeIndex The minutes in the desired range. """ start_idx = searchsorted(self._trading_minutes_nanos, start_minute.value) end_idx = searchsorted(self._trading_minutes_nanos, end_minute.value) if end_minute.value == self._trading_minutes_nanos[end_idx]: # if the end minute is a market minute, increase by 1 end_idx += 1 return self.all_minutes[start_idx:end_idx] def minutes_for_sessions_in_range(self, start_session_label, end_session_label): """ Returns all the minutes for all the sessions from the given start session label to the given end session label, inclusive. Parameters ---------- start_session_label: pd.Timestamp The label of the first session in the range. end_session_label: pd.Timestamp The label of the last session in the range. Returns ------- pd.DatetimeIndex The minutes in the desired range. """ first_minute, _ = self.open_and_close_for_session(start_session_label) _, last_minute = self.open_and_close_for_session(end_session_label) return self.minutes_in_range(first_minute, last_minute) def open_and_close_for_session(self, session_label): """ Returns a tuple of timestamps of the open and close of the session represented by the given label. Parameters ---------- session_label: pd.Timestamp The session whose open and close are desired. Returns ------- (Timestamp, Timestamp) The open and close for the given session. """ sched = self.schedule # `market_open` and `market_close` should be timezone aware, but pandas # 0.16.1 does not appear to support this: # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa return ( sched.at[session_label, 'market_open'].tz_localize('UTC'), sched.at[session_label, 'market_close'].tz_localize('UTC'), ) def session_open(self, session_label): return self.schedule.at[ session_label, 'market_open' ].tz_localize('UTC') def session_close(self, session_label): return self.schedule.at[ session_label, 'market_close' ].tz_localize('UTC') def session_opens_in_range(self, start_session_label, end_session_label): return self.schedule.loc[ start_session_label:end_session_label, 'market_open', ].dt.tz_localize('UTC') def session_closes_in_range(self, start_session_label, end_session_label): return self.schedule.loc[ start_session_label:end_session_label, 'market_close', ].dt.tz_localize('UTC') @property def all_sessions(self): return self.schedule.index @property def first_session(self): return self.all_sessions[0] @property def last_session(self): return self.all_sessions[-1] def execution_time_from_open(self, open_dates): return open_dates def execution_time_from_close(self, close_dates): return close_dates @lazyval def all_minutes(self): """ Returns a DatetimeIndex representing all the minutes in this calendar. """ opens_in_ns = self._opens.values.astype( 'datetime64[ns]', ).view('int64') closes_in_ns = self._closes.values.astype( 'datetime64[ns]', ).view('int64') return DatetimeIndex( compute_all_minutes(opens_in_ns, closes_in_ns), tz='utc', ) @preprocess(dt=coerce(pd.Timestamp, attrgetter('value'))) def minute_to_session_label(self, dt, direction="next"): """ Given a minute, get the label of its containing session. Parameters ---------- dt : pd.Timestamp or nanosecond offset The dt for which to get the containing session. direction: str "next" (default) means that if the given dt is not part of a session, return the label of the next session. "previous" means that if the given dt is not part of a session, return the label of the previous session. "none" means that a KeyError will be raised if the given dt is not part of a session. Returns ------- pd.Timestamp (midnight UTC) The label of the containing session. """ if direction == "next": try: return self._minute_to_session_label_cache[dt] except KeyError: pass idx = searchsorted(self.market_closes_nanos, dt) current_or_next_session = self.schedule.index[idx] self._minute_to_session_label_cache[dt] = current_or_next_session if direction == "next": return current_or_next_session elif direction == "previous": if not is_open(self.market_opens_nanos, self.market_closes_nanos, dt): # if the exchange is closed, use the previous session return self.schedule.index[idx - 1] elif direction == "none": if not is_open(self.market_opens_nanos, self.market_closes_nanos, dt): # if the exchange is closed, blow up raise ValueError("The given dt is not an exchange minute!") else: # invalid direction raise ValueError("Invalid direction parameter: " "{0}".format(direction)) return current_or_next_session def minute_index_to_session_labels(self, index): """ Given a sorted DatetimeIndex of market minutes, return a DatetimeIndex of the corresponding session labels. Parameters ---------- index: pd.DatetimeIndex or pd.Series The ordered list of market minutes we want session labels for. Returns ------- pd.DatetimeIndex (UTC) The list of session labels corresponding to the given minutes. """ return pd.Index(map(self.minute_to_session_label, index)) def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date): """ Union an iterable of pairs of the form (time, calendar) and an iterable of pairs of the form (time, [dates]) (This is shared logic for computing special opens and special closes.) """ _dates = DatetimeIndex([], tz='UTC').union_many( [ holidays_at_time(calendar, start_date, end_date, time_, self.tz) for time_, calendar in calendars ] + [ days_at_time(datetimes, time_, self.tz) for time_, datetimes in ad_hoc_dates ] ) return _dates[(_dates >= start_date) & (_dates <= end_date)] def _calculate_special_opens(self, start, end): return self._special_dates( self.special_opens, self.special_opens_adhoc, start, end, ) def _calculate_special_closes(self, start, end): return self._special_dates( self.special_closes, self.special_closes_adhoc, start, end, )