Example #1
0
class LabelArray(ndarray):
    """
    An ndarray subclass for working with arrays of strings.

    Factorizes the input array into integers, but overloads equality on strings
    to check against the factor label.

    Parameters
    ----------
    values : array-like
        Array of values that can be passed to np.asarray with dtype=object.
    missing_value : str
        Scalar value to treat as 'missing' for operations on ``self``.
    categories : list[str], optional
        List of values to use as categories.  If not supplied, categories will
        be inferred as the unique set of entries in ``values``.
    sort : bool, optional
        Whether to sort categories.  If sort is False and categories is
        supplied, they are left in the order provided.  If sort is False and
        categories is None, categories will be constructed in a random order.

    Attributes
    ----------
    categories : ndarray[str]
        An array containing the unique labels of self.
    reverse_categories : dict[str -> int]
        Reverse lookup table for ``categories``. Stores the index in
        ``categories`` at which each entry each unique entry is found.
    missing_value : str or None
        A sentinel missing value with NaN semantics for comparisons.

    Notes
    -----
    Consumers should be cautious when passing instances of LabelArray to numpy
    functions.  We attempt to disallow as many meaningless operations as
    possible, but since a LabelArray is just an ndarray of ints with some
    additional metadata, many numpy functions (for example, trigonometric) will
    happily accept a LabelArray and treat its values as though they were
    integers.

    In a future change, we may be able to disallow more numerical operations by
    creating a wrapper dtype which doesn't register an implementation for most
    numpy ufuncs. Until that change is made, consumers of LabelArray should
    assume that it is undefined behavior to pass a LabelArray to any numpy
    ufunc that operates on semantically-numerical data.

    See Also
    --------
    http://docs.scipy.org/doc/numpy-1.10.0/user/basics.subclassing.html
    """
    SUPPORTED_SCALAR_TYPES = (bytes, unicode, type(None))

    @preprocess(
        values=coerce(list, partial(np.asarray, dtype=object)),
        categories=coerce(np.ndarray, list),
    )
    @expect_types(
        values=np.ndarray,
        missing_value=SUPPORTED_SCALAR_TYPES,
        categories=optional(list),
    )
    @expect_kinds(values=("O", "S", "U"))
    def __new__(cls, values, missing_value, categories=None, sort=True):

        # Numpy's fixed-width string types aren't very efficient. Working with
        # object arrays is faster than bytes or unicode arrays in almost all
        # cases.
        if not is_object(values):
            values = values.astype(object)

        if categories is None:
            codes, categories, reverse_categories = factorize_strings(
                values.ravel(),
                missing_value=missing_value,
                sort=sort,
            )
        else:
            codes, categories, reverse_categories = (
                factorize_strings_known_categories(
                    values.ravel(),
                    categories=categories,
                    missing_value=missing_value,
                    sort=sort,
                ))
        categories.setflags(write=False)

        return cls._from_codes_and_metadata(
            codes=codes.reshape(values.shape),
            categories=categories,
            reverse_categories=reverse_categories,
            missing_value=missing_value,
        )

    @classmethod
    def _from_codes_and_metadata(cls, codes, categories, reverse_categories,
                                 missing_value):
        """
        View codes as a LabelArray and set LabelArray metadata on the result.
        """
        ret = codes.view(type=cls, dtype=np.void)
        ret._categories = categories
        ret._reverse_categories = reverse_categories
        ret._missing_value = missing_value
        return ret

    @property
    def categories(self):
        # This is a property because it should be immutable.
        return self._categories

    @property
    def reverse_categories(self):
        # This is a property because it should be immutable.
        return self._reverse_categories

    @property
    def missing_value(self):
        # This is a property because it should be immutable.
        return self._missing_value

    @property
    def missing_value_code(self):
        return self.reverse_categories[self.missing_value]

    def has_label(self, value):
        return value in self.reverse_categories

    def __array_finalize__(self, obj):
        """
        Called by Numpy after array construction.

        There are three cases where this can happen:

        1. Someone tries to directly construct a new array by doing::

            >>> ndarray.__new__(LabelArray, ...)  # doctest: +SKIP

           In this case, obj will be None.  We treat this as an error case and
           fail.

        2. Someone (most likely our own __new__) does::

           >>> other_array.view(type=LabelArray)  # doctest: +SKIP

           In this case, `self` will be the new LabelArray instance, and
           ``obj` will be the array on which ``view`` is being called.

           The caller of ``obj.view`` is responsible for setting category
           metadata on ``self`` after we exit.

        3. Someone creates a new LabelArray by slicing an existing one.

           In this case, ``obj`` will be the original LabelArray.  We're
           responsible for copying over the parent array's category metadata.
        """
        if obj is None:
            raise TypeError(
                "Direct construction of LabelArrays is not supported.")

        # See docstring for an explanation of when these will or will not be
        # set.
        self._categories = getattr(obj, 'categories', None)
        self._reverse_categories = getattr(obj, 'reverse_categories', None)
        self._missing_value = getattr(obj, 'missing_value', None)

    def as_int_array(self):
        """
        Convert self into a regular ndarray of ints.

        This is an O(1) operation. It does not copy the underlying data.
        """
        return self.view(
            type=ndarray,
            dtype=int_dtype_with_size_in_bytes(self.itemsize),
        )

    def as_string_array(self):
        """
        Convert self back into an array of strings.

        This is an O(N) operation.
        """
        return self.categories[self.as_int_array()]

    def as_categorical(self, name=None):
        """
        Coerce self into a pandas categorical.

        This is only defined on 1D arrays, since that's all pandas supports.
        """
        if len(self.shape) > 1:
            raise ValueError("Can't convert a 2D array to a categorical.")

        with ignore_pandas_nan_categorical_warning():
            return pd.Categorical.from_codes(
                self.as_int_array(),
                # We need to make a copy because pandas >= 0.17 fails if this
                # buffer isn't writeable.
                self.categories.copy(),
                ordered=False,
                name=name,
            )

    def as_categorical_frame(self, index, columns, name=None):
        """
        Coerce self into a pandas DataFrame of Categoricals.
        """
        if len(self.shape) != 2:
            raise ValueError(
                "Can't convert a non-2D LabelArray into a DataFrame.")

        expected_shape = (len(index), len(columns))
        if expected_shape != self.shape:
            raise ValueError(
                "Can't construct a DataFrame with provided indices:\n\n"
                "LabelArray shape is {actual}, but index and columns imply "
                "that shape should be {expected}.".format(
                    actual=self.shape,
                    expected=expected_shape,
                ))

        return pd.Series(
            index=pd.MultiIndex.from_product([index, columns]),
            data=self.ravel().as_categorical(name=name),
        ).unstack()

    def __setitem__(self, indexer, value):
        self_categories = self.categories

        if isinstance(value, LabelArray):
            value_categories = value.categories
            if compare_arrays(self_categories, value_categories):
                return super(LabelArray, self).__setitem__(indexer, value)
            else:
                raise CategoryMismatch(self_categories, value_categories)

        elif isinstance(value, self.SUPPORTED_SCALAR_TYPES):
            value_code = self.reverse_categories.get(value, -1)
            if value_code < 0:
                raise ValueError("%r is not in LabelArray categories." % value)
            self.as_int_array()[indexer] = value_code
        else:
            raise NotImplementedError(
                "Setting into a LabelArray with a value of "
                "type {type} is not yet supported.".format(
                    type=type(value).__name__, ), )

    def __setslice__(self, i, j, sequence):
        """
        This method was deprecated in Python 2.0. It predates slice objects,
        but Python 2.7.11 still uses it if you implement it, which ndarray
        does.  In newer Pythons, __setitem__ is always called, but we need to
        manuallly forward in py2.
        """
        self.__setitem__(slice(i, j), sequence)

    def __getitem__(self, indexer):
        result = super(LabelArray, self).__getitem__(indexer)
        if result.ndim:
            # Result is still a LabelArray, so we can just return it.
            return result

        # Result is a scalar value, which will be an instance of np.void.
        # Map it back to one of our category entries.
        index = result.view(int_dtype_with_size_in_bytes(self.itemsize))
        return self.categories[index]

    def is_missing(self):
        """
        Like isnan, but checks for locations where we store missing values.
        """
        return (
            self.as_int_array() == self.reverse_categories[self.missing_value])

    def not_missing(self):
        """
        Like ~isnan, but checks for locations where we store missing values.
        """
        return (self.as_int_array() !=
                self.reverse_categories[self.missing_value])

    def _equality_check(op):
        """
        Shared code for __eq__ and __ne__, parameterized on the actual
        comparison operator to use.
        """
        def method(self, other):

            if isinstance(other, LabelArray):
                self_mv = self.missing_value
                other_mv = other.missing_value
                if self_mv != other_mv:
                    raise MissingValueMismatch(self_mv, other_mv)

                self_categories = self.categories
                other_categories = other.categories
                if not compare_arrays(self_categories, other_categories):
                    raise CategoryMismatch(self_categories, other_categories)

                return (op(self.as_int_array(), other.as_int_array())
                        & self.not_missing()
                        & other.not_missing())

            elif isinstance(other, ndarray):
                # Compare to ndarrays as though we were an array of strings.
                # This is fairly expensive, and should generally be avoided.
                return op(self.as_string_array(), other) & self.not_missing()

            elif isinstance(other, self.SUPPORTED_SCALAR_TYPES):
                i = self._reverse_categories.get(other, -1)
                return op(self.as_int_array(), i) & self.not_missing()

            return op(super(LabelArray, self), other)

        return method

    __eq__ = _equality_check(eq)
    __ne__ = _equality_check(ne)
    del _equality_check

    def view(self, dtype=_NotPassed, type=_NotPassed):
        if type is _NotPassed and dtype not in (_NotPassed, self.dtype):
            raise TypeError("Can't view LabelArray as another dtype.")

        # The text signature on ndarray.view makes it look like the default
        # values for dtype and type are `None`, but passing None explicitly has
        # different semantics than not passing an arg at all, so we reconstruct
        # the kwargs dict here to simulate the args not being passed at all.
        kwargs = {}
        if dtype is not _NotPassed:
            kwargs['dtype'] = dtype
        if type is not _NotPassed:
            kwargs['type'] = type
        return super(LabelArray, self).view(**kwargs)

    # In general, we support resizing, slicing, and reshaping methods, but not
    # numeric methods.
    SUPPORTED_NDARRAY_METHODS = frozenset([
        'base', 'compress', 'copy', 'data', 'diagonal', 'dtype', 'flat',
        'flatten', 'item', 'itemset', 'itemsize', 'nbytes', 'ndim', 'ravel',
        'repeat', 'reshape', 'resize', 'setflags', 'shape', 'size', 'squeeze',
        'strides', 'swapaxes', 'take', 'trace', 'transpose', 'view'
    ])
    PUBLIC_NDARRAY_METHODS = frozenset(
        [s for s in dir(ndarray) if not s.startswith('_')])

    # Generate failing wrappers for all unsupported methods.
    locals().update({
        method: _make_unsupported_method(method)
        for method in PUBLIC_NDARRAY_METHODS - SUPPORTED_NDARRAY_METHODS
    })

    def __repr__(self):
        repr_lines = repr(self.as_string_array()).splitlines()
        repr_lines[0] = repr_lines[0].replace('array(', 'LabelArray(', 1)
        repr_lines[-1] = repr_lines[-1].rsplit(',', 1)[0] + ')'
        # The extra spaces here account for the difference in length between
        # 'array(' and 'LabelArray('.
        return '\n     '.join(repr_lines)

    def empty_like(self, shape):
        """
        Make an empty LabelArray with the same categories as ``self``, filled
        with ``self.missing_value``.
        """
        return type(self)._from_codes_and_metadata(
            codes=np.full(
                shape,
                self.reverse_categories[self.missing_value],
                dtype=int_dtype_with_size_in_bytes(self.itemsize),
            ),
            categories=self.categories,
            reverse_categories=self.reverse_categories,
            missing_value=self.missing_value,
        )

    def map_predicate(self, f):
        """
        Map a function from str -> bool element-wise over ``self``.

        ``f`` will be applied exactly once to each non-missing unique value in
        ``self``. Missing values will always return False.
        """
        # Functions passed to this are of type str -> bool.  Don't ever call
        # them on None, which is the only non-str value we ever store in
        # categories.
        if self.missing_value is None:
            f_to_use = lambda x: False if x is None else f(x)
        else:
            f_to_use = f

        # Call f on each unique value in our categories.
        results = np.vectorize(f_to_use, otypes=[bool_dtype])(self.categories)

        # missing_value should produce False no matter what
        results[self.reverse_categories[self.missing_value]] = False

        # unpack the results form each unique value into their corresponding
        # locations in our indices.
        return results[self.as_int_array()]

    def startswith(self, prefix):
        """
        Element-wise startswith.

        Parameters
        ----------
        prefix : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self started with ``prefix``.
        """
        return self.map_predicate(lambda elem: elem.startswith(prefix))

    def endswith(self, suffix):
        """
        Elementwise endswith.

        Parameters
        ----------
        suffix : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self ended with ``suffix``
        """
        return self.map_predicate(lambda elem: elem.endswith(suffix))

    def has_substring(self, substring):
        """
        Elementwise contains.

        Parameters
        ----------
        substring : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self ended with ``suffix``.
        """
        return self.map_predicate(lambda elem: substring in elem)

    @preprocess(pattern=coerce(from_=(bytes, unicode), to=re.compile))
    def matches(self, pattern):
        """
        Elementwise regex match.

        Parameters
        ----------
        pattern : str or compiled regex

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self was matched by ``pattern``.
        """
        return self.map_predicate(compose(bool, pattern.match))

    # These types all implement an O(N) __contains__, so pre-emptively
    # coerce to `set`.
    @preprocess(container=coerce((list, tuple, np.ndarray), set))
    def element_of(self, container):
        """
        Check if each element of self is an of ``container``.

        Parameters
        ----------
        container : object
            An object implementing a __contains__ to call on each element of
            ``self``.

        Returns
        -------
        is_contained : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self was an element of ``container``.
        """
        return self.map_predicate(container.__contains__)
Example #2
0
class TradingCalendar(with_metaclass(ABCMeta)):
    """
    An TradingCalendar represents the timing information of a single market
    exchange.

    The timing information is made up of two parts: sessions, and opens/closes.

    A session represents a contiguous set of minutes, and has a label that is
    midnight UTC. It is important to note that a session label should not be
    considered a specific point in time, and that midnight UTC is just being
    used for convenience.

    For each session, we store the open and close time in UTC time.
    """
    def __init__(self, start=start_default, end=end_default):
        # Midnight in UTC for each trading day.
        _all_days = date_range(start, end, freq=self.day, tz='UTC')

        # `DatetimeIndex`s of standard opens/closes for each day.
        self._opens = days_at_time(_all_days, self.open_time, self.tz,
                                   self.open_offset)
        self._closes = days_at_time(_all_days, self.close_time, self.tz,
                                    self.close_offset)

        # `DatetimeIndex`s of nonstandard opens/closes
        _special_opens = self._calculate_special_opens(start, end)
        _special_closes = self._calculate_special_closes(start, end)

        # Overwrite the special opens and closes on top of the standard ones.
        _overwrite_special_dates(_all_days, self._opens, _special_opens)
        _overwrite_special_dates(_all_days, self._closes, _special_closes)

        # In pandas 0.16.1 _opens and _closes will lose their timezone
        # information. This looks like it has been resolved in 0.17.1.
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        self.schedule = DataFrame(
            index=_all_days,
            columns=['market_open', 'market_close'],
            data={
                'market_open': self._opens,
                'market_close': self._closes,
            },
            dtype='datetime64[ns]',
        )

        self.market_opens_nanos = self.schedule.market_open.values.\
            astype(np.int64)

        self.market_closes_nanos = self.schedule.market_close.values.\
            astype(np.int64)

        self._trading_minutes_nanos = self.all_minutes.values.\
            astype(np.int64)

        self.first_trading_session = _all_days[0]
        self.last_trading_session = _all_days[-1]

        self._early_closes = pd.DatetimeIndex(
            _special_closes.map(self.minute_to_session_label))

    @lazyval
    def day(self):
        return CustomBusinessDay(
            holidays=self.adhoc_holidays,
            calendar=self.regular_holidays,
        )

    @abstractproperty
    def name(self):
        raise NotImplementedError()

    @abstractproperty
    def tz(self):
        raise NotImplementedError()

    @abstractproperty
    def open_time(self):
        raise NotImplementedError()

    @abstractproperty
    def close_time(self):
        raise NotImplementedError()

    @property
    def open_offset(self):
        return 0

    @property
    def close_offset(self):
        return 0

    @property
    def regular_holidays(self):
        """
        Returns
        -------
        pd.AbstractHolidayCalendar: a calendar containing the regular holidays
        for this calendar
        """
        return None

    @property
    def adhoc_holidays(self):
        return []

    @property
    def special_opens(self):
        """
        A list of special open times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_opens_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    @property
    def special_closes(self):
        """
        A list of special close times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_closes_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    # -----

    def opens(self):
        return self.schedule.market_open

    @property
    def closes(self):
        return self.schedule.market_close

    @property
    def early_closes(self):
        return self._early_closes

    def is_session(self, dt):
        """
        Given a dt, returns whether it's a valid session label.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt that is being tested.

        Returns
        -------
        bool
            Whether the given dt is a valid session label.
        """
        return dt in self.schedule.index

    def is_open_on_minute(self, dt):
        """
        Given a dt, return whether this exchange is open at the given dt.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to check if this exchange is open.

        Returns
        -------
        bool
            Whether the exchange is open on this dt.
        """
        return is_open(self.market_opens_nanos, self.market_closes_nanos,
                       dt.value)

    def next_open(self, dt):
        """
        Given a dt, returns the next open.

        If the given dt happens to be a session open, the next session's open
        will be returned.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next open.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next open.
        """
        idx = next_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def next_close(self, dt):
        """
        Given a dt, returns the next close.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next close.
        """
        idx = next_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def previous_open(self, dt):
        """
        Given a dt, returns the previous open.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous open.

        Returns
        -------
        pd.Timestamp
            The UTC imestamp of the previous open.
        """
        idx = previous_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def previous_close(self, dt):
        """
        Given a dt, returns the previous close.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the previous close.
        """
        idx = previous_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def next_minute(self, dt):
        """
        Given a dt, return the next exchange minute.  If the given dt is not
        an exchange minute, returns the next exchange open.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next exchange minute.

        Returns
        -------
        pd.Timestamp
            The next exchange minute.
        """
        idx = next_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def previous_minute(self, dt):
        """
        Given a dt, return the previous exchange minute.

        Raises KeyError if the given timestamp is not an exchange minute.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous exchange minute.

        Returns
        -------
        pd.Timestamp
            The previous exchange minute.
        """

        idx = previous_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def next_session_label(self, session_label):
        """
        Given a session label, returns the label of the next session.

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose next session is desired.

        Returns
        -------
        pd.Timestamp
            The next session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the last session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        try:
            return self.schedule.index[idx + 1]
        except IndexError:
            if idx == len(self.schedule.index) - 1:
                raise ValueError("There is no next session as this is the end"
                                 " of the exchange calendar.")
            else:
                raise

    def previous_session_label(self, session_label):
        """
        Given a session label, returns the label of the previous session.

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose previous session is desired.

        Returns
        -------
        pd.Timestamp
            The previous session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the first session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        if idx == 0:
            raise ValueError("There is no previous session as this is the"
                             " beginning of the exchange calendar.")

        return self.schedule.index[idx - 1]

    def minutes_for_session(self, session_label):
        """
        Given a session label, return the minutes for that session.

        Parameters
        ----------
        session_label: pd.Timestamp (midnight UTC)
            A session label whose session's minutes are desired.

        Returns
        -------
        pd.DateTimeIndex
            All the minutes for the given session.
        """
        data = self.schedule.loc[session_label]
        return self.all_minutes[self.all_minutes.slice_indexer(
            data.market_open, data.market_close)]

    def minutes_window(self, start_dt, count):
        try:
            start_idx = self.all_minutes.get_loc(start_dt)
        except KeyError:
            # if this is not a market minute, go to the previous session's
            # close
            previous_session = self.minute_to_session_label(
                start_dt, direction="previous")

            previous_close = self.open_and_close_for_session(
                previous_session)[1]

            start_idx = self.all_minutes.get_loc(previous_close)

        end_idx = start_idx + count

        if start_idx > end_idx:
            return self.all_minutes[(end_idx + 1):(start_idx + 1)]
        else:
            return self.all_minutes[start_idx:end_idx]

    def sessions_in_range(self, start_session_label, end_session_label):
        """
        Given start and end session labels, return all the sessions in that
        range, inclusive.

        Parameters
        ----------
        start_session_label: pd.Timestamp (midnight UTC)
            The label representing the first session of the desired range.

        end_session_label: pd.Timestamp (midnight UTC)
            The label representing the last session of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        return self.all_sessions[self.all_sessions.slice_indexer(
            start_session_label, end_session_label)]

    def sessions_window(self, session_label, count):
        """
        Given a session label and a window size, returns a list of sessions
        of size `count` + 1, that either starts with the given session
        (if `count` is positive) or ends with the given session (if `count` is
        negative).

        Parameters
        ----------
        session_label: pd.Timestamp
            The label of the initial session.

        count: int
            Defines the length and the direction of the window.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        start_idx = self.schedule.index.get_loc(session_label)
        end_idx = start_idx + count

        return self.all_sessions[min(start_idx, end_idx
                                     ):max(start_idx, end_idx) + 1]

    def session_distance(self, start_session_label, end_session_label):
        """
        Given a start and end session label, returns the distance between
        them.  For example, for three consecutive sessions Mon., Tues., and
        Wed, `session_distance(Mon, Wed)` would return 2.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the start session.

        end_session_label: pd.Timestamp
            The label of the ending session.

        Returns
        -------
        int
            The distance between the two sessions.
        """
        start_idx = self.all_sessions.searchsorted(
            self.minute_to_session_label(start_session_label))

        end_idx = self.all_sessions.searchsorted(
            self.minute_to_session_label(end_session_label))

        return abs(end_idx - start_idx)

    def minutes_in_range(self, start_minute, end_minute):
        """
        Given start and end minutes, return all the calendar minutes
        in that range, inclusive.

        Given minutes don't need to be calendar minutes.

        Parameters
        ----------
        start_minute: pd.Timestamp
            The minute representing the start of the desired range.

        end_minute: pd.Timestamp
            The minute representing the end of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.
        """
        start_idx = searchsorted(self._trading_minutes_nanos,
                                 start_minute.value)

        end_idx = searchsorted(self._trading_minutes_nanos, end_minute.value)

        if end_minute.value == self._trading_minutes_nanos[end_idx]:
            # if the end minute is a market minute, increase by 1
            end_idx += 1

        return self.all_minutes[start_idx:end_idx]

    def minutes_for_sessions_in_range(self, start_session_label,
                                      end_session_label):
        """
        Returns all the minutes for all the sessions from the given start
        session label to the given end session label, inclusive.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the first session in the range.

        end_session_label: pd.Timestamp
            The label of the last session in the range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.

        """
        first_minute, _ = self.open_and_close_for_session(start_session_label)
        _, last_minute = self.open_and_close_for_session(end_session_label)

        return self.minutes_in_range(first_minute, last_minute)

    def open_and_close_for_session(self, session_label):
        """
        Returns a tuple of timestamps of the open and close of the session
        represented by the given label.

        Parameters
        ----------
        session_label: pd.Timestamp
            The session whose open and close are desired.

        Returns
        -------
        (Timestamp, Timestamp)
            The open and close for the given session.
        """
        o_and_c = self.schedule.loc[session_label]

        # `market_open` and `market_close` should be timezone aware, but pandas
        # 0.16.1 does not appear to support this:
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        return (o_and_c['market_open'].tz_localize('UTC'),
                o_and_c['market_close'].tz_localize('UTC'))

    @property
    def all_sessions(self):
        return self.schedule.index

    @property
    def first_session(self):
        return self.all_sessions[0]

    @property
    def last_session(self):
        return self.all_sessions[-1]

    @property
    @remember_last
    def all_minutes(self):
        """
        Returns a DatetimeIndex representing all the minutes in this calendar.
        """
        opens_in_ns = \
            self._opens.values.astype('datetime64[ns]')

        closes_in_ns = \
            self._closes.values.astype('datetime64[ns]')

        deltas = closes_in_ns - opens_in_ns

        # + 1 because we want 390 days per standard day, not 389
        daily_sizes = (deltas / NANOS_IN_MINUTE) + 1
        num_minutes = np.sum(daily_sizes).astype(np.int64)

        # One allocation for the entire thing. This assumes that each day
        # represents a contiguous block of minutes.
        all_minutes = np.empty(num_minutes, dtype='datetime64[ns]')

        idx = 0
        for day_idx, size in enumerate(daily_sizes):
            # lots of small allocations, but it's fast enough for now.

            # size is a np.timedelta64, so we need to int it
            size_int = int(size)
            all_minutes[idx:(idx + size_int)] = \
                np.arange(
                    opens_in_ns[day_idx],
                    closes_in_ns[day_idx] + NANOS_IN_MINUTE,
                    NANOS_IN_MINUTE
                )

            idx += size_int

        return DatetimeIndex(all_minutes).tz_localize("UTC")

    @preprocess(dt=coerce(pd.Timestamp, attrgetter('value')))
    def minute_to_session_label(self, dt, direction="next"):
        """
        Given a minute, get the label of its containing session.

        Parameters
        ----------
        dt : pd.Timestamp or nanosecond offset
            The dt for which to get the containing session.

        direction: str
            "next" (default) means that if the given dt is not part of a
            session, return the label of the next session.

            "previous" means that if the given dt is not part of a session,
            return the label of the previous session.

            "none" means that a KeyError will be raised if the given
            dt is not part of a session.

        Returns
        -------
        pd.Timestamp (midnight UTC)
            The label of the containing session.
        """

        idx = searchsorted(self.market_closes_nanos, dt)
        current_or_next_session = self.schedule.index[idx]

        if direction == "previous":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, use the previous session
                return self.schedule.index[idx - 1]
        elif direction == "none":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, blow up
                raise ValueError("The given dt is not an exchange minute!")
        elif direction != "next":
            # invalid direction
            raise ValueError("Invalid direction parameter: "
                             "{0}".format(direction))

        return current_or_next_session

    def minute_index_to_session_labels(self, index):
        """
        Given a sorted DatetimeIndex of market minutes, return a
        DatetimeIndex of the corresponding session labels.

        Parameters
        ----------
        index: pd.DatetimeIndex or pd.Series
            The ordered list of market minutes we want session labels for.

        Returns
        -------
        pd.DatetimeIndex (UTC)
            The list of session labels corresponding to the given minutes.
        """
        def minute_to_session_label_nanos(dt_nanos):
            return self.minute_to_session_label(dt_nanos).value

        return DatetimeIndex(minutes_to_session_labels(
            index.values.astype(np.int64),
            minute_to_session_label_nanos,
            self.market_closes_nanos,
        ).astype('datetime64[ns]'),
                             tz='UTC')

    def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date):
        """
        Union an iterable of pairs of the form (time, calendar)
        and an iterable of pairs of the form (time, [dates])

        (This is shared logic for computing special opens and special closes.)
        """
        _dates = DatetimeIndex([], tz='UTC').union_many([
            holidays_at_time(calendar, start_date, end_date, time_, self.tz)
            for time_, calendar in calendars
        ] + [
            days_at_time(datetimes, time_, self.tz)
            for time_, datetimes in ad_hoc_dates
        ])
        return _dates[(_dates >= start_date) & (_dates <= end_date)]

    def _calculate_special_opens(self, start, end):
        return self._special_dates(
            self.special_opens,
            self.special_opens_adhoc,
            start,
            end,
        )

    def _calculate_special_closes(self, start, end):
        return self._special_dates(
            self.special_closes,
            self.special_closes_adhoc,
            start,
            end,
        )
Example #3
0
class LabelArray(ndarray):
    """
    An ndarray subclass for working with arrays of strings.

    Factorizes the input array into integers, but overloads equality on strings
    to check against the factor label.

    Parameters
    ----------
    values : array-like
        Array of values that can be passed to np.asarray with dtype=object.
    missing_value : str
        Scalar value to treat as 'missing' for operations on ``self``.
    categories : list[str], optional
        List of values to use as categories.  If not supplied, categories will
        be inferred as the unique set of entries in ``values``.
    sort : bool, optional
        Whether to sort categories.  If sort is False and categories is
        supplied, they are left in the order provided.  If sort is False and
        categories is None, categories will be constructed in a random order.

    Attributes
    ----------
    categories : ndarray[str]
        An array containing the unique labels of self.
    reverse_categories : dict[str -> int]
        Reverse lookup table for ``categories``. Stores the index in
        ``categories`` at which each entry each unique entry is found.
    missing_value : str or None
        A sentinel missing value with NaN semantics for comparisons.

    Notes
    -----
    Consumers should be cautious when passing instances of LabelArray to numpy
    functions.  We attempt to disallow as many meaningless operations as
    possible, but since a LabelArray is just an ndarray of ints with some
    additional metadata, many numpy functions (for example, trigonometric) will
    happily accept a LabelArray and treat its values as though they were
    integers.

    In a future change, we may be able to disallow more numerical operations by
    creating a wrapper dtype which doesn't register an implementation for most
    numpy ufuncs. Until that change is made, consumers of LabelArray should
    assume that it is undefined behavior to pass a LabelArray to any numpy
    ufunc that operates on semantically-numerical data.

    See Also
    --------
    https://docs.scipy.org/doc/numpy-1.11.0/user/basics.subclassing.html
    """

    SUPPORTED_SCALAR_TYPES = (bytes, unicode, type(None))
    SUPPORTED_NON_NONE_SCALAR_TYPES = (bytes, unicode)

    @preprocess(
        values=coerce(list, partial(np.asarray, dtype=object)),
        # Coerce ``list`` to ``list`` to make a copy. Code internally may call
        # ``categories.insert(0, missing_value)`` which will mutate this list
        # in place.
        categories=coerce((list, np.ndarray, set), list),
    )
    @expect_types(
        values=np.ndarray,
        missing_value=SUPPORTED_SCALAR_TYPES,
        categories=optional(list),
    )
    @expect_kinds(values=("O", "S", "U"))
    def __new__(cls, values, missing_value, categories=None, sort=True):

        # Numpy's fixed-width string types aren't very efficient. Working with
        # object arrays is faster than bytes or unicode arrays in almost all
        # cases.
        if not is_object(values):
            values = values.astype(object)

        if values.flags.f_contiguous:
            ravel_order = "F"
        else:
            ravel_order = "C"

        if categories is None:
            codes, categories, reverse_categories = factorize_strings(
                values.ravel(ravel_order),
                missing_value=missing_value,
                sort=sort,
            )
        else:
            (
                codes,
                categories,
                reverse_categories,
            ) = factorize_strings_known_categories(
                values.ravel(ravel_order),
                categories=categories,
                missing_value=missing_value,
                sort=sort,
            )
        categories.setflags(write=False)

        return cls.from_codes_and_metadata(
            codes=codes.reshape(values.shape, order=ravel_order),
            categories=categories,
            reverse_categories=reverse_categories,
            missing_value=missing_value,
        )

    @classmethod
    def from_codes_and_metadata(cls, codes, categories, reverse_categories,
                                missing_value):
        """
        Rehydrate a LabelArray from the codes and metadata.

        Parameters
        ----------
        codes : np.ndarray[integral]
            The codes for the label array.
        categories : np.ndarray[object]
            The unique string categories.
        reverse_categories : dict[str, int]
            The mapping from category to its code-index.
        missing_value : any
            The value used to represent missing data.
        """
        ret = codes.view(type=cls, dtype=np.void)
        ret._categories = categories
        ret._reverse_categories = reverse_categories
        ret._missing_value = missing_value
        return ret

    @classmethod
    def from_categorical(cls, categorical, missing_value=None):
        """
        Create a LabelArray from a pandas categorical.

        Parameters
        ----------
        categorical : pd.Categorical
            The categorical object to convert.
        missing_value : bytes, unicode, or None, optional
            The missing value to use for this LabelArray.

        Returns
        -------
        la : LabelArray
            The LabelArray representation of this categorical.
        """
        return LabelArray(
            categorical,
            missing_value,
            categorical.categories,
        )

    @property
    def categories(self):
        # This is a property because it should be immutable.
        return self._categories

    @property
    def reverse_categories(self):
        # This is a property because it should be immutable.
        return self._reverse_categories

    @property
    def missing_value(self):
        # This is a property because it should be immutable.
        return self._missing_value

    @property
    def missing_value_code(self):
        return self.reverse_categories[self.missing_value]

    def has_label(self, value):
        return value in self.reverse_categories

    def __array_finalize__(self, obj):
        """
        Called by Numpy after array construction.

        There are three cases where this can happen:

        1. Someone tries to directly construct a new array by doing::

            >>> ndarray.__new__(LabelArray, ...)  # doctest: +SKIP

           In this case, obj will be None.  We treat this as an error case and
           fail.

        2. Someone (most likely our own __new__) does::

           >>> other_array.view(type=LabelArray)  # doctest: +SKIP

           In this case, `self` will be the new LabelArray instance, and
           ``obj` will be the array on which ``view`` is being called.

           The caller of ``obj.view`` is responsible for setting category
           metadata on ``self`` after we exit.

        3. Someone creates a new LabelArray by slicing an existing one.

           In this case, ``obj`` will be the original LabelArray.  We're
           responsible for copying over the parent array's category metadata.
        """
        if obj is None:
            raise TypeError(
                "Direct construction of LabelArrays is not supported.")

        # See docstring for an explanation of when these will or will not be
        # set.
        self._categories = getattr(obj, "categories", None)
        self._reverse_categories = getattr(obj, "reverse_categories", None)
        self._missing_value = getattr(obj, "missing_value", None)

    def as_int_array(self):
        """
        Convert self into a regular ndarray of ints.

        This is an O(1) operation. It does not copy the underlying data.
        """
        return self.view(
            type=ndarray,
            dtype=unsigned_int_dtype_with_size_in_bytes(self.itemsize),
        )

    def as_string_array(self):
        """
        Convert self back into an array of strings.

        This is an O(N) operation.
        """
        return self.categories[self.as_int_array()]

    def as_categorical(self):
        """
        Coerce self into a pandas categorical.

        This is only defined on 1D arrays, since that's all pandas supports.
        """
        if len(self.shape) > 1:
            raise ValueError("Can't convert a 2D array to a categorical.")

        with ignore_pandas_nan_categorical_warning():
            return pd.Categorical.from_codes(
                self.as_int_array(),
                # We need to make a copy because pandas >= 0.17 fails if this
                # buffer isn't writeable.
                self.categories.copy(),
                ordered=False,
            )

    def as_categorical_frame(self, index, columns, name=None):
        """
        Coerce self into a pandas DataFrame of Categoricals.
        """
        if len(self.shape) != 2:
            raise ValueError(
                "Can't convert a non-2D LabelArray into a DataFrame.")

        expected_shape = (len(index), len(columns))
        if expected_shape != self.shape:
            raise ValueError(
                "Can't construct a DataFrame with provided indices:\n\n"
                "LabelArray shape is {actual}, but index and columns imply "
                "that shape should be {expected}.".format(
                    actual=self.shape,
                    expected=expected_shape,
                ))

        return pd.Series(
            index=pd.MultiIndex.from_product([index, columns]),
            data=self.ravel().as_categorical(),
            name=name,
        ).unstack()

    def __setitem__(self, indexer, value):
        self_categories = self.categories

        if isinstance(value, self.SUPPORTED_SCALAR_TYPES):
            value_code = self.reverse_categories.get(value, None)
            if value_code is None:
                raise ValueError("%r is not in LabelArray categories." % value)
            self.as_int_array()[indexer] = value_code
        elif isinstance(value, LabelArray):
            value_categories = value.categories
            if compare_arrays(self_categories, value_categories):
                return super(LabelArray, self).__setitem__(indexer, value)
            elif self.missing_value == value.missing_value and set(
                    value.categories) <= set(self.categories):
                rhs = LabelArray.from_codes_and_metadata(
                    *factorize_strings_known_categories(
                        value.as_string_array().ravel(),
                        list(self.categories),
                        self.missing_value,
                        False,
                    ),
                    missing_value=self.missing_value,
                ).reshape(value.shape)
                super(LabelArray, self).__setitem__(indexer, rhs)
            else:
                raise CategoryMismatch(self_categories, value_categories)
        else:
            raise NotImplementedError(
                "Setting into a LabelArray with a value of "
                "type {type} is not yet supported.".format(
                    type=type(value).__name__, ), )

    def set_scalar(self, indexer, value):
        """
        Set scalar value into the array.

        Parameters
        ----------
        indexer : any
            The indexer to set the value at.
        value : str
            The value to assign at the given locations.

        Raises
        ------
        ValueError
            Raised when ``value`` is not a value element of this this label
            array.
        """
        try:
            value_code = self.reverse_categories[value]
        except KeyError:
            raise ValueError("%r is not in LabelArray categories." % value)

        self.as_int_array()[indexer] = value_code

    def __getitem__(self, indexer):
        result = super(LabelArray, self).__getitem__(indexer)
        if result.ndim:
            # Result is still a LabelArray, so we can just return it.
            return result

        # Result is a scalar value, which will be an instance of np.void.
        # Map it back to one of our category entries.
        index = result.view(
            unsigned_int_dtype_with_size_in_bytes(self.itemsize), )
        return self.categories[index]

    def is_missing(self):
        """
        Like isnan, but checks for locations where we store missing values.
        """
        return (
            self.as_int_array() == self.reverse_categories[self.missing_value])

    def not_missing(self):
        """
        Like ~isnan, but checks for locations where we store missing values.
        """
        return (self.as_int_array() !=
                self.reverse_categories[self.missing_value])

    def _equality_check(op):
        """
        Shared code for __eq__ and __ne__, parameterized on the actual
        comparison operator to use.
        """
        def method(self, other):

            if isinstance(other, LabelArray):
                self_mv = self.missing_value
                other_mv = other.missing_value
                if self_mv != other_mv:
                    raise MissingValueMismatch(self_mv, other_mv)

                self_categories = self.categories
                other_categories = other.categories
                if not compare_arrays(self_categories, other_categories):
                    raise CategoryMismatch(self_categories, other_categories)

                return (op(self.as_int_array(), other.as_int_array())
                        & self.not_missing()
                        & other.not_missing())

            elif isinstance(other, ndarray):
                # Compare to ndarrays as though we were an array of strings.
                # This is fairly expensive, and should generally be avoided.
                return op(self.as_string_array(), other) & self.not_missing()

            elif isinstance(other, self.SUPPORTED_SCALAR_TYPES):
                i = self._reverse_categories.get(other, -1)
                return op(self.as_int_array(), i) & self.not_missing()

            return op(super(LabelArray, self), other)

        return method

    __eq__ = _equality_check(eq)
    __ne__ = _equality_check(ne)
    del _equality_check

    def view(self, dtype=_NotPassed, type=_NotPassed):
        if type is _NotPassed and dtype not in (_NotPassed, self.dtype):
            raise TypeError("Can't view LabelArray as another dtype.")

        # The text signature on ndarray.view makes it look like the default
        # values for dtype and type are `None`, but passing None explicitly has
        # different semantics than not passing an arg at all, so we reconstruct
        # the kwargs dict here to simulate the args not being passed at all.
        kwargs = {}
        if dtype is not _NotPassed:
            kwargs["dtype"] = dtype
        if type is not _NotPassed:
            kwargs["type"] = type
        return super(LabelArray, self).view(**kwargs)

    def astype(self,
               dtype,
               order="K",
               casting="unsafe",
               subok=True,
               copy=True):
        if dtype == self.dtype:
            if not subok:
                array = self.view(type=np.ndarray)
            else:
                array = self

            if copy:
                return array.copy()
            return array

        if dtype == object_dtype:
            return self.as_string_array()

        if dtype.kind == "S":
            return self.as_string_array().astype(
                dtype,
                order=order,
                casting=casting,
                subok=subok,
                copy=copy,
            )

        raise TypeError(
            "%s can only be converted into object, string, or void,"
            " got: %r" % (
                type(self).__name__,
                dtype,
            ), )

    # In general, we support resizing, slicing, and reshaping methods, but not
    # numeric methods.
    SUPPORTED_NDARRAY_METHODS = frozenset([
        "astype",
        "base",
        "compress",
        "copy",
        "data",
        "diagonal",
        "dtype",
        "flat",
        "flatten",
        "item",
        "itemset",
        "itemsize",
        "nbytes",
        "ndim",
        "ravel",
        "repeat",
        "reshape",
        "resize",
        "setflags",
        "shape",
        "size",
        "squeeze",
        "strides",
        "swapaxes",
        "take",
        "trace",
        "transpose",
        "view",
    ])
    PUBLIC_NDARRAY_METHODS = frozenset(
        [s for s in dir(ndarray) if not s.startswith("_")])

    # Generate failing wrappers for all unsupported methods.
    locals().update({
        method: _make_unsupported_method(method)
        for method in PUBLIC_NDARRAY_METHODS - SUPPORTED_NDARRAY_METHODS
    })

    def __repr__(self):
        repr_lines = repr(self.as_string_array()).splitlines()
        repr_lines[0] = repr_lines[0].replace("array(", "LabelArray(", 1)
        repr_lines[-1] = repr_lines[-1].rsplit(",", 1)[0] + ")"
        # The extra spaces here account for the difference in length between
        # 'array(' and 'LabelArray('.
        return "\n     ".join(repr_lines)

    def empty_like(self, shape):
        """
        Make an empty LabelArray with the same categories as ``self``, filled
        with ``self.missing_value``.
        """
        return type(self).from_codes_and_metadata(
            codes=np.full(
                shape,
                self.reverse_categories[self.missing_value],
                dtype=unsigned_int_dtype_with_size_in_bytes(self.itemsize),
            ),
            categories=self.categories,
            reverse_categories=self.reverse_categories,
            missing_value=self.missing_value,
        )

    def map_predicate(self, f):
        """
        Map a function from str -> bool element-wise over ``self``.

        ``f`` will be applied exactly once to each non-missing unique value in
        ``self``. Missing values will always return False.
        """
        # Functions passed to this are of type str -> bool.  Don't ever call
        # them on None, which is the only non-str value we ever store in
        # categories.
        if self.missing_value is None:

            def f_to_use(x):
                return False if x is None else f(x)

        else:
            f_to_use = f

        # Call f on each unique value in our categories.
        results = np.vectorize(f_to_use, otypes=[bool_dtype])(self.categories)

        # missing_value should produce False no matter what
        results[self.reverse_categories[self.missing_value]] = False

        # unpack the results form each unique value into their corresponding
        # locations in our indices.
        return results[self.as_int_array()]

    def map(self, f):
        """
        Map a function from str -> str element-wise over ``self``.

        ``f`` will be applied exactly once to each non-missing unique value in
        ``self``. Missing values will always map to ``self.missing_value``.
        """
        # f() should only return None if None is our missing value.
        if self.missing_value is None:
            allowed_outtypes = self.SUPPORTED_SCALAR_TYPES
        else:
            allowed_outtypes = self.SUPPORTED_NON_NONE_SCALAR_TYPES

        def f_to_use(x,
                     missing_value=self.missing_value,
                     otypes=allowed_outtypes):

            # Don't call f on the missing value; those locations don't exist
            # semantically. We return _sortable_sentinel rather than None
            # because the np.unique call below sorts the categories array,
            # which raises an error on Python 3 because None and str aren't
            # comparable.
            if x == missing_value:
                return _sortable_sentinel

            ret = f(x)

            if not isinstance(ret, otypes):
                raise TypeError(
                    "LabelArray.map expected function {f} to return a string"
                    " or None, but got {type} instead.\n"
                    "Value was {value}.".format(
                        f=f.__name__,
                        type=type(ret).__name__,
                        value=ret,
                    ))

            if ret == missing_value:
                return _sortable_sentinel

            return ret

        new_categories_with_duplicates = np.vectorize(f_to_use,
                                                      otypes=[object])(
                                                          self.categories)

        # If f() maps multiple inputs to the same output, then we can end up
        # with the same code duplicated multiple times. Compress the categories
        # by running them through np.unique, and then use the reverse lookup
        # table to compress codes as well.
        new_categories, bloated_inverse_index = np.unique(
            new_categories_with_duplicates, return_inverse=True)

        if new_categories[0] is _sortable_sentinel:
            # f_to_use return _sortable_sentinel for locations that should be
            # missing values in our output. Since np.unique returns the uniques
            # in sorted order, and since _sortable_sentinel sorts before any
            # string, we only need to check the first array entry.
            new_categories[0] = self.missing_value

        # `reverse_index` will always be a 64 bit integer even if we can hold a
        # smaller array.
        reverse_index = bloated_inverse_index.astype(
            smallest_uint_that_can_hold(len(new_categories)))
        new_codes = np.take(reverse_index, self.as_int_array())

        return self.from_codes_and_metadata(
            new_codes,
            new_categories,
            dict(zip(new_categories, range(len(new_categories)))),
            missing_value=self.missing_value,
        )

    def startswith(self, prefix):
        """
        Element-wise startswith.

        Parameters
        ----------
        prefix : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self started with ``prefix``.
        """
        return self.map_predicate(lambda elem: elem.startswith(prefix))

    def endswith(self, suffix):
        """
        Elementwise endswith.

        Parameters
        ----------
        suffix : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self ended with ``suffix``
        """
        return self.map_predicate(lambda elem: elem.endswith(suffix))

    def has_substring(self, substring):
        """
        Elementwise contains.

        Parameters
        ----------
        substring : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self ended with ``suffix``.
        """
        return self.map_predicate(lambda elem: substring in elem)

    @preprocess(pattern=coerce(from_=(bytes, unicode), to=re.compile))
    def matches(self, pattern):
        """
        Elementwise regex match.

        Parameters
        ----------
        pattern : str or compiled regex

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self was matched by ``pattern``.
        """
        return self.map_predicate(compose(bool, pattern.match))

    # These types all implement an O(N) __contains__, so pre-emptively
    # coerce to `set`.
    @preprocess(container=coerce((list, tuple, np.ndarray), set))
    def element_of(self, container):
        """
        Check if each element of self is an of ``container``.

        Parameters
        ----------
        container : object
            An object implementing a __contains__ to call on each element of
            ``self``.

        Returns
        -------
        is_contained : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self was an element of ``container``.
        """
        return self.map_predicate(container.__contains__)
Example #4
0
class TradingCalendar(with_metaclass(ABCMeta)):
    """
    TradingCalendar代表单个市场交易所的时间信息。

    时间信息由两部分组成:会话和开盘/收盘。

    会话表示一组连续的分钟,外加一个UTC午夜标签。 需要注意的是,会话标签不应被视为特定的时
    间点,使用UTC午夜时间纯粹是出于便利的考虑。

    对于每个会话,我们存储UTC时间的开盘和收盘时间。
    """

    use_lunch_break = False  # 标记对象是否使用午休时间

    def __init__(self, start=start_default, end=end_default):
        # 每个交易日UTC的午夜
        # self.use_lunch_break = lunch_break  # 标记对象是否使用午休时间
        # In pandas 0.18.1, pandas calls into its own code here in a way that
        # fires a warning. The calling code in pandas tries to suppress the
        # warning, but does so incorrectly, causing it to bubble out here.
        # Actually catch and suppress the warning here:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            _all_days = date_range(start, end, freq=self.day, tz='UTC')

        # 每天标准的开盘和收盘`DatetimeIndex`
        self._opens = days_at_time(_all_days, self.open_time, self.tz,
                                   self.open_offset)
        self._closes = days_at_time(
            _all_days, self.close_time, self.tz, self.close_offset
        )

        # 每天非标准的开盘和收盘`DatetimeIndex`
        _special_opens = self._calculate_special_opens(start, end)
        _special_closes = self._calculate_special_closes(start, end)

        # 在标准集的基础上,重写特殊开盘与收盘
        _overwrite_special_dates(_all_days, self._opens, _special_opens)
        _overwrite_special_dates(_all_days, self._closes, _special_closes)

        # In pandas 0.16.1 _opens and _closes will lose their timezone
        # information. This looks like it has been resolved in 0.17.1.
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        self.schedule = DataFrame(
            index=_all_days,
            columns=['market_open', 'market_close'],
            data={
                'market_open': self._opens,
                'market_close': self._closes,
            },
            dtype='datetime64[ns]',
        )

        # 简单缓存以避免在“下一个”模式下重新计算相同的分钟 - >会话。
        # 对当前zipline代码路径的分析显示,连续调用函数`minute_to_session_label`通常使用相
        # 同的输入。
        self._minute_to_session_label_cache = LRU(1)
        # 转化为纳秒整数
        self.market_opens_nanos = self.schedule.market_open.values.\
            astype(np.int64)

        self.market_closes_nanos = self.schedule.market_close.values.\
            astype(np.int64)
        # 所有交易分钟(纳秒整数)
        self._trading_minutes_nanos = self.all_minutes.values.\
            astype(np.int64)

        self.first_trading_session = _all_days[0]
        self.last_trading_session = _all_days[-1]

        self._early_closes = pd.DatetimeIndex(
            _special_closes.map(self.minute_to_session_label)
        )

    @lazyval
    def day(self):
        return CustomBusinessDay(
            holidays=self.adhoc_holidays,    # 特别假期
            calendar=self.regular_holidays,  # 常规假期
        )

    @abstractproperty
    def name(self):
        raise NotImplementedError()

    @abstractproperty
    def tz(self):
        raise NotImplementedError()

    @abstractproperty
    def open_time(self):
        raise NotImplementedError()

    @abstractproperty
    def close_time(self):
        raise NotImplementedError()

    @property
    def lunch_break_start_time(self):
        """
        如果使用午休间隔,必须重写该属性。使用实际午休开始时间

        使用该默认值,即表示所有日期内的分钟都有效。
        """
        return time(23, 99)

    @property
    def lunch_break_end_time(self):
        """
        如果使用午休间隔,必须重写该属性。使用实际午休结束时间

        使用该默认值,即表示所有日期内的分钟都有效。
        """
        return time(0, 0)

    @property
    def open_offset(self):
        return 0

    @property
    def close_offset(self):
        return 0

    @lazyval
    def _minutes_per_session(self):
        diff = self.schedule.market_close - self.schedule.market_open
        diff = diff.astype('timedelta64[m]')
        return diff + 1

    def minutes_count_for_sessions_in_range(self, start_session, end_session):
        """
        Parameters
        ----------
        start_session: pd.Timestamp
            The first session.

        end_session: pd.Timestamp
            The last session.

        Returns
        -------
        int: The total number of minutes for the contiguous chunk of sessions.
             between start_session and end_session, inclusive.
        """
        return int(self._minutes_per_session[start_session:end_session].sum())

    @property
    def regular_holidays(self):
        """
        Returns
        -------
        pd.AbstractHolidayCalendar: a calendar containing the regular holidays
        for this calendar
        """
        return None

    @property
    def adhoc_holidays(self):
        return []

    @property
    def special_opens(self):
        """
        A list of special open times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_opens_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    @property
    def special_closes(self):
        """
        A list of special close times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_closes_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    # -----

    @property
    def opens(self):
        return self.schedule.market_open

    @property
    def closes(self):
        return self.schedule.market_close

    @property
    def early_closes(self):
        return self._early_closes

    def is_session(self, dt):
        """
        给定一个dt,返回它是否是有效的会话标签(请注意会话标签是午夜时分)。

        Parameters
        ----------
        dt: pd.Timestamp
            将要测试的dt           
        Notes
        -----
        1. 如dt为日期,且在交易日历内,返回真;
        2. 如dt带时间,只有午夜时分才为真;
        3. 如带时区,为UTC或者None,返回真;

        Returns
        -------
        bool
            给定的dt是否是有效的会话标签
        """
        return dt in self.schedule.index

    def is_open_on_minute(self, dt):
        """
        给定一个dt,返回此时交易所是否已经开盘

        Parameters
        ----------
        dt: pd.Timestamp
            用于检查交易所是否已经开盘的dt。

        Returns
        -------
        bool
            在此时点(dt)交易是否开盘
        """
        return is_open(self.market_opens_nanos, self.market_closes_nanos,
                       dt.value)

    def next_open(self, dt):
        """
        给定一个dt,返回下一个开盘时点

        即使给定的dt恰好是会话开盘时点,也会返回下一个会话的开盘点。

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next open.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next open.
        """
        idx = next_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def next_close(self, dt):
        """
        给定一个dt,返回下一个收盘时点

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next close.
        """
        idx = next_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def previous_open(self, dt):
        """
        给定一个dt,返回上一个开盘时点

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous open.

        Returns
        -------
        pd.Timestamp
            The UTC imestamp of the previous open.
        """
        idx = previous_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def previous_close(self, dt):
        """
        给定一个dt,返回上一个收盘时点

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the previous close.
        """
        idx = previous_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def next_minute(self, dt):
        """
        给定一个dt,返回下一个交易所在分钟。 如果给定的dt不是交易时点,则返回下一个开盘时点。
        如dt在午休时段,则会返回下一个交易分钟,即下午开盘时间。

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next exchange minute.

        Returns
        -------
        pd.Timestamp
            The next exchange minute.
        """
        idx = next_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def previous_minute(self, dt):
        """
        给点dt,返回上一个交易分钟
        如为非交易分钟,触发KeyError异常(原文)
        其实,只有当dt处于初始化对象时的开始及结束日期外,才会触发ValueError

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous exchange minute.

        Returns
        -------
        pd.Timestamp
            The previous exchange minute.
        """

        idx = previous_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def next_session_label(self, session_label):
        """
        给定一个会话标签,返回下一个会话的标签。

        Notes
        -----
        session_label要么为日期,要么normalize,且须为交易日期
        非交易日,或在start与end外,均会触发KeyError异常

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose next session is desired.

        Returns
        -------
        pd.Timestamp
            The next session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the last session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        try:
            return self.schedule.index[idx + 1]
        except IndexError:
            if idx == len(self.schedule.index) - 1:
                raise ValueError("There is no next session as this is the end"
                                 " of the exchange calendar.")
            else:
                raise

    def previous_session_label(self, session_label):
        """
        给定一个会话标签,返回上一个会话的标签。

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose previous session is desired.

        Returns
        -------
        pd.Timestamp
            The previous session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the first session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        if idx == 0:
            raise ValueError("There is no previous session as this is the"
                             " beginning of the exchange calendar.")

        return self.schedule.index[idx - 1]

    def minutes_for_session(self, session_label):
        """
        给定会话标签,返回该会话的所有分钟。

        Parameters
        ----------
        session_label: pd.Timestamp (midnight UTC)
            A session label whose session's minutes are desired.

        Returns
        -------
        pd.DateTimeIndex
            All the minutes for the given session.
        """
        return self.minutes_in_range(
            start_minute=self.schedule.at[session_label, 'market_open'],
            end_minute=self.schedule.at[session_label, 'market_close'],
        )

    def execution_minutes_for_session(self, session_label):
        """
        给定会话标签,返回该会话的执行分钟。

        Parameters
        ----------
        session_label: pd.Timestamp (midnight UTC)
            A session label whose session's minutes are desired.

        Returns
        -------
        pd.DateTimeIndex
            All the execution minutes for the given session.
        """
        return self.minutes_in_range(
            start_minute=self.execution_time_from_open(
                self.schedule.at[session_label, 'market_open'],
            ),
            end_minute=self.execution_time_from_close(
                self.schedule.at[session_label, 'market_close'],
            ),
        )

    def execution_minutes_for_sessions_in_range(self, start, stop):
        """期间所有执行分钟"""
        minutes = self.execution_minutes_for_session
        return pd.DatetimeIndex(
            np.concatenate([
                minutes(session)
                for session in self.sessions_in_range(start, stop)
            ]),
            tz='UTC',
        )

    def minutes_window(self, start_dt, count):
        start_dt_nanos = start_dt.value
        all_minutes_nanos = self._trading_minutes_nanos
        start_idx = all_minutes_nanos.searchsorted(start_dt_nanos)

        # searchsorted finds the index of the minute **on or after** start_dt.
        # If the latter, push back to the prior minute.
        if all_minutes_nanos[start_idx] != start_dt_nanos:
            start_idx -= 1

        if start_idx < 0 or start_idx >= len(all_minutes_nanos):
            raise KeyError("Can't start minute window at {}".format(start_dt))

        end_idx = start_idx + count

        if start_idx > end_idx:
            return self.all_minutes[(end_idx + 1):(start_idx + 1)]
        else:
            return self.all_minutes[start_idx:end_idx]

    def sessions_in_range(self, start_session_label, end_session_label):
        """
        给定开始和结束会话标签,返回该范围内的所有会话(包含)。

        注
        --
            输入时使用normalize

        Parameters
        ----------
        start_session_label: pd.Timestamp (midnight UTC)
            The label representing the first session of the desired range.

        end_session_label: pd.Timestamp (midnight UTC)
            The label representing the last session of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        return self.all_sessions[
            self.all_sessions.slice_indexer(
                start_session_label,
                end_session_label
            )
        ]

    def sessions_window(self, session_label, count):
        """
        Given a session label and a window size, returns a list of sessions
        of size `count` + 1, that either starts with the given session
        (if `count` is positive) or ends with the given session (if `count` is
        negative).

        Parameters
        ----------
        session_label: pd.Timestamp
            The label of the initial session.

        count: int
            Defines the length and the direction of the window.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        start_idx = self.schedule.index.get_loc(session_label)
        end_idx = start_idx + count

        return self.all_sessions[
            min(start_idx, end_idx):max(start_idx, end_idx) + 1
        ]

    def session_distance(self, start_session_label, end_session_label):
        """
        Given a start and end session label, returns the distance between them.
        For example, for three consecutive sessions Mon., Tues., and Wed,
        ``session_distance(Mon, Wed)`` returns 3. If ``start_session`` is after
        ``end_session``, the value will be negated.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the start session.
        end_session_label: pd.Timestamp
            The label of the ending session inclusive.

        Returns
        -------
        int
            The distance between the two sessions.
        """
        negate = end_session_label < start_session_label
        if negate:
            start_session_label, end_session_label = (
                end_session_label,
                start_session_label,
            )
        start_idx = self.all_sessions.searchsorted(start_session_label)
        end_idx = self.all_sessions.searchsorted(
            end_session_label,
            side='right',
        )

        out = end_idx - start_idx
        if negate:
            out = -out

        return out

    def minutes_in_range(self, start_minute, end_minute):
        """
        给定开始和结束分钟,返回该范围内的所有日历分钟数,包括开始与结束。
        给定分钟并不需要是日历分钟

        Parameters
        ----------
        start_minute: pd.Timestamp
            The minute representing the start of the desired range.

        end_minute: pd.Timestamp
            The minute representing the end of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.
        """
        start_idx = searchsorted(self._trading_minutes_nanos,
                                 start_minute.value)

        end_idx = searchsorted(self._trading_minutes_nanos,
                               end_minute.value)

        if end_minute.value == self._trading_minutes_nanos[end_idx]:
            # if the end minute is a market minute, increase by 1
            end_idx += 1

        return self.all_minutes[start_idx:end_idx]

    def minutes_for_sessions_in_range(self,
                                      start_session_label,
                                      end_session_label):
        """
        Returns all the minutes for all the sessions from the given start
        session label to the given end session label, inclusive.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the first session in the range.

        end_session_label: pd.Timestamp
            The label of the last session in the range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.

        """
        first_minute, _ = self.open_and_close_for_session(start_session_label)
        _, last_minute = self.open_and_close_for_session(end_session_label)

        return self.minutes_in_range(first_minute, last_minute)

    def open_and_close_for_session(self, session_label):
        """
        Returns a tuple of timestamps of the open and close of the session
        represented by the given label.

        Parameters
        ----------
        session_label: pd.Timestamp
            The session whose open and close are desired.

        Returns
        -------
        (Timestamp, Timestamp)
            The open and close for the given session.
        """
        sched = self.schedule

        # `market_open` and `market_close` should be timezone aware, but pandas
        # 0.16.1 does not appear to support this:
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        return (
            sched.at[session_label, 'market_open'].tz_localize('UTC'),
            sched.at[session_label, 'market_close'].tz_localize('UTC'),
        )

    def session_open(self, session_label):
        return self.schedule.at[
            session_label,
            'market_open'
        ].tz_localize('UTC')

    def session_close(self, session_label):
        return self.schedule.at[
            session_label,
            'market_close'
        ].tz_localize('UTC')

    def session_opens_in_range(self, start_session_label, end_session_label):
        return self.schedule.loc[
            start_session_label:end_session_label,
            'market_open',
        ].dt.tz_localize('UTC')

    def session_closes_in_range(self, start_session_label, end_session_label):
        return self.schedule.loc[
            start_session_label:end_session_label,
            'market_close',
        ].dt.tz_localize('UTC')

    @property
    def all_sessions(self):
        return self.schedule.index

    @property
    def first_session(self):
        return self.all_sessions[0]

    @property
    def last_session(self):
        return self.all_sessions[-1]

    def execution_time_from_open(self, open_dates):
        return open_dates

    def execution_time_from_close(self, close_dates):
        return close_dates

    @lazyval
    def all_minutes(self):
        """
        返回表示此日历中所有分钟的`DatetimeIndex`。
        """
        opens_in_ns = self._opens.values.astype(
            'datetime64[ns]',
        ).view('int64')

        closes_in_ns = self._closes.values.astype(
            'datetime64[ns]',
        ).view('int64')
        # compute_all_minutes假设每天仅包含连续分钟块
        dts = DatetimeIndex(
            compute_all_minutes(opens_in_ns, closes_in_ns),
            tz='utc',
        )
        # 如果有午休,则排除午休时段
        if self.use_lunch_break:
            # 需要使用utc时间
            utc_start = days_at_time(
                [dts[0].date()], self.lunch_break_start_time, self.tz).time[0]
            utc_end = days_at_time(
                [dts[0].date()], self.lunch_break_end_time, self.tz).time[0]
            locs = dts.indexer_between_time(
                utc_start, utc_end, include_start=True, include_end=True)
            return dts.delete(locs)
        else:
            return dts

    @preprocess(dt=coerce(pd.Timestamp, attrgetter('value')))
    def minute_to_session_label(self, dt, direction="next"):
        """
        给定dt,获取其所在会话的标签

        Parameters
        ----------
        dt : pd.Timestamp or nanosecond offset
            所含会话的dt

        direction: str
            “next”(默认)意味着如果给定的dt不是会话的一部分,则返回下一个会话的标签。

            "previous"表示如果给定的dt不是会话的一部分,则返回前一个会话的标签。

            "none"表示如果给定的dt不是会话的一部分,则会引发KeyError。

        Returns
        -------
        pd.Timestamp (midnight UTC)
            所在会话的标签。
        """
        if direction == "next":
            try:
                return self._minute_to_session_label_cache[dt]
            except KeyError:
                pass

        idx = searchsorted(self.market_closes_nanos, dt)
        current_or_next_session = self.schedule.index[idx]
        self._minute_to_session_label_cache[dt] = current_or_next_session

        if direction == "next":
            return current_or_next_session
        elif direction == "previous":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, use the previous session
                return self.schedule.index[idx - 1]
        elif direction == "none":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, blow up
                raise ValueError("The given dt is not an exchange minute!")
        else:
            # invalid direction
            raise ValueError("Invalid direction parameter: "
                             "{0}".format(direction))

        return current_or_next_session

    def minute_index_to_session_labels(self, index):
        """
        给定市场分钟的排序DatetimeIndex,返回相应会话标签的DatetimeIndex。

        Parameters
        ----------
        index: pd.DatetimeIndex or pd.Series
            The ordered list of market minutes we want session labels for.

        Returns
        -------
        pd.DatetimeIndex (UTC)
            The list of session labels corresponding to the given minutes.
        """
        return pd.Index(map(self.minute_to_session_label, index))

    def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date):
        """
        联合一对以(时间,日历)格式的迭代和一对以(时间,[日期])格式的迭代
        (这是计算特殊开盘和特殊收盘的共享逻辑。)
        """
        _dates = DatetimeIndex([], tz='UTC').union_many(
            [
                holidays_at_time(calendar, start_date, end_date, time_,
                                 self.tz)
                for time_, calendar in calendars
            ] + [
                days_at_time(datetimes, time_, self.tz)
                for time_, datetimes in ad_hoc_dates
            ]
        )
        return _dates[(_dates >= start_date) & (_dates <= end_date)]

    def _calculate_special_opens(self, start, end):
        return self._special_dates(
            self.special_opens,
            self.special_opens_adhoc,
            start,
            end,
        )

    def _calculate_special_closes(self, start, end):
        return self._special_dates(
            self.special_closes,
            self.special_closes_adhoc,
            start,
            end,
        )
class TradingCalendar(with_metaclass(ABCMeta)):
    """
    An TradingCalendar represents the timing information of a single market
    exchange.

    The timing information is made up of two parts: sessions, and opens/closes.

    A session represents a contiguous set of minutes, and has a label that is
    midnight UTC. It is important to note that a session label should not be
    considered a specific point in time, and that midnight UTC is just being
    used for convenience.

    For each session, we store the open and close time in UTC time.
    """
    def __init__(self, start=start_default, end=end_default):
        # Midnight in UTC for each trading day.

        # In pandas 0.18.1, pandas calls into its own code here in a way that
        # fires a warning. The calling code in pandas tries to suppress the
        # warning, but does so incorrectly, causing it to bubble out here.
        # Actually catch and suppress the warning here:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            _all_days = date_range(start, end, freq=self.day, tz='UTC')

        # `DatetimeIndex`s of standard opens/closes for each day.
        self._opens = days_at_time(_all_days, self.open_time, self.tz,
                                   self.open_offset)
        self._closes = days_at_time(
            _all_days, self.close_time, self.tz, self.close_offset
        )

        # `DatetimeIndex`s of nonstandard opens/closes
        _special_opens = self._calculate_special_opens(start, end)
        _special_closes = self._calculate_special_closes(start, end)

        # Overwrite the special opens and closes on top of the standard ones.
        _overwrite_special_dates(_all_days, self._opens, _special_opens)
        _overwrite_special_dates(_all_days, self._closes, _special_closes)

        # In pandas 0.16.1 _opens and _closes will lose their timezone
        # information. This looks like it has been resolved in 0.17.1.
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        self.schedule = DataFrame(
            index=_all_days,
            columns=['market_open', 'market_close'],
            data={
                'market_open': self._opens,
                'market_close': self._closes,
            },
            dtype='datetime64[ns]',
        )

        # Simple cache to avoid recalculating the same minute -> session in
        # "next" mode. Analysis of current zipline code paths show that
        # `minute_to_session_label` is often called consecutively with the same
        # inputs.
        self._minute_to_session_label_cache = LRU(1)

        self.market_opens_nanos = self.schedule.market_open.values.\
            astype(np.int64)

        self.market_closes_nanos = self.schedule.market_close.values.\
            astype(np.int64)

        self._trading_minutes_nanos = self.all_minutes.values.\
            astype(np.int64)

        self.first_trading_session = _all_days[0]
        self.last_trading_session = _all_days[-1]

        self._early_closes = pd.DatetimeIndex(
            _special_closes.map(self.minute_to_session_label)
        )

    @lazyval
    def day(self):
        return CustomBusinessDay(
            holidays=self.adhoc_holidays,
            calendar=self.regular_holidays,
        )

    @abstractproperty
    def name(self):
        raise NotImplementedError()

    @abstractproperty
    def tz(self):
        raise NotImplementedError()

    @abstractproperty
    def open_time(self):
        raise NotImplementedError()

    @abstractproperty
    def close_time(self):
        raise NotImplementedError()

    @property
    def open_offset(self):
        return 0

    @property
    def close_offset(self):
        return 0

    @lazyval
    def _minutes_per_session(self):
        diff = self.schedule.market_close - self.schedule.market_open
        diff = diff.astype('timedelta64[m]')
        return diff + 1

    def minutes_count_for_sessions_in_range(self, start_session, end_session):
        """
        Parameters
        ----------
        start_session: pd.Timestamp
            The first session.

        end_session: pd.Timestamp
            The last session.

        Returns
        -------
        int: The total number of minutes for the contiguous chunk of sessions.
             between start_session and end_session, inclusive.
        """
        return int(self._minutes_per_session[start_session:end_session].sum())

    @property
    def regular_holidays(self):
        """
        Returns
        -------
        pd.AbstractHolidayCalendar: a calendar containing the regular holidays
        for this calendar
        """
        return None

    @property
    def adhoc_holidays(self):
        return []

    @property
    def special_opens(self):
        """
        A list of special open times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_opens_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    @property
    def special_closes(self):
        """
        A list of special close times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_closes_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    # -----

    @property
    def opens(self):
        return self.schedule.market_open

    @property
    def closes(self):
        return self.schedule.market_close

    @property
    def early_closes(self):
        return self._early_closes

    def is_session(self, dt):
        """
        Given a dt, returns whether it's a valid session label.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt that is being tested.

        Returns
        -------
        bool
            Whether the given dt is a valid session label.
        """
        return dt in self.schedule.index

    def is_open_on_minute(self, dt):
        """
        Given a dt, return whether this exchange is open at the given dt.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to check if this exchange is open.

        Returns
        -------
        bool
            Whether the exchange is open on this dt.
        """
        return is_open(self.market_opens_nanos, self.market_closes_nanos,
                       dt.value)

    def next_open(self, dt):
        """
        Given a dt, returns the next open.

        If the given dt happens to be a session open, the next session's open
        will be returned.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next open.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next open.
        """
        idx = next_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def next_close(self, dt):
        """
        Given a dt, returns the next close.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next close.
        """
        idx = next_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def previous_open(self, dt):
        """
        Given a dt, returns the previous open.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous open.

        Returns
        -------
        pd.Timestamp
            The UTC imestamp of the previous open.
        """
        idx = previous_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def previous_close(self, dt):
        """
        Given a dt, returns the previous close.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the previous close.
        """
        idx = previous_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def next_minute(self, dt):
        """
        Given a dt, return the next exchange minute.  If the given dt is not
        an exchange minute, returns the next exchange open.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next exchange minute.

        Returns
        -------
        pd.Timestamp
            The next exchange minute.
        """
        idx = next_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def previous_minute(self, dt):
        """
        Given a dt, return the previous exchange minute.

        Raises KeyError if the given timestamp is not an exchange minute.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous exchange minute.

        Returns
        -------
        pd.Timestamp
            The previous exchange minute.
        """

        idx = previous_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def next_session_label(self, session_label):
        """
        Given a session label, returns the label of the next session.

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose next session is desired.

        Returns
        -------
        pd.Timestamp
            The next session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the last session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        try:
            return self.schedule.index[idx + 1]
        except IndexError:
            if idx == len(self.schedule.index) - 1:
                raise ValueError("There is no next session as this is the end"
                                 " of the exchange calendar.")
            else:
                raise

    def previous_session_label(self, session_label):
        """
        Given a session label, returns the label of the previous session.

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose previous session is desired.

        Returns
        -------
        pd.Timestamp
            The previous session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the first session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        if idx == 0:
            raise ValueError("There is no previous session as this is the"
                             " beginning of the exchange calendar.")

        return self.schedule.index[idx - 1]

    def minutes_for_session(self, session_label):
        """
        Given a session label, return the minutes for that session.

        Parameters
        ----------
        session_label: pd.Timestamp (midnight UTC)
            A session label whose session's minutes are desired.

        Returns
        -------
        pd.DateTimeIndex
            All the minutes for the given session.
        """
        return self.minutes_in_range(
            start_minute=self.schedule.at[session_label, 'market_open'],
            end_minute=self.schedule.at[session_label, 'market_close'],
        )

    def execution_minutes_for_session(self, session_label):
        """
        Given a session label, return the execution minutes for that session.

        Parameters
        ----------
        session_label: pd.Timestamp (midnight UTC)
            A session label whose session's minutes are desired.

        Returns
        -------
        pd.DateTimeIndex
            All the execution minutes for the given session.
        """
        return self.minutes_in_range(
            start_minute=self.execution_time_from_open(
                self.schedule.at[session_label, 'market_open'],
            ),
            end_minute=self.execution_time_from_close(
                self.schedule.at[session_label, 'market_close'],
            ),
        )

    def execution_minutes_for_sessions_in_range(self, start, stop):
        minutes = self.execution_minutes_for_session
        return pd.DatetimeIndex(
            np.concatenate([
                minutes(session)
                for session in self.sessions_in_range(start, stop)
            ]),
            tz='UTC',
        )

    def minutes_window(self, start_dt, count):
        start_dt_nanos = start_dt.value
        all_minutes_nanos = self._trading_minutes_nanos
        start_idx = all_minutes_nanos.searchsorted(start_dt_nanos)

        # searchsorted finds the index of the minute **on or after** start_dt.
        # If the latter, push back to the prior minute.
        if all_minutes_nanos[start_idx] != start_dt_nanos:
            start_idx -= 1

        if start_idx < 0 or start_idx >= len(all_minutes_nanos):
            raise KeyError("Can't start minute window at {}".format(start_dt))

        end_idx = start_idx + count

        if start_idx > end_idx:
            return self.all_minutes[(end_idx + 1):(start_idx + 1)]
        else:
            return self.all_minutes[start_idx:end_idx]

    def sessions_in_range(self, start_session_label, end_session_label):
        """
        Given start and end session labels, return all the sessions in that
        range, inclusive.

        Parameters
        ----------
        start_session_label: pd.Timestamp (midnight UTC)
            The label representing the first session of the desired range.

        end_session_label: pd.Timestamp (midnight UTC)
            The label representing the last session of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        return self.all_sessions[
            self.all_sessions.slice_indexer(
                start_session_label,
                end_session_label
            )
        ]

    def sessions_window(self, session_label, count):
        """
        Given a session label and a window size, returns a list of sessions
        of size `count` + 1, that either starts with the given session
        (if `count` is positive) or ends with the given session (if `count` is
        negative).

        Parameters
        ----------
        session_label: pd.Timestamp
            The label of the initial session.

        count: int
            Defines the length and the direction of the window.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        start_idx = self.schedule.index.get_loc(session_label)
        end_idx = start_idx + count

        return self.all_sessions[
            min(start_idx, end_idx):max(start_idx, end_idx) + 1
        ]

    def session_distance(self, start_session_label, end_session_label):
        """
        Given a start and end session label, returns the distance between them.
        For example, for three consecutive sessions Mon., Tues., and Wed,
        ``session_distance(Mon, Wed)`` returns 3. If ``start_session`` is after
        ``end_session``, the value will be negated.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the start session.
        end_session_label: pd.Timestamp
            The label of the ending session inclusive.

        Returns
        -------
        int
            The distance between the two sessions.
        """
        negate = end_session_label < start_session_label
        if negate:
            start_session_label, end_session_label = (
                end_session_label,
                start_session_label,
            )
        start_idx = self.all_sessions.searchsorted(start_session_label)
        end_idx = self.all_sessions.searchsorted(
            end_session_label,
            side='right',
        )

        out = end_idx - start_idx
        if negate:
            out = -out

        return out

    def minutes_in_range(self, start_minute, end_minute):
        """
        Given start and end minutes, return all the calendar minutes
        in that range, inclusive.

        Given minutes don't need to be calendar minutes.

        Parameters
        ----------
        start_minute: pd.Timestamp
            The minute representing the start of the desired range.

        end_minute: pd.Timestamp
            The minute representing the end of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.
        """
        start_idx = searchsorted(self._trading_minutes_nanos,
                                 start_minute.value)

        end_idx = searchsorted(self._trading_minutes_nanos,
                               end_minute.value)

        if end_minute.value == self._trading_minutes_nanos[end_idx]:
            # if the end minute is a market minute, increase by 1
            end_idx += 1

        return self.all_minutes[start_idx:end_idx]

    def minutes_for_sessions_in_range(self,
                                      start_session_label,
                                      end_session_label):
        """
        Returns all the minutes for all the sessions from the given start
        session label to the given end session label, inclusive.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the first session in the range.

        end_session_label: pd.Timestamp
            The label of the last session in the range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.

        """
        first_minute, _ = self.open_and_close_for_session(start_session_label)
        _, last_minute = self.open_and_close_for_session(end_session_label)

        return self.minutes_in_range(first_minute, last_minute)

    def open_and_close_for_session(self, session_label):
        """
        Returns a tuple of timestamps of the open and close of the session
        represented by the given label.

        Parameters
        ----------
        session_label: pd.Timestamp
            The session whose open and close are desired.

        Returns
        -------
        (Timestamp, Timestamp)
            The open and close for the given session.
        """
        sched = self.schedule

        # `market_open` and `market_close` should be timezone aware, but pandas
        # 0.16.1 does not appear to support this:
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        return (
            sched.at[session_label, 'market_open'].tz_localize('UTC'),
            sched.at[session_label, 'market_close'].tz_localize('UTC'),
        )

    def session_open(self, session_label):
        return self.schedule.at[
            session_label,
            'market_open'
        ].tz_localize('UTC')

    def session_close(self, session_label):
        return self.schedule.at[
            session_label,
            'market_close'
        ].tz_localize('UTC')

    def session_opens_in_range(self, start_session_label, end_session_label):
        return self.schedule.loc[
            start_session_label:end_session_label,
            'market_open',
        ].dt.tz_localize('UTC')

    def session_closes_in_range(self, start_session_label, end_session_label):
        return self.schedule.loc[
            start_session_label:end_session_label,
            'market_close',
        ].dt.tz_localize('UTC')

    @property
    def all_sessions(self):
        return self.schedule.index

    @property
    def first_session(self):
        return self.all_sessions[0]

    @property
    def last_session(self):
        return self.all_sessions[-1]

    def execution_time_from_open(self, open_dates):
        return open_dates

    def execution_time_from_close(self, close_dates):
        return close_dates

    @lazyval
    def all_minutes(self):
        """
        Returns a DatetimeIndex representing all the minutes in this calendar.
        """
        opens_in_ns = self._opens.values.astype(
            'datetime64[ns]',
        ).view('int64')

        closes_in_ns = self._closes.values.astype(
            'datetime64[ns]',
        ).view('int64')

        return DatetimeIndex(
            compute_all_minutes(opens_in_ns, closes_in_ns),
            tz='utc',
        )

    @preprocess(dt=coerce(pd.Timestamp, attrgetter('value')))
    def minute_to_session_label(self, dt, direction="next"):
        """
        Given a minute, get the label of its containing session.

        Parameters
        ----------
        dt : pd.Timestamp or nanosecond offset
            The dt for which to get the containing session.

        direction: str
            "next" (default) means that if the given dt is not part of a
            session, return the label of the next session.

            "previous" means that if the given dt is not part of a session,
            return the label of the previous session.

            "none" means that a KeyError will be raised if the given
            dt is not part of a session.

        Returns
        -------
        pd.Timestamp (midnight UTC)
            The label of the containing session.
        """
        if direction == "next":
            try:
                return self._minute_to_session_label_cache[dt]
            except KeyError:
                pass

        idx = searchsorted(self.market_closes_nanos, dt)
        current_or_next_session = self.schedule.index[idx]
        self._minute_to_session_label_cache[dt] = current_or_next_session

        if direction == "next":
            return current_or_next_session
        elif direction == "previous":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, use the previous session
                return self.schedule.index[idx - 1]
        elif direction == "none":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, blow up
                raise ValueError("The given dt is not an exchange minute!")
        else:
            # invalid direction
            raise ValueError("Invalid direction parameter: "
                             "{0}".format(direction))

        return current_or_next_session

    def minute_index_to_session_labels(self, index):
        """
        Given a sorted DatetimeIndex of market minutes, return a
        DatetimeIndex of the corresponding session labels.

        Parameters
        ----------
        index: pd.DatetimeIndex or pd.Series
            The ordered list of market minutes we want session labels for.

        Returns
        -------
        pd.DatetimeIndex (UTC)
            The list of session labels corresponding to the given minutes.
        """
        return pd.Index(map(self.minute_to_session_label, index))

    def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date):
        """
        Union an iterable of pairs of the form (time, calendar)
        and an iterable of pairs of the form (time, [dates])

        (This is shared logic for computing special opens and special closes.)
        """
        _dates = DatetimeIndex([], tz='UTC').union_many(
            [
                holidays_at_time(calendar, start_date, end_date, time_,
                                 self.tz)
                for time_, calendar in calendars
            ] + [
                days_at_time(datetimes, time_, self.tz)
                for time_, datetimes in ad_hoc_dates
            ]
        )
        return _dates[(_dates >= start_date) & (_dates <= end_date)]

    def _calculate_special_opens(self, start, end):
        return self._special_dates(
            self.special_opens,
            self.special_opens_adhoc,
            start,
            end,
        )

    def _calculate_special_closes(self, start, end):
        return self._special_dates(
            self.special_closes,
            self.special_closes_adhoc,
            start,
            end,
        )