Esempio n. 1
0
class LabelArray(ndarray):
    """
    An ndarray subclass for working with arrays of strings.

    Factorizes the input array into integers, but overloads equality on strings
    to check against the factor label.

    Parameters
    ----------
    values : array-like
        Array of values that can be passed to np.asarray with dtype=object.
    missing_value : str
        Scalar value to treat as 'missing' for operations on ``self``.
    categories : list[str], optional
        List of values to use as categories.  If not supplied, categories will
        be inferred as the unique set of entries in ``values``.
    sort : bool, optional
        Whether to sort categories.  If sort is False and categories is
        supplied, they are left in the order provided.  If sort is False and
        categories is None, categories will be constructed in a random order.

    Attributes
    ----------
    categories : ndarray[str]
        An array containing the unique labels of self.
    reverse_categories : dict[str -> int]
        Reverse lookup table for ``categories``. Stores the index in
        ``categories`` at which each entry each unique entry is found.
    missing_value : str or None
        A sentinel missing value with NaN semantics for comparisons.

    Notes
    -----
    Consumers should be cautious when passing instances of LabelArray to numpy
    functions.  We attempt to disallow as many meaningless operations as
    possible, but since a LabelArray is just an ndarray of ints with some
    additional metadata, many numpy functions (for example, trigonometric) will
    happily accept a LabelArray and treat its values as though they were
    integers.

    In a future change, we may be able to disallow more numerical operations by
    creating a wrapper dtype which doesn't register an implementation for most
    numpy ufuncs. Until that change is made, consumers of LabelArray should
    assume that it is undefined behavior to pass a LabelArray to any numpy
    ufunc that operates on semantically-numerical data.

    See Also
    --------
    http://docs.scipy.org/doc/numpy-1.10.0/user/basics.subclassing.html
    """
    SUPPORTED_SCALAR_TYPES = (bytes, unicode, type(None))
    SUPPORTED_NON_NONE_SCALAR_TYPES = (bytes, unicode)

    @preprocess(
        values=coerce(list, partial(np.asarray, dtype=object)),
        categories=coerce(np.ndarray, list),
    )
    @expect_types(
        values=np.ndarray,
        missing_value=SUPPORTED_SCALAR_TYPES,
        categories=optional(list),
    )
    @expect_kinds(values=("O", "S", "U"))
    def __new__(cls, values, missing_value, categories=None, sort=True):

        # Numpy's fixed-width string types aren't very efficient. Working with
        # object arrays is faster than bytes or unicode arrays in almost all
        # cases.
        if not is_object(values):
            values = values.astype(object)

        if categories is None:
            codes, categories, reverse_categories = factorize_strings(
                values.ravel(),
                missing_value=missing_value,
                sort=sort,
            )
        else:
            codes, categories, reverse_categories = (
                factorize_strings_known_categories(
                    values.ravel(),
                    categories=categories,
                    missing_value=missing_value,
                    sort=sort,
                ))
        categories.setflags(write=False)

        return cls.from_codes_and_metadata(
            codes=codes.reshape(values.shape),
            categories=categories,
            reverse_categories=reverse_categories,
            missing_value=missing_value,
        )

    @classmethod
    def from_codes_and_metadata(cls, codes, categories, reverse_categories,
                                missing_value):
        """
        Rehydrate a LabelArray from the codes and metadata.

        Parameters
        ----------
        codes : np.ndarray[integral]
            The codes for the label array.
        categories : np.ndarray[object]
            The unique string categories.
        reverse_categories : dict[str, int]
            The mapping from category to its code-index.
        missing_value : any
            The value used to represent missing data.
        """
        ret = codes.view(type=cls, dtype=np.void)
        ret._categories = categories
        ret._reverse_categories = reverse_categories
        ret._missing_value = missing_value
        return ret

    @classmethod
    def from_categorical(cls, categorical, missing_value=None):
        """
        Create a LabelArray from a pandas categorical.

        Parameters
        ----------
        categorical : pd.Categorical
            The categorical object to convert.
        missing_value : bytes, unicode, or None, optional
            The missing value to use for this LabelArray.

        Returns
        -------
        la : LabelArray
            The LabelArray representation of this categorical.
        """
        return LabelArray(
            categorical,
            missing_value,
            categorical.categories,
        )

    @property
    def categories(self):
        # This is a property because it should be immutable.
        return self._categories

    @property
    def reverse_categories(self):
        # This is a property because it should be immutable.
        return self._reverse_categories

    @property
    def missing_value(self):
        # This is a property because it should be immutable.
        return self._missing_value

    @property
    def missing_value_code(self):
        return self.reverse_categories[self.missing_value]

    def has_label(self, value):
        return value in self.reverse_categories

    def __array_finalize__(self, obj):
        """
        Called by Numpy after array construction.

        There are three cases where this can happen:

        1. Someone tries to directly construct a new array by doing::

            >>> ndarray.__new__(LabelArray, ...)  # doctest: +SKIP

           In this case, obj will be None.  We treat this as an error case and
           fail.

        2. Someone (most likely our own __new__) does::

           >>> other_array.view(type=LabelArray)  # doctest: +SKIP

           In this case, `self` will be the new LabelArray instance, and
           ``obj` will be the array on which ``view`` is being called.

           The caller of ``obj.view`` is responsible for setting category
           metadata on ``self`` after we exit.

        3. Someone creates a new LabelArray by slicing an existing one.

           In this case, ``obj`` will be the original LabelArray.  We're
           responsible for copying over the parent array's category metadata.
        """
        if obj is None:
            raise TypeError(
                "Direct construction of LabelArrays is not supported.")

        # See docstring for an explanation of when these will or will not be
        # set.
        self._categories = getattr(obj, 'categories', None)
        self._reverse_categories = getattr(obj, 'reverse_categories', None)
        self._missing_value = getattr(obj, 'missing_value', None)

    def as_int_array(self):
        """
        Convert self into a regular ndarray of ints.

        This is an O(1) operation. It does not copy the underlying data.
        """
        return self.view(
            type=ndarray,
            dtype=unsigned_int_dtype_with_size_in_bytes(self.itemsize),
        )

    def as_string_array(self):
        """
        Convert self back into an array of strings.

        This is an O(N) operation.
        """
        return self.categories[self.as_int_array()]

    def as_categorical(self, name=None):
        """
        Coerce self into a pandas categorical.

        This is only defined on 1D arrays, since that's all pandas supports.
        """
        if len(self.shape) > 1:
            raise ValueError("Can't convert a 2D array to a categorical.")

        with ignore_pandas_nan_categorical_warning():
            return pd.Categorical.from_codes(
                self.as_int_array(),
                # We need to make a copy because pandas >= 0.17 fails if this
                # buffer isn't writeable.
                self.categories.copy(),
                ordered=False,
                name=name,
            )

    def as_categorical_frame(self, index, columns, name=None):
        """
        Coerce self into a pandas DataFrame of Categoricals.
        """
        if len(self.shape) != 2:
            raise ValueError(
                "Can't convert a non-2D LabelArray into a DataFrame.")

        expected_shape = (len(index), len(columns))
        if expected_shape != self.shape:
            raise ValueError(
                "Can't construct a DataFrame with provided indices:\n\n"
                "LabelArray shape is {actual}, but index and columns imply "
                "that shape should be {expected}.".format(
                    actual=self.shape,
                    expected=expected_shape,
                ))

        return pd.Series(
            index=pd.MultiIndex.from_product([index, columns]),
            data=self.ravel().as_categorical(name=name),
        ).unstack()

    def __setitem__(self, indexer, value):
        self_categories = self.categories

        if isinstance(value, LabelArray):
            value_categories = value.categories
            if compare_arrays(self_categories, value_categories):
                return super(LabelArray, self).__setitem__(indexer, value)
            else:
                raise CategoryMismatch(self_categories, value_categories)

        elif isinstance(value, self.SUPPORTED_SCALAR_TYPES):
            value_code = self.reverse_categories.get(value, -1)
            if value_code < 0:
                raise ValueError("%r is not in LabelArray categories." % value)
            self.as_int_array()[indexer] = value_code
        else:
            raise NotImplementedError(
                "Setting into a LabelArray with a value of "
                "type {type} is not yet supported.".format(
                    type=type(value).__name__, ), )

    def __setslice__(self, i, j, sequence):
        """
        This method was deprecated in Python 2.0. It predates slice objects,
        but Python 2.7.11 still uses it if you implement it, which ndarray
        does.  In newer Pythons, __setitem__ is always called, but we need to
        manuallly forward in py2.
        """
        self.__setitem__(slice(i, j), sequence)

    def __getitem__(self, indexer):
        result = super(LabelArray, self).__getitem__(indexer)
        if result.ndim:
            # Result is still a LabelArray, so we can just return it.
            return result

        # Result is a scalar value, which will be an instance of np.void.
        # Map it back to one of our category entries.
        index = result.view(
            unsigned_int_dtype_with_size_in_bytes(self.itemsize), )
        return self.categories[index]

    def is_missing(self):
        """
        Like isnan, but checks for locations where we store missing values.
        """
        return (
            self.as_int_array() == self.reverse_categories[self.missing_value])

    def not_missing(self):
        """
        Like ~isnan, but checks for locations where we store missing values.
        """
        return (self.as_int_array() !=
                self.reverse_categories[self.missing_value])

    def _equality_check(op):
        """
        Shared code for __eq__ and __ne__, parameterized on the actual
        comparison operator to use.
        """
        def method(self, other):

            if isinstance(other, LabelArray):
                self_mv = self.missing_value
                other_mv = other.missing_value
                if self_mv != other_mv:
                    raise MissingValueMismatch(self_mv, other_mv)

                self_categories = self.categories
                other_categories = other.categories
                if not compare_arrays(self_categories, other_categories):
                    raise CategoryMismatch(self_categories, other_categories)

                return (op(self.as_int_array(), other.as_int_array())
                        & self.not_missing()
                        & other.not_missing())

            elif isinstance(other, ndarray):
                # Compare to ndarrays as though we were an array of strings.
                # This is fairly expensive, and should generally be avoided.
                return op(self.as_string_array(), other) & self.not_missing()

            elif isinstance(other, self.SUPPORTED_SCALAR_TYPES):
                i = self._reverse_categories.get(other, -1)
                return op(self.as_int_array(), i) & self.not_missing()

            return op(super(LabelArray, self), other)

        return method

    __eq__ = _equality_check(eq)
    __ne__ = _equality_check(ne)
    del _equality_check

    def view(self, dtype=_NotPassed, type=_NotPassed):
        if type is _NotPassed and dtype not in (_NotPassed, self.dtype):
            raise TypeError("Can't view LabelArray as another dtype.")

        # The text signature on ndarray.view makes it look like the default
        # values for dtype and type are `None`, but passing None explicitly has
        # different semantics than not passing an arg at all, so we reconstruct
        # the kwargs dict here to simulate the args not being passed at all.
        kwargs = {}
        if dtype is not _NotPassed:
            kwargs['dtype'] = dtype
        if type is not _NotPassed:
            kwargs['type'] = type
        return super(LabelArray, self).view(**kwargs)

    # In general, we support resizing, slicing, and reshaping methods, but not
    # numeric methods.
    SUPPORTED_NDARRAY_METHODS = frozenset([
        'base', 'compress', 'copy', 'data', 'diagonal', 'dtype', 'flat',
        'flatten', 'item', 'itemset', 'itemsize', 'nbytes', 'ndim', 'ravel',
        'repeat', 'reshape', 'resize', 'setflags', 'shape', 'size', 'squeeze',
        'strides', 'swapaxes', 'take', 'trace', 'transpose', 'view'
    ])
    PUBLIC_NDARRAY_METHODS = frozenset(
        [s for s in dir(ndarray) if not s.startswith('_')])

    # Generate failing wrappers for all unsupported methods.
    locals().update({
        method: _make_unsupported_method(method)
        for method in PUBLIC_NDARRAY_METHODS - SUPPORTED_NDARRAY_METHODS
    })

    def __repr__(self):
        repr_lines = repr(self.as_string_array()).splitlines()
        repr_lines[0] = repr_lines[0].replace('array(', 'LabelArray(', 1)
        repr_lines[-1] = repr_lines[-1].rsplit(',', 1)[0] + ')'
        # The extra spaces here account for the difference in length between
        # 'array(' and 'LabelArray('.
        return '\n     '.join(repr_lines)

    def empty_like(self, shape):
        """
        Make an empty LabelArray with the same categories as ``self``, filled
        with ``self.missing_value``.
        """
        return type(self).from_codes_and_metadata(
            codes=np.full(
                shape,
                self.reverse_categories[self.missing_value],
                dtype=unsigned_int_dtype_with_size_in_bytes(self.itemsize),
            ),
            categories=self.categories,
            reverse_categories=self.reverse_categories,
            missing_value=self.missing_value,
        )

    def map_predicate(self, f):
        """
        Map a function from str -> bool element-wise over ``self``.

        ``f`` will be applied exactly once to each non-missing unique value in
        ``self``. Missing values will always return False.
        """
        # Functions passed to this are of type str -> bool.  Don't ever call
        # them on None, which is the only non-str value we ever store in
        # categories.
        if self.missing_value is None:

            def f_to_use(x):
                return False if x is None else f(x)
        else:
            f_to_use = f

        # Call f on each unique value in our categories.
        results = np.vectorize(f_to_use, otypes=[bool_dtype])(self.categories)

        # missing_value should produce False no matter what
        results[self.reverse_categories[self.missing_value]] = False

        # unpack the results form each unique value into their corresponding
        # locations in our indices.
        return results[self.as_int_array()]

    def map(self, f):
        """
        Map a function from str -> str element-wise over ``self``.

        ``f`` will be applied exactly once to each non-missing unique value in
        ``self``. Missing values will always map to ``self.missing_value``.
        """
        # f() should only return None if None is our missing value.
        if self.missing_value is None:
            allowed_outtypes = self.SUPPORTED_SCALAR_TYPES
        else:
            allowed_outtypes = self.SUPPORTED_NON_NONE_SCALAR_TYPES

        def f_to_use(x,
                     missing_value=self.missing_value,
                     otypes=allowed_outtypes):

            # Don't call f on the missing value; those locations don't exist
            # semantically. We return _sortable_sentinel rather than None
            # because the np.unique call below sorts the categories array,
            # which raises an error on Python 3 because None and str aren't
            # comparable.
            if x == missing_value:
                return _sortable_sentinel

            ret = f(x)

            if not isinstance(ret, otypes):
                raise TypeError(
                    "LabelArray.map expected function {f} to return a string"
                    " or None, but got {type} instead.\n"
                    "Value was {value}.".format(
                        f=f.__name__,
                        type=type(ret).__name__,
                        value=ret,
                    ))

            if ret == missing_value:
                return _sortable_sentinel

            return ret

        new_categories_with_duplicates = (np.vectorize(f_to_use,
                                                       otypes=[object])(
                                                           self.categories))

        # If f() maps multiple inputs to the same output, then we can end up
        # with the same code duplicated multiple times. Compress the categories
        # by running them through np.unique, and then use the reverse lookup
        # table to compress codes as well.
        new_categories, bloated_inverse_index = np.unique(
            new_categories_with_duplicates, return_inverse=True)

        if new_categories[0] is _sortable_sentinel:
            # f_to_use return _sortable_sentinel for locations that should be
            # missing values in our output. Since np.unique returns the uniques
            # in sorted order, and since _sortable_sentinel sorts before any
            # string, we only need to check the first array entry.
            new_categories[0] = self.missing_value

        # `reverse_index` will always be a 64 bit integer even if we can hold a
        # smaller array.
        reverse_index = bloated_inverse_index.astype(
            smallest_uint_that_can_hold(len(new_categories)))
        new_codes = np.take(reverse_index, self.as_int_array())

        return self.from_codes_and_metadata(
            new_codes,
            new_categories,
            dict(zip(new_categories, range(len(new_categories)))),
            missing_value=self.missing_value,
        )

    def startswith(self, prefix):
        """
        Element-wise startswith.

        Parameters
        ----------
        prefix : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self started with ``prefix``.
        """
        return self.map_predicate(lambda elem: elem.startswith(prefix))

    def endswith(self, suffix):
        """
        Elementwise endswith.

        Parameters
        ----------
        suffix : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self ended with ``suffix``
        """
        return self.map_predicate(lambda elem: elem.endswith(suffix))

    def has_substring(self, substring):
        """
        Elementwise contains.

        Parameters
        ----------
        substring : str

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self ended with ``suffix``.
        """
        return self.map_predicate(lambda elem: substring in elem)

    @preprocess(pattern=coerce(from_=(bytes, unicode), to=re.compile))
    def matches(self, pattern):
        """
        Elementwise regex match.

        Parameters
        ----------
        pattern : str or compiled regex

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self was matched by ``pattern``.
        """
        return self.map_predicate(compose(bool, pattern.match))

    # These types all implement an O(N) __contains__, so pre-emptively
    # coerce to `set`.
    @preprocess(container=coerce((list, tuple, np.ndarray), set))
    def element_of(self, container):
        """
        Check if each element of self is an of ``container``.

        Parameters
        ----------
        container : object
            An object implementing a __contains__ to call on each element of
            ``self``.

        Returns
        -------
        is_contained : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self was an element of ``container``.
        """
        return self.map_predicate(container.__contains__)
Esempio n. 2
0
class TradingCalendar(with_metaclass(ABCMeta)):
    """
    A TradingCalendar represents the timing information of a single market
    exchange.

    The timing information is made up of two parts: sessions, and opens/closes.

    A session represents a contiguous set of minutes, and has a label that is
    midnight UTC. It is important to note that a session label should not be
    considered a specific point in time, and that midnight UTC is just being
    used for convenience.

    For each session, we store the open and close time in UTC time.
    """
    def __init__(self, start=start_default, end=end_default):
        # Midnight in UTC for each trading day.

        # In pandas 0.18.1, pandas calls into its own code here in a way that
        # fires a warning. The calling code in pandas tries to suppress the
        # warning, but does so incorrectly, causing it to bubble out here.
        # Actually catch and suppress the warning here:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            _all_days = date_range(start, end, freq=self.day, tz='UTC')

        # `DatetimeIndex`s of standard opens/closes for each day.
        self._opens = days_at_time(_all_days, self.open_time, self.tz,
                                   self.open_offset)
        self._closes = days_at_time(_all_days, self.close_time, self.tz,
                                    self.close_offset)

        # `DatetimeIndex`s of nonstandard opens/closes
        _special_opens = self._calculate_special_opens(start, end)
        _special_closes = self._calculate_special_closes(start, end)

        # Overwrite the special opens and closes on top of the standard ones.
        _overwrite_special_dates(_all_days, self._opens, _special_opens)
        _overwrite_special_dates(_all_days, self._closes, _special_closes)

        # In pandas 0.16.1 _opens and _closes will lose their timezone
        # information. This looks like it has been resolved in 0.17.1.
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        self.schedule = DataFrame(
            index=_all_days,
            columns=['market_open', 'market_close'],
            data={
                'market_open': self._opens,
                'market_close': self._closes,
            },
            dtype='datetime64[ns, UTC]',
        )

        # Simple cache to avoid recalculating the same minute -> session in
        # "next" mode. Analysis of current catalyst code paths show that
        # `minute_to_session_label` is often called consecutively with the same
        # inputs.
        self._minute_to_session_label_cache = LRU(1)

        self.market_opens_nanos = self.schedule.market_open.values.\
            astype(np.int64)

        self.market_closes_nanos = self.schedule.market_close.values.\
            astype(np.int64)

        self._trading_minutes_nanos = self.all_minutes.values.\
            astype(np.int64)

        self.first_trading_session = _all_days[0]
        self.last_trading_session = _all_days[-1]

        self._early_closes = pd.DatetimeIndex(
            _special_closes.map(self.minute_to_session_label))

    @lazyval
    def day(self):
        return CustomBusinessDay(
            holidays=self.adhoc_holidays,
            calendar=self.regular_holidays,
        )

    @abstractproperty
    def name(self):
        raise NotImplementedError()

    @abstractproperty
    def tz(self):
        raise NotImplementedError()

    @abstractproperty
    def open_time(self):
        raise NotImplementedError()

    @abstractproperty
    def close_time(self):
        raise NotImplementedError()

    @property
    def open_offset(self):
        return 0

    @property
    def close_offset(self):
        return 0

    @lazyval
    def _minutes_per_session(self):
        diff = self.schedule.market_close - self.schedule.market_open
        diff = diff.astype('timedelta64[m]')
        return diff + 1

    def minutes_count_for_sessions_in_range(self, start_session, end_session):
        """
        Parameters
        ----------
        start_session: pd.Timestamp
            The first session.

        end_session: pd.Timestamp
            The last session.

        Returns
        -------
        int: The total number of minutes for the contiguous chunk of sessions.
             between start_session and end_session, inclusive.
        """
        return int(self._minutes_per_session[start_session:end_session].sum())

    @property
    def regular_holidays(self):
        """
        Returns
        -------
        pd.AbstractHolidayCalendar: a calendar containing the regular holidays
        for this calendar
        """
        return None

    @property
    def adhoc_holidays(self):
        return []

    @property
    def special_opens(self):
        """
        A list of special open times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_opens_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    @property
    def special_closes(self):
        """
        A list of special close times and corresponding HolidayCalendars.

        Returns
        -------
        list: List of (time, AbstractHolidayCalendar) tuples
        """
        return []

    @property
    def special_closes_adhoc(self):
        """
        Returns
        -------
        list: List of (time, DatetimeIndex) tuples that represent special
         closes that cannot be codified into rules.
        """
        return []

    # -----

    @property
    def opens(self):
        return self.schedule.market_open

    @property
    def closes(self):
        return self.schedule.market_close

    @property
    def early_closes(self):
        return self._early_closes

    def is_session(self, dt):
        """
        Given a dt, returns whether it's a valid session label.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt that is being tested.

        Returns
        -------
        bool
            Whether the given dt is a valid session label.
        """
        return dt in self.schedule.index

    def is_open_on_minute(self, dt):
        """
        Given a dt, return whether this exchange is open at the given dt.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to check if this exchange is open.

        Returns
        -------
        bool
            Whether the exchange is open on this dt.
        """
        return is_open(self.market_opens_nanos, self.market_closes_nanos,
                       dt.value)

    def next_open(self, dt):
        """
        Given a dt, returns the next open.

        If the given dt happens to be a session open, the next session's open
        will be returned.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next open.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next open.
        """
        idx = next_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def next_close(self, dt):
        """
        Given a dt, returns the next close.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the next close.
        """
        idx = next_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def previous_open(self, dt):
        """
        Given a dt, returns the previous open.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous open.

        Returns
        -------
        pd.Timestamp
            The UTC imestamp of the previous open.
        """
        idx = previous_divider_idx(self.market_opens_nanos, dt.value)
        return pd.Timestamp(self.market_opens_nanos[idx], tz='UTC')

    def previous_close(self, dt):
        """
        Given a dt, returns the previous close.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous close.

        Returns
        -------
        pd.Timestamp
            The UTC timestamp of the previous close.
        """
        idx = previous_divider_idx(self.market_closes_nanos, dt.value)
        return pd.Timestamp(self.market_closes_nanos[idx], tz='UTC')

    def next_minute(self, dt):
        """
        Given a dt, return the next exchange minute.  If the given dt is not
        an exchange minute, returns the next exchange open.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the next exchange minute.

        Returns
        -------
        pd.Timestamp
            The next exchange minute.
        """
        idx = next_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def previous_minute(self, dt):
        """
        Given a dt, return the previous exchange minute.

        Raises KeyError if the given timestamp is not an exchange minute.

        Parameters
        ----------
        dt: pd.Timestamp
            The dt for which to get the previous exchange minute.

        Returns
        -------
        pd.Timestamp
            The previous exchange minute.
        """

        idx = previous_divider_idx(self._trading_minutes_nanos, dt.value)
        return self.all_minutes[idx]

    def next_session_label(self, session_label):
        """
        Given a session label, returns the label of the next session.

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose next session is desired.

        Returns
        -------
        pd.Timestamp
            The next session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the last session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        try:
            return self.schedule.index[idx + 1]
        except IndexError:
            if idx == len(self.schedule.index) - 1:
                raise ValueError("There is no next session as this is the end"
                                 " of the exchange calendar.")
            else:
                raise

    def previous_session_label(self, session_label):
        """
        Given a session label, returns the label of the previous session.

        Parameters
        ----------
        session_label: pd.Timestamp
            A session whose previous session is desired.

        Returns
        -------
        pd.Timestamp
            The previous session label (midnight UTC).

        Notes
        -----
        Raises ValueError if the given session is the first session in this
        calendar.
        """
        idx = self.schedule.index.get_loc(session_label)
        if idx == 0:
            raise ValueError("There is no previous session as this is the"
                             " beginning of the exchange calendar.")

        return self.schedule.index[idx - 1]

    def minutes_for_session(self, session_label):
        """
        Given a session label, return the minutes for that session.

        Parameters
        ----------
        session_label: pd.Timestamp (midnight UTC)
            A session label whose session's minutes are desired.

        Returns
        -------
        pd.DateTimeIndex
            All the minutes for the given session.
        """
        return self.minutes_in_range(
            start_minute=self.schedule.at[session_label, 'market_open'],
            end_minute=self.schedule.at[session_label, 'market_close'],
        )

    def minutes_window(self, start_dt, count):
        start_dt_nanos = start_dt.value
        all_minutes_nanos = self._trading_minutes_nanos
        start_idx = all_minutes_nanos.searchsorted(start_dt_nanos)

        # searchsorted finds the index of the minute **on or after** start_dt.
        # If the latter, push back to the prior minute.
        if all_minutes_nanos[start_idx] != start_dt_nanos:
            start_idx -= 1

        if start_idx < 0 or start_idx >= len(all_minutes_nanos):
            raise KeyError("Can't start minute window at {}".format(start_dt))

        end_idx = start_idx + count

        if start_idx > end_idx:
            return self.all_minutes[(end_idx + 1):(start_idx + 1)]
        else:
            return self.all_minutes[start_idx:end_idx]

    def sessions_in_range(self, start_session_label, end_session_label):
        """
        Given start and end session labels, return all the sessions in that
        range, inclusive.

        Parameters
        ----------
        start_session_label: pd.Timestamp (midnight UTC)
            The label representing the first session of the desired range.

        end_session_label: pd.Timestamp (midnight UTC)
            The label representing the last session of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        return self.all_sessions[self.all_sessions.slice_indexer(
            start_session_label, end_session_label)]

    def sessions_window(self, session_label, count):
        """
        Given a session label and a window size, returns a list of sessions
        of size `count` + 1, that either starts with the given session
        (if `count` is positive) or ends with the given session (if `count` is
        negative).

        Parameters
        ----------
        session_label: pd.Timestamp
            The label of the initial session.

        count: int
            Defines the length and the direction of the window.

        Returns
        -------
        pd.DatetimeIndex
            The desired sessions.
        """
        start_idx = self.schedule.index.get_loc(session_label)
        end_idx = start_idx + count

        return self.all_sessions[min(start_idx, end_idx
                                     ):max(start_idx, end_idx) + 1]

    def session_distance(self, start_session_label, end_session_label):
        """
        Given a start and end session label, returns the distance between
        them.  For example, for three consecutive sessions Mon., Tues., and
        Wed, `session_distance(Mon, Wed)` would return 2.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the start session.

        end_session_label: pd.Timestamp
            The label of the ending session.

        Returns
        -------
        int
            The distance between the two sessions.
        """
        start_idx = self.all_sessions.searchsorted(
            self.minute_to_session_label(start_session_label))

        end_idx = self.all_sessions.searchsorted(
            self.minute_to_session_label(end_session_label))

        return abs(end_idx - start_idx)

    def minutes_in_range(self, start_minute, end_minute):
        """
        Given start and end minutes, return all the calendar minutes
        in that range, inclusive.

        Given minutes don't need to be calendar minutes.

        Parameters
        ----------
        start_minute: pd.Timestamp
            The minute representing the start of the desired range.

        end_minute: pd.Timestamp
            The minute representing the end of the desired range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.
        """
        start_idx = searchsorted(self._trading_minutes_nanos,
                                 start_minute.value)

        end_idx = searchsorted(self._trading_minutes_nanos, end_minute.value)

        if end_minute.value == self._trading_minutes_nanos[end_idx]:
            # if the end minute is a market minute, increase by 1
            end_idx += 1

        return self.all_minutes[start_idx:end_idx]

    def minutes_for_sessions_in_range(self, start_session_label,
                                      end_session_label):
        """
        Returns all the minutes for all the sessions from the given start
        session label to the given end session label, inclusive.

        Parameters
        ----------
        start_session_label: pd.Timestamp
            The label of the first session in the range.

        end_session_label: pd.Timestamp
            The label of the last session in the range.

        Returns
        -------
        pd.DatetimeIndex
            The minutes in the desired range.

        """
        first_minute, _ = self.open_and_close_for_session(start_session_label)
        _, last_minute = self.open_and_close_for_session(end_session_label)

        return self.minutes_in_range(first_minute, last_minute)

    def open_and_close_for_session(self, session_label):
        """
        Returns a tuple of timestamps of the open and close of the session
        represented by the given label.

        Parameters
        ----------
        session_label: pd.Timestamp
            The session whose open and close are desired.

        Returns
        -------
        (Timestamp, Timestamp)
            The open and close for the given session.
        """
        sched = self.schedule

        # `market_open` and `market_close` should be timezone aware, but pandas
        # 0.16.1 does not appear to support this:
        # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz  # noqa
        return (
            sched.at[session_label, 'market_open'].tz_localize('UTC'),
            sched.at[session_label, 'market_close'].tz_localize('UTC'),
        )

    def session_open(self, session_label):
        return self.schedule.at[session_label,
                                'market_open'].tz_localize('UTC')

    def session_close(self, session_label):
        return self.schedule.at[session_label,
                                'market_close'].tz_localize('UTC')

    def session_opens_in_range(self, start_session_label, end_session_label):
        return self.schedule.loc[start_session_label:end_session_label,
                                 'market_open', ].dt.tz_convert('UTC')

    def session_closes_in_range(self, start_session_label, end_session_label):
        return self.schedule.loc[start_session_label:end_session_label,
                                 'market_close', ].dt.tz_convert('UTC')

    @property
    def all_sessions(self):
        return self.schedule.index

    @property
    def first_session(self):
        return self.all_sessions[0]

    @property
    def last_session(self):
        return self.all_sessions[-1]

    def execution_time_from_open(self, open_dates):
        return open_dates

    def execution_time_from_close(self, close_dates):
        return close_dates

    def _all_minutes_with_interval(self, interval):
        """
        Returns a DatetimeIndex representing all the minutes in this calendar.
        """
        opens_in_ns = \
            self._opens.values.astype('datetime64[ns]')

        closes_in_ns = \
            self._closes.values.astype('datetime64[ns]')

        deltas = closes_in_ns - opens_in_ns

        nanos_in_interval = interval * NANOS_IN_MINUTE

        # + 1 because we want 390 days per standard day, not 389
        daily_sizes = (deltas / nanos_in_interval) + 1
        num_minutes = np.sum(daily_sizes).astype(np.int64)

        # One allocation for the entire thing. This assumes that each day
        # represents a contiguous block of minutes.
        all_minutes = np.empty(num_minutes, dtype='datetime64[ns]')

        idx = 0
        for day_idx, size in enumerate(daily_sizes):
            # lots of small allocations, but it's fast enough for now.

            # size is a np.timedelta64, so we need to int it
            size_int = int(size)
            all_minutes[idx:(idx + size_int)] = \
                np.arange(
                    opens_in_ns[day_idx],
                    closes_in_ns[day_idx] + NANOS_IN_MINUTE,
                    nanos_in_interval
                )

            idx += size_int

        return DatetimeIndex(all_minutes).tz_localize("UTC")

    @lazyval
    def all_minutes(self):
        """
        Returns a DatetimeIndex representing all the minutes in this calendar.
        """
        return self._all_minutes_with_interval(1)

    @preprocess(dt=coerce(pd.Timestamp, attrgetter('value')))
    def minute_to_session_label(self, dt, direction="next"):
        """
        Given a minute, get the label of its containing session.

        Parameters
        ----------
        dt : pd.Timestamp or nanosecond offset
            The dt for which to get the containing session.

        direction: str
            "next" (default) means that if the given dt is not part of a
            session, return the label of the next session.

            "previous" means that if the given dt is not part of a session,
            return the label of the previous session.

            "none" means that a KeyError will be raised if the given
            dt is not part of a session.

        Returns
        -------
        pd.Timestamp (midnight UTC)
            The label of the containing session.
        """
        if direction == "next":
            try:
                return self._minute_to_session_label_cache[dt]
            except KeyError:
                pass

        idx = searchsorted(self.market_closes_nanos, dt)
        current_or_next_session = self.schedule.index[idx]
        self._minute_to_session_label_cache[dt] = current_or_next_session

        if direction == "next":
            return current_or_next_session
        elif direction == "previous":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, use the previous session
                return self.schedule.index[idx - 1]
        elif direction == "none":
            if not is_open(self.market_opens_nanos, self.market_closes_nanos,
                           dt):
                # if the exchange is closed, blow up
                raise ValueError("The given dt is not an exchange minute!")
        else:
            # invalid direction
            raise ValueError("Invalid direction parameter: "
                             "{0}".format(direction))

        return current_or_next_session

    def minute_index_to_session_labels(self, index):
        """
        Given a sorted DatetimeIndex of market minutes, return a
        DatetimeIndex of the corresponding session labels.

        Parameters
        ----------
        index: pd.DatetimeIndex or pd.Series
            The ordered list of market minutes we want session labels for.

        Returns
        -------
        pd.DatetimeIndex (UTC)
            The list of session labels corresponding to the given minutes.
        """
        def minute_to_session_label_nanos(dt_nanos):
            return self.minute_to_session_label(dt_nanos).value

        return DatetimeIndex(minutes_to_session_labels(
            index.values.astype(np.int64),
            minute_to_session_label_nanos,
            self.market_closes_nanos,
        ).astype('datetime64[ns]'),
                             tz='UTC')

    def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date):
        """
        Union an iterable of pairs of the form (time, calendar)
        and an iterable of pairs of the form (time, [dates])

        (This is shared logic for computing special opens and special closes.)
        """
        _dates = DatetimeIndex([], tz='UTC').union_many([
            holidays_at_time(calendar, start_date, end_date, time_, self.tz)
            for time_, calendar in calendars
        ] + [
            days_at_time(datetimes, time_, self.tz)
            for time_, datetimes in ad_hoc_dates
        ])
        return _dates[(_dates >= start_date) & (_dates <= end_date)]

    def _calculate_special_opens(self, start, end):
        return self._special_dates(
            self.special_opens,
            self.special_opens_adhoc,
            start,
            end,
        )

    def _calculate_special_closes(self, start, end):
        return self._special_dates(
            self.special_closes,
            self.special_closes_adhoc,
            start,
            end,
        )