예제 #1
0
    def test_cached_data(self):
        # GH 26565
        # Calling RangeIndex._data caches an int64 array of the same length at
        # self._cached_data. This tests whether _cached_data has been set.
        idx = RangeIndex(0, 100, 10)

        assert idx._cached_data is None

        repr(idx)
        assert idx._cached_data is None

        str(idx)
        assert idx._cached_data is None

        idx.get_loc(20)
        assert idx._cached_data is None

        df = pd.DataFrame({'a': range(10)}, index=idx)

        df.loc[50]
        assert idx._cached_data is None

        with pytest.raises(KeyError):
            df.loc[51]
        assert idx._cached_data is None

        df.loc[10:50]
        assert idx._cached_data is None

        df.iloc[5:10]
        assert idx._cached_data is None

        # actually calling data._data
        assert isinstance(idx._data, np.ndarray)
        assert isinstance(idx._cached_data, np.ndarray)
예제 #2
0
    def test_cache(self):
        # GH 26565, GH26617, GH35432
        # This test checks whether _cache has been set.
        # Calling RangeIndex._cache["_data"] creates an int64 array of the same length
        # as the RangeIndex and stores it in _cache.
        idx = RangeIndex(0, 100, 10)

        assert idx._cache == {}

        repr(idx)
        assert idx._cache == {}

        str(idx)
        assert idx._cache == {}

        idx.get_loc(20)
        assert idx._cache == {}

        90 in idx  # True
        assert idx._cache == {}

        91 in idx  # False
        assert idx._cache == {}

        idx.all()
        assert idx._cache == {}

        idx.any()
        assert idx._cache == {}

        for _ in idx:
            pass
        assert idx._cache == {}

        df = pd.DataFrame({"a": range(10)}, index=idx)

        df.loc[50]
        assert idx._cache == {}

        with pytest.raises(KeyError, match="51"):
            df.loc[51]
        assert idx._cache == {}

        df.loc[10:50]
        assert idx._cache == {}

        df.iloc[5:10]
        assert idx._cache == {}

        # idx._cache should contain a _data entry after call to idx._data
        idx._data
        assert isinstance(idx._data, np.ndarray)
        assert idx._data is idx._data  # check cached value is reused
        assert len(idx._cache) == 4
        expected = np.arange(0, 100, 10, dtype="int64")
        tm.assert_numpy_array_equal(idx._cache["_data"], expected)
예제 #3
0
    def test_cached_data(self):
        # GH 26565, GH26617
        # Calling RangeIndex._data caches an int64 array of the same length at
        # self._cached_data. This test checks whether _cached_data has been set
        idx = RangeIndex(0, 100, 10)

        assert idx._cached_data is None

        repr(idx)
        assert idx._cached_data is None

        str(idx)
        assert idx._cached_data is None

        idx.get_loc(20)
        assert idx._cached_data is None

        90 in idx
        assert idx._cached_data is None

        91 in idx
        assert idx._cached_data is None

        with tm.assert_produces_warning(FutureWarning):
            idx.contains(90)
        assert idx._cached_data is None

        with tm.assert_produces_warning(FutureWarning):
            idx.contains(91)
        assert idx._cached_data is None

        idx.all()
        assert idx._cached_data is None

        idx.any()
        assert idx._cached_data is None

        df = pd.DataFrame({'a': range(10)}, index=idx)

        df.loc[50]
        assert idx._cached_data is None

        with pytest.raises(KeyError):
            df.loc[51]
        assert idx._cached_data is None

        df.loc[10:50]
        assert idx._cached_data is None

        df.iloc[5:10]
        assert idx._cached_data is None

        # actually calling idx._data
        assert isinstance(idx._data, np.ndarray)
        assert isinstance(idx._cached_data, np.ndarray)
예제 #4
0
    def test_cached_data(self):
        # GH 26565, GH26617
        # Calling RangeIndex._data caches an int64 array of the same length at
        # self._cached_data. This test checks whether _cached_data has been set
        idx = RangeIndex(0, 100, 10)

        assert idx._cached_data is None

        repr(idx)
        assert idx._cached_data is None

        str(idx)
        assert idx._cached_data is None

        idx.get_loc(20)
        assert idx._cached_data is None

        90 in idx
        assert idx._cached_data is None

        91 in idx
        assert idx._cached_data is None

        idx.contains(90)
        assert idx._cached_data is None

        idx.contains(91)
        assert idx._cached_data is None

        idx.all()
        assert idx._cached_data is None

        idx.any()
        assert idx._cached_data is None

        df = pd.DataFrame({'a': range(10)}, index=idx)

        df.loc[50]
        assert idx._cached_data is None

        with pytest.raises(KeyError):
            df.loc[51]
        assert idx._cached_data is None

        df.loc[10:50]
        assert idx._cached_data is None

        df.iloc[5:10]
        assert idx._cached_data is None

        # actually calling idx._data
        assert isinstance(idx._data, np.ndarray)
        assert isinstance(idx._cached_data, np.ndarray)
예제 #5
0
    def test_cached_data(self):
        # GH 26565, GH26617
        # Calling RangeIndex._data caches an int64 array of the same length at
        # self._cached_data. This test checks whether _cached_data has been set
        idx = RangeIndex(0, 100, 10)

        assert idx._cached_data is None

        repr(idx)
        assert idx._cached_data is None

        str(idx)
        assert idx._cached_data is None

        idx.get_loc(20)
        assert idx._cached_data is None

        90 in idx
        assert idx._cached_data is None

        91 in idx
        assert idx._cached_data is None

        idx.all()
        assert idx._cached_data is None

        idx.any()
        assert idx._cached_data is None

        idx.format()
        assert idx._cache == {}

        df = pd.DataFrame({"a": range(10)}, index=idx)

        str(df)
        assert idx._cache == {}

        df.loc[50]
        assert idx._cached_data is None

        with pytest.raises(KeyError, match="51"):
            df.loc[51]
        assert idx._cached_data is None

        df.loc[10:50]
        assert idx._cached_data is None

        df.iloc[5:10]
        assert idx._cached_data is None

        # actually calling idx._data
        assert isinstance(idx._data, np.ndarray)
        assert isinstance(idx._cached_data, np.ndarray)
예제 #6
0
    def test_engineless_lookup(self):
        # GH 16685
        # Standard lookup on RangeIndex should not require the engine to be
        # created
        idx = RangeIndex(2, 10, 3)

        assert idx.get_loc(5) == 1
        tm.assert_numpy_array_equal(idx.get_indexer([2, 8]),
                                    ensure_platform_int(np.array([0, 2])))
        with pytest.raises(KeyError):
            idx.get_loc(3)

        assert '_engine' not in idx._cache

        # The engine is still required for lookup of a different dtype scalar:
        with pytest.raises(KeyError):
            assert idx.get_loc('a') == -1

        assert '_engine' in idx._cache
예제 #7
0
    def test_engineless_lookup(self):
        # GH 16685
        # Standard lookup on RangeIndex should not require the engine to be
        # created
        idx = RangeIndex(2, 10, 3)

        assert idx.get_loc(5) == 1
        tm.assert_numpy_array_equal(idx.get_indexer([2, 8]),
                                    ensure_platform_int(np.array([0, 2])))
        with pytest.raises(KeyError, match="3"):
            idx.get_loc(3)

        assert "_engine" not in idx._cache

        # Different types of scalars can be excluded immediately, no need to
        #  use the _engine
        with pytest.raises(KeyError, match="'a'"):
            idx.get_loc("a")

        assert "_engine" not in idx._cache
예제 #8
0
class Range:
    def setup(self):
        self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
        self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)

    def time_max(self):
        self.idx_inc.max()

    def time_max_trivial(self):
        self.idx_dec.max()

    def time_min(self):
        self.idx_dec.min()

    def time_min_trivial(self):
        self.idx_inc.min()

    def time_get_loc_inc(self):
        self.idx_inc.get_loc(900000)

    def time_get_loc_dec(self):
        self.idx_dec.get_loc(100000)
예제 #9
0
class Range:

    def setup(self):
        self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
        self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)

    def time_max(self):
        self.idx_inc.max()

    def time_max_trivial(self):
        self.idx_dec.max()

    def time_min(self):
        self.idx_dec.min()

    def time_min_trivial(self):
        self.idx_inc.min()

    def time_get_loc_inc(self):
        self.idx_inc.get_loc(900000)

    def time_get_loc_dec(self):
        self.idx_dec.get_loc(100000)
예제 #10
0
    def _get_index_loc(self, key, base_index=None):
        """
        Get the location of a specific key in an index

        Parameters
        ----------
        key : label
            The key for which to find the location if the underlying index is
            a DateIndex or a location if the underlying index is a RangeIndex
            or an Int64Index.
        base_index : pd.Index, optional
            Optionally the base index to search. If None, the model's index is
            searched.

        Returns
        -------
        loc : int
            The location of the key
        index : pd.Index
            The index including the key; this is a copy of the original index
            unless the index had to be expanded to accommodate `key`.
        index_was_expanded : bool
            Whether or not the index was expanded to accommodate `key`.

        Notes
        -----
        If `key` is past the end of of the given index, and the index is either
        an Int64Index or a date index, this function extends the index up to
        and including key, and then returns the location in the new index.

        """
        if base_index is None:
            base_index = self._index

        index = base_index
        date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex))
        int_index = isinstance(base_index, Int64Index)
        range_index = isinstance(base_index, RangeIndex)
        index_class = type(base_index)
        nobs = len(index)

        # Special handling for RangeIndex
        if range_index and isinstance(key, (int, np.integer)):
            # Negative indices (that lie in the Index)
            if key < 0 and -key <= nobs:
                key = nobs + key
            # Out-of-sample (note that we include key itself in the new index)
            elif key > nobs - 1:
                # See gh5835. Remove the except after pandas 0.25 required.
                try:
                    base_index_start = base_index.start
                    base_index_step = base_index.step
                except AttributeError:
                    base_index_start = base_index._start
                    base_index_step = base_index._step
                stop = base_index_start + (key + 1) * base_index_step
                index = RangeIndex(start=base_index_start,
                                   stop=stop,
                                   step=base_index_step)

        # Special handling for Int64Index
        if (not range_index and int_index and not date_index
                and isinstance(key, (int, np.integer))):
            # Negative indices (that lie in the Index)
            if key < 0 and -key <= nobs:
                key = nobs + key
            # Out-of-sample (note that we include key itself in the new index)
            elif key > base_index[-1]:
                index = Int64Index(np.arange(base_index[0], int(key + 1)))

        # Special handling for date indexes
        if date_index:
            # Use index type to choose creation function
            if index_class is DatetimeIndex:
                index_fn = date_range
            else:
                index_fn = period_range
            # Integer key (i.e. already given a location)
            if isinstance(key, (int, np.integer)):
                # Negative indices (that lie in the Index)
                if key < 0 and -key < nobs:
                    key = index[nobs + key]
                # Out-of-sample (note that we include key itself in the new
                # index)
                elif key > len(base_index) - 1:
                    index = index_fn(start=base_index[0],
                                     periods=int(key + 1),
                                     freq=base_index.freq)
                    key = index[-1]
                else:
                    key = index[key]
            # Other key types (i.e. string date or some datetime-like object)
            else:
                # Covert the key to the appropriate date-like object
                if index_class is PeriodIndex:
                    date_key = Period(key, freq=base_index.freq)
                else:
                    date_key = Timestamp(key, freq=base_index.freq)

                # Out-of-sample
                if date_key > base_index[-1]:
                    # First create an index that may not always include `key`
                    index = index_fn(start=base_index[0],
                                     end=date_key,
                                     freq=base_index.freq)

                    # Now make sure we include `key`
                    if not index[-1] == date_key:
                        index = index_fn(start=base_index[0],
                                         periods=len(index) + 1,
                                         freq=base_index.freq)

                    # To avoid possible inconsistencies with `get_loc` below,
                    # set the key directly equal to the last index location
                    key = index[-1]

        # Get the location
        if date_index:
            # (note that get_loc will throw a KeyError if key is invalid)
            loc = index.get_loc(key)
        elif int_index or range_index:
            # For Int64Index and RangeIndex, key is assumed to be the location
            # and not an index value (this assumption is required to support
            # RangeIndex)
            try:
                index[key]
            # We want to raise a KeyError in this case, to keep the exception
            # consistent across index types.
            # - Attempting to index with an out-of-bound location (e.g.
            #   index[10] on an index of length 9) will raise an IndexError
            #   (as of Pandas 0.22)
            # - Attemtping to index with a type that cannot be cast to integer
            #   (e.g. a non-numeric string) will raise a ValueError if the
            #   index is RangeIndex (otherwise will raise an IndexError)
            #   (as of Pandas 0.22)
            except (IndexError, ValueError) as e:
                raise KeyError(str(e))
            loc = key
        else:
            loc = index.get_loc(key)

        # Check if we now have a modified index
        index_was_expanded = index is not base_index

        # Return the index through the end of the loc / slice
        if isinstance(loc, slice):
            end = loc.stop - 1
        else:
            end = loc

        return loc, index[:end + 1], index_was_expanded
예제 #11
0
    def _get_index_loc(self, key, base_index=None):
        """
        Get the location of a specific key in an index

        Parameters
        ----------
        key : label
            The key for which to find the location if the underlying index is
            a DateIndex or a location if the underlying index is a RangeIndex
            or an Int64Index.
        base_index : pd.Index, optional
            Optionally the base index to search. If None, the model's index is
            searched.

        Returns
        -------
        loc : int
            The location of the key
        index : pd.Index
            The index including the key; this is a copy of the original index
            unless the index had to be expanded to accomodate `key`.
        index_was_expanded : bool
            Whether or not the index was expanded to accomodate `key`.

        Notes
        -----
        If `key` is past the end of of the given index, and the index is either
        an Int64Index or a date index, this function extends the index up to
        and including key, and then returns the location in the new index.

        """
        if base_index is None:
            base_index = self._index

        index = base_index
        date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex))
        int_index = isinstance(base_index, Int64Index)
        range_index = isinstance(base_index, RangeIndex)
        index_class = type(base_index)
        nobs = len(index)

        # Special handling for RangeIndex
        if range_index and isinstance(key, (int, long, np.integer)):
            # Negative indices (that lie in the Index)
            if key < 0 and -key <= nobs:
                key = nobs + key
            # Out-of-sample (note that we include key itself in the new index)
            elif key > nobs - 1:
                stop = base_index._start + (key + 1) * base_index._step
                index = RangeIndex(start=base_index._start,
                                   stop=stop,
                                   step=base_index._step)

        # Special handling for Int64Index
        if (not range_index and int_index and not date_index and
                isinstance(key, (int, long, np.integer))):
            # Negative indices (that lie in the Index)
            if key < 0 and -key <= nobs:
                key = nobs + key
            # Out-of-sample (note that we include key itself in the new index)
            elif key > base_index[-1]:
                index = Int64Index(np.arange(base_index[0], int(key + 1)))

        # Special handling for date indexes
        if date_index:
            # Integer key (i.e. already given a location)
            if isinstance(key, (int, long, np.integer)):
                # Negative indices (that lie in the Index)
                if key < 0 and -key < nobs:
                    key = index[nobs + key]
                # Out-of-sample (note that we include key itself in the new
                # index)
                elif key > len(base_index) - 1:
                    index = index_class(start=base_index[0],
                                        periods=int(key + 1),
                                        freq=base_index.freq)
                    key = index[-1]
                else:
                    key = index[key]
            # Other key types (i.e. string date or some datetime-like object)
            else:
                # Covert the key to the appropriate date-like object
                if index_class is PeriodIndex:
                    date_key = Period(key, freq=base_index.freq)
                else:
                    date_key = Timestamp(key)

                # Out-of-sample
                if date_key > base_index[-1]:
                    # First create an index that may not always include `key`
                    index = index_class(start=base_index[0], end=date_key,
                                        freq=base_index.freq)

                    # Now make sure we include `key`
                    if not index[-1] == date_key:
                        index = index_class(start=base_index[0],
                                            periods=len(index) + 1,
                                            freq=base_index.freq)

        # Get the location
        if date_index:
            # (note that get_loc will throw a KeyError if key is invalid)
            loc = index.get_loc(key)
        elif int_index or range_index:
            # For Int64Index and RangeIndex, key is assumed to be the location
            # and not an index value (this assumption is required to support
            # RangeIndex)
            try:
                index[key]
            # We want to raise a KeyError in this case, to keep the exception
            # consistent across index types.
            # - Attempting to index with an out-of-bound location (e.g.
            #   index[10] on an index of length 9) will raise an IndexError
            #   (as of Pandas 0.22)
            # - Attemtping to index with a type that cannot be cast to integer
            #   (e.g. a non-numeric string) will raise a ValueError if the
            #   index is RangeIndex (otherwise will raise an IndexError)
            #   (as of Pandas 0.22)
            except (IndexError, ValueError) as e:
                raise KeyError(str(e))
            loc = key
        else:
            loc = index.get_loc(key)

        # Check if we now have a modified index
        index_was_expanded = index is not base_index

        # Return the index through the end of the loc / slice
        if isinstance(loc, slice):
            end = loc.stop
        else:
            end = loc

        return loc, index[:end + 1], index_was_expanded
예제 #12
0
def _get_index_loc(self, key, base_index=None):

   if base_index is None:
        base_index = self._index

    index = base_index
    date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex))
    int_index = isinstance(base_index, Int64Index)
    range_index = isinstance(base_index, RangeIndex)
    index_class = type(base_index)
    nobs = len(index)

    # Special handling for RangeIndex
    if range_index and isinstance(key, (int, long, np.integer)):
        # Negative indices (that lie in the Index)
        if key < 0 and -key <= nobs:
            key = nobs + key
        # Out-of-sample (note that we include key itself in the new index)
        elif key > nobs - 1:
            # See gh5835. Remove the except after pandas 0.25 required.
            try:
                base_index_start = base_index.start
                base_index_step = base_index.step
            except AttributeError:
                base_index_start = base_index._start
                base_index_step = base_index._step
            stop = base_index_start + (key + 1) * base_index_step
            index = RangeIndex(start=base_index_start,
                               stop=stop,
                               step=base_index_step)

    # Special handling for Int64Index
    if (not range_index and int_index and not date_index and
            isinstance(key, (int, long, np.integer))):
        # Negative indices (that lie in the Index)
        if key < 0 and -key <= nobs:
            key = nobs + key
        # Out-of-sample (note that we include key itself in the new index)
        elif key > base_index[-1]:
            index = Int64Index(np.arange(base_index[0], int(key + 1)))

    # Special handling for date indexes
    if date_index:
        # Use index type to choose creation function
        if index_class is DatetimeIndex:
            index_fn = date_range
        else:
            index_fn = period_range
        # Integer key (i.e. already given a location)
        if isinstance(key, (int, long, np.integer)):
            # Negative indices (that lie in the Index)
            if key < 0 and -key < nobs:
                key = index[nobs + key]
            # Out-of-sample (note that we include key itself in the new
            # index)
            elif key > len(base_index) - 1:
                index = index_fn(start=base_index[0],
                                 periods=int(key + 1),
                                 freq=base_index.freq)
                key = index[-1]
            else:
                key = index[key]
        # Other key types (i.e. string date or some datetime-like object)
        else:
            # Covert the key to the appropriate date-like object
            if index_class is PeriodIndex:
                date_key = Period(key, freq=base_index.freq)
            else:
                date_key = Timestamp(key)

            # Out-of-sample
            if date_key > base_index[-1]:
                # First create an index that may not always include `key`
                index = index_fn(start=base_index[0], end=date_key,
                                 freq=base_index.freq)

                # Now make sure we include `key`
                if not index[-1] == date_key:
                    index = index_fn(start=base_index[0],
                                     periods=len(index) + 1,
                                     freq=base_index.freq)

    # Get the location
    if date_index:
        # (note that get_loc will throw a KeyError if key is invalid)
        loc = index.get_loc(key)
    elif int_index or range_index:
        # For Int64Index and RangeIndex, key is assumed to be the location
        # and not an index value (this assumption is required to support
        # RangeIndex)
        try:
            index[key]
        # We want to raise a KeyError in this case, to keep the exception
        # consistent across index types.
        # - Attempting to index with an out-of-bound location (e.g.
        #   index[10] on an index of length 9) will raise an IndexError
        #   (as of Pandas 0.22)
        # - Attemtping to index with a type that cannot be cast to integer
        #   (e.g. a non-numeric string) will raise a ValueError if the
        #   index is RangeIndex (otherwise will raise an IndexError)
        #   (as of Pandas 0.22)
        except (IndexError, ValueError) as e:
            raise KeyError(str(e))
        loc = key
    else:
        loc = index.get_loc(key)

    # Check if we now have a modified index
    index_was_expanded = index is not base_index

    # Return the index through the end of the loc / slice
    if isinstance(loc, slice):
        end = loc.stop
    else:
        end = loc

    return loc, index[:end + 1], index_was_expanded