Example #1
0
def test_qcut_include_lowest():
    values = np.arange(10)
    ii = qcut(values, 4)

    ex_levels = IntervalIndex([
        Interval(-0.001, 2.25),
        Interval(2.25, 4.5),
        Interval(4.5, 6.75),
        Interval(6.75, 9),
    ])
    tm.assert_index_equal(ii.categories, ex_levels)
Example #2
0
    def test_where(self, closed, klass):
        idx = self.create_index(closed=closed)
        cond = [True] * len(idx)
        expected = idx
        result = expected.where(klass(cond))
        tm.assert_index_equal(result, expected)

        cond = [False] + [True] * len(idx[1:])
        expected = IntervalIndex([np.nan] + idx[1:].tolist())
        result = idx.where(klass(cond))
        tm.assert_index_equal(result, expected)
Example #3
0
    def test_putmask_td64(self):
        # GH#37968
        dti = date_range("2016-01-01", periods=9)
        tdi = dti - dti[0]
        idx = IntervalIndex.from_breaks(tdi)
        mask = np.zeros(idx.shape, dtype=bool)
        mask[0:3] = True

        result = idx.putmask(mask, idx[-1])
        expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
        tm.assert_index_equal(result, expected)
Example #4
0
def test_dtype_closed_mismatch():
    # GH#38394 closed specified in both dtype and IntervalIndex constructor

    dtype = IntervalDtype(np.int64, "left")

    msg = "closed keyword does not match dtype.closed"
    with pytest.raises(ValueError, match=msg):
        IntervalIndex([], dtype=dtype, closed="neither")

    with pytest.raises(ValueError, match=msg):
        IntervalArray([], dtype=dtype, closed="neither")
Example #5
0
    def astype(self, dtype, copy=True):
        if is_interval_dtype(dtype):
            from pandas import IntervalIndex
            return IntervalIndex(np.array(self))
        elif is_categorical_dtype(dtype):
            # GH 18630
            dtype = self.dtype.update_dtype(dtype)
            if dtype == self.dtype:
                return self.copy() if copy else self

        return super(CategoricalIndex, self).astype(dtype=dtype, copy=copy)
Example #6
0
def test_qcut_duplicates_bin(kwargs, msg):
    # see gh-7751
    values = [0, 0, 0, 0, 1, 2, 3]

    if msg is not None:
        with pytest.raises(ValueError, match=msg):
            qcut(values, 3, **kwargs)
    else:
        result = qcut(values, 3, **kwargs)
        expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)])
        tm.assert_index_equal(result.categories, expected)
Example #7
0
def test_datetime_bin(conv):
    data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")]
    bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"]

    expected = Series(IntervalIndex([
        Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
        Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])).astype(
        CDT(ordered=True))

    bins = [conv(v) for v in bin_data]
    result = Series(cut(data, bins=bins))
    tm.assert_series_equal(result, expected)
def test_single_quantile(data, start, end, length, labels):
    # see gh-15431
    ser = Series([data] * length)
    result = qcut(ser, 1, labels=labels)

    if labels is None:
        intervals = IntervalIndex([Interval(start, end)] * length, closed="right")
        expected = Series(intervals).astype(CDT(ordered=True))
    else:
        expected = Series([0] * length)

    tm.assert_series_equal(result, expected)
Example #9
0
    def test_ensure_copied_data(self, closed):
        # exercise the copy flag in the constructor

        # not copying
        index = self.create_index(closed=closed)
        result = IntervalIndex(index, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same='same')
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same='same')

        # by-definition make a copy
        result = IntervalIndex(index._ndarray_values, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same='copy')
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same='copy')
Example #10
0
    def test_where(self, simple_index, listlike_box):
        klass = listlike_box

        idx = simple_index
        cond = [True] * len(idx)
        expected = idx
        result = expected.where(klass(cond))
        tm.assert_index_equal(result, expected)

        cond = [False] + [True] * len(idx[1:])
        expected = IntervalIndex([np.nan] + idx[1:].tolist())
        result = idx.where(klass(cond))
        tm.assert_index_equal(result, expected)
Example #11
0
def test_datetime_cut(data):
    # see gh-14714
    #
    # Testing time data when it comes in various collection types.
    result, _ = cut(data, 3, retbins=True)
    expected = Series(IntervalIndex([
        Interval(Timestamp("2012-12-31 23:57:07.200000"),
                 Timestamp("2013-01-01 16:00:00")),
        Interval(Timestamp("2013-01-01 16:00:00"),
                 Timestamp("2013-01-02 08:00:00")),
        Interval(Timestamp("2013-01-02 08:00:00"),
                 Timestamp("2013-01-03 00:00:00"))])).astype(CDT(ordered=True))
    tm.assert_series_equal(Series(result), expected)
Example #12
0
    def test_qcut_duplicates_bin(self):
        # GH 7751
        values = [0, 0, 0, 0, 1, 2, 3]
        expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)])

        result = qcut(values, 3, duplicates='drop')
        tm.assert_index_equal(result.categories, expected)

        pytest.raises(ValueError, qcut, values, 3)
        pytest.raises(ValueError, qcut, values, 3, duplicates='raise')

        # invalid
        pytest.raises(ValueError, qcut, values, 3, duplicates='foo')
Example #13
0
    def test_single_quantile(self):
        # issue 15431
        expected = Series([0, 0])

        s = Series([9., 9.])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(8.999, 9.0),
                                   Interval(8.999, 9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([-9., -9.])
        expected = Series([0, 0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-9.001, -9.0),
                                   Interval(-9.001, -9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([0., 0.])
        expected = Series([0, 0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-0.001, 0.0),
                                   Interval(-0.001, 0.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([9])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([-9])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([0])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)
Example #14
0
    def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
        # GH#41831

        index = IntervalIndex([np.nan, np.nan])
        key = index[:-1]

        obj = frame_or_series(range(2), index=index)
        if frame_or_series is DataFrame and indexer_sl is tm.setitem:
            obj = obj.T

        result = indexer_sl(obj)[key]
        expected = obj

        tm.assert_equal(result, expected)
    def test_difference(self, closed):
        index = self.create_index(closed=closed)
        tm.assert_index_equal(index.difference(index[:1]), index[1:])

        # GH 19101: empty result, same dtype
        result = index.difference(index)
        expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 19101: empty result, different dtypes
        other = IntervalIndex.from_arrays(index.left.astype('float64'),
                                          index.right, closed=closed)
        result = index.difference(other)
        tm.assert_index_equal(result, expected)
Example #16
0
    def test_union(self, closed):
        index = self.create_index(closed=closed)
        other = IntervalIndex.from_breaks(range(5, 13), closed=closed)

        expected = IntervalIndex.from_breaks(range(13), closed=closed)
        result = index.union(other)
        tm.assert_index_equal(result, expected)

        result = other.union(index)
        tm.assert_index_equal(result, expected)

        tm.assert_index_equal(index.union(index), index)
        tm.assert_index_equal(index.union(index[:1]), index)

        # GH 19101: empty result, same dtype
        index = IntervalIndex(np.array([], dtype='int64'), closed=closed)
        result = index.union(index)
        tm.assert_index_equal(result, index)

        # GH 19101: empty result, different dtypes
        other = IntervalIndex(np.array([], dtype='float64'), closed=closed)
        result = index.union(other)
        tm.assert_index_equal(result, index)
Example #17
0
    def test_insert(self, data):
        item = data[0]
        idx_item = IntervalIndex([item])

        # start
        expected = idx_item.append(data)
        result = data.insert(0, item)
        tm.assert_index_equal(result, expected)

        # end
        expected = data.append(idx_item)
        result = data.insert(len(data), item)
        tm.assert_index_equal(result, expected)

        # mid
        expected = data[:3].append(idx_item).append(data[3:])
        result = data.insert(3, item)
        tm.assert_index_equal(result, expected)

        # invalid type
        msg = 'can only insert Interval objects and NA into an IntervalIndex'
        with pytest.raises(ValueError, match=msg):
            data.insert(1, 'foo')

        # invalid closed
        msg = 'inserted item must be closed on the same side as the index'
        for closed in {'left', 'right', 'both', 'neither'} - {item.closed}:
            with pytest.raises(ValueError, match=msg):
                bad_item = Interval(item.left, item.right, closed=closed)
                data.insert(1, bad_item)

        # GH 18295 (test missing)
        na_idx = IntervalIndex([np.nan], closed=data.closed)
        for na in (np.nan, pd.NaT, None):
            expected = data[:1].append(na_idx).append(data[1:])
            result = data.insert(1, na)
            tm.assert_index_equal(result, expected)
Example #18
0
    def test_intersection(self, closed, sort):
        index = monotonic_index(0, 11, closed=closed)
        other = monotonic_index(5, 13, closed=closed)

        expected = monotonic_index(5, 11, closed=closed)
        result = index[::-1].intersection(other, sort=sort)
        if sort is None:
            tm.assert_index_equal(result, expected)
        assert tm.equalContents(result, expected)

        result = other[::-1].intersection(index, sort=sort)
        if sort is None:
            tm.assert_index_equal(result, expected)
        assert tm.equalContents(result, expected)

        tm.assert_index_equal(index.intersection(index, sort=sort), index)

        # GH 26225: nested intervals
        index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
        other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
        expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
        result = index.intersection(other)
        tm.assert_index_equal(result, expected)

        # GH 26225
        index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
        other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
        expected = IntervalIndex.from_tuples([(0, 2)])
        result = index.intersection(other)
        tm.assert_index_equal(result, expected)

        # GH 26225: duplicate nan element
        index = IntervalIndex([np.nan, np.nan])
        other = IntervalIndex([np.nan])
        expected = IntervalIndex([np.nan])
        result = index.intersection(other)
        tm.assert_index_equal(result, expected)
Example #19
0
 def test_datetimetz_qcut(self, bins):
     # GH 19872
     tz = 'US/Eastern'
     s = Series(date_range('20130101', periods=3, tz=tz))
     result = qcut(s, bins)
     expected = (
         Series(IntervalIndex([
             Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz),
                      Timestamp('2013-01-01 16:00:00', tz=tz)),
             Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
                      Timestamp('2013-01-02 08:00:00', tz=tz)),
             Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
                      Timestamp('2013-01-03 00:00:00', tz=tz))]))
         .astype(CDT(ordered=True)))
     tm.assert_series_equal(result, expected)
Example #20
0
    def test_repr_floats(self):
        # GH 32553

        markers = Series(
            ["foo", "bar"],
            index=IntervalIndex([
                Interval(left, right) for left, right in zip(
                    Float64Index([329.973, 345.137], dtype="float64"),
                    Float64Index([345.137, 360.191], dtype="float64"),
                )
            ]),
        )
        result = str(markers)
        expected = "(329.973, 345.137]    foo\n(345.137, 360.191]    bar\ndtype: object"
        assert result == expected
Example #21
0
def test_datetime_tz_qcut(bins):
    # see gh-19872
    tz = "US/Eastern"
    ser = Series(date_range("20130101", periods=3, tz=tz))

    result = qcut(ser, bins)
    expected = Series(
        IntervalIndex([
            Interval(Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
                     Timestamp("2013-01-01 16:00:00", tz=tz)),
            Interval(Timestamp("2013-01-01 16:00:00", tz=tz),
                     Timestamp("2013-01-02 08:00:00", tz=tz)),
            Interval(Timestamp("2013-01-02 08:00:00", tz=tz),
                     Timestamp("2013-01-03 00:00:00", tz=tz))
        ])).astype(CDT(ordered=True))
    tm.assert_series_equal(result, expected)
Example #22
0
    def test_get_indexer_with_nans(self):
        # GH#41831
        index = IntervalIndex([np.nan, Interval(1, 2), np.nan])

        expected = np.array([True, False, True])
        for key in [None, np.nan, NA]:
            assert key in index
            result = index.get_loc(key)
            tm.assert_numpy_array_equal(result, expected)

        for key in [
                NaT,
                np.timedelta64("NaT", "ns"),
                np.datetime64("NaT", "ns")
        ]:
            with pytest.raises(KeyError, match=str(key)):
                index.get_loc(key)
Example #23
0
 def test_datetimetz_cut(self, bins, box):
     # GH 19872
     tz = 'US/Eastern'
     s = Series(date_range('20130101', periods=3, tz=tz))
     if not isinstance(bins, int):
         bins = box(bins)
     result = cut(s, bins)
     expected = (
         Series(IntervalIndex([
             Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz),
                      Timestamp('2013-01-01 16:00:00', tz=tz)),
             Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
                      Timestamp('2013-01-02 08:00:00', tz=tz)),
             Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
                      Timestamp('2013-01-03 00:00:00', tz=tz))]))
         .astype(CDT(ordered=True)))
     tm.assert_series_equal(result, expected)
Example #24
0
    def test_get_indexer_categorical_with_nans(self):
        # GH#41934 nans in both index and in target
        ii = IntervalIndex.from_breaks(range(5), inclusive="right")
        ii2 = ii.append(IntervalIndex([np.nan]))
        ci2 = CategoricalIndex(ii2)

        result = ii2.get_indexer(ci2)
        expected = np.arange(5, dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        # not-all-matches
        result = ii2[1:].get_indexer(ci2[::-1])
        expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)

        # non-unique target, non-unique nans
        result = ii2.get_indexer(ci2.append(ci2))
        expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)
Example #25
0
    def test_interval(self):
        idx = pd.interval_range(0, 10, periods=10)
        cat = Categorical(idx, categories=idx)
        expected_codes = np.arange(10, dtype="int8")
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # infer categories
        cat = Categorical(idx)
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # list values
        cat = Categorical(list(idx))
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # list values, categories
        cat = Categorical(list(idx), categories=list(idx))
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # shuffled
        values = idx.take([1, 2, 0])
        cat = Categorical(values, categories=idx)
        tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0],
                                                        dtype="int8"))
        tm.assert_index_equal(cat.categories, idx)

        # extra
        values = pd.interval_range(8, 11, periods=3)
        cat = Categorical(values, categories=idx)
        expected_codes = np.array([8, 9, -1], dtype="int8")
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)

        # overlapping
        idx = IntervalIndex([Interval(0, 2), Interval(0, 1)])
        cat = Categorical(idx, categories=idx)
        expected_codes = np.array([0, 1], dtype="int8")
        tm.assert_numpy_array_equal(cat.codes, expected_codes)
        tm.assert_index_equal(cat.categories, idx)
Example #26
0
    def test_difference(self, closed, sort):
        index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4],
                                          closed=closed)
        result = index.difference(index[:1], sort)
        expected = index[1:]
        if sort:
            expected = expected.sort_values()
        tm.assert_index_equal(result, expected)

        # GH 19101: empty result, same dtype
        result = index.difference(index, sort)
        expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 19101: empty result, different dtypes
        other = IntervalIndex.from_arrays(index.left.astype('float64'),
                                          index.right,
                                          closed=closed)
        result = index.difference(other, sort)
        tm.assert_index_equal(result, expected)
Example #27
0
    def test_datetime_bin(self):
        data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
        bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
        expected = (
            Series(IntervalIndex([
                Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
                Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))]))
            .astype(CDT(ordered=True)))

        for conv in [Timestamp, Timestamp, np.datetime64]:
            bins = [conv(v) for v in bin_data]
            result = cut(data, bins=bins)
            tm.assert_series_equal(Series(result), expected)

        bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data]
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected)

        bins = to_datetime(bin_data)
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected)
Example #28
0
    def test_symmetric_difference(self, closed, sort):
        index = monotonic_index(0, 11, closed=closed)
        result = index[1:].symmetric_difference(index[:-1], sort=sort)
        expected = IntervalIndex([index[0], index[-1]])
        if sort is None:
            tm.assert_index_equal(result, expected)
        assert tm.equalContents(result, expected)

        # GH 19101: empty result, same dtype
        result = index.symmetric_difference(index, sort=sort)
        expected = empty_index(dtype="int64", closed=closed)
        if sort is None:
            tm.assert_index_equal(result, expected)
        assert tm.equalContents(result, expected)

        # GH 19101: empty result, different dtypes
        other = IntervalIndex.from_arrays(index.left.astype("float64"),
                                          index.right,
                                          closed=closed)
        result = index.symmetric_difference(other, sort=sort)
        tm.assert_index_equal(result, expected)
Example #29
0
    def test_datetime_cut(self):
        # GH 14714
        # testing for time data to be present as series
        data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03']))

        result, bins = cut(data, 3, retbins=True)
        expected = (Series(
            IntervalIndex([
                Interval(Timestamp('2012-12-31 23:57:07.200000'),
                         Timestamp('2013-01-01 16:00:00')),
                Interval(Timestamp('2013-01-01 16:00:00'),
                         Timestamp('2013-01-02 08:00:00')),
                Interval(Timestamp('2013-01-02 08:00:00'),
                         Timestamp('2013-01-03 00:00:00'))
            ])).astype(CDT(ordered=True)))

        tm.assert_series_equal(result, expected)

        # testing for time data to be present as list
        data = [
            np.datetime64('2013-01-01'),
            np.datetime64('2013-01-02'),
            np.datetime64('2013-01-03')
        ]
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)

        # testing for time data to be present as ndarray
        data = np.array([
            np.datetime64('2013-01-01'),
            np.datetime64('2013-01-02'),
            np.datetime64('2013-01-03')
        ])
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)

        # testing for time data to be present as datetime index
        data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03'])
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)
Example #30
0
def _isna_ndarraylike(obj):
    values = getattr(obj, 'values', obj)
    dtype = values.dtype

    if is_extension_array_dtype(obj):
        if isinstance(obj, (ABCIndexClass, ABCSeries)):
            values = obj._values
        else:
            values = obj
        result = values.isna()
    elif is_interval_dtype(values):
        # TODO(IntervalArray): remove this if block
        from pandas import IntervalIndex
        result = IntervalIndex(obj).isna()
    elif is_string_dtype(dtype):
        # Working around NumPy ticket 1542
        shape = values.shape

        if is_string_like_dtype(dtype):
            # object array of strings
            result = np.zeros(values.shape, dtype=bool)
        else:
            # object array of non-strings
            result = np.empty(shape, dtype=bool)
            vec = libmissing.isnaobj(values.ravel())
            result[...] = vec.reshape(shape)

    elif needs_i8_conversion(obj):
        # this is the NaT pattern
        result = values.view('i8') == iNaT
    else:
        result = np.isnan(values)

    # box
    if isinstance(obj, ABCSeries):
        from pandas import Series
        result = Series(result, index=obj.index, name=obj.name, copy=False)

    return result