def test_constructors_nan(self, closed, data): # GH 18421 expected_values = np.array(data, dtype=object) expected_idx = IntervalIndex(data, closed=closed) # validate the expected index assert expected_idx.closed == closed tm.assert_numpy_array_equal(expected_idx.values, expected_values) result = IntervalIndex.from_tuples(data, closed=closed) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_breaks([np.nan] + data, closed=closed) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_arrays(data, data, closed=closed) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values) if closed == 'right': # Can't specify closed for IntervalIndex.from_intervals result = IntervalIndex.from_intervals(data) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values)
def test_constructors_empty(self, data, closed): # GH 18421 expected_dtype = getattr(data, 'dtype', np.int64) expected_values = np.array([], dtype=object) expected_index = IntervalIndex(data, closed=closed) # validate the expected index assert expected_index.empty assert expected_index.closed == closed assert expected_index.dtype.subtype == expected_dtype tm.assert_numpy_array_equal(expected_index.values, expected_values) result = IntervalIndex.from_tuples(data, closed=closed) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_breaks(data, closed=closed) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_arrays(data, data, closed=closed) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values) if closed == 'right': # Can't specify closed for IntervalIndex.from_intervals result = IntervalIndex.from_intervals(data) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values)
def test_constructors_errors(self): # scalar with pytest.raises(TypeError): IntervalIndex(5) # not an interval with pytest.raises(TypeError): IntervalIndex([0, 1]) with pytest.raises(TypeError): IntervalIndex.from_intervals([0, 1]) # invalid closed with pytest.raises(ValueError): IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid') # mismatched closed with pytest.raises(ValueError): IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2, closed='left')]) with pytest.raises(ValueError): IntervalIndex.from_arrays([0, 10], [3, 5]) with pytest.raises(ValueError): Index([Interval(0, 1), Interval(2, 3, closed='left')]) # no point in nesting periods in an IntervalIndex with pytest.raises(ValueError): IntervalIndex.from_breaks( pd.period_range('2000-01-01', periods=3))
def test_constructor_errors(self): # GH 19016: categorical data data = Categorical(list('01234abcde'), ordered=True) msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_breaks(data)
def _format_labels(bins, precision, right=True, include_lowest=False, dtype=None): """ based on the dtype, return our labels """ closed = 'right' if right else 'left' if is_datetime64_dtype(dtype): formatter = Timestamp adjust = lambda x: x - Timedelta('1ns') elif is_timedelta64_dtype(dtype): formatter = Timedelta adjust = lambda x: x - Timedelta('1ns') else: precision = _infer_precision(precision, bins) formatter = lambda x: _round_frac(x, precision) adjust = lambda x: x - 10 ** (-precision) breaks = [formatter(b) for b in bins] labels = IntervalIndex.from_breaks(breaks, closed=closed) if right and include_lowest: # we will adjust the left hand side by precision to # account that we are all right closed v = adjust(labels[0].left) i = IntervalIndex.from_intervals( [Interval(v, labels[0].right, closed='right')]) labels = i.append(labels[1:]) return labels
def test_unique(self, closed): # unique non-overlapping idx = IntervalIndex.from_tuples( [(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_unique is True # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples( [(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True # duplicate idx = IntervalIndex.from_tuples( [(0, 1), (0, 1), (2, 3)], closed=closed) assert idx.is_unique is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_unique is True
def test_astype(self): ci = self.create_index() result = ci.astype('category') tm.assert_index_equal(result, ci, exact=True) result = ci.astype(object) tm.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class assert result.equals(ci) assert isinstance(result, Index) assert not isinstance(result, CategoricalIndex) # interval ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed='right') ci = CategoricalIndex(Categorical.from_codes( [0, 1, -1], categories=ii, ordered=True)) result = ci.astype('interval') expected = ii.take([0, 1, -1]) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(result.values) tm.assert_index_equal(result, expected)
def slice_locs_cases(self, breaks): # TODO: same tests for more index types index = IntervalIndex.from_breaks([0, 1, 2], closed='right') assert index.slice_locs() == (0, 2) assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(0, 0.5) == (0, 1) assert index.slice_locs(start=1) == (0, 2) assert index.slice_locs(start=1.2) == (1, 2) assert index.slice_locs(end=1) == (0, 1) assert index.slice_locs(end=1.1) == (0, 2) assert index.slice_locs(end=1.0) == (0, 1) assert index.slice_locs(-1, -1) == (0, 0) index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(1, 1) == (1, 1) assert index.slice_locs(1, 2) == (1, 2) index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed='both') assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(1, 2) == (0, 2)
def slice_locs_cases(self, breaks): # TODO: same tests for more index types index = IntervalIndex.from_breaks([0, 1, 2], closed='right') self.assertEqual(index.slice_locs(), (0, 2)) self.assertEqual(index.slice_locs(0, 1), (0, 1)) self.assertEqual(index.slice_locs(1, 1), (0, 1)) self.assertEqual(index.slice_locs(0, 2), (0, 2)) self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) self.assertEqual(index.slice_locs(0, 0.5), (0, 1)) self.assertEqual(index.slice_locs(start=1), (0, 2)) self.assertEqual(index.slice_locs(start=1.2), (1, 2)) self.assertEqual(index.slice_locs(end=1), (0, 1)) self.assertEqual(index.slice_locs(end=1.1), (0, 2)) self.assertEqual(index.slice_locs(end=1.0), (0, 1)) self.assertEqual(*index.slice_locs(-1, -1)) index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') self.assertEqual(index.slice_locs(0, 1), (0, 1)) self.assertEqual(index.slice_locs(0, 2), (0, 2)) self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) self.assertEqual(index.slice_locs(1, 1), (1, 1)) self.assertEqual(index.slice_locs(1, 2), (1, 2)) index = IntervalIndex.from_breaks([0, 1, 2], closed='both') self.assertEqual(index.slice_locs(1, 1), (0, 2)) self.assertEqual(index.slice_locs(1, 2), (0, 2))
def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected)
def test_constructors(self): expected = self.index actual = IntervalIndex.from_breaks(np.arange(3), closed='right') self.assertTrue(expected.equals(actual)) alternate = IntervalIndex.from_breaks(np.arange(3), closed='left') self.assertFalse(expected.equals(alternate)) actual = IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2)]) self.assertTrue(expected.equals(actual)) actual = IntervalIndex([Interval(0, 1), Interval(1, 2)]) self.assertTrue(expected.equals(actual)) actual = IntervalIndex.from_arrays(np.arange(2), np.arange(2) + 1, closed='right') self.assertTrue(expected.equals(actual)) actual = Index([Interval(0, 1), Interval(1, 2)]) assert isinstance(actual, IntervalIndex) self.assertTrue(expected.equals(actual)) actual = Index(expected) assert isinstance(actual, IntervalIndex) self.assertTrue(expected.equals(actual))
def test_constructors(self, data, closed, name): left, right = data[:-1], data[1:] ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)] expected = IntervalIndex._simple_new( left=left, right=right, closed=closed, name=name) # validate expected assert expected.closed == closed assert expected.name == name assert expected.dtype.subtype == data.dtype tm.assert_index_equal(expected.left, data[:-1]) tm.assert_index_equal(expected.right, data[1:]) # validated constructors result = IntervalIndex(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_breaks(data, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_arrays( left, right, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( lzip(left, right), closed=closed, name=name) tm.assert_index_equal(result, expected) result = Index(ivs, name=name) assert isinstance(result, IntervalIndex) tm.assert_index_equal(result, expected) # idempotent tm.assert_index_equal(Index(expected), expected) tm.assert_index_equal(IntervalIndex(expected), expected) result = IntervalIndex.from_intervals(expected) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals( expected.values, name=expected.name) tm.assert_index_equal(result, expected) left, right = expected.left, expected.right result = IntervalIndex.from_arrays( left, right, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( expected.to_tuples(), closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) breaks = expected.left.tolist() + [expected.right[-1]] result = IntervalIndex.from_breaks( breaks, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected)
def test_intersection(self, closed): idx = self.create_index(closed=closed) other = IntervalIndex.from_breaks([1, 2, 3], closed=closed) expected = IntervalIndex.from_breaks([1, 2], closed=closed) actual = idx.intersection(other) assert expected.equals(actual) tm.assert_index_equal(idx.intersection(idx), idx)
def setup(self, N): left = np.append(np.arange(N), np.array(0)) right = np.append(np.arange(1, N + 1), np.array(1)) self.intv = IntervalIndex.from_arrays(left, right) self.intv._engine self.left = IntervalIndex.from_breaks(np.arange(N)) self.right = IntervalIndex.from_breaks(np.arange(N - 3, 2 * N - 3))
def test_intersection(self): other = IntervalIndex.from_breaks([1, 2, 3]) expected = IntervalIndex.from_breaks([1, 2]) actual = self.index.intersection(other) self.assertTrue(expected.equals(actual)) tm.assert_index_equal(self.index.intersection(self.index), self.index)
def test_subtype_integer(self, subtype_start, subtype_end): index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) dtype = IntervalDtype(subtype_end) result = index.astype(dtype) expected = IntervalIndex.from_arrays(index.left.astype(subtype_end), index.right.astype(subtype_end), closed=index.closed) tm.assert_index_equal(result, expected)
def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs( start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs( start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs( start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs( start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs( start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs( start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) pytest.raises(KeyError, index.slice_locs( start=Interval(0, 2), end=Interval(2, 4))) pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2))) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2))) pytest.raises(KeyError, index.slice_locs( start=Interval(2, 4), end=Interval(0, 2))) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs( start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs( start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
def test_get_reindexer_datetimelike(self, arrays): # GH 20636 index = IntervalIndex.from_arrays(*arrays) tuples = [(index[0].left, index[0].left + pd.Timedelta('12H')), (index[-1].right - pd.Timedelta('12H'), index[-1].right)] target = IntervalIndex.from_tuples(tuples) result = index._get_reindexer(target) expected = np.array([0, 3], dtype='intp') tm.assert_numpy_array_equal(result, expected)
def test_missing_values(self): idx = pd.Index([np.nan, pd.Interval(0, 1), pd.Interval(1, 2)]) idx2 = pd.IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2]) assert idx.equals(idx2) with pytest.raises(ValueError): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2])) tm.assert_numpy_array_equal(isnull(idx), np.array([True, False, False]))
def test_where(self, closed): expected = self.create_index(closed=closed) result = expected.where(expected.notna()) tm.assert_index_equal(result, expected) idx = IntervalIndex.from_breaks([1, 2], closed=closed) result = idx.where([True, False]) expected = IntervalIndex.from_intervals( [Interval(1.0, 2.0, closed=closed), np.nan]) tm.assert_index_equal(result, expected)
def test_where(self): expected = self.index result = self.index.where(self.index.notnull()) tm.assert_index_equal(result, expected) idx = IntervalIndex.from_breaks([1, 2]) result = idx.where([True, False]) expected = IntervalIndex.from_intervals( [Interval(1.0, 2.0, closed='right'), np.nan]) tm.assert_index_equal(result, expected)
def test_labels(self): arr = np.tile(np.arange(0, 1.01, 0.1), 4) result, bins = cut(arr, 4, retbins=True) ex_levels = IntervalIndex.from_breaks([-1e-3, 0.25, 0.5, 0.75, 1]) tm.assert_index_equal(result.categories, ex_levels) result, bins = cut(arr, 4, retbins=True, right=False) ex_levels = IntervalIndex.from_breaks([0, 0.25, 0.5, 0.75, 1 + 1e-3], closed='left') tm.assert_index_equal(result.categories, ex_levels)
def test_dropna(self): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)]) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan]) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan]) result = ii.dropna() tm.assert_index_equal(result, expected)
def test_constructors_other(self): # all-nan result = IntervalIndex.from_intervals([np.nan]) expected = np.array([np.nan], dtype=object) tm.assert_numpy_array_equal(result.values, expected) # empty result = IntervalIndex.from_intervals([]) expected = np.array([], dtype=object) tm.assert_numpy_array_equal(result.values, expected)
def test_get_indexer_with_ints_and_floats_multiple_queries( self, query, expected): index = IntervalIndex.from_tuples( [(0, 1), (1, 2), (3, 4)], closed='right') result = index.get_indexer(query) expected = np.array(expected, dtype='intp') tm.assert_numpy_array_equal(result, expected) index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
def test_basic_dtype(self): assert is_interval_dtype('interval[int64]') assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)])) assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4))) assert is_interval_dtype(IntervalIndex.from_breaks( date_range('20130101', periods=3))) assert not is_interval_dtype('U') assert not is_interval_dtype('S') assert not is_interval_dtype('foo') assert not is_interval_dtype(np.object_) assert not is_interval_dtype(np.int64) assert not is_interval_dtype(np.float64)
def test_union(self): other = IntervalIndex.from_arrays([2], [3]) expected = IntervalIndex.from_arrays(range(3), range(1, 4)) actual = self.index.union(other) self.assertTrue(expected.equals(actual)) actual = other.union(self.index) self.assertTrue(expected.equals(actual)) tm.assert_index_equal(self.index.union(self.index), self.index) tm.assert_index_equal(self.index.union(self.index[:1]), self.index)
def test_union(self, closed): idx = self.create_index(closed=closed) other = IntervalIndex.from_arrays([2], [3], closed=closed) expected = IntervalIndex.from_arrays( range(3), range(1, 4), closed=closed) actual = idx.union(other) assert expected.equals(actual) actual = other.union(idx) assert expected.equals(actual) tm.assert_index_equal(idx.union(idx), idx) tm.assert_index_equal(idx.union(idx[:1]), idx)
def test_constructors_datetimelike(self, closed): # DTI / TDI for idx in [pd.date_range('20130101', periods=5), pd.timedelta_range('1 day', periods=5)]: result = IntervalIndex.from_breaks(idx, closed=closed) expected = IntervalIndex.from_breaks(idx.values, closed=closed) tm.assert_index_equal(result, expected) expected_scalar_type = type(idx[0]) i = result[0] assert isinstance(i.left, expected_scalar_type) assert isinstance(i.right, expected_scalar_type)
def test_missing_values(self, closed): idx = Index([np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]) idx2 = IntervalIndex.from_arrays( [np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) with pytest.raises(ValueError): IntervalIndex.from_arrays( [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
class TestIntervalIndex: index = IntervalIndex.from_arrays([0, 1], [1, 2]) def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed="right"): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed, ) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( "breaks", [ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]), ], ) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert index.hasnans is False result = index.isna() expected = np.zeros(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.ones(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans is True result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="same") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="same") # by-definition make a copy result = IntervalIndex(np.array(index), copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="copy") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="copy") def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "data", [ interval_range(0, periods=10, closed="neither"), interval_range(1.7, periods=8, freq=2.5, closed="both"), interval_range(Timestamp("20170101"), periods=12, closed="left"), interval_range(Timedelta("1 day"), periods=6, closed="right"), ], ) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = "can only insert Interval objects and NA into an IntervalArray" with pytest.raises(ValueError, match=msg): data.insert(1, "foo") # invalid closed msg = "'value.closed' is 'left', expected 'right'." for closed in {"left", "right", "both", "neither"} - {item.closed}: msg = f"'value.closed' is '{closed}', expected '{item.closed}'." with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in [np.nan, None, pd.NA]: expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) if data.left.dtype.kind not in ["m", "M"]: # trying to insert pd.NaT into a numeric-dtyped Index should cast/raise msg = "can only insert Interval objects and NA into an IntervalArray" with pytest.raises(ValueError, match=msg): result = data.insert(1, pd.NaT) else: result = data.insert(1, pd.NaT) tm.assert_index_equal(result, expected) def test_is_unique_interval(self, closed): """ Interval specific tests for is_unique in addition to base class tests """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan), closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5) ], ) def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) to_convert = breaks._constructor([pd.NaT] * 3) expected = pd.Float64Index([np.nan] * 3) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) to_convert = to_convert.insert(0, breaks[0]) expected = expected.insert(0, float(breaks[0].value)) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [np.arange(5, dtype="int64"), np.arange(5, dtype="float64")], ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key @pytest.mark.parametrize( "breaks1, breaks2", permutations( [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], 2, ), ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks1) key = make_key(breaks2) msg = ( f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with " f"values of dtype {breaks2.dtype}") msg = re.escape(msg) with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) expected = np.array([False, False], dtype="bool") actual = i.contains(0) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(3) tm.assert_numpy_array_equal(actual, expected) expected = np.array([True, False], dtype="bool") actual = i.contains(0.5) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(1) tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow # that for the contains method for now with pytest.raises(NotImplementedError, match="contains not implemented for two"): i.contains(Interval(0, 1)) def test_contains_dunder(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed="right") in index assert Interval(0, 2, closed="right") not in index assert Interval(0, 0.5, closed="right") not in index assert Interval(3, 5, closed="right") not in index assert Interval(-1, 0, closed="left") not in index assert Interval(0, 1, closed="left") not in index assert Interval(0, 1, closed="both") not in index def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) msg = ("not supported between instances of 'int' and " "'pandas._libs.interval.Interval'") with pytest.raises(TypeError, match=msg): self.index > 0 with pytest.raises(TypeError, match=msg): self.index <= 0 with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) msg = ("missing values must be missing in the same location both left " "and right sides") with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False, na_position="first") expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_datetime(self, tz): start = Timestamp("2000-01-01", tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp("2000-01-01T12:00", tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp("2000-01-01", tz=tz) not in index assert Timestamp("2000-01-01T12", tz=tz) not in index assert Timestamp("2000-01-02", tz=tz) not in index iv_true = Interval(Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)) iv_false = Interval(Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() assert index.contains(Timestamp("2000-01-02", tz=tz)).any() # test get_indexer start = Timestamp("1999-12-31T12:00", tz=tz) target = date_range(start=start, periods=7, freq="12H") actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") tm.assert_numpy_array_equal(actual, expected) start = Timestamp("2000-01-08T18:00", tz=tz) target = date_range(start=start, periods=7, freq="6H") actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = "Intervals must all be closed on the same side" for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == "both": idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize( "start, shift, na_value", [ (0, 1, np.nan), (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), ], ) def test_is_overlapping(self, start, shift, na_value, closed): # GH 23309 # see test_interval_tree.py for extensive tests; interface tests here # non-overlapping tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # non-overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # overlapping tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # common endpoints tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping expected = closed == "both" assert result is expected # common endpoints with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping assert result is expected @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))), list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )), list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )), ], ) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))) + [np.nan], list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )) + [np.nan], list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )) + [np.nan], ], ) @pytest.mark.parametrize("na_tuple", [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = f"invalid option for 'closed': {bad_closed}" with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): # GH 23576 year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"), pd.Timestamp("2018-01-01 00:00:00")) year_2017_index = pd.IntervalIndex([year_2017]) assert not year_2017_index.is_all_dates @pytest.mark.parametrize("key", [[5], (2, 3)]) def test_get_value_non_scalar_errors(self, key): # GH 31117 idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)]) s = pd.Series(range(len(idx)), index=idx) msg = str(key) with pytest.raises(InvalidIndexError, match=msg): with tm.assert_produces_warning(FutureWarning): idx.get_value(s, key) @pytest.mark.parametrize("closed", ["left", "right", "both"]) def test_pickle_round_trip_closed(self, closed): # https://github.com/pandas-dev/pandas/issues/35658 idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx)
def test_dir(): # GH#27571 dir(interval_index) should not raise index = IntervalIndex.from_arrays([0, 1], [1, 2]) result = dir(index) assert "str" not in result
msg = "bins must increase monotonically" data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] with pytest.raises(ValueError, match=msg): cut(data, [0.1, 1.5, 1, 10]) @pytest.mark.parametrize( "x, bins, expected", [ ( date_range("2017-12-31", periods=3), [Timestamp.min, Timestamp("2018-01-01"), Timestamp.max], IntervalIndex.from_tuples( [ (Timestamp.min, Timestamp("2018-01-01")), (Timestamp("2018-01-01"), Timestamp.max), ] ), ), ( [-1, 0, 1], np.array( [np.iinfo(np.int64).min, 0, np.iinfo(np.int64).max], dtype="int64" ), IntervalIndex.from_tuples( [(np.iinfo(np.int64).min, 0), (0, np.iinfo(np.int64).max)] ), ), ( [ np.timedelta64(-1, "ns"),
def test_label_precision(): arr = np.arange(0, 0.73, 0.01) result = cut(arr, 4, precision=2) ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, 0.54, 0.72]) tm.assert_index_equal(result.categories, ex_levels)
def setup_method(self, method): self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) self.index_with_nan = IntervalIndex.from_tuples( [(0, 1), np.nan, (1, 2)]) self.indices = dict(intervalIndex=tm.makeIntervalIndex(10))
def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) with pytest.raises( KeyError, match=re.escape( '"Cannot get left slice bound for non-unique label: ' "Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) with pytest.raises( KeyError, match=re.escape( '"Cannot get left slice bound for non-unique label: ' "Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(0, 2)) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) with pytest.raises( KeyError, match=re.escape( '"Cannot get right slice bound for non-unique label: ' "Interval(0, 2, closed='right')\""), ): index.slice_locs(end=Interval(0, 2)) with pytest.raises( KeyError, match=re.escape( '"Cannot get right slice bound for non-unique label: ' "Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
def test_repr_missing(self, constructor, expected): # GH 25984 index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) obj = constructor(list("abc"), index=index) result = repr(obj) assert result == expected
def test_to_native_types(self, tuples, closed, expected_data): # GH 28210 index = IntervalIndex.from_tuples(tuples, closed=closed) result = index._format_native_types() expected = np.array(expected_data) tm.assert_numpy_array_equal(result, expected)
def test_value_counts_bins(index_or_series): klass = index_or_series s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] s = klass(s_values) # bins msg = "bins argument only works with numeric data" with pytest.raises(TypeError, match=msg): s.value_counts(bins=1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) else: exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) assert s1.nunique() == 3 # these return the same res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] s = klass(s_values) expected = Series([4, 3, 2], index=["b", "a", "d"]) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(["a", "b", np.nan, "d"]) tm.assert_index_equal(s.unique(), exp) else: exp = np.array(["a", "b", np.nan, "d"], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 3 s = klass({}) if klass is dict else klass({}, dtype=object) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): tm.assert_index_equal(s.unique(), Index([]), exact=False) else: tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) assert s.nunique() == 0
def setup_cache(self): idx = IntervalIndex.from_breaks(np.arange(1000001)) monotonic = Series(np.arange(1000000), index=idx) return monotonic
def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True
def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected)
def series_with_interval_index(self): return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6), "right"))
def test_get_indexer_length_one_interval(self, size, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer([Interval(0, 5, closed)] * size) expected = np.array([0] * size, dtype="intp") tm.assert_numpy_array_equal(result, expected)
def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected)
class TestGetIndexer: @pytest.mark.parametrize( "query, expected", [ ([Interval(2, 4, closed="right")], [1]), ([Interval(2, 4, closed="left")], [-1]), ([Interval(2, 4, closed="both")], [-1]), ([Interval(2, 4, closed="neither")], [-1]), ([Interval(1, 4, closed="right")], [-1]), ([Interval(0, 4, closed="right")], [-1]), ([Interval(0.5, 1.5, closed="right")], [-1]), ([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]), ([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]), ([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]), ([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]), ], ) def test_get_indexer_with_interval(self, query, expected): tuples = [(0, 2), (2, 4), (5, 7)] index = IntervalIndex.from_tuples(tuples, closed="right") result = index.get_indexer(query) expected = np.array(expected, dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "query, expected", [ ([-0.5], [-1]), ([0], [-1]), ([0.5], [0]), ([1], [0]), ([1.5], [1]), ([2], [1]), ([2.5], [-1]), ([3], [-1]), ([3.5], [2]), ([4], [2]), ([4.5], [-1]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, -1]), ([1, 2, 3, 4], [0, 1, -1, 2]), ([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]), ], ) def test_get_indexer_with_int_and_float(self, query, expected): tuples = [(0, 1), (1, 2), (3, 4)] index = IntervalIndex.from_tuples(tuples, closed="right") result = index.get_indexer(query) expected = np.array(expected, dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)]) def test_get_indexer_length_one(self, item, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer(item) expected = np.array([0] * len(item), dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("size", [1, 5]) def test_get_indexer_length_one_interval(self, size, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer([Interval(0, 5, closed)] * size) expected = np.array([0] * size, dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "target", [ IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]), IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]), IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"), [-1, 0, 0.5, 1, 2, 2.5, np.nan], ["foo", "foo", "bar", "baz"], ], ) def test_get_indexer_categorical(self, target, ordered): # GH 30063: categorical and non-categorical results should be consistent index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) categorical_target = CategoricalIndex(target, ordered=ordered) result = index.get_indexer(categorical_target) expected = index.get_indexer(target) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "tuples, closed", [ ([(0, 2), (1, 3), (3, 4)], "neither"), ([(0, 5), (1, 4), (6, 7)], "left"), ([(0, 1), (0, 1), (1, 2)], "right"), ([(0, 1), (2, 3), (3, 4)], "both"), ], ) def test_get_indexer_errors(self, tuples, closed): # IntervalIndex needs non-overlapping for uniqueness when querying index = IntervalIndex.from_tuples(tuples, closed=closed) msg = ("cannot handle overlapping indices; use " "IntervalIndex.get_indexer_non_unique") with pytest.raises(InvalidIndexError, match=msg): index.get_indexer([0, 2]) @pytest.mark.parametrize( "query, expected", [ ([-0.5], ([-1], [0])), ([0], ([0], [])), ([0.5], ([0], [])), ([1], ([0, 1], [])), ([1.5], ([0, 1], [])), ([2], ([0, 1, 2], [])), ([2.5], ([1, 2], [])), ([3], ([2], [])), ([3.5], ([2], [])), ([4], ([-1], [0])), ([4.5], ([-1], [0])), ([1, 2], ([0, 1, 0, 1, 2], [])), ([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])), ([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])), ([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])), ], ) def test_get_indexer_non_unique_with_int_and_float(self, query, expected): tuples = [(0, 2.5), (1, 3), (2, 4)] index = IntervalIndex.from_tuples(tuples, closed="left") result_indexer, result_missing = index.get_indexer_non_unique(query) expected_indexer = np.array(expected[0], dtype="intp") expected_missing = np.array(expected[1], dtype="intp") tm.assert_numpy_array_equal(result_indexer, expected_indexer) tm.assert_numpy_array_equal(result_missing, expected_missing) # TODO we may also want to test get_indexer for the case when # the intervals are duplicated, decreasing, non-monotonic, etc.. def test_get_indexer_non_monotonic(self): # GH 16410 idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) result = idx1.get_indexer(idx2) expected = np.array([2, 0, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = idx1.get_indexer(idx1[1:]) expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected)
def test_linspace_dst_transition(self, start, mid, end): # GH 20976: linspace behavior defined from start/end/periods # accounts for the hour gained/lost during DST transition result = interval_range(start=start, end=end, periods=2) expected = IntervalIndex.from_breaks([start, mid, end]) tm.assert_index_equal(result, expected)
def test_get_loc_decreasing(self, values): # GH 25860 index = IntervalIndex.from_arrays(values[1:], values[:-1]) result = index.get_loc(index[0]) expected = 0 assert result == expected
def create_series_categorical_intervals(left, right, closed="right"): return Series(Categorical(IntervalIndex.from_arrays(left, right, closed)))
def test_get_indexer_length_one(self, item, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer(item) expected = np.array([0] * len(item), dtype='intp') tm.assert_numpy_array_equal(result, expected)
def test_labels(right, breaks, closed): arr = np.tile(np.arange(0, 1.01, 0.1), 4) result, bins = cut(arr, 4, retbins=True, right=right) ex_levels = IntervalIndex.from_breaks(breaks, closed=closed) tm.assert_index_equal(result.categories, ex_levels)
def test_get_loc_length_one(self, item, closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_loc(item) assert result == 0
def test_searchsorted_invalid_argument(arg): values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and " with pytest.raises(TypeError, match=msg): values.searchsorted(arg)
def test_slice_locs_fails(self): index = IntervalIndex.from_tuples([(1, 2), (0, 1), (2, 3)]) with pytest.raises(KeyError): index.slice_locs(1, 2)
def test_pickle_round_trip_closed(self, closed): # https://github.com/pandas-dev/pandas/issues/35658 idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx)
def create_index_with_nan(self, closed='right'): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed)
def makeIntervalIndex(k=10, name=None, **kwargs): """ make a length k IntervalIndex """ x = np.linspace(0, 100, num=(k + 1)) return IntervalIndex.from_breaks(x, name=name, **kwargs)
def create_index(self, closed='right'): return IntervalIndex.from_breaks(range(11), closed=closed)
def test_construction(self): result = interval_range(0, 5, name='foo', closed='both') expected = IntervalIndex.from_breaks(np.arange(0, 5), name='foo', closed='both') tm.assert_index_equal(result, expected)
def test_contains_interval(self, item, expected): # GH 23705 ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) result = item in ci assert result is expected