def test_is_non_overlapping_monotonic(self): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] for closed in ('left', 'right', 'neither', 'both'): idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] for closed in ('left', 'right', 'neither', 'both'): idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] for closed in ('left', 'right', 'neither', 'both'): idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', overwise True (GH16560) idx = IntervalIndex.from_breaks(range(4), closed='both') assert idx.is_non_overlapping_monotonic is False for closed in ('left', 'right', 'neither'): idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True
def test_constructors_errors_string(self, data): # GH 19016 left, right = data[:-1], data[1:] tuples = lzip(left, right) ivs = [Interval(l, r) for l, r in tuples] or data msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') with tm.assert_raises_regex(TypeError, msg): IntervalIndex(ivs) with tm.assert_raises_regex(TypeError, msg): Index(ivs) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_intervals(ivs) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_breaks(data) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_arrays(left, right) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_tuples(tuples)
def slice_locs_cases(self, breaks): # TODO: same tests for more index types index = IntervalIndex.from_breaks([0, 1, 2], closed='right') self.assertEqual(index.slice_locs(), (0, 2)) self.assertEqual(index.slice_locs(0, 1), (0, 1)) self.assertEqual(index.slice_locs(1, 1), (0, 1)) self.assertEqual(index.slice_locs(0, 2), (0, 2)) self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) self.assertEqual(index.slice_locs(0, 0.5), (0, 1)) self.assertEqual(index.slice_locs(start=1), (0, 2)) self.assertEqual(index.slice_locs(start=1.2), (1, 2)) self.assertEqual(index.slice_locs(end=1), (0, 1)) self.assertEqual(index.slice_locs(end=1.1), (0, 2)) self.assertEqual(index.slice_locs(end=1.0), (0, 1)) self.assertEqual(*index.slice_locs(-1, -1)) index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') self.assertEqual(index.slice_locs(0, 1), (0, 1)) self.assertEqual(index.slice_locs(0, 2), (0, 2)) self.assertEqual(index.slice_locs(0.5, 1.5), (0, 2)) self.assertEqual(index.slice_locs(1, 1), (1, 1)) self.assertEqual(index.slice_locs(1, 2), (1, 2)) index = IntervalIndex.from_breaks([0, 1, 2], closed='both') self.assertEqual(index.slice_locs(1, 1), (0, 2)) self.assertEqual(index.slice_locs(1, 2), (0, 2))
def test_constructors_errors(self): # scalar with pytest.raises(TypeError): IntervalIndex(5) # not an interval with pytest.raises(TypeError): IntervalIndex([0, 1]) with pytest.raises(TypeError): IntervalIndex.from_intervals([0, 1]) # invalid closed with pytest.raises(ValueError): IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid') # mismatched closed with pytest.raises(ValueError): IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2, closed='left')]) with pytest.raises(ValueError): IntervalIndex.from_arrays([0, 10], [3, 5]) with pytest.raises(ValueError): Index([Interval(0, 1), Interval(2, 3, closed='left')]) # no point in nesting periods in an IntervalIndex with pytest.raises(ValueError): IntervalIndex.from_breaks( pd.period_range('2000-01-01', periods=3))
def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == 'both': idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True
def test_constructor_errors(self): # GH 19016: categorical data data = Categorical(list('01234abcde'), ordered=True) msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_breaks(data)
def slice_locs_cases(self, breaks): # TODO: same tests for more index types index = IntervalIndex.from_breaks([0, 1, 2], closed='right') assert index.slice_locs() == (0, 2) assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(0, 0.5) == (0, 1) assert index.slice_locs(start=1) == (0, 2) assert index.slice_locs(start=1.2) == (1, 2) assert index.slice_locs(end=1) == (0, 1) assert index.slice_locs(end=1.1) == (0, 2) assert index.slice_locs(end=1.0) == (0, 1) assert index.slice_locs(-1, -1) == (0, 0) index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(1, 1) == (1, 1) assert index.slice_locs(1, 2) == (1, 2) index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed='both') assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(1, 2) == (0, 2)
def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected)
def test_constructors(self): expected = self.index actual = IntervalIndex.from_breaks(np.arange(3), closed='right') self.assertTrue(expected.equals(actual)) alternate = IntervalIndex.from_breaks(np.arange(3), closed='left') self.assertFalse(expected.equals(alternate)) actual = IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2)]) self.assertTrue(expected.equals(actual)) actual = IntervalIndex([Interval(0, 1), Interval(1, 2)]) self.assertTrue(expected.equals(actual)) actual = IntervalIndex.from_arrays(np.arange(2), np.arange(2) + 1, closed='right') self.assertTrue(expected.equals(actual)) actual = Index([Interval(0, 1), Interval(1, 2)]) assert isinstance(actual, IntervalIndex) self.assertTrue(expected.equals(actual)) actual = Index(expected) assert isinstance(actual, IntervalIndex) self.assertTrue(expected.equals(actual))
def test_constructors(self, data, closed, name): left, right = data[:-1], data[1:] ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)] expected = IntervalIndex._simple_new( left=left, right=right, closed=closed, name=name) # validate expected assert expected.closed == closed assert expected.name == name assert expected.dtype.subtype == data.dtype tm.assert_index_equal(expected.left, data[:-1]) tm.assert_index_equal(expected.right, data[1:]) # validated constructors result = IntervalIndex(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_breaks(data, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_arrays( left, right, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( lzip(left, right), closed=closed, name=name) tm.assert_index_equal(result, expected) result = Index(ivs, name=name) assert isinstance(result, IntervalIndex) tm.assert_index_equal(result, expected) # idempotent tm.assert_index_equal(Index(expected), expected) tm.assert_index_equal(IntervalIndex(expected), expected) result = IntervalIndex.from_intervals(expected) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals( expected.values, name=expected.name) tm.assert_index_equal(result, expected) left, right = expected.left, expected.right result = IntervalIndex.from_arrays( left, right, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( expected.to_tuples(), closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) breaks = expected.left.tolist() + [expected.right[-1]] result = IntervalIndex.from_breaks( breaks, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected)
def test_intersection(self): other = IntervalIndex.from_breaks([1, 2, 3]) expected = IntervalIndex.from_breaks([1, 2]) actual = self.index.intersection(other) self.assertTrue(expected.equals(actual)) tm.assert_index_equal(self.index.intersection(self.index), self.index)
def test_intersection(self, closed): idx = self.create_index(closed=closed) other = IntervalIndex.from_breaks([1, 2, 3], closed=closed) expected = IntervalIndex.from_breaks([1, 2], closed=closed) actual = idx.intersection(other) assert expected.equals(actual) tm.assert_index_equal(idx.intersection(idx), idx)
def setup(self, N): left = np.append(np.arange(N), np.array(0)) right = np.append(np.arange(1, N + 1), np.array(1)) self.intv = IntervalIndex.from_arrays(left, right) self.intv._engine self.left = IntervalIndex.from_breaks(np.arange(N)) self.right = IntervalIndex.from_breaks(np.arange(N - 3, 2 * N - 3))
def test_labels(self): arr = np.tile(np.arange(0, 1.01, 0.1), 4) result, bins = cut(arr, 4, retbins=True) ex_levels = IntervalIndex.from_breaks([-1e-3, 0.25, 0.5, 0.75, 1]) tm.assert_index_equal(result.categories, ex_levels) result, bins = cut(arr, 4, retbins=True, right=False) ex_levels = IntervalIndex.from_breaks([0, 0.25, 0.5, 0.75, 1 + 1e-3], closed='left') tm.assert_index_equal(result.categories, ex_levels)
def test_basic_dtype(self): assert is_interval_dtype('interval[int64]') assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)])) assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4))) assert is_interval_dtype(IntervalIndex.from_breaks( date_range('20130101', periods=3))) assert not is_interval_dtype('U') assert not is_interval_dtype('S') assert not is_interval_dtype('foo') assert not is_interval_dtype(np.object_) assert not is_interval_dtype(np.int64) assert not is_interval_dtype(np.float64)
def test_constructors_datetimelike(self, closed): # DTI / TDI for idx in [pd.date_range('20130101', periods=5), pd.timedelta_range('1 day', periods=5)]: result = IntervalIndex.from_breaks(idx, closed=closed) expected = IntervalIndex.from_breaks(idx.values, closed=closed) tm.assert_index_equal(result, expected) expected_scalar_type = type(idx[0]) i = result[0] assert isinstance(i.left, expected_scalar_type) assert isinstance(i.right, expected_scalar_type)
def test_series_retbins(self): # GH 8589 s = Series(np.arange(4)) result, bins = cut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( [-0.003, 1.5, 3], closed='right').repeat(2)).astype( CDT(ordered=True)) tm.assert_series_equal(result, expected) result, bins = qcut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( [-0.001, 1.5, 3], closed='right').repeat(2)).astype( CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_constructors(self, closed, name): left, right = Index([0, 1, 2, 3]), Index([1, 2, 3, 4]) ivs = [Interval(l, r, closed=closed) for l, r in zip(left, right)] expected = IntervalIndex._simple_new( left=left, right=right, closed=closed, name=name) result = IntervalIndex(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_breaks( np.arange(5), closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_arrays( left.values, right.values, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( zip(left, right), closed=closed, name=name) tm.assert_index_equal(result, expected) result = Index(ivs, name=name) assert isinstance(result, IntervalIndex) tm.assert_index_equal(result, expected) # idempotent tm.assert_index_equal(Index(expected), expected) tm.assert_index_equal(IntervalIndex(expected), expected) result = IntervalIndex.from_intervals( expected.values, name=expected.name) tm.assert_index_equal(result, expected) left, right = expected.left, expected.right result = IntervalIndex.from_arrays( left, right, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( expected.to_tuples(), closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) breaks = expected.left.tolist() + [expected.right[-1]] result = IntervalIndex.from_breaks( breaks, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected)
def _format_labels(bins, precision, right=True, include_lowest=False, dtype=None): """ based on the dtype, return our labels """ closed = 'right' if right else 'left' if is_datetime64_dtype(dtype): formatter = Timestamp adjust = lambda x: x - Timedelta('1ns') elif is_timedelta64_dtype(dtype): formatter = Timedelta adjust = lambda x: x - Timedelta('1ns') else: precision = _infer_precision(precision, bins) formatter = lambda x: _round_frac(x, precision) adjust = lambda x: x - 10 ** (-precision) breaks = [formatter(b) for b in bins] labels = IntervalIndex.from_breaks(breaks, closed=closed) if right and include_lowest: # we will adjust the left hand side by precision to # account that we are all right closed v = adjust(labels[0].left) i = IntervalIndex.from_intervals( [Interval(v, labels[0].right, closed='right')]) labels = i.append(labels[1:]) return labels
def test_constructors_nan(self, closed, data): # GH 18421 expected_values = np.array(data, dtype=object) expected_idx = IntervalIndex(data, closed=closed) # validate the expected index assert expected_idx.closed == closed tm.assert_numpy_array_equal(expected_idx.values, expected_values) result = IntervalIndex.from_tuples(data, closed=closed) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_breaks([np.nan] + data, closed=closed) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_arrays(data, data, closed=closed) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values) if closed == 'right': # Can't specify closed for IntervalIndex.from_intervals result = IntervalIndex.from_intervals(data) tm.assert_index_equal(result, expected_idx) tm.assert_numpy_array_equal(result.values, expected_values)
def test_constructors_empty(self, data, closed): # GH 18421 expected_dtype = getattr(data, 'dtype', np.int64) expected_values = np.array([], dtype=object) expected_index = IntervalIndex(data, closed=closed) # validate the expected index assert expected_index.empty assert expected_index.closed == closed assert expected_index.dtype.subtype == expected_dtype tm.assert_numpy_array_equal(expected_index.values, expected_values) result = IntervalIndex.from_tuples(data, closed=closed) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_breaks(data, closed=closed) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values) result = IntervalIndex.from_arrays(data, data, closed=closed) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values) if closed == 'right': # Can't specify closed for IntervalIndex.from_intervals result = IntervalIndex.from_intervals(data) tm.assert_index_equal(result, expected_index) tm.assert_numpy_array_equal(result.values, expected_values)
def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {'right', 'left', 'both', 'neither'}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected)
def test_constructors_errors_tz(self, tz_left, tz_right): # GH 18537 left = date_range('2017-01-01', periods=4, tz=tz_left) right = date_range('2017-01-02', periods=4, tz=tz_right) # don't need to check IntervalIndex(...) or from_intervals, since # mixed tz are disallowed at the Interval level with pytest.raises(ValueError): IntervalIndex.from_arrays(left, right) with pytest.raises(ValueError): IntervalIndex.from_tuples(lzip(left, right)) with pytest.raises(ValueError): breaks = left.tolist() + [right[-1]] IntervalIndex.from_breaks(breaks)
def test_label_precision(self): arr = np.arange(0, 0.73, 0.01) result = cut(arr, 4, precision=2) ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, 0.54, 0.72]) tm.assert_index_equal(result.categories, ex_levels)
def test_constructor_timestamp(self, closed, name, freq, periods, tz): start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz) breaks = date_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) # defined from start/end/freq result = interval_range( start=start, end=end, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from start/periods/freq result = interval_range( start=start, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from end/periods/freq result = interval_range( end=end, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # GH 20976: linspace behavior defined from start/end/periods if not breaks.freq.isAnchored() and tz is None: # matches expected only for non-anchored offsets and tz naive # (anchored/DST transitions cause unequal spacing in expected) result = interval_range(start=start, end=end, periods=periods, name=name, closed=closed) tm.assert_index_equal(result, expected)
def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key
def test_insert(self): expected = IntervalIndex.from_breaks(range(4)) actual = self.index.insert(2, Interval(2, 3)) assert expected.equals(actual) pytest.raises(ValueError, self.index.insert, 0, 1) pytest.raises(ValueError, self.index.insert, 0, Interval(2, 3, closed='left'))
def test_series_ret_bins(): # see gh-8589 ser = Series(np.arange(4)) result, bins = cut(ser, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( [-0.003, 1.5, 3], closed="right").repeat(2)).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_cut_return_intervals(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = cut(s, 3) exp_bins = np.linspace(0, 8, num=4).round(3) exp_bins[0] -= 0.008 exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take( [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True)) tm.assert_series_equal(res, exp)
def test_constructors_errors(self): # scalar msg = ('IntervalIndex(...) must be called with a collection of ' 'some kind, 5 was passed') with pytest.raises(TypeError, message=msg): IntervalIndex(5) # not an interval msg = "type <class 'numpy.int32'> with value 0 is not an interval" with pytest.raises(TypeError, message=msg): IntervalIndex([0, 1]) with pytest.raises(TypeError, message=msg): IntervalIndex.from_intervals([0, 1]) # invalid closed msg = "invalid options for 'closed': invalid" with pytest.raises(ValueError, message=msg): IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid') # mismatched closed msg = 'intervals must all be closed on the same side' with pytest.raises(ValueError, message=msg): IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2, closed='left')]) with pytest.raises(ValueError, message=msg): IntervalIndex.from_arrays([0, 10], [3, 5]) with pytest.raises(ValueError, message=msg): Index([Interval(0, 1), Interval(2, 3, closed='left')]) # no point in nesting periods in an IntervalIndex msg = 'Period dtypes are not supported, use a PeriodIndex instead' with pytest.raises(ValueError, message=msg): IntervalIndex.from_breaks( pd.period_range('2000-01-01', periods=3)) # decreasing breaks/arrays msg = 'left side of interval must be <= right side' with pytest.raises(ValueError, message=msg): IntervalIndex.from_breaks(range(10, -1, -1)) with pytest.raises(ValueError, message=msg): IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))
def test_value_counts_bins(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) # bins pytest.raises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) else: exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) assert s1.nunique() == 3 # these return the same res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ 'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b' ] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(['a', 'b', np.nan, 'd']) tm.assert_index_equal(s.unique(), exp) else: exp = np.array(['a', 'b', np.nan, 'd'], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 3 s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): tm.assert_index_equal(s.unique(), Index([]), exact=False) else: tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) assert s.nunique() == 0
def test_length_one(self): """breaks of length one produce an empty IntervalIndex""" breaks = [0] result = IntervalIndex.from_breaks(breaks) expected = IntervalIndex.from_breaks([]) tm.assert_index_equal(result, expected)
def test_repr_matches(): idx = IntervalIndex.from_breaks([1, 2, 3]) a = repr(idx) b = repr(idx.values) assert a.replace("Index", "Array") == b
def test_left_right_dont_share_data(self): # GH#36310 breaks = np.arange(5) result = IntervalIndex.from_breaks(breaks)._data assert result._left.base is None or result._left.base is not result._right.base
def test_label_precision(): arr = np.arange(0, 0.73, 0.01) result = cut(arr, 4, precision=2) ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, 0.54, 0.72]) tm.assert_index_equal(result.categories, ex_levels)
def makeIntervalIndex(k=10, name=None, **kwargs): """make a length k IntervalIndex""" x = np.linspace(0, 100, num=(k + 1)) return IntervalIndex.from_breaks(x, name=name, **kwargs)
def monotonic_index(start, end, dtype="int64", inclusive="right"): return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), inclusive=inclusive)
def test_interval_can_hold_element_emptylist(self, dtype, element): arr = np.array([1, 3, 4], dtype=dtype) ii = IntervalIndex.from_breaks(arr) blk = new_block(ii._data, [1], ndim=2) assert blk._can_hold_element([])
def create_index(self, *, inclusive="right"): return IntervalIndex.from_breaks(range(11), inclusive=inclusive)
def test_delete(self): expected = IntervalIndex.from_breaks([1, 2]) actual = self.index.delete(0) self.assertTrue(expected.equals(actual))
def create_index(self): return IntervalIndex.from_breaks(np.arange(10))
def test_set_operation_errors(self): pytest.raises(ValueError, self.index.union, self.index.left) other = IntervalIndex.from_breaks([0, 1, 2], closed='neither') pytest.raises(ValueError, self.index.union, other)
def test_construction(self): result = interval_range(0, 5, name='foo', closed='both') expected = IntervalIndex.from_breaks(np.arange(0, 5), name='foo', closed='both') tm.assert_index_equal(result, expected)
def setup_cache(self): idx = IntervalIndex.from_breaks(np.arange(1000001)) monotonic = Series(np.arange(1000000), index=idx) return monotonic
def test_value_counts_bins(index_or_series): klass = index_or_series s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] s = klass(s_values) # bins msg = "bins argument only works with numeric data" with pytest.raises(TypeError, match=msg): s.value_counts(bins=1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) else: exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) assert s1.nunique() == 3 # these return the same res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] s = klass(s_values) expected = Series([4, 3, 2], index=["b", "a", "d"]) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(["a", "b", np.nan, "d"]) tm.assert_index_equal(s.unique(), exp) else: exp = np.array(["a", "b", np.nan, "d"], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 3 s = klass({}) if klass is dict else klass({}, dtype=object) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): tm.assert_index_equal(s.unique(), Index([]), exact=False) else: tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) assert s.nunique() == 0
def test_constructors_errors(self): # scalar msg = (r'IntervalIndex\(...\) must be called with a collection of ' 'some kind, 5 was passed') with tm.assert_raises_regex(TypeError, msg): IntervalIndex(5) # not an interval msg = ("type <(class|type) 'numpy.int64'> with value 0 " "is not an interval") with tm.assert_raises_regex(TypeError, msg): IntervalIndex([0, 1]) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_intervals([0, 1]) # invalid closed msg = "invalid options for 'closed': invalid" with tm.assert_raises_regex(ValueError, msg): IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid') # mismatched closed within intervals msg = 'intervals must all be closed on the same side' with tm.assert_raises_regex(ValueError, msg): IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2, closed='left')]) with tm.assert_raises_regex(ValueError, msg): IntervalIndex([Interval(0, 1), Interval(2, 3, closed='left')]) with tm.assert_raises_regex(ValueError, msg): Index([Interval(0, 1), Interval(2, 3, closed='left')]) # mismatched closed inferred from intervals vs constructor. msg = 'conflicting values for closed' with tm.assert_raises_regex(ValueError, msg): iv = [Interval(0, 1, closed='both'), Interval(1, 2, closed='both')] IntervalIndex(iv, closed='neither') # no point in nesting periods in an IntervalIndex msg = 'Period dtypes are not supported, use a PeriodIndex instead' with tm.assert_raises_regex(ValueError, msg): IntervalIndex.from_breaks( pd.period_range('2000-01-01', periods=3)) # decreasing breaks/arrays msg = 'left side of interval must be <= right side' with tm.assert_raises_regex(ValueError, msg): IntervalIndex.from_breaks(range(10, -1, -1)) with tm.assert_raises_regex(ValueError, msg): IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1)) # GH 19016: categorical data data = Categorical(list('01234abcde'), ordered=True) msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_breaks(data) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_arrays(data[:-1], data[1:])
def test_constructor_interval_values_mismatched_dtype(self): dti = date_range("2016-01-01", periods=3) ii = IntervalIndex.from_breaks(dti) result = Index(ii, dtype="category") expected = CategoricalIndex(ii) tm.assert_index_equal(result, expected)
def series_with_interval_index(self): return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) msg = "|".join([ "not supported between instances of 'int' and '.*.Interval'", r"Invalid comparison between dtype=interval\[int64\] and ", ]) with pytest.raises(TypeError, match=msg): self.index > 0 with pytest.raises(TypeError, match=msg): self.index <= 0 with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3)
def test_linspace_dst_transition(self, start, mid, end): # GH 20976: linspace behavior defined from start/end/periods # accounts for the hour gained/lost during DST transition result = interval_range(start=start, end=end, periods=2) expected = IntervalIndex.from_breaks([start, mid, end]) tm.assert_index_equal(result, expected)
def setup_method(self, method): self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed)
def monotonic_index(start, end, dtype="int64", closed="right"): return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected)
class TestClassConstructors(ConstructorTests): """Tests specific to the IntervalIndex/Index constructors""" @pytest.fixture( params=[IntervalIndex, partial(Index, dtype="interval")], ids=["IntervalIndex", "Index"], ) def constructor(self, request): return request.param def get_kwargs_from_breaks(self, breaks, closed="right"): """ converts intervals in breaks format to a dictionary of kwargs to specific to the format expected by the IntervalIndex/Index constructors """ if len(breaks) == 0: return {"data": breaks} ivs = [ Interval(left, right, closed) if notna(left) else left for left, right in zip(breaks[:-1], breaks[1:]) ] if isinstance(breaks, list): return {"data": ivs} elif is_categorical_dtype(breaks): return {"data": breaks._constructor(ivs)} return {"data": np.array(ivs, dtype=object)} def test_generic_errors(self, constructor): """ override the base class implementation since errors are handled differently; checks unnecessary since caught at the Interval level """ pass def test_constructor_string(self): # GH23013 # When forming the interval from breaks, # the interval of strings is already forbidden. pass def test_constructor_errors(self, constructor): # mismatched closed within intervals with no constructor override ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")] msg = "intervals must all be closed on the same side" with pytest.raises(ValueError, match=msg): constructor(ivs) # scalar msg = ( r"IntervalIndex\(...\) must be called with a collection of " "some kind, 5 was passed" ) with pytest.raises(TypeError, match=msg): constructor(5) # not an interval; dtype depends on 32bit/windows builds msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval" with pytest.raises(TypeError, match=msg): constructor([0, 1]) @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "data, closed", [ ([], "both"), ([np.nan, np.nan], "neither"), ( [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")], "left", ), ( [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")], "neither", ), (IntervalIndex.from_breaks(range(5), closed="both"), "right"), ], ) def test_override_inferred_closed(self, constructor, data, closed): # GH 19370 if isinstance(data, IntervalIndex): tuples = data.to_tuples() else: tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data] expected = IntervalIndex.from_tuples(tuples, closed=closed) result = constructor(data, closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "values_constructor", [list, np.array, IntervalIndex, IntervalArray] ) def test_index_object_dtype(self, values_constructor): # Index(intervals, dtype=object) is an Index (not an IntervalIndex) intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)] values = values_constructor(intervals) result = Index(values, dtype=object) assert type(result) is Index tm.assert_numpy_array_equal(result.values, np.array(values)) def test_index_mixed_closed(self): # GH27172 intervals = [ Interval(0, 1, closed="left"), Interval(1, 2, closed="right"), Interval(2, 3, closed="neither"), Interval(3, 4, closed="both"), ] result = Index(intervals) expected = Index(intervals, dtype=object) tm.assert_index_equal(result, expected)
class TestClassConstructors(Base): """Tests specific to the IntervalIndex/Index constructors""" @pytest.fixture(params=[IntervalIndex, partial(Index, dtype='interval')], ids=['IntervalIndex', 'Index']) def constructor(self, request): return request.param def get_kwargs_from_breaks(self, breaks, closed='right'): """ converts intervals in breaks format to a dictionary of kwargs to specific to the format expected by the IntervalIndex/Index constructors """ if len(breaks) == 0: return {'data': breaks} ivs = [ Interval(l, r, closed) if notna(l) else l for l, r in zip(breaks[:-1], breaks[1:]) ] if isinstance(breaks, list): return {'data': ivs} elif is_categorical_dtype(breaks): return {'data': breaks._constructor(ivs)} return {'data': np.array(ivs, dtype=object)} def test_generic_errors(self, constructor): """ override the base class implementation since errors are handled differently; checks unnecessary since caught at the Interval level """ pass def test_constructor_string(self): # GH23013 # When forming the interval from breaks, # the interval of strings is already forbidden. pass def test_constructor_errors(self, constructor): # mismatched closed within intervals with no constructor override ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')] msg = 'intervals must all be closed on the same side' with pytest.raises(ValueError, match=msg): constructor(ivs) # scalar msg = (r'IntervalIndex\(...\) must be called with a collection of ' 'some kind, 5 was passed') with pytest.raises(TypeError, match=msg): constructor(5) # not an interval msg = "type <class 'numpy.int64'> with value 0 is not an interval" with pytest.raises(TypeError, match=msg): constructor([0, 1]) @pytest.mark.parametrize( 'data, closed', [([], 'both'), ([np.nan, np.nan], 'neither'), ([Interval(0, 3, closed='neither'), Interval(2, 5, closed='neither')], 'left'), ([Interval(0, 3, closed='left'), Interval(2, 5, closed='right')], 'neither'), (IntervalIndex.from_breaks(range(5), closed='both'), 'right')]) def test_override_inferred_closed(self, constructor, data, closed): # GH 19370 if isinstance(data, IntervalIndex): tuples = data.to_tuples() else: tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data] expected = IntervalIndex.from_tuples(tuples, closed=closed) result = constructor(data, closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('values_constructor', [list, np.array, IntervalIndex, IntervalArray]) def test_index_object_dtype(self, values_constructor): # Index(intervals, dtype=object) is an Index (not an IntervalIndex) intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)] values = values_constructor(intervals) result = Index(values, dtype=object) assert type(result) is Index tm.assert_numpy_array_equal(result.values, np.array(values))
def obj(self): idx = IntervalIndex.from_breaks(range(4)) return Series(idx)
def test_labels(right, breaks, closed): arr = np.tile(np.arange(0, 1.01, 0.1), 4) result, bins = cut(arr, 4, retbins=True, right=right) ex_levels = IntervalIndex.from_breaks(breaks, closed=closed) tm.assert_index_equal(result.categories, ex_levels)
def test_contains_interval(self, item, expected): # GH 23705 ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) result = item in ci assert result is expected
def test_constructors(self, data, closed, name): left, right = data[:-1], data[1:] ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)] expected = IntervalIndex._simple_new(left=left, right=right, closed=closed, name=name) # validate expected assert expected.closed == closed assert expected.name == name assert expected.dtype.subtype == data.dtype tm.assert_index_equal(expected.left, data[:-1]) tm.assert_index_equal(expected.right, data[1:]) # validated constructors result = IntervalIndex(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_breaks(data, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_arrays(left, right, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples(lzip(left, right), closed=closed, name=name) tm.assert_index_equal(result, expected) result = Index(ivs, name=name) assert isinstance(result, IntervalIndex) tm.assert_index_equal(result, expected) # idempotent tm.assert_index_equal(Index(expected), expected) tm.assert_index_equal(IntervalIndex(expected), expected) result = IntervalIndex.from_intervals(expected) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(expected.values, name=expected.name) tm.assert_index_equal(result, expected) left, right = expected.left, expected.right result = IntervalIndex.from_arrays(left, right, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples(expected.to_tuples(), closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) breaks = expected.left.tolist() + [expected.right[-1]] result = IntervalIndex.from_breaks(breaks, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected)