def test_qcut_index(): result = qcut([0, 2], 2) intervals = [Interval(-0.001, 1), Interval(1, 2)] expected = Categorical(intervals, ordered=True) tm.assert_categorical_equal(result, expected)
def test_compare_length_mismatch_errors(self, op, other_constructor, length): array = IntervalArray.from_arrays(range(4), range(1, 5)) other = other_constructor([Interval(0, 1)] * length) with pytest.raises(ValueError, match="Lengths must match to compare"): op(array, other)
class TestComparison: @pytest.fixture(params=[operator.eq, operator.ne]) def op(self, request): return request.param @pytest.fixture( params=[ IntervalArray.from_arrays, IntervalIndex.from_arrays, create_categorical_intervals, create_series_intervals, create_series_categorical_intervals, ], ids=[ "IntervalArray", "IntervalIndex", "Categorical[Interval]", "Series[Interval]", "Series[Categorical[Interval]]", ], ) def interval_constructor(self, request): """ Fixture for all pandas native interval constructors. To be used as the LHS of IntervalArray comparisons. """ return request.param def elementwise_comparison(self, op, array, other): """ Helper that performs elementwise comparisons between `array` and `other` """ other = other if is_list_like(other) else [other] * len(array) expected = np.array([op(x, y) for x, y in zip(array, other)]) if isinstance(other, Series): return Series(expected, index=other.index) return expected def test_compare_scalar_interval(self, op, array): # matches first interval other = array[0] result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) # matches on a single endpoint but not both other = Interval(array.left[0], array.right[1]) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed): array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) other = Interval(0, 1, closed=other_closed) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_scalar_na(self, op, array, nulls_fixture, request): result = op(array, nulls_fixture) expected = self.elementwise_comparison(op, array, nulls_fixture) if nulls_fixture is pd.NA and array.dtype != pd.IntervalDtype("int64"): mark = pytest.mark.xfail( reason="broken for non-integer IntervalArray; see GH 31882") request.node.add_marker(mark) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "other", [ 0, 1.0, True, "foo", Timestamp("2017-01-01"), Timestamp("2017-01-01", tz="US/Eastern"), Timedelta("0 days"), Period("2017-01-01", "D"), ], ) def test_compare_scalar_other(self, op, array, other): result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_list_like_interval(self, op, array, interval_constructor): # same endpoints other = interval_constructor(array.left, array.right) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) # different endpoints other = interval_constructor(array.left[::-1], array.right[::-1]) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) # all nan endpoints other = interval_constructor([np.nan] * 4, [np.nan] * 4) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) def test_compare_list_like_interval_mixed_closed(self, op, interval_constructor, closed, other_closed): array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) other = interval_constructor(range(2), range(1, 3), closed=other_closed) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) @pytest.mark.parametrize( "other", [ ( Interval(0, 1), Interval(Timedelta("1 day"), Timedelta("2 days")), Interval(4, 5, "both"), Interval(10, 20, "neither"), ), (0, 1.5, Timestamp("20170103"), np.nan), ( Timestamp("20170102", tz="US/Eastern"), Timedelta("2 days"), "baz", pd.NaT, ), ], ) def test_compare_list_like_object(self, op, array, other): result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_list_like_nan(self, op, array, nulls_fixture, request): other = [nulls_fixture] * 4 result = op(array, other) expected = self.elementwise_comparison(op, array, other) if nulls_fixture is pd.NA and array.dtype.subtype != "i8": reason = "broken for non-integer IntervalArray; see GH 31882" mark = pytest.mark.xfail(reason=reason) request.node.add_marker(mark) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "other", [ np.arange(4, dtype="int64"), np.arange(4, dtype="float64"), date_range("2017-01-01", periods=4), date_range("2017-01-01", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), period_range("2017-01-01", periods=4, freq="D"), Categorical(list("abab")), Categorical(date_range("2017-01-01", periods=4)), pd.array(list("abcd")), pd.array(["foo", 3.14, None, object()]), ], ids=lambda x: str(x.dtype), ) def test_compare_list_like_other(self, op, array, other): result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("length", [1, 3, 5]) @pytest.mark.parametrize("other_constructor", [IntervalArray, list]) def test_compare_length_mismatch_errors(self, op, other_constructor, length): array = IntervalArray.from_arrays(range(4), range(1, 5)) other = other_constructor([Interval(0, 1)] * length) with pytest.raises(ValueError, match="Lengths must match to compare"): op(array, other) @pytest.mark.parametrize( "constructor, expected_type, assert_func", [ (IntervalIndex, np.array, tm.assert_numpy_array_equal), (Series, Series, tm.assert_series_equal), ], ) def test_index_series_compat(self, op, constructor, expected_type, assert_func): # IntervalIndex/Series that rely on IntervalArray for comparisons breaks = range(4) index = constructor(IntervalIndex.from_breaks(breaks)) # scalar comparisons other = index[0] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) other = breaks[0] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) # list-like comparisons other = IntervalArray.from_breaks(breaks) result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) other = [index[0], breaks[0], "foo"] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) @pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None]) def test_comparison_operations(self, scalars): # GH #28981 expected = Series([False, False]) s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval") result = s == scalars tm.assert_series_equal(result, expected)
def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed='right') assert Interval(0, 1) != Interval(0, 1, closed='left') assert Interval(0, 1) != 0
def test_length(self, left, right, expected): # GH 18789 iv = Interval(left, right) result = iv.length assert result == expected
def test_value_counts_bins(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) # bins self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) else: exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) self.assertEqual(s1.nunique(), 3) # these return the same res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = ['a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(['a', 'b', np.nan, 'd']) tm.assert_index_equal(s.unique(), exp) else: exp = np.array(['a', 'b', np.nan, 'd'], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): self.assert_index_equal(s.unique(), Index([]), exact=False) else: self.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) self.assertEqual(s.nunique(), 0)
def interval(): return Interval(0, 1)
def test_loc_with_interval(self): # loc with single label / list of labels: # - Intervals: only exact matches # - scalars: those that contain it s = self.s expected = 0 result = s.loc[Interval(0, 1)] assert result == expected result = s[Interval(0, 1)] assert result == expected expected = s.iloc[3:5] result = s.loc[[Interval(3, 4), Interval(4, 5)]] tm.assert_series_equal(expected, result) result = s[[Interval(3, 4), Interval(4, 5)]] tm.assert_series_equal(expected, result) # missing or not exact with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): s.loc[Interval(3, 5, closed="left")] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): s[Interval(3, 5, closed="left")] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s.loc[Interval(3, 5)] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] with pytest.raises(KeyError, match=re.escape("Interval(-2, 0, closed='right')")): s.loc[Interval(-2, 0)] with pytest.raises(KeyError, match=re.escape("Interval(-2, 0, closed='right')")): s[Interval(-2, 0)] with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): s.loc[Interval(5, 6)] with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): s[Interval(5, 6)]
def test_slice_interval_step(self): # GH#31658 allows for integer step!=1, not Interval step s = self.s msg = "label-based slicing with step!=1 is not supported for IntervalIndex" with pytest.raises(ValueError, match=msg): s[0:4:Interval(0, 1)]
datetime(2000, 1, 3), datetime(2000, 1, 4), datetime(2000, 1, 4), datetime(2000, 1, 4), datetime(2000, 1, 5), ] return Series(np.random.randn(len(dates)), index=dates) # ---------------------------------------------------------------- # Scalars # ---------------------------------------------------------------- @pytest.fixture( params=[ (Interval(left=0, right=5), IntervalDtype("int64", "right")), (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")), (Period("2012-01", freq="M"), "period[M]"), (Period("2012-02-01", freq="D"), "period[D]"), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), ] ) def ea_scalar_and_dtype(request): return request.param # ----------------------------------------------------------------
def test_value_counts_bins(self, index_or_series): klass = index_or_series s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] s = klass(s_values) # bins msg = "bins argument only works with numeric data" with pytest.raises(TypeError, match=msg): s.value_counts(bins=1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) else: exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) assert s1.nunique() == 3 # these return the same res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ "a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b" ] s = klass(s_values) expected = Series([4, 3, 2], index=["b", "a", "d"]) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(["a", "b", np.nan, "d"]) tm.assert_index_equal(s.unique(), exp) else: exp = np.array(["a", "b", np.nan, "d"], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 3 s = klass({}) if klass is dict else klass({}, dtype=object) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): tm.assert_index_equal(s.unique(), Index([]), exact=False) else: tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) assert s.nunique() == 0
def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) pytest.raises( KeyError, index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))) pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2))) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2))) pytest.raises( KeyError, index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
class TestIntervalIndex(Base): @pytest.mark.parametrize("idx_side", ['right', 'left', 'both', 'neither']) @pytest.mark.parametrize("side", ['right', 'left', 'both', 'neither']) def test_get_loc_interval(self, idx_side, side): idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=idx_side) for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: # if get_loc is supplied an interval, it should only search # for exact matches, not overlaps or covers, else KeyError. if idx_side == side: if bound == [0, 1]: assert idx.get_loc(Interval(0, 1, closed=side)) == 0 elif bound == [2, 3]: assert idx.get_loc(Interval(2, 3, closed=side)) == 1 else: with pytest.raises(KeyError): idx.get_loc(Interval(*bound, closed=side)) else: with pytest.raises(KeyError): idx.get_loc(Interval(*bound, closed=side)) @pytest.mark.parametrize("idx_side", ['right', 'left', 'both', 'neither']) @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) def test_get_loc_scalar(self, idx_side, scalar): # correct = {side: {query: answer}}. # If query is not in the dict, that query should raise a KeyError correct = { 'right': { 0.5: 0, 1: 0, 2.5: 1, 3: 1 }, 'left': { 0: 0, 0.5: 0, 2: 1, 2.5: 1 }, 'both': { 0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1 }, 'neither': { 0.5: 0, 2.5: 1 } } idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=idx_side) # if get_loc is supplied a scalar, it should return the index of # the interval which contains the scalar, or KeyError. if scalar in correct[idx_side].keys(): assert idx.get_loc(scalar) == correct[idx_side][scalar] else: pytest.raises(KeyError, idx.get_loc, scalar) def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) pytest.raises( KeyError, index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))) pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2))) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2))) pytest.raises( KeyError, index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) def test_slice_locs_with_ints_and_floats_succeeds(self): # increasing non-overlapping index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0, 3) == (0, 2) assert index.slice_locs(3, 1) == (2, 1) assert index.slice_locs(3, 4) == (2, 3) assert index.slice_locs(0, 4) == (0, 3) # decreasing non-overlapping index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)]) assert index.slice_locs(0, 1) == (3, 2) assert index.slice_locs(0, 2) == (3, 1) assert index.slice_locs(0, 3) == (3, 1) assert index.slice_locs(3, 1) == (1, 2) assert index.slice_locs(3, 4) == (1, 0) assert index.slice_locs(0, 4) == (3, 0) @pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [3, 1], [3, 4], [0, 4]]) def test_slice_locs_with_ints_and_floats_fails(self, query): # increasing overlapping index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) pytest.raises(KeyError, index.slice_locs, query) # decreasing overlapping index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) pytest.raises(KeyError, index.slice_locs, query) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) pytest.raises(KeyError, index.slice_locs, query) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) pytest.raises(KeyError, index.slice_locs, query) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) pytest.raises(KeyError, index.slice_locs, query) @pytest.mark.parametrize("query", [ Interval(1, 3, closed='right'), Interval(1, 3, closed='left'), Interval(1, 3, closed='both'), Interval(1, 3, closed='neither'), Interval(1, 4, closed='right'), Interval(0, 4, closed='right'), Interval(1, 2, closed='right') ]) @pytest.mark.parametrize("expected_result", [1, -1, -1, -1, -1, -1, -1]) def test_get_indexer_with_interval_single_queries(self, query, expected_result): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='right') result = index.get_indexer([query]) expect = np.array([expected_result], dtype='intp') tm.assert_numpy_array_equal(result, expect) @pytest.mark.parametrize( "query", [[Interval(2, 4, closed='right'), Interval(1, 3, closed='right')], [Interval(1, 3, closed='right'), Interval(0, 2, closed='right')], [Interval(1, 3, closed='right'), Interval(1, 3, closed='left')]]) @pytest.mark.parametrize("expected_result", [[2, 1], [1, -1], [1, -1]]) def test_get_indexer_with_interval_multiple_queries( self, query, expected_result): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='right') result = index.get_indexer(query) expect = np.array(expected_result, dtype='intp') tm.assert_numpy_array_equal(result, expect) @pytest.mark.parametrize("query", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]) @pytest.mark.parametrize("expected_result", [-1, -1, 0, 0, 1, 1, -1, -1, 2, 2, -1]) def test_get_indexer_with_ints_and_floats_single_queries( self, query, expected_result): index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed='right') result = index.get_indexer([query]) expect = np.array([expected_result], dtype='intp') tm.assert_numpy_array_equal(result, expect) @pytest.mark.parametrize("query", [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 2]] ) @pytest.mark.parametrize( "expected_result", [[0, 1], [0, 1, -1], [0, 1, -1, 2], [0, 1, -1, 2, 1]]) def test_get_indexer_with_ints_and_floats_multiple_queries( self, query, expected_result): index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed='right') result = index.get_indexer(query) expect = np.array(expected_result, dtype='intp') tm.assert_numpy_array_equal(result, expect) index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) # TODO: @shoyer believes this should raise, master branch doesn't @pytest.mark.parametrize("query", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]) @pytest.mark.parametrize( "expected_result", [(Int64Index([], dtype='int64'), np.array([0])), (Int64Index([0], dtype='int64'), np.array([])), (Int64Index([0], dtype='int64'), np.array([])), (Int64Index([0, 1], dtype='int64'), np.array([])), (Int64Index([0, 1], dtype='int64'), np.array([])), (Int64Index([0, 1, 2], dtype='int64'), np.array([])), (Int64Index([1, 2], dtype='int64'), np.array([])), (Int64Index([2], dtype='int64'), np.array([])), (Int64Index([2], dtype='int64'), np.array([])), (Int64Index([], dtype='int64'), np.array([0])), (Int64Index([], dtype='int64'), np.array([0]))]) def test_get_indexer_non_unique_with_ints_and_floats_single_queries( self, query, expected_result): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='left') result = index.get_indexer_non_unique([query]) tm.assert_numpy_array_equal(result, expected_result) @pytest.mark.parametrize("query", [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 2]] ) @pytest.mark.parametrize( "expected_result", [(Int64Index([0, 1, 0, 1, 2], dtype='int64'), np.array([])), (Int64Index([0, 1, 0, 1, 2, 2], dtype='int64'), np.array([])), (Int64Index([0, 1, 0, 1, 2, 2, -1], dtype='int64'), np.array([3])), (Int64Index([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], dtype='int64'), np.array([3]))]) def test_get_indexer_non_unique_with_ints_and_floats_multiple_queries( self, query, expected_result): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='left') result = index.get_indexer_non_unique(query) tm.assert_numpy_array_equal(result, expected_result) # TODO we may also want to test get_indexer for the case when # the intervals are duplicated, decreasing, non-monotonic, etc.. def test_contains(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right') # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed='right') in index assert Interval(0, 2, closed='right') not in index assert Interval(0, 0.5, closed='right') not in index assert Interval(3, 5, closed='right') not in index assert Interval(-1, 0, closed='left') not in index assert Interval(0, 1, closed='left') not in index assert Interval(0, 1, closed='both') not in index def test_contains_method(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right') assert not index.contains(0) assert index.contains(0.1) assert index.contains(0.5) assert index.contains(1) assert index.contains(Interval(0, 1), closed='right') assert not index.contains(Interval(0, 1), closed='left') assert not index.contains(Interval(0, 1), closed='both') assert not index.contains(Interval(0, 2), closed='right') assert not index.contains(Interval(0, 3), closed='right') assert not index.contains(Interval(1, 3), closed='right') assert not index.contains(20) assert not index.contains(-20)
dire_count = dire_count.reset_index() dire_sum = dire_df.groupby(['tbin', 'xbin', 'ybin'])[["winner"]].sum() dire_sum = dire_sum.reset_index() dire_mean = dire_df.groupby(['tbin', 'xbin', 'ybin'])[["winner"]].mean() dire_mean = dire_mean.reset_index() summary = DataFrame({ 'x': dire_count['xbin'], 'y': dire_count['ybin'], 't': dire_count['tbin'], 'mean': dire_mean['winner'], 'wins': dire_sum['winner'], 'total': dire_count['winner'] }) dire_mean.loc[dire_mean['tbin'] == Interval(0, 600)].plot.hexbin(x='xbin', y='ybin', C='winner', gridsize=16) dire_sum.loc[dire_sum['tbin'] == Interval(0, 600)].plot.hexbin(x='xbin', y='ybin', C='winner', gridsize=16) def plot_wards(query_data: DataFrame, weights: str, bins=16, ax_in=None): if ax_in is None: fig, ax_in = plt.subplots(figsize=(10, 10)) else: fig = plt.gcf()
class TestDataFrameSetItem: def test_setitem_str_subclass(self): # GH#37366 class mystring(str): pass data = ["2020-10-22 01:21:00+00:00"] index = DatetimeIndex(data) df = DataFrame({"a": [1]}, index=index) df["b"] = 2 df[mystring("c")] = 3 expected = DataFrame({"a": [1], "b": [2], mystring("c"): [3]}, index=index) tm.assert_equal(df, expected) @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) def test_setitem_dtype(self, dtype, float_frame): arr = np.random.randn(len(float_frame)) float_frame[dtype] = np.array(arr, dtype=dtype) assert float_frame[dtype].dtype.name == dtype def test_setitem_list_not_dataframe(self, float_frame): data = np.random.randn(len(float_frame), 2) float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) def test_setitem_error_msmgs(self): # GH 7432 df = DataFrame( {"bar": [1, 2, 3], "baz": ["d", "e", "f"]}, index=Index(["a", "b", "c"], name="foo"), ) ser = Series( ["g", "h", "i", "j"], index=Index(["a", "b", "c", "a"], name="foo"), name="fiz", ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning, match="non-unique"): df["newcol"] = ser # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) msg = "incompatible index of inserted column with frame index" with pytest.raises(TypeError, match=msg): df["gr"] = df.groupby(["b", "c"]).count() def test_setitem_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 df = DataFrame(index=range(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) tm.assert_frame_equal(df, expected) def test_setitem_different_dtype(self): df = DataFrame( np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] ) df.insert(0, "foo", df["a"]) df.insert(2, "bar", df["c"]) # diff dtype # new item df["x"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 5 + [np.dtype("float32")], index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) # replacing current (in different block) df["a"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2, index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) df["y"] = df["a"].astype("int32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 + [np.dtype("int32")], index=["foo", "c", "bar", "b", "a", "x", "y"], ) tm.assert_series_equal(result, expected) def test_setitem_empty_columns(self): # GH 13522 df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index df["X"] = ["x", "y", "z"] exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) tm.assert_frame_equal(df, exp) def test_setitem_dt64_index_empty_columns(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") df = DataFrame(index=np.arange(len(rng))) df["A"] = rng assert df["A"].dtype == np.dtype("M8[ns]") def test_setitem_timestamp_empty_columns(self): # GH#19843 df = DataFrame(index=range(3)) df["now"] = Timestamp("20130101", tz="UTC") expected = DataFrame( [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] ) tm.assert_frame_equal(df, expected) def test_setitem_wrong_length_categorical_dtype_raises(self): # GH#29523 cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) df = DataFrame(range(10), columns=["bar"]) msg = ( rf"Length of values \({len(cat)}\) " rf"does not match length of index \({len(df)}\)" ) with pytest.raises(ValueError, match=msg): df["foo"] = cat def test_setitem_with_sparse_value(self): # GH#8131 df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_array = SparseArray([0, 0, 1]) df["new_column"] = sp_array expected = Series(sp_array, name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_with_unaligned_sparse_value(self): df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) df["new_column"] = sp_series expected = Series(SparseArray([1, 0, 0]), name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_period_preserves_dtype(self): # GH: 26861 data = [Period("2003-12", "D")] result = DataFrame([]) result["a"] = data expected = DataFrame({"a": data}) tm.assert_frame_equal(result, expected) def test_setitem_dict_preserves_dtypes(self): # https://github.com/pandas-dev/pandas/issues/34573 expected = DataFrame( { "a": Series([0, 1, 2], dtype="int64"), "b": Series([1, 2, 3], dtype=float), "c": Series([1, 2, 3], dtype=float), } ) df = DataFrame( { "a": Series([], dtype="int64"), "b": Series([], dtype=float), "c": Series([], dtype=float), } ) for idx, b in enumerate([1, 2, 3]): df.loc[df.shape[0]] = {"a": int(idx), "b": float(b), "c": float(b)} tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "obj,dtype", [ (Period("2020-01"), PeriodDtype("M")), (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), ), ], ) def test_setitem_extension_types(self, obj, dtype): # GH: 34832 expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype)}) df = DataFrame({"idx": [1, 2, 3]}) df["obj"] = obj tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "ea_name", [ dtype.name for dtype in ea_registry.dtypes # property would require instantiation if not isinstance(dtype.name, property) ] # mypy doesn't allow adding lists of different types # https://github.com/python/mypy/issues/5492 + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] ) def test_setitem_with_ea_name(self, ea_name): # GH 38386 result = DataFrame([0]) result[ea_name] = [1] expected = DataFrame({0: [0], ea_name: [1]}) tm.assert_frame_equal(result, expected) def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): # GH#7492 data_ns = np.array([1, "nat"], dtype="datetime64[ns]") result = Series(data_ns).to_frame() result["new"] = data_ns expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) # OutOfBoundsDatetime error shouldn't occur data_s = np.array([1, "nat"], dtype="datetime64[s]") result["new"] = data_s expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into a not-yet-existing column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df[unit] = vals assert df[unit].dtype == np.dtype("M8[ns]") assert (df[unit].values == ex_vals).all() @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_existing_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into an already-existing dt64 column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") # We overwrite existing dt64 column with new, non-nano dt64 vals df["dates"] = vals assert (df["dates"].values == ex_vals).all() def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame idx = df["B"].rename("foo") # setitem df["C"] = idx tm.assert_series_equal(df["C"], Series(idx, name="C")) df["D"] = "foo" df["D"] = idx tm.assert_series_equal(df["D"], Series(idx, name="D")) del df["D"] # assert that A & C are not sharing the same base (e.g. they # are copies) v1 = df._mgr.arrays[1] v2 = df._mgr.arrays[2] tm.assert_extension_array_equal(v1, v2) v1base = v1._data.base v2base = v2._data.base assert v1base is None or (id(v1base) != id(v2base)) # with nan df2 = df.copy() df2.iloc[1, 1] = NaT df2.iloc[1, 2] = NaT result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal(df2.dtypes, df.dtypes) def test_setitem_periodindex(self): rng = period_range("1/1/2000", periods=5, name="index") df = DataFrame(np.random.randn(5, 3), index=rng) df["Index"] = rng rs = Index(df["Index"]) tm.assert_index_equal(rs, rng, check_names=False) assert rs.name == "Index" assert rng.name == "index" rs = df.reset_index().set_index("index") assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) def test_setitem_complete_column_with_array(self): # GH#37954 df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}) arr = np.array([[1, 1], [3, 1], [5, 1]]) df[["c", "d"]] = arr expected = DataFrame( { "a": ["one", "two", "three"], "b": [1, 2, 3], "c": [1, 3, 5], "d": [1, 1, 1], } ) expected["c"] = expected["c"].astype(arr.dtype) expected["d"] = expected["d"].astype(arr.dtype) assert expected["c"].dtype == arr.dtype assert expected["d"].dtype == arr.dtype tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) def test_setitem_bool_with_numeric_index(self, dtype): # GH#36319 cols = Index([1, 2, 3], dtype=dtype) df = DataFrame(np.random.randn(3, 3), columns=cols) df[False] = ["a", "b", "c"] expected_cols = Index([1, 2, 3, False], dtype=object) if dtype == "f8": expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object) tm.assert_index_equal(df.columns, expected_cols) @pytest.mark.parametrize("indexer", ["B", ["B"]]) def test_setitem_frame_length_0_str_key(self, indexer): # GH#38831 df = DataFrame(columns=["A", "B"]) other = DataFrame({"B": [1, 2]}) df[indexer] = other expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]}) expected["A"] = expected["A"].astype("object") tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns(self, using_array_manager): # GH#15695 cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) df.loc[0, "A"] = (0, 3) df.loc[:, "B"] = (1, 4) df["C"] = (2, 5) expected = DataFrame( [ [0, 1, 2, 3, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], ], dtype="object", ) if using_array_manager: # setitem replaces column so changes dtype expected.columns = cols expected["C"] = expected["C"].astype("int64") # TODO(ArrayManager) .loc still overwrites expected["B"] = expected["B"].astype("int64") else: # set these with unique columns to be extra-unambiguous expected[2] = expected[2].astype(np.int64) expected[5] = expected[5].astype(np.int64) expected.columns = cols tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns_size_mismatch(self): # GH#39510 cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) with pytest.raises(ValueError, match="Columns must be same length as key"): df[["A"]] = (0, 3, 5) df2 = df.iloc[:, :3] # unique columns with pytest.raises(ValueError, match="Columns must be same length as key"): df2[["A"]] = (0, 3, 5) @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) def test_setitem_df_wrong_column_number(self, cols): # GH#38604 df = DataFrame([[1, 2, 3]], columns=cols) rhs = DataFrame([[10, 11]], columns=["d", "e"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df["a"] = rhs def test_setitem_listlike_indexer_duplicate_columns(self): # GH#38604 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) df[["a", "b"]] = rhs expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) tm.assert_frame_equal(df, expected) df[["c", "b"]] = rhs expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"]) tm.assert_frame_equal(df, expected) def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): # GH#39403 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11]], columns=["a", "b"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df[["a", "b"]] = rhs def test_setitem_intervals(self): df = DataFrame({"A": range(10)}) ser = cut(df["A"], 5) assert isinstance(ser.cat.categories, IntervalIndex) # B & D end up as Categoricals # the remainder are converted to in-line objects # containing an IntervalIndex.values df["B"] = ser df["C"] = np.array(ser) df["D"] = ser.values df["E"] = np.array(ser.values) df["F"] = ser.astype(object) assert is_categorical_dtype(df["B"].dtype) assert is_interval_dtype(df["B"].cat.categories) assert is_categorical_dtype(df["D"].dtype) assert is_interval_dtype(df["D"].cat.categories) # These go through the Series constructor and so get inferred back # to IntervalDtype assert is_interval_dtype(df["C"]) assert is_interval_dtype(df["E"]) # But the Series constructor doesn't do inference on Series objects, # so setting df["F"] doesn't get cast back to IntervalDtype assert is_object_dtype(df["F"]) # they compare equal as Index # when converted to numpy objects c = lambda x: Index(np.array(x)) tm.assert_index_equal(c(df.B), c(df.B)) tm.assert_index_equal(c(df.B), c(df.C), check_names=False) tm.assert_index_equal(c(df.B), c(df.D), check_names=False) tm.assert_index_equal(c(df.C), c(df.D), check_names=False) # B & D are the same Series tm.assert_series_equal(df["B"], df["B"]) tm.assert_series_equal(df["B"], df["D"], check_names=False) # C & E are the same Series tm.assert_series_equal(df["C"], df["C"]) tm.assert_series_equal(df["C"], df["E"], check_names=False) def test_setitem_categorical(self): # GH#35369 df = DataFrame({"h": Series(list("mn")).astype("category")}) df.h = df.h.cat.reorder_categories(["n", "m"]) expected = DataFrame( {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])} ) tm.assert_frame_equal(df, expected) def test_setitem_with_empty_listlike(self): # GH#17101 index = Index([], name="idx") result = DataFrame(columns=["A"], index=index) result["A"] = [] expected = DataFrame(columns=["A"], index=index) tm.assert_index_equal(result.index, expected.index) @pytest.mark.parametrize( "cols, values, expected", [ (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols (["C", "B", "a"], [1, 2, 3], 3), # no duplicates (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order (["C", "a", "B"], [3, 2, 1], 2), # in the middle ], ) def test_setitem_same_column(self, cols, values, expected): # GH#23239 df = DataFrame([values], columns=cols) df["a"] = df["a"] result = df["a"].values[0] assert result == expected def test_setitem_multi_index(self): # GH#7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] cols = MultiIndex.from_product(it) index = date_range("20141006", periods=20) vals = np.random.randint(1, 1000, (len(index), len(cols))) df = DataFrame(vals, columns=cols, index=index) i, j = df.index.values.copy(), it[-1][:] np.random.shuffle(i) df["jim"] = df["jolie"].loc[i, ::-1] tm.assert_frame_equal(df["jim"], df["jolie"]) np.random.shuffle(j) df[("joe", "first")] = df[("jolie", "last")].loc[i, j] tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) np.random.shuffle(j) df[("joe", "last")] = df[("jolie", "first")].loc[i, j] tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) @pytest.mark.parametrize( "columns,box,expected", [ ( ["A", "B", "C", "D"], 7, DataFrame( [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], columns=["A", "B", "C", "D"], ), ), ( ["C", "D"], [7, 8], DataFrame( [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], columns=["A", "B", "C", "D"], ), ), ( ["A", "B", "C"], np.array([7, 8, 9], dtype=np.int64), DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), ), ( ["B", "C", "D"], [[7, 8, 9], [10, 11, 12], [13, 14, 15]], DataFrame( [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], columns=["A", "B", "C", "D"], ), ), ( ["C", "A", "D"], np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), DataFrame( [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], columns=["A", "B", "C", "D"], ), ), ( ["A", "C"], DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), DataFrame( [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] ), ), ], ) def test_setitem_list_missing_columns(self, columns, box, expected): # GH#29334 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) df[columns] = box tm.assert_frame_equal(df, expected) def test_setitem_list_of_tuples(self, float_frame): tuples = list(zip(float_frame["A"], float_frame["B"])) float_frame["tuples"] = tuples result = float_frame["tuples"] expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) def test_setitem_iloc_generator(self): # GH#39614 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer] = 1 expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_iloc_two_dimensional_generator(self): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer, 1] = 1 expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_dtypes_bytes_type_to_object(self): # GH 20734 index = Series(name="id", dtype="S24") df = DataFrame(index=index) df["a"] = Series(name="a", index=index, dtype=np.uint32) df["b"] = Series(name="b", index=index, dtype="S64") df["c"] = Series(name="c", index=index, dtype="S64") df["d"] = Series(name="d", index=index, dtype=np.uint8) result = df.dtypes expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) tm.assert_series_equal(result, expected) def test_boolean_mask_nullable_int64(self): # GH 28928 result = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( {"a": "int64", "b": "Int64"} ) mask = Series(False, index=result.index) result.loc[mask, "a"] = result["a"] result.loc[mask, "b"] = result["b"] expected = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( {"a": "int64", "b": "Int64"} ) tm.assert_frame_equal(result, expected) # TODO(ArrayManager) set column with 2d column array, see #44788 @td.skip_array_manager_not_yet_implemented def test_setitem_npmatrix_2d(self): # GH#42376 # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) expected = DataFrame( {"np-array": np.ones(10), "np-matrix": np.ones(10)}, index=np.arange(10) ) a = np.ones((10, 1)) df = DataFrame(index=np.arange(10)) df["np-array"] = a # Instantiation of `np.matrix` gives PendingDeprecationWarning with tm.assert_produces_warning(PendingDeprecationWarning): df["np-matrix"] = np.matrix(a) tm.assert_frame_equal(df, expected)
def test_loc_with_overlap(self): idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) s = Series(range(len(idx)), index=idx) # scalar expected = s result = s.loc[4] tm.assert_series_equal(expected, result) result = s[4] tm.assert_series_equal(expected, result) result = s.loc[[4]] tm.assert_series_equal(expected, result) result = s[[4]] tm.assert_series_equal(expected, result) # interval expected = 0 result = s.loc[Interval(1, 5)] result == expected result = s[Interval(1, 5)] result == expected expected = s result = s.loc[[Interval(1, 5), Interval(3, 7)]] tm.assert_series_equal(expected, result) result = s[[Interval(1, 5), Interval(3, 7)]] tm.assert_series_equal(expected, result) with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s.loc[Interval(3, 5)] with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): s.loc[[Interval(3, 5)]] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): s[[Interval(3, 5)]] # slices with interval (only exact matches) expected = s result = s.loc[Interval(1, 5):Interval(3, 7)] tm.assert_series_equal(expected, result) result = s[Interval(1, 5):Interval(3, 7)] tm.assert_series_equal(expected, result) msg = "'can only get slices from an IntervalIndex if bounds are" " non-overlapping and all monotonic increasing or decreasing'" with pytest.raises(KeyError, match=msg): s.loc[Interval(1, 6):Interval(3, 8)] with pytest.raises(KeyError, match=msg): s[Interval(1, 6):Interval(3, 8)] # slices with scalar raise for overlapping intervals # TODO KeyError is the appropriate error? with pytest.raises(KeyError, match=msg): s.loc[1:4]
def make_data(): N = 100 left = np.random.uniform(size=N).cumsum() right = left + np.random.uniform(size=N) return [Interval(l, r) for l, r in zip(left, right)]
class TestIntervalIndex: @pytest.mark.parametrize("side", ["right", "left", "both", "neither"]) def test_get_loc_interval(self, closed, side): idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: # if get_loc is supplied an interval, it should only search # for exact matches, not overlaps or covers, else KeyError. msg = re.escape( "Interval({bound[0]}, {bound[1]}, closed='{side}')".format( bound=bound, side=side)) if closed == side: if bound == [0, 1]: assert idx.get_loc(Interval(0, 1, closed=side)) == 0 elif bound == [2, 3]: assert idx.get_loc(Interval(2, 3, closed=side)) == 1 else: with pytest.raises(KeyError, match=msg): idx.get_loc(Interval(*bound, closed=side)) else: with pytest.raises(KeyError, match=msg): idx.get_loc(Interval(*bound, closed=side)) @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) def test_get_loc_scalar(self, closed, scalar): # correct = {side: {sort.py: answer}}. # If sort.py is not in the dict, that sort.py should raise a KeyError correct = { "right": { 0.5: 0, 1: 0, 2.5: 1, 3: 1 }, "left": { 0: 0, 0.5: 0, 2: 1, 2.5: 1 }, "both": { 0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1 }, "neither": { 0.5: 0, 2.5: 1 }, } idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) # if get_loc is supplied a scalar, it should return the index of # the interval which contains the scalar, or KeyError. if scalar in correct[closed].keys(): assert idx.get_loc(scalar) == correct[closed][scalar] else: with pytest.raises(KeyError, match=str(scalar)): idx.get_loc(scalar) def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) with pytest.raises( KeyError, match=re.escape( '"Cannot get left slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) with pytest.raises( KeyError, match=re.escape( '"Cannot get left slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(0, 2)) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) with pytest.raises( KeyError, match=re.escape( '"Cannot get right slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(end=Interval(0, 2)) with pytest.raises( KeyError, match=re.escape( '"Cannot get right slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) def test_slice_locs_with_ints_and_floats_succeeds(self): # increasing non-overlapping index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0, 3) == (0, 2) assert index.slice_locs(3, 1) == (2, 1) assert index.slice_locs(3, 4) == (2, 3) assert index.slice_locs(0, 4) == (0, 3) # decreasing non-overlapping index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)]) assert index.slice_locs(0, 1) == (3, 3) assert index.slice_locs(0, 2) == (3, 2) assert index.slice_locs(0, 3) == (3, 1) assert index.slice_locs(3, 1) == (1, 3) assert index.slice_locs(3, 4) == (1, 1) assert index.slice_locs(0, 4) == (3, 1) @pytest.mark.parametrize("sort.py", [[0, 1], [0, 2], [0, 3], [0, 4]]) @pytest.mark.parametrize( "tuples", [ [(0, 2), (1, 3), (2, 4)], [(2, 4), (1, 3), (0, 2)], [(0, 2), (0, 2), (2, 4)], [(0, 2), (2, 4), (0, 2)], [(0, 2), (0, 2), (2, 4), (1, 3)], ], ) def test_slice_locs_with_ints_and_floats_errors(self, tuples, query): start, stop = query index = IntervalIndex.from_tuples(tuples) with pytest.raises( KeyError, match= ("'can only get slices from an IntervalIndex if bounds are" " non-overlapping and all monotonic increasing or decreasing'"), ): index.slice_locs(start, stop) @pytest.mark.parametrize( "sort.py, expected", [ ([Interval(2, 4, closed="right")], [1]), ([Interval(2, 4, closed="left")], [-1]), ([Interval(2, 4, closed="both")], [-1]), ([Interval(2, 4, closed="neither")], [-1]), ([Interval(1, 4, closed="right")], [-1]), ([Interval(0, 4, closed="right")], [-1]), ([Interval(0.5, 1.5, closed="right")], [-1]), ([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]), ([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]), ([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]), ([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]), ], ) def test_get_indexer_with_interval(self, query, expected): tuples = [(0, 2), (2, 4), (5, 7)] index = IntervalIndex.from_tuples(tuples, closed="right") result = index.get_indexer(query) expected = np.array(expected, dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "sort.py, expected", [ ([-0.5], [-1]), ([0], [-1]), ([0.5], [0]), ([1], [0]), ([1.5], [1]), ([2], [1]), ([2.5], [-1]), ([3], [-1]), ([3.5], [2]), ([4], [2]), ([4.5], [-1]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, -1]), ([1, 2, 3, 4], [0, 1, -1, 2]), ([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]), ], ) def test_get_indexer_with_int_and_float(self, query, expected): tuples = [(0, 1), (1, 2), (3, 4)] index = IntervalIndex.from_tuples(tuples, closed="right") result = index.get_indexer(query) expected = np.array(expected, dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "tuples, closed", [ ([(0, 2), (1, 3), (3, 4)], "neither"), ([(0, 5), (1, 4), (6, 7)], "left"), ([(0, 1), (0, 1), (1, 2)], "right"), ([(0, 1), (2, 3), (3, 4)], "both"), ], ) def test_get_indexer_errors(self, tuples, closed): # IntervalIndex needs non-overlapping for uniqueness when querying index = IntervalIndex.from_tuples(tuples, closed=closed) msg = ("cannot handle overlapping indices; use " "IntervalIndex.get_indexer_non_unique") with pytest.raises(InvalidIndexError, match=msg): index.get_indexer([0, 2]) @pytest.mark.parametrize( "sort.py, expected", [ ([-0.5], ([-1], [0])), ([0], ([0], [])), ([0.5], ([0], [])), ([1], ([0, 1], [])), ([1.5], ([0, 1], [])), ([2], ([0, 1, 2], [])), ([2.5], ([1, 2], [])), ([3], ([2], [])), ([3.5], ([2], [])), ([4], ([-1], [0])), ([4.5], ([-1], [0])), ([1, 2], ([0, 1, 0, 1, 2], [])), ([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])), ([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])), ([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])), ], ) def test_get_indexer_non_unique_with_int_and_float(self, query, expected): tuples = [(0, 2.5), (1, 3), (2, 4)] index = IntervalIndex.from_tuples(tuples, closed="left") result_indexer, result_missing = index.get_indexer_non_unique(query) expected_indexer = np.array(expected[0], dtype="intp") expected_missing = np.array(expected[1], dtype="intp") tm.assert_numpy_array_equal(result_indexer, expected_indexer) tm.assert_numpy_array_equal(result_missing, expected_missing) # TODO we may also want to test get_indexer for the case when # the intervals are duplicated, decreasing, non-monotonic, etc.. def test_contains_dunder(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed="right") in index assert Interval(0, 2, closed="right") not in index assert Interval(0, 0.5, closed="right") not in index assert Interval(3, 5, closed="right") not in index assert Interval(-1, 0, closed="left") not in index assert Interval(0, 1, closed="left") not in index assert Interval(0, 1, closed="both") not in index
def test_length_errors(self, left, right): # GH 18789 iv = Interval(left, right) msg = 'cannot compute length between .* and .*' with tm.assert_raises_regex(TypeError, msg): iv.length
def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) with pytest.raises( KeyError, match=re.escape( '"Cannot get left slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) with pytest.raises( KeyError, match=re.escape( '"Cannot get left slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(0, 2)) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) with pytest.raises( KeyError, match=re.escape( '"Cannot get right slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(end=Interval(0, 2)) with pytest.raises( KeyError, match=re.escape( '"Cannot get right slice bound for non-unique label:' " Interval(0, 2, closed='right')\""), ): index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
class TestInterval(object): def test_properties(self, interval): assert interval.closed == 'right' assert interval.left == 0 assert interval.right == 1 assert interval.mid == 0.5 def test_repr(self, interval): assert repr(interval) == "Interval(0, 1, closed='right')" assert str(interval) == "(0, 1]" interval_left = Interval(0, 1, closed='left') assert repr(interval_left) == "Interval(0, 1, closed='left')" assert str(interval_left) == "[0, 1)" def test_contains(self, interval): assert 0.5 in interval assert 1 in interval assert 0 not in interval msg = "__contains__ not defined for two intervals" with tm.assert_raises_regex(TypeError, msg): interval in interval interval_both = Interval(0, 1, closed='both') assert 0 in interval_both assert 1 in interval_both interval_neither = Interval(0, 1, closed='neither') assert 0 not in interval_neither assert 0.5 in interval_neither assert 1 not in interval_neither def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed='right') assert Interval(0, 1) != Interval(0, 1, closed='left') assert Interval(0, 1) != 0 def test_comparison(self): with tm.assert_raises_regex(TypeError, 'unorderable types'): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) assert Interval(0, 1) < Interval(0, 2) assert Interval(0, 1) < Interval(0.5, 1.5) assert Interval(0, 1) <= Interval(0, 1) assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1) def test_hash(self, interval): # should not raise hash(interval) @pytest.mark.parametrize( 'left, right, expected', [(0, 5, 5), (-2, 5.5, 7.5), (10, 10, 0), (10, np.inf, np.inf), (-np.inf, -5, np.inf), (-np.inf, np.inf, np.inf), (Timedelta('0 days'), Timedelta('5 days'), Timedelta('5 days')), (Timedelta('10 days'), Timedelta('10 days'), Timedelta('0 days')), (Timedelta('1H10M'), Timedelta('5H5M'), Timedelta('3H55M')), (Timedelta('5S'), Timedelta('1H'), Timedelta('59M55S'))]) def test_length(self, left, right, expected): # GH 18789 iv = Interval(left, right) result = iv.length assert result == expected @pytest.mark.parametrize( 'left, right, expected', [('2017-01-01', '2017-01-06', '5 days'), ('2017-01-01', '2017-01-01 12:00:00', '12 hours'), ('2017-01-01 12:00', '2017-01-01 12:00:00', '0 days'), ('2017-01-01 12:01', '2017-01-05 17:31:00', '4 days 5 hours 30 min')]) @pytest.mark.parametrize('tz', (None, 'UTC', 'CET', 'US/Eastern')) def test_length_timestamp(self, tz, left, right, expected): # GH 18789 iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) result = iv.length expected = Timedelta(expected) assert result == expected @pytest.mark.parametrize('left, right', [('a', 'z'), (('a', 'b'), ('c', 'd')), (list('AB'), list('ab')), (Interval(0, 1), Interval(1, 2))]) def test_length_errors(self, left, right): # GH 18789 iv = Interval(left, right) msg = 'cannot compute length between .* and .*' with tm.assert_raises_regex(TypeError, msg): iv.length def test_math_add(self, interval): expected = Interval(1, 2) actual = interval + 1 assert expected == actual expected = Interval(1, 2) actual = 1 + interval assert expected == actual actual = interval actual += 1 assert expected == actual msg = "unsupported operand type\(s\) for \+" with tm.assert_raises_regex(TypeError, msg): interval + Interval(1, 2) with tm.assert_raises_regex(TypeError, msg): interval + 'foo' def test_math_sub(self, interval): expected = Interval(-1, 0) actual = interval - 1 assert expected == actual actual = interval actual -= 1 assert expected == actual msg = "unsupported operand type\(s\) for -" with tm.assert_raises_regex(TypeError, msg): interval - Interval(1, 2) with tm.assert_raises_regex(TypeError, msg): interval - 'foo' def test_math_mult(self, interval): expected = Interval(0, 2) actual = interval * 2 assert expected == actual expected = Interval(0, 2) actual = 2 * interval assert expected == actual actual = interval actual *= 2 assert expected == actual msg = "unsupported operand type\(s\) for \*" with tm.assert_raises_regex(TypeError, msg): interval * Interval(1, 2) msg = "can\'t multiply sequence by non-int" with tm.assert_raises_regex(TypeError, msg): interval * 'foo' def test_math_div(self, interval): expected = Interval(0, 0.5) actual = interval / 2.0 assert expected == actual actual = interval actual /= 2.0 assert expected == actual msg = "unsupported operand type\(s\) for /" with tm.assert_raises_regex(TypeError, msg): interval / Interval(1, 2) with tm.assert_raises_regex(TypeError, msg): interval / 'foo' def test_constructor_errors(self): msg = "invalid option for 'closed': foo" with tm.assert_raises_regex(ValueError, msg): Interval(0, 1, closed='foo') msg = 'left side of interval must be <= right side' with tm.assert_raises_regex(ValueError, msg): Interval(1, 0) @pytest.mark.parametrize('tz_left, tz_right', [(None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')]) def test_constructor_errors_tz(self, tz_left, tz_right): # GH 18538 left = Timestamp('2017-01-01', tz=tz_left) right = Timestamp('2017-01-02', tz=tz_right) error = TypeError if _any_none(tz_left, tz_right) else ValueError with pytest.raises(error): Interval(left, right)
class TestInterval: def test_properties(self, interval): assert interval.closed == "right" assert interval.left == 0 assert interval.right == 1 assert interval.mid == 0.5 def test_repr(self, interval): assert repr(interval) == "Interval(0, 1, closed='right')" assert str(interval) == "(0, 1]" interval_left = Interval(0, 1, closed="left") assert repr(interval_left) == "Interval(0, 1, closed='left')" assert str(interval_left) == "[0, 1)" def test_contains(self, interval): assert 0.5 in interval assert 1 in interval assert 0 not in interval msg = "__contains__ not defined for two intervals" with pytest.raises(TypeError, match=msg): interval in interval interval_both = Interval(0, 1, closed="both") assert 0 in interval_both assert 1 in interval_both interval_neither = Interval(0, 1, closed="neither") assert 0 not in interval_neither assert 0.5 in interval_neither assert 1 not in interval_neither def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed="right") assert Interval(0, 1) != Interval(0, 1, closed="left") assert Interval(0, 1) != 0 def test_comparison(self): with pytest.raises(TypeError, match="unorderable types"): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) assert Interval(0, 1) < Interval(0, 2) assert Interval(0, 1) < Interval(0.5, 1.5) assert Interval(0, 1) <= Interval(0, 1) assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1) def test_hash(self, interval): # should not raise hash(interval) @pytest.mark.parametrize( "left, right, expected", [ (0, 5, 5), (-2, 5.5, 7.5), (10, 10, 0), (10, np.inf, np.inf), (-np.inf, -5, np.inf), (-np.inf, np.inf, np.inf), (Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")), (Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")), (Timedelta("1H10M"), Timedelta("5H5M"), Timedelta("3H55M")), (Timedelta("5S"), Timedelta("1H"), Timedelta("59M55S")), ], ) def test_length(self, left, right, expected): # GH 18789 iv = Interval(left, right) result = iv.length assert result == expected @pytest.mark.parametrize( "left, right, expected", [ ("2017-01-01", "2017-01-06", "5 days"), ("2017-01-01", "2017-01-01 12:00:00", "12 hours"), ("2017-01-01 12:00", "2017-01-01 12:00:00", "0 days"), ("2017-01-01 12:01", "2017-01-05 17:31:00", "4 days 5 hours 30 min"), ], ) @pytest.mark.parametrize("tz", (None, "UTC", "CET", "US/Eastern")) def test_length_timestamp(self, tz, left, right, expected): # GH 18789 iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) result = iv.length expected = Timedelta(expected) assert result == expected @pytest.mark.parametrize( "left, right", [ (0, 1), (Timedelta("0 days"), Timedelta("1 day")), (Timestamp("2018-01-01"), Timestamp("2018-01-02")), ( Timestamp("2018-01-01", tz="US/Eastern"), Timestamp("2018-01-02", tz="US/Eastern"), ), ], ) def test_is_empty(self, left, right, closed): # GH27219 # non-empty always return False iv = Interval(left, right, closed) assert iv.is_empty is False # same endpoint is empty except when closed='both' (contains one point) iv = Interval(left, left, closed) result = iv.is_empty expected = closed != "both" assert result is expected @pytest.mark.parametrize( "left, right", [ ("a", "z"), (("a", "b"), ("c", "d")), (list("AB"), list("ab")), (Interval(0, 1), Interval(1, 2)), (Period("2018Q1", freq="Q"), Period("2018Q1", freq="Q")), ], ) def test_construct_errors(self, left, right): # GH 23013 msg = "Only numeric, Timestamp and Timedelta endpoints are allowed" with pytest.raises(ValueError, match=msg): Interval(left, right) def test_math_add(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(1, 2, closed=closed) result = interval + 1 assert result == expected result = 1 + interval assert result == expected result = interval result += 1 assert result == expected msg = r"unsupported operand type\(s\) for \+" with pytest.raises(TypeError, match=msg): interval + interval with pytest.raises(TypeError, match=msg): interval + "foo" def test_math_sub(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(-1, 0, closed=closed) result = interval - 1 assert result == expected result = interval result -= 1 assert result == expected msg = r"unsupported operand type\(s\) for -" with pytest.raises(TypeError, match=msg): interval - interval with pytest.raises(TypeError, match=msg): interval - "foo" def test_math_mult(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(0, 2, closed=closed) result = interval * 2 assert result == expected result = 2 * interval assert result == expected result = interval result *= 2 assert result == expected msg = r"unsupported operand type\(s\) for \*" with pytest.raises(TypeError, match=msg): interval * interval msg = r"can\'t multiply sequence by non-int" with pytest.raises(TypeError, match=msg): interval * "foo" def test_math_div(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(0, 0.5, closed=closed) result = interval / 2.0 assert result == expected result = interval result /= 2.0 assert result == expected msg = r"unsupported operand type\(s\) for /" with pytest.raises(TypeError, match=msg): interval / interval with pytest.raises(TypeError, match=msg): interval / "foo" def test_math_floordiv(self, closed): interval = Interval(1, 2, closed=closed) expected = Interval(0, 1, closed=closed) result = interval // 2 assert result == expected result = interval result //= 2 assert result == expected msg = r"unsupported operand type\(s\) for //" with pytest.raises(TypeError, match=msg): interval // interval with pytest.raises(TypeError, match=msg): interval // "foo" def test_constructor_errors(self): msg = "invalid option for 'closed': foo" with pytest.raises(ValueError, match=msg): Interval(0, 1, closed="foo") msg = "left side of interval must be <= right side" with pytest.raises(ValueError, match=msg): Interval(1, 0) @pytest.mark.parametrize("tz_left, tz_right", [(None, "UTC"), ("UTC", None), ("UTC", "US/Eastern")]) def test_constructor_errors_tz(self, tz_left, tz_right): # GH 18538 left = Timestamp("2017-01-01", tz=tz_left) right = Timestamp("2017-01-02", tz=tz_right) error = TypeError if com.any_none(tz_left, tz_right) else ValueError with pytest.raises(error): Interval(left, right)
def test_comparison(self): with tm.assert_raises_regex(TypeError, 'unorderable types'): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) assert Interval(0, 1) < Interval(0, 2) assert Interval(0, 1) < Interval(0.5, 1.5) assert Interval(0, 1) <= Interval(0, 1) assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1)
def test_construct_errors(self, left, right): # GH 23013 msg = "Only numeric, Timestamp and Timedelta endpoints are allowed" with pytest.raises(ValueError, match=msg): Interval(left, right)
def test_length_timestamp(self, tz, left, right, expected): # GH 18789 iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) result = iv.length expected = Timedelta(expected) assert result == expected
def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed="right") assert Interval(0, 1) != Interval(0, 1, closed="left") assert Interval(0, 1) != 0
def test_comparison_operations(self, scalars): # GH #28981 expected = Series([False, False]) s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval") result = s == scalars tm.assert_series_equal(result, expected)
def test_comparison(self): with pytest.raises(TypeError, match="unorderable types"): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) assert Interval(0, 1) < Interval(0, 2) assert Interval(0, 1) < Interval(0.5, 1.5) assert Interval(0, 1) <= Interval(0, 1) assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1)
def test_is_scalar_pandas_scalars(self): assert is_scalar(Timestamp('2014-01-01')) assert is_scalar(Timedelta(hours=1)) assert is_scalar(Period('2014-01-01')) assert is_scalar(Interval(left=0, right=1)) assert is_scalar(DateOffset(days=1))
class TestIntervalIndex: index = IntervalIndex.from_arrays([0, 1], [1, 2]) def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed="right"): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed, ) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( "breaks", [ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]), ], ) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert index.hasnans is False result = index.isna() expected = np.zeros(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.ones(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans is True result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="same") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="same") # by-definition make a copy result = IntervalIndex(index._ndarray_values, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="copy") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="copy") def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "data", [ interval_range(0, periods=10, closed="neither"), interval_range(1.7, periods=8, freq=2.5, closed="both"), interval_range(Timestamp("20170101"), periods=12, closed="left"), interval_range(Timedelta("1 day"), periods=6, closed="right"), ], ) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = "can only insert Interval objects and NA into an IntervalIndex" with pytest.raises(ValueError, match=msg): data.insert(1, "foo") # invalid closed msg = "inserted item must be closed on the same side as the index" for closed in {"left", "right", "both", "neither"} - {item.closed}: with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in (np.nan, pd.NaT, None): expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) def test_is_unique_interval(self, closed): """ Interval specific tests for is_unique in addition to base class tests """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan), closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5) ], ) def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) to_convert = breaks._constructor([pd.NaT] * 3) expected = pd.Float64Index([np.nan] * 3) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) to_convert = to_convert.insert(0, breaks[0]) expected = expected.insert(0, float(breaks[0].value)) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [np.arange(5, dtype="int64"), np.arange(5, dtype="float64")], ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key @pytest.mark.parametrize( "breaks1, breaks2", permutations( [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], 2, ), ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks1) key = make_key(breaks2) msg = ("Cannot index an IntervalIndex of subtype {dtype1} with " "values of dtype {dtype2}") msg = re.escape(msg.format(dtype1=breaks1.dtype, dtype2=breaks2.dtype)) with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) expected = np.array([False, False], dtype="bool") actual = i.contains(0) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(3) tm.assert_numpy_array_equal(actual, expected) expected = np.array([True, False], dtype="bool") actual = i.contains(0.5) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(1) tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow # that for the contains method for now with pytest.raises(NotImplementedError, match="contains not implemented for two"): i.contains(Interval(0, 1)) def test_contains_dunder(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed="right") in index assert Interval(0, 2, closed="right") not in index assert Interval(0, 0.5, closed="right") not in index assert Interval(3, 5, closed="right") not in index assert Interval(-1, 0, closed="left") not in index assert Interval(0, 1, closed="left") not in index assert Interval(0, 1, closed="both") not in index def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) with pytest.raises(TypeError, match="unorderable types"): self.index > 0 with pytest.raises(TypeError, match="unorderable types"): self.index <= 0 msg = r"unorderable types: Interval\(\) > int\(\)" with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) msg = ("missing values must be missing in the same location both left" " and right sides") with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False) expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_datetime(self, tz): start = Timestamp("2000-01-01", tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp("2000-01-01T12:00", tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp("2000-01-01", tz=tz) not in index assert Timestamp("2000-01-01T12", tz=tz) not in index assert Timestamp("2000-01-02", tz=tz) not in index iv_true = Interval(Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)) iv_false = Interval(Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() assert index.contains(Timestamp("2000-01-02", tz=tz)).any() # test get_indexer start = Timestamp("1999-12-31T12:00", tz=tz) target = date_range(start=start, periods=7, freq="12H") actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") tm.assert_numpy_array_equal(actual, expected) start = Timestamp("2000-01-08T18:00", tz=tz) target = date_range(start=start, periods=7, freq="6H") actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = ("can only append two IntervalIndex objects that are closed " "on the same side") for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == "both": idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize( "start, shift, na_value", [ (0, 1, np.nan), (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), ], ) def test_is_overlapping(self, start, shift, na_value, closed): # GH 23309 # see test_interval_tree.py for extensive tests; interface tests here # non-overlapping tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # non-overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # overlapping tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # common endpoints tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping expected = closed == "both" assert result is expected # common endpoints with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping assert result is expected @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))), list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )), list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )), ], ) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))) + [np.nan], list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )) + [np.nan], list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )) + [np.nan], ], ) @pytest.mark.parametrize("na_tuple", [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): # GH 23576 year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"), pd.Timestamp("2018-01-01 00:00:00")) year_2017_index = pd.IntervalIndex([year_2017]) assert not year_2017_index.is_all_dates