def test_loc_axis_arguments(self): index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), _mklbl('C', 4), _mklbl('D', 2)]) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') .reshape((len(index), len(columns))), index=index, columns=columns).sort_index().sort_index(axis=1) # axis 0 result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) result = df.loc(axis='index')[:, :, ['C1', 'C3']] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) # axis 1 result = df.loc(axis=1)[:, 'foo'] expected = df.loc[:, (slice(None), 'foo')] tm.assert_frame_equal(result, expected) result = df.loc(axis='columns')[:, 'foo'] expected = df.loc[:, (slice(None), 'foo')] tm.assert_frame_equal(result, expected) # invalid axis def f(): df.loc(axis=-1)[:, :, ['C1', 'C3']] pytest.raises(ValueError, f) def f(): df.loc(axis=2)[:, :, ['C1', 'C3']] pytest.raises(ValueError, f) def f(): df.loc(axis='foo')[:, :, ['C1', 'C3']] pytest.raises(ValueError, f)
def test_per_axis_per_level_doc_examples(self): # test index maker idx = pd.IndexSlice # from indexing.rst / advanced index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), _mklbl('C', 4), _mklbl('D', 2)]) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') .reshape((len(index), len(columns))), index=index, columns=columns) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) result = df.loc[idx[:, :, ['C1', 'C3']], :] tm.assert_frame_equal(result, expected) # not sorted def f(): df.loc['A1', ('a', slice('foo'))] pytest.raises(UnsortedIndexError, f) # GH 16734: not sorted, but no real slicing tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')], df.loc['A1'].iloc[:, [0, 2]]) df = df.sort_index(axis=1) # slicing df.loc['A1', (slice(None), 'foo')] df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')] # setitem df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', lambda: s[::0]) self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', lambda: s.loc[::0]) with catch_warnings(record=True): self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', lambda: s.ix[::0])
def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) with pytest.raises(ValueError, match='slice step cannot be zero'): s[::0] with pytest.raises(ValueError, match='slice step cannot be zero'): s.loc[::0] with catch_warnings(record=True): simplefilter("ignore") with pytest.raises(ValueError, match='slice step cannot be zero'): s.ix[::0]
def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s[::0]) tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s.loc[::0]) with catch_warnings(record=True): simplefilter("ignore") tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s.ix[::0])
def test_str_label_slicing_with_negative_step(self): SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) if not idx.is_integer: # For integer indices, ix and plain getitem are position-based. tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) for idx in [_mklbl('A', 20), np.arange(20) + 100, np.linspace(100, 150, 20)]: idx = Index(idx) s = Series(np.arange(20), index=idx) assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1]) assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1]) assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])
def test_str_label_slicing_with_negative_step(self): SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) if not idx.is_integer: # For integer indices, ix and plain getitem are position-based. tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) for idx in [ _mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20) ]: idx = Index(idx) s = Series(np.arange(20), index=idx) assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1]) assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1]) assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])
class TestMisc: def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df["a"] = 10 expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}) tm.assert_frame_equal(expected, df) def test_float_index_non_scalar_assignment(self): df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) df.loc[df.index[:2]] = 1 expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index) tm.assert_frame_equal(expected, df) def test_loc_setitem_fullindex_views(self): df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) df2 = df.copy() df.loc[df.index] = df.loc[df.index] tm.assert_frame_equal(df, df2) def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is # assigned to. covers both uniform data-type & multi-type cases def run_tests(df, rhs, right_loc, right_iloc): # label, index, slice lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4) lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3) left = df.copy() left.loc[lbl_one, lbl_two] = rhs tm.assert_frame_equal(left, right_loc) left = df.copy() left.iloc[idx_one, idx_two] = rhs tm.assert_frame_equal(left, right_iloc) left = df.copy() left.iloc[slice_one, slice_two] = rhs tm.assert_frame_equal(left, right_iloc) xs = np.arange(20).reshape(5, 4) cols = ["jim", "joe", "jolie", "joline"] df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64") # right hand side; permute the indices and multiplpy by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] # expected `right` result; just multiply by -2 right_iloc = df.copy() right_iloc["joe"] = [1, 14, 10, 6, 17] right_iloc["jolie"] = [2, 13, 9, 5, 18] right_iloc.iloc[1:4, 1:3] *= -2 right_loc = df.copy() right_loc.iloc[1:4, 1:3] *= -2 # run tests with uniform dtypes run_tests(df, rhs, right_loc, right_iloc) # make frames multi-type & re-run tests for frame in [df, rhs, right_loc, right_iloc]: frame["joe"] = frame["joe"].astype("float64") frame["jolie"] = frame["jolie"].map("@{}".format) right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0] right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"] run_tests(df, rhs, right_loc, right_iloc) @pytest.mark.parametrize( "idx", [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)] ) def test_str_label_slicing_with_negative_step(self, idx): SLC = pd.IndexSlice idx = Index(idx) ser = Series(np.arange(20), index=idx) tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] :: -1], SLC[9::-1]) tm.assert_indexing_slices_equivalent(ser, SLC[: idx[9] : -1], SLC[:8:-1]) tm.assert_indexing_slices_equivalent( ser, SLC[idx[13] : idx[9] : -1], SLC[13:8:-1] ) tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] : idx[13] : -1], SLC[:0]) def test_slice_with_zero_step_raises(self, index, indexer_sl, frame_or_series): obj = frame_or_series(np.arange(len(index)), index=index) with pytest.raises(ValueError, match="slice step cannot be zero"): indexer_sl(obj)[::0] def test_loc_setitem_indexing_assignment_dict_already_exists(self): index = Index([-5, 0, 5], name="z") df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8]}, index=index) expected = df.copy() rhs = {"x": 9, "y": 99} df.loc[5] = rhs expected.loc[5] = [9, 99] tm.assert_frame_equal(df, expected) # GH#38335 same thing, mixed dtypes df = DataFrame({"x": [1, 2, 6], "y": [2.0, 2.0, 8.0]}, index=index) df.loc[5] = rhs expected = DataFrame({"x": [1, 2, 9], "y": [2.0, 2.0, 99.0]}, index=index) tm.assert_frame_equal(df, expected) def test_iloc_getitem_indexing_dtypes_on_empty(self): # Check that .iloc returns correct dtypes GH9983 df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]}) df2 = df.iloc[[], :] assert df2.loc[:, "a"].dtype == np.int64 tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0]) @pytest.mark.parametrize("size", [5, 999999, 1000000]) def test_loc_range_in_series_indexing(self, size): # range can cause an indexing error # GH 11652 s = Series(index=range(size), dtype=np.float64) s.loc[range(1)] = 42 tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) s.loc[range(2)] = 43 tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) def test_partial_boolean_frame_indexing(self): # GH 17170 df = DataFrame( np.arange(9.0).reshape(3, 3), index=list("abc"), columns=list("ABC") ) index_df = DataFrame(1, index=list("ab"), columns=list("AB")) result = df[index_df.notnull()] expected = DataFrame( np.array([[0.0, 1.0, np.nan], [3.0, 4.0, np.nan], [np.nan] * 3]), index=list("abc"), columns=list("ABC"), ) tm.assert_frame_equal(result, expected) def test_no_reference_cycle(self): df = DataFrame({"a": [0, 1], "b": [2, 3]}) for name in ("loc", "iloc", "at", "iat"): getattr(df, name) wr = weakref.ref(df) del df assert wr() is None def test_label_indexing_on_nan(self, nulls_fixture): # GH 32431 df = Series([1, "{1,2}", 1, nulls_fixture]) vc = df.value_counts(dropna=False) result1 = vc.loc[nulls_fixture] result2 = vc[nulls_fixture] expected = 1 assert result1 == expected assert result2 == expected
def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl("A", 20)) with pytest.raises(ValueError, match="slice step cannot be zero"): s[::0] with pytest.raises(ValueError, match="slice step cannot be zero"): s.loc[::0]
def test_slice_with_zero_step_raises(self, indexer_sl, frame_or_series): obj = frame_or_series(np.arange(20), index=_mklbl("A", 20)) with pytest.raises(ValueError, match="slice step cannot be zero"): indexer_sl(obj)[::0]
def test_per_axis_per_level_getitem(self): # GH6134 # example test case ix = MultiIndex.from_product( [_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]) df = DataFrame(np.arange(len(ix.to_numpy())), index=ix) result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == "A1" or a == "A2" or a == "A3") and ( c == "C1" or c == "C3") ]] tm.assert_frame_equal(result, expected) expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == "A1" or a == "A2" or a == "A3") and ( c == "C1" or c == "C2" or c == "C3") ]] result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :] tm.assert_frame_equal(result, expected) # test multi-index slicing with per axis and per index controls index = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]) columns = MultiIndex.from_tuples( [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"], ) df = DataFrame(np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns) df = df.sort_index(axis=0).sort_index(axis=1) # identity result = df.loc[(slice(None), slice(None)), :] tm.assert_frame_equal(result, df) result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] tm.assert_frame_equal(result, df) result = df.loc[:, (slice(None), slice(None))] tm.assert_frame_equal(result, df) # index result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), 1), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # columns result = df.loc[:, (slice(None), ["foo"])] expected = df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) # both result = df.loc[(slice(None), 1), (slice(None), ["foo"])] expected = df.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(result, expected) result = df.loc["A", "a"] expected = DataFrame( dict(bar=[1, 5, 9], foo=[0, 4, 8]), index=Index([1, 2, 3], name="two"), columns=Index(["bar", "foo"], name="lvl1"), ) tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1, 2]), :] expected = df.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) # multi-level series s = Series(np.arange(len(ix.to_numpy())), index=ix) result = s.loc["A1":"A3", :, ["C1", "C3"]] expected = s.loc[[ tuple([a, b, c, d]) for a, b, c, d in s.index.values if (a == "A1" or a == "A2" or a == "A3") and ( c == "C1" or c == "C3") ]] tm.assert_series_equal(result, expected) # boolean indexers result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] expected = df.iloc[[2, 3]] tm.assert_frame_equal(result, expected) with pytest.raises(ValueError): df.loc[(slice(None), np.array([True, False])), :] # ambiguous notation # this is interpreted as slicing on both axes (GH #16396) result = df.loc[slice(None), [1]] expected = df.iloc[:, []] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # not lexsorted assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) assert df.index.lexsort_depth == 0 msg = ("MultiIndex slicing requires the index to be " r"lexsorted: slicing on levels \[1\], lexsort depth 0") with pytest.raises(UnsortedIndexError, match=msg): df.loc[(slice(None), slice("bar")), :] # GH 16734: not sorted, but no real slicing result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] tm.assert_frame_equal(result, df.iloc[[1, 3], :])
def test_per_axis_per_level_getitem(self): # GH6134 # example test case ix = MultiIndex.from_product( [_mklbl('A', 5), _mklbl('B', 7), _mklbl('C', 4), _mklbl('D', 2)]) df = DataFrame(np.arange(len(ix.get_values())), index=ix) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3') ]] tm.assert_frame_equal(result, expected) expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C2' or c == 'C3') ]] result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] tm.assert_frame_equal(result, expected) # test multi-index slicing with per axis and per index controls index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3), ('B', 1)], names=['one', 'two']) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame(np.arange(16, dtype='int64').reshape(4, 4), index=index, columns=columns) df = df.sort_index(axis=0).sort_index(axis=1) # identity result = df.loc[(slice(None), slice(None)), :] tm.assert_frame_equal(result, df) result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] tm.assert_frame_equal(result, df) result = df.loc[:, (slice(None), slice(None))] tm.assert_frame_equal(result, df) # index result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), 1), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # columns result = df.loc[:, (slice(None), ['foo'])] expected = df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) # both result = df.loc[(slice(None), 1), (slice(None), ['foo'])] expected = df.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(result, expected) result = df.loc['A', 'a'] expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), index=Index([1, 2, 3], name='two'), columns=Index(['bar', 'foo'], name='lvl1')) tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1, 2]), :] expected = df.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) # multi-level series s = Series(np.arange(len(ix.get_values())), index=ix) result = s.loc['A1':'A3', :, ['C1', 'C3']] expected = s.loc[[ tuple([a, b, c, d]) for a, b, c, d in s.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3') ]] tm.assert_series_equal(result, expected) # boolean indexers result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] expected = df.iloc[[2, 3]] tm.assert_frame_equal(result, expected) def f(): df.loc[(slice(None), np.array([True, False])), :] self.assertRaises(ValueError, f) # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # not lexsorted self.assertEqual(df.index.lexsort_depth, 2) df = df.sort_index(level=1, axis=0) self.assertEqual(df.index.lexsort_depth, 0) with tm.assertRaisesRegexp( UnsortedIndexError, 'MultiIndex Slicing requires the index to be fully ' r'lexsorted tuple len \(2\), lexsort depth \(0\)'): df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
def test_per_axis_per_level_getitem(self): # GH6134 # example test case ix = MultiIndex.from_product( [_mklbl('A', 5), _mklbl('B', 7), _mklbl('C', 4), _mklbl('D', 2)]) df = DataFrame(np.arange(len(ix.to_numpy())), index=ix) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3') ]] tm.assert_frame_equal(result, expected) expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C2' or c == 'C3') ]] result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] tm.assert_frame_equal(result, expected) # test multi-index slicing with per axis and per index controls index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3), ('B', 1)], names=['one', 'two']) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame(np.arange(16, dtype='int64').reshape(4, 4), index=index, columns=columns) df = df.sort_index(axis=0).sort_index(axis=1) # identity result = df.loc[(slice(None), slice(None)), :] tm.assert_frame_equal(result, df) result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] tm.assert_frame_equal(result, df) result = df.loc[:, (slice(None), slice(None))] tm.assert_frame_equal(result, df) # index result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), 1), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # columns result = df.loc[:, (slice(None), ['foo'])] expected = df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) # both result = df.loc[(slice(None), 1), (slice(None), ['foo'])] expected = df.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(result, expected) result = df.loc['A', 'a'] expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), index=Index([1, 2, 3], name='two'), columns=Index(['bar', 'foo'], name='lvl1')) tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1, 2]), :] expected = df.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) # multi-level series s = Series(np.arange(len(ix.to_numpy())), index=ix) result = s.loc['A1':'A3', :, ['C1', 'C3']] expected = s.loc[[ tuple([a, b, c, d]) for a, b, c, d in s.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3') ]] tm.assert_series_equal(result, expected) # boolean indexers result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] expected = df.iloc[[2, 3]] tm.assert_frame_equal(result, expected) with pytest.raises(ValueError): df.loc[(slice(None), np.array([True, False])), :] # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well with pytest.raises(KeyError, match=r"'\[1\] not in index'"): df.loc[slice(None), [1]] result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # not lexsorted assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) assert df.index.lexsort_depth == 0 msg = ('MultiIndex slicing requires the index to be ' r'lexsorted: slicing on levels \[1\], lexsort depth 0') with pytest.raises(UnsortedIndexError, match=msg): df.loc[(slice(None), slice('bar')), :] # GH 16734: not sorted, but no real slicing result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] tm.assert_frame_equal(result, df.iloc[[1, 3], :])
def test_per_axis_per_level_getitem(self): # GH6134 # example test case ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( 'C', 4), _mklbl('D', 2)]) df = DataFrame(np.arange(len(ix.get_values())), index=ix) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C2' or c == 'C3')]] result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] tm.assert_frame_equal(result, expected) # test multi-index slicing with per axis and per index controls index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3), ('B', 1)], names=['one', 'two']) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame( np.arange(16, dtype='int64').reshape( 4, 4), index=index, columns=columns) df = df.sort_index(axis=0).sort_index(axis=1) # identity result = df.loc[(slice(None), slice(None)), :] tm.assert_frame_equal(result, df) result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] tm.assert_frame_equal(result, df) result = df.loc[:, (slice(None), slice(None))] tm.assert_frame_equal(result, df) # index result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), 1), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # columns result = df.loc[:, (slice(None), ['foo'])] expected = df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) # both result = df.loc[(slice(None), 1), (slice(None), ['foo'])] expected = df.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(result, expected) result = df.loc['A', 'a'] expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), index=Index([1, 2, 3], name='two'), columns=Index(['bar', 'foo'], name='lvl1')) tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1, 2]), :] expected = df.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) # multi-level series s = Series(np.arange(len(ix.get_values())), index=ix) result = s.loc['A1':'A3', :, ['C1', 'C3']] expected = s.loc[[tuple([a, b, c, d]) for a, b, c, d in s.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_series_equal(result, expected) # boolean indexers result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] expected = df.iloc[[2, 3]] tm.assert_frame_equal(result, expected) def f(): df.loc[(slice(None), np.array([True, False])), :] pytest.raises(ValueError, f) # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well pytest.raises(KeyError, lambda: df.loc[slice(None), [1]]) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # not lexsorted assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) assert df.index.lexsort_depth == 0 with tm.assert_raises_regex( UnsortedIndexError, 'MultiIndex slicing requires the index to be ' r'lexsorted: slicing on levels \[1\], lexsort depth 0'): df.loc[(slice(None), slice('bar')), :] # GH 16734: not sorted, but no real slicing result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] tm.assert_frame_equal(result, df.iloc[[1, 3], :])
def test_loc_axis_arguments(self): index = MultiIndex.from_product( [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] ) columns = MultiIndex.from_tuples( [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"], ) df = ( DataFrame( np.arange(len(index) * len(columns), dtype="int64").reshape( (len(index), len(columns)) ), index=index, columns=columns, ) .sort_index() .sort_index(axis=1) ) # axis 0 result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]] expected = df.loc[ [ ( a, b, c, d, ) for a, b, c, d in df.index.values if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") ] ] tm.assert_frame_equal(result, expected) result = df.loc(axis="index")[:, :, ["C1", "C3"]] expected = df.loc[ [ ( a, b, c, d, ) for a, b, c, d in df.index.values if (c == "C1" or c == "C3") ] ] tm.assert_frame_equal(result, expected) # axis 1 result = df.loc(axis=1)[:, "foo"] expected = df.loc[:, (slice(None), "foo")] tm.assert_frame_equal(result, expected) result = df.loc(axis="columns")[:, "foo"] expected = df.loc[:, (slice(None), "foo")] tm.assert_frame_equal(result, expected) # invalid axis for i in [-1, 2, "foo"]: msg = f"No axis named {i} for object type DataFrame" with pytest.raises(ValueError, match=msg): df.loc(axis=i)[:, :, ["C1", "C3"]]
def test_per_axis_per_level_doc_examples(self): # test index maker idx = pd.IndexSlice # from indexing.rst / advanced index = MultiIndex.from_product( [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]) columns = MultiIndex.from_tuples( [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"], ) df = DataFrame( np.arange(len(index) * len(columns), dtype="int64").reshape( (len(index), len(columns))), index=index, columns=columns, ) result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] expected = df.loc[[( a, b, c, d, ) for a, b, c, d in df.index.values if ( a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")]] tm.assert_frame_equal(result, expected) result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :] expected = df.loc[[( a, b, c, d, ) for a, b, c, d in df.index.values if (c == "C1" or c == "C3")]] tm.assert_frame_equal(result, expected) result = df.loc[idx[:, :, ["C1", "C3"]], :] tm.assert_frame_equal(result, expected) # not sorted msg = ("MultiIndex slicing requires the index to be lexsorted: " r"slicing on levels \[1\], lexsort depth 1") with pytest.raises(UnsortedIndexError, match=msg): df.loc["A1", ("a", slice("foo"))] # GH 16734: not sorted, but no real slicing tm.assert_frame_equal(df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]) df = df.sort_index(axis=1) # slicing df.loc["A1", (slice(None), "foo")] df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")] # setitem df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
def test_slice_with_zero_step_raises(self, indexer_sl): ser = Series(np.arange(20), index=_mklbl("A", 20)) with pytest.raises(ValueError, match="slice step cannot be zero"): indexer_sl(ser)[::0]