Ejemplo n.º 1
0
    def test_loc_axis_arguments(self):

        index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
                                         _mklbl('C', 4), _mklbl('D', 2)])
        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
                                          ('b', 'foo'), ('b', 'bah')],
                                         names=['lvl0', 'lvl1'])
        df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
                       .reshape((len(index), len(columns))),
                       index=index,
                       columns=columns).sort_index().sort_index(axis=1)

        # axis 0
        result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']]
        expected = df.loc[[tuple([a, b, c, d])
                           for a, b, c, d in df.index.values
                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
                               c == 'C1' or c == 'C3')]]
        tm.assert_frame_equal(result, expected)

        result = df.loc(axis='index')[:, :, ['C1', 'C3']]
        expected = df.loc[[tuple([a, b, c, d])
                           for a, b, c, d in df.index.values
                           if (c == 'C1' or c == 'C3')]]
        tm.assert_frame_equal(result, expected)

        # axis 1
        result = df.loc(axis=1)[:, 'foo']
        expected = df.loc[:, (slice(None), 'foo')]
        tm.assert_frame_equal(result, expected)

        result = df.loc(axis='columns')[:, 'foo']
        expected = df.loc[:, (slice(None), 'foo')]
        tm.assert_frame_equal(result, expected)

        # invalid axis
        def f():
            df.loc(axis=-1)[:, :, ['C1', 'C3']]

        pytest.raises(ValueError, f)

        def f():
            df.loc(axis=2)[:, :, ['C1', 'C3']]

        pytest.raises(ValueError, f)

        def f():
            df.loc(axis='foo')[:, :, ['C1', 'C3']]

        pytest.raises(ValueError, f)
Ejemplo n.º 2
0
    def test_per_axis_per_level_doc_examples(self):

        # test index maker
        idx = pd.IndexSlice

        # from indexing.rst / advanced
        index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
                                         _mklbl('C', 4), _mklbl('D', 2)])
        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
                                          ('b', 'foo'), ('b', 'bah')],
                                         names=['lvl0', 'lvl1'])
        df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
                       .reshape((len(index), len(columns))),
                       index=index, columns=columns)
        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
        expected = df.loc[[tuple([a, b, c, d])
                           for a, b, c, d in df.index.values
                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
                               c == 'C1' or c == 'C3')]]
        tm.assert_frame_equal(result, expected)
        result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :]
        expected = df.loc[[tuple([a, b, c, d])
                           for a, b, c, d in df.index.values
                           if (c == 'C1' or c == 'C3')]]
        tm.assert_frame_equal(result, expected)
        result = df.loc[idx[:, :, ['C1', 'C3']], :]
        tm.assert_frame_equal(result, expected)

        # not sorted
        def f():
            df.loc['A1', ('a', slice('foo'))]

        pytest.raises(UnsortedIndexError, f)

        # GH 16734: not sorted, but no real slicing
        tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
                              df.loc['A1'].iloc[:, [0, 2]])

        df = df.sort_index(axis=1)

        # slicing
        df.loc['A1', (slice(None), 'foo')]
        df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')]

        # setitem
        df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
Ejemplo n.º 3
0
 def test_slice_with_zero_step_raises(self):
     s = Series(np.arange(20), index=_mklbl('A', 20))
     self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
                             lambda: s[::0])
     self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
                             lambda: s.loc[::0])
     with catch_warnings(record=True):
         self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
                                 lambda: s.ix[::0])
Ejemplo n.º 4
0
 def test_slice_with_zero_step_raises(self):
     s = Series(np.arange(20), index=_mklbl('A', 20))
     with pytest.raises(ValueError, match='slice step cannot be zero'):
         s[::0]
     with pytest.raises(ValueError, match='slice step cannot be zero'):
         s.loc[::0]
     with catch_warnings(record=True):
         simplefilter("ignore")
         with pytest.raises(ValueError, match='slice step cannot be zero'):
             s.ix[::0]
Ejemplo n.º 5
0
 def test_slice_with_zero_step_raises(self):
     s = Series(np.arange(20), index=_mklbl('A', 20))
     tm.assert_raises_regex(ValueError, 'slice step cannot be zero',
                            lambda: s[::0])
     tm.assert_raises_regex(ValueError, 'slice step cannot be zero',
                            lambda: s.loc[::0])
     with catch_warnings(record=True):
         simplefilter("ignore")
         tm.assert_raises_regex(ValueError,
                                'slice step cannot be zero',
                                lambda: s.ix[::0])
Ejemplo n.º 6
0
    def test_str_label_slicing_with_negative_step(self):
        SLC = pd.IndexSlice

        def assert_slices_equivalent(l_slc, i_slc):
            tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

            if not idx.is_integer:
                # For integer indices, ix and plain getitem are position-based.
                tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
                tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

        for idx in [_mklbl('A', 20), np.arange(20) + 100,
                    np.linspace(100, 150, 20)]:
            idx = Index(idx)
            s = Series(np.arange(20), index=idx)
            assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1])
            assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1])
            assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1])
            assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])
Ejemplo n.º 7
0
    def test_str_label_slicing_with_negative_step(self):
        SLC = pd.IndexSlice

        def assert_slices_equivalent(l_slc, i_slc):
            tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

            if not idx.is_integer:
                # For integer indices, ix and plain getitem are position-based.
                tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
                tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

        for idx in [
                _mklbl("A", 20),
                np.arange(20) + 100,
                np.linspace(100, 150, 20)
        ]:
            idx = Index(idx)
            s = Series(np.arange(20), index=idx)
            assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1])
            assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1])
            assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1])
            assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])
Ejemplo n.º 8
0
class TestMisc:
    def test_float_index_to_mixed(self):
        df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)})
        df["a"] = 10

        expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10})
        tm.assert_frame_equal(expected, df)

    def test_float_index_non_scalar_assignment(self):
        df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
        df.loc[df.index[:2]] = 1
        expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index)
        tm.assert_frame_equal(expected, df)

    def test_loc_setitem_fullindex_views(self):
        df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
        df2 = df.copy()
        df.loc[df.index] = df.loc[df.index]
        tm.assert_frame_equal(df, df2)

    def test_rhs_alignment(self):
        # GH8258, tests that both rows & columns are aligned to what is
        # assigned to. covers both uniform data-type & multi-type cases
        def run_tests(df, rhs, right_loc, right_iloc):
            # label, index, slice
            lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
            lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)

            left = df.copy()
            left.loc[lbl_one, lbl_two] = rhs
            tm.assert_frame_equal(left, right_loc)

            left = df.copy()
            left.iloc[idx_one, idx_two] = rhs
            tm.assert_frame_equal(left, right_iloc)

            left = df.copy()
            left.iloc[slice_one, slice_two] = rhs
            tm.assert_frame_equal(left, right_iloc)

        xs = np.arange(20).reshape(5, 4)
        cols = ["jim", "joe", "jolie", "joline"]
        df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64")

        # right hand side; permute the indices and multiplpy by -2
        rhs = -2 * df.iloc[3:0:-1, 2:0:-1]

        # expected `right` result; just multiply by -2
        right_iloc = df.copy()
        right_iloc["joe"] = [1, 14, 10, 6, 17]
        right_iloc["jolie"] = [2, 13, 9, 5, 18]
        right_iloc.iloc[1:4, 1:3] *= -2
        right_loc = df.copy()
        right_loc.iloc[1:4, 1:3] *= -2

        # run tests with uniform dtypes
        run_tests(df, rhs, right_loc, right_iloc)

        # make frames multi-type & re-run tests
        for frame in [df, rhs, right_loc, right_iloc]:
            frame["joe"] = frame["joe"].astype("float64")
            frame["jolie"] = frame["jolie"].map("@{}".format)
        right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
        right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
        run_tests(df, rhs, right_loc, right_iloc)

    @pytest.mark.parametrize(
        "idx", [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)]
    )
    def test_str_label_slicing_with_negative_step(self, idx):
        SLC = pd.IndexSlice

        idx = Index(idx)
        ser = Series(np.arange(20), index=idx)
        tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] :: -1], SLC[9::-1])
        tm.assert_indexing_slices_equivalent(ser, SLC[: idx[9] : -1], SLC[:8:-1])
        tm.assert_indexing_slices_equivalent(
            ser, SLC[idx[13] : idx[9] : -1], SLC[13:8:-1]
        )
        tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] : idx[13] : -1], SLC[:0])

    def test_slice_with_zero_step_raises(self, index, indexer_sl, frame_or_series):
        obj = frame_or_series(np.arange(len(index)), index=index)
        with pytest.raises(ValueError, match="slice step cannot be zero"):
            indexer_sl(obj)[::0]

    def test_loc_setitem_indexing_assignment_dict_already_exists(self):
        index = Index([-5, 0, 5], name="z")
        df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8]}, index=index)
        expected = df.copy()
        rhs = {"x": 9, "y": 99}
        df.loc[5] = rhs
        expected.loc[5] = [9, 99]
        tm.assert_frame_equal(df, expected)

        # GH#38335 same thing, mixed dtypes
        df = DataFrame({"x": [1, 2, 6], "y": [2.0, 2.0, 8.0]}, index=index)
        df.loc[5] = rhs
        expected = DataFrame({"x": [1, 2, 9], "y": [2.0, 2.0, 99.0]}, index=index)
        tm.assert_frame_equal(df, expected)

    def test_iloc_getitem_indexing_dtypes_on_empty(self):
        # Check that .iloc returns correct dtypes GH9983
        df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]})
        df2 = df.iloc[[], :]

        assert df2.loc[:, "a"].dtype == np.int64
        tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0])

    @pytest.mark.parametrize("size", [5, 999999, 1000000])
    def test_loc_range_in_series_indexing(self, size):
        # range can cause an indexing error
        # GH 11652
        s = Series(index=range(size), dtype=np.float64)
        s.loc[range(1)] = 42
        tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0]))

        s.loc[range(2)] = 43
        tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1]))

    def test_partial_boolean_frame_indexing(self):
        # GH 17170
        df = DataFrame(
            np.arange(9.0).reshape(3, 3), index=list("abc"), columns=list("ABC")
        )
        index_df = DataFrame(1, index=list("ab"), columns=list("AB"))
        result = df[index_df.notnull()]
        expected = DataFrame(
            np.array([[0.0, 1.0, np.nan], [3.0, 4.0, np.nan], [np.nan] * 3]),
            index=list("abc"),
            columns=list("ABC"),
        )
        tm.assert_frame_equal(result, expected)

    def test_no_reference_cycle(self):
        df = DataFrame({"a": [0, 1], "b": [2, 3]})
        for name in ("loc", "iloc", "at", "iat"):
            getattr(df, name)
        wr = weakref.ref(df)
        del df
        assert wr() is None

    def test_label_indexing_on_nan(self, nulls_fixture):
        # GH 32431
        df = Series([1, "{1,2}", 1, nulls_fixture])
        vc = df.value_counts(dropna=False)
        result1 = vc.loc[nulls_fixture]
        result2 = vc[nulls_fixture]

        expected = 1
        assert result1 == expected
        assert result2 == expected
Ejemplo n.º 9
0
 def test_slice_with_zero_step_raises(self):
     s = Series(np.arange(20), index=_mklbl("A", 20))
     with pytest.raises(ValueError, match="slice step cannot be zero"):
         s[::0]
     with pytest.raises(ValueError, match="slice step cannot be zero"):
         s.loc[::0]
Ejemplo n.º 10
0
 def test_slice_with_zero_step_raises(self, indexer_sl, frame_or_series):
     obj = frame_or_series(np.arange(20), index=_mklbl("A", 20))
     with pytest.raises(ValueError, match="slice step cannot be zero"):
         indexer_sl(obj)[::0]
Ejemplo n.º 11
0
    def test_per_axis_per_level_getitem(self):

        # GH6134
        # example test case
        ix = MultiIndex.from_product(
            [_mklbl("A", 5),
             _mklbl("B", 7),
             _mklbl("C", 4),
             _mklbl("D", 2)])
        df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)

        result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == "A1" or a == "A2" or a == "A3") and (
                c == "C1" or c == "C3")
        ]]
        tm.assert_frame_equal(result, expected)

        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == "A1" or a == "A2" or a == "A3") and (
                c == "C1" or c == "C2" or c == "C3")
        ]]
        result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
        tm.assert_frame_equal(result, expected)

        # test multi-index slicing with per axis and per index controls
        index = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3),
                                        ("B", 1)],
                                       names=["one", "two"])
        columns = MultiIndex.from_tuples(
            [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
            names=["lvl0", "lvl1"],
        )

        df = DataFrame(np.arange(16, dtype="int64").reshape(4, 4),
                       index=index,
                       columns=columns)
        df = df.sort_index(axis=0).sort_index(axis=1)

        # identity
        result = df.loc[(slice(None), slice(None)), :]
        tm.assert_frame_equal(result, df)
        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)
        result = df.loc[:, (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)

        # index
        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), 1), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # columns
        result = df.loc[:, (slice(None), ["foo"])]
        expected = df.iloc[:, [1, 3]]
        tm.assert_frame_equal(result, expected)

        # both
        result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
        expected = df.iloc[[0, 3], [1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc["A", "a"]
        expected = DataFrame(
            dict(bar=[1, 5, 9], foo=[0, 4, 8]),
            index=Index([1, 2, 3], name="two"),
            columns=Index(["bar", "foo"], name="lvl1"),
        )
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1, 2]), :]
        expected = df.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # multi-level series
        s = Series(np.arange(len(ix.to_numpy())), index=ix)
        result = s.loc["A1":"A3", :, ["C1", "C3"]]
        expected = s.loc[[
            tuple([a, b, c, d]) for a, b, c, d in s.index.values
            if (a == "A1" or a == "A2" or a == "A3") and (
                c == "C1" or c == "C3")
        ]]
        tm.assert_series_equal(result, expected)

        # boolean indexers
        result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
        expected = df.iloc[[2, 3]]
        tm.assert_frame_equal(result, expected)

        with pytest.raises(ValueError):
            df.loc[(slice(None), np.array([True, False])), :]

        # ambiguous notation
        # this is interpreted as slicing on both axes (GH #16396)
        result = df.loc[slice(None), [1]]
        expected = df.iloc[:, []]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # not lexsorted
        assert df.index.lexsort_depth == 2
        df = df.sort_index(level=1, axis=0)
        assert df.index.lexsort_depth == 0

        msg = ("MultiIndex slicing requires the index to be "
               r"lexsorted: slicing on levels \[1\], lexsort depth 0")
        with pytest.raises(UnsortedIndexError, match=msg):
            df.loc[(slice(None), slice("bar")), :]

        # GH 16734: not sorted, but no real slicing
        result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
Ejemplo n.º 12
0
    def test_per_axis_per_level_getitem(self):

        # GH6134
        # example test case
        ix = MultiIndex.from_product(
            [_mklbl('A', 5),
             _mklbl('B', 7),
             _mklbl('C', 4),
             _mklbl('D', 2)])
        df = DataFrame(np.arange(len(ix.get_values())), index=ix)

        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_frame_equal(result, expected)

        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C2' or c == 'C3')
        ]]
        result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
        tm.assert_frame_equal(result, expected)

        # test multi-index slicing with per axis and per index controls
        index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3),
                                        ('B', 1)],
                                       names=['one', 'two'])
        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
                                          ('b', 'foo'), ('b', 'bah')],
                                         names=['lvl0', 'lvl1'])

        df = DataFrame(np.arange(16, dtype='int64').reshape(4, 4),
                       index=index,
                       columns=columns)
        df = df.sort_index(axis=0).sort_index(axis=1)

        # identity
        result = df.loc[(slice(None), slice(None)), :]
        tm.assert_frame_equal(result, df)
        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)
        result = df.loc[:, (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)

        # index
        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), 1), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # columns
        result = df.loc[:, (slice(None), ['foo'])]
        expected = df.iloc[:, [1, 3]]
        tm.assert_frame_equal(result, expected)

        # both
        result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
        expected = df.iloc[[0, 3], [1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc['A', 'a']
        expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
                             index=Index([1, 2, 3], name='two'),
                             columns=Index(['bar', 'foo'], name='lvl1'))
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1, 2]), :]
        expected = df.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # multi-level series
        s = Series(np.arange(len(ix.get_values())), index=ix)
        result = s.loc['A1':'A3', :, ['C1', 'C3']]
        expected = s.loc[[
            tuple([a, b, c, d]) for a, b, c, d in s.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_series_equal(result, expected)

        # boolean indexers
        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
        expected = df.iloc[[2, 3]]
        tm.assert_frame_equal(result, expected)

        def f():
            df.loc[(slice(None), np.array([True, False])), :]

        self.assertRaises(ValueError, f)

        # ambiguous cases
        # these can be multiply interpreted (e.g. in this case
        # as df.loc[slice(None),[1]] as well
        self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]])

        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # not lexsorted
        self.assertEqual(df.index.lexsort_depth, 2)
        df = df.sort_index(level=1, axis=0)
        self.assertEqual(df.index.lexsort_depth, 0)
        with tm.assertRaisesRegexp(
                UnsortedIndexError,
                'MultiIndex Slicing requires the index to be fully '
                r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
            df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
Ejemplo n.º 13
0
    def test_per_axis_per_level_getitem(self):

        # GH6134
        # example test case
        ix = MultiIndex.from_product(
            [_mklbl('A', 5),
             _mklbl('B', 7),
             _mklbl('C', 4),
             _mklbl('D', 2)])
        df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)

        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_frame_equal(result, expected)

        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C2' or c == 'C3')
        ]]
        result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
        tm.assert_frame_equal(result, expected)

        # test multi-index slicing with per axis and per index controls
        index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3),
                                        ('B', 1)],
                                       names=['one', 'two'])
        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
                                          ('b', 'foo'), ('b', 'bah')],
                                         names=['lvl0', 'lvl1'])

        df = DataFrame(np.arange(16, dtype='int64').reshape(4, 4),
                       index=index,
                       columns=columns)
        df = df.sort_index(axis=0).sort_index(axis=1)

        # identity
        result = df.loc[(slice(None), slice(None)), :]
        tm.assert_frame_equal(result, df)
        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)
        result = df.loc[:, (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)

        # index
        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), 1), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # columns
        result = df.loc[:, (slice(None), ['foo'])]
        expected = df.iloc[:, [1, 3]]
        tm.assert_frame_equal(result, expected)

        # both
        result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
        expected = df.iloc[[0, 3], [1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc['A', 'a']
        expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
                             index=Index([1, 2, 3], name='two'),
                             columns=Index(['bar', 'foo'], name='lvl1'))
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1, 2]), :]
        expected = df.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # multi-level series
        s = Series(np.arange(len(ix.to_numpy())), index=ix)
        result = s.loc['A1':'A3', :, ['C1', 'C3']]
        expected = s.loc[[
            tuple([a, b, c, d]) for a, b, c, d in s.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_series_equal(result, expected)

        # boolean indexers
        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
        expected = df.iloc[[2, 3]]
        tm.assert_frame_equal(result, expected)

        with pytest.raises(ValueError):
            df.loc[(slice(None), np.array([True, False])), :]

        # ambiguous cases
        # these can be multiply interpreted (e.g. in this case
        # as df.loc[slice(None),[1]] as well
        with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
            df.loc[slice(None), [1]]

        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # not lexsorted
        assert df.index.lexsort_depth == 2
        df = df.sort_index(level=1, axis=0)
        assert df.index.lexsort_depth == 0

        msg = ('MultiIndex slicing requires the index to be '
               r'lexsorted: slicing on levels \[1\], lexsort depth 0')
        with pytest.raises(UnsortedIndexError, match=msg):
            df.loc[(slice(None), slice('bar')), :]

        # GH 16734: not sorted, but no real slicing
        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
Ejemplo n.º 14
0
    def test_per_axis_per_level_getitem(self):

        # GH6134
        # example test case
        ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl(
            'C', 4), _mklbl('D', 2)])
        df = DataFrame(np.arange(len(ix.get_values())), index=ix)

        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
        expected = df.loc[[tuple([a, b, c, d])
                           for a, b, c, d in df.index.values
                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
                               c == 'C1' or c == 'C3')]]
        tm.assert_frame_equal(result, expected)

        expected = df.loc[[tuple([a, b, c, d])
                           for a, b, c, d in df.index.values
                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
                               c == 'C1' or c == 'C2' or c == 'C3')]]
        result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
        tm.assert_frame_equal(result, expected)

        # test multi-index slicing with per axis and per index controls
        index = MultiIndex.from_tuples([('A', 1), ('A', 2),
                                        ('A', 3), ('B', 1)],
                                       names=['one', 'two'])
        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
                                          ('b', 'foo'), ('b', 'bah')],
                                         names=['lvl0', 'lvl1'])

        df = DataFrame(
            np.arange(16, dtype='int64').reshape(
                4, 4), index=index, columns=columns)
        df = df.sort_index(axis=0).sort_index(axis=1)

        # identity
        result = df.loc[(slice(None), slice(None)), :]
        tm.assert_frame_equal(result, df)
        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)
        result = df.loc[:, (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)

        # index
        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), 1), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # columns
        result = df.loc[:, (slice(None), ['foo'])]
        expected = df.iloc[:, [1, 3]]
        tm.assert_frame_equal(result, expected)

        # both
        result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
        expected = df.iloc[[0, 3], [1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc['A', 'a']
        expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
                             index=Index([1, 2, 3], name='two'),
                             columns=Index(['bar', 'foo'], name='lvl1'))
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1, 2]), :]
        expected = df.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # multi-level series
        s = Series(np.arange(len(ix.get_values())), index=ix)
        result = s.loc['A1':'A3', :, ['C1', 'C3']]
        expected = s.loc[[tuple([a, b, c, d])
                          for a, b, c, d in s.index.values
                          if (a == 'A1' or a == 'A2' or a == 'A3') and (
                              c == 'C1' or c == 'C3')]]
        tm.assert_series_equal(result, expected)

        # boolean indexers
        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
        expected = df.iloc[[2, 3]]
        tm.assert_frame_equal(result, expected)

        def f():
            df.loc[(slice(None), np.array([True, False])), :]

        pytest.raises(ValueError, f)

        # ambiguous cases
        # these can be multiply interpreted (e.g. in this case
        # as df.loc[slice(None),[1]] as well
        pytest.raises(KeyError, lambda: df.loc[slice(None), [1]])

        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # not lexsorted
        assert df.index.lexsort_depth == 2
        df = df.sort_index(level=1, axis=0)
        assert df.index.lexsort_depth == 0
        with tm.assert_raises_regex(
                UnsortedIndexError,
                'MultiIndex slicing requires the index to be '
                r'lexsorted: slicing on levels \[1\], lexsort depth 0'):
            df.loc[(slice(None), slice('bar')), :]

        # GH 16734: not sorted, but no real slicing
        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
Ejemplo n.º 15
0
    def test_loc_axis_arguments(self):

        index = MultiIndex.from_product(
            [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
        )
        columns = MultiIndex.from_tuples(
            [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
            names=["lvl0", "lvl1"],
        )
        df = (
            DataFrame(
                np.arange(len(index) * len(columns), dtype="int64").reshape(
                    (len(index), len(columns))
                ),
                index=index,
                columns=columns,
            )
            .sort_index()
            .sort_index(axis=1)
        )

        # axis 0
        result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
        expected = df.loc[
            [
                (
                    a,
                    b,
                    c,
                    d,
                )
                for a, b, c, d in df.index.values
                if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
            ]
        ]
        tm.assert_frame_equal(result, expected)

        result = df.loc(axis="index")[:, :, ["C1", "C3"]]
        expected = df.loc[
            [
                (
                    a,
                    b,
                    c,
                    d,
                )
                for a, b, c, d in df.index.values
                if (c == "C1" or c == "C3")
            ]
        ]
        tm.assert_frame_equal(result, expected)

        # axis 1
        result = df.loc(axis=1)[:, "foo"]
        expected = df.loc[:, (slice(None), "foo")]
        tm.assert_frame_equal(result, expected)

        result = df.loc(axis="columns")[:, "foo"]
        expected = df.loc[:, (slice(None), "foo")]
        tm.assert_frame_equal(result, expected)

        # invalid axis
        for i in [-1, 2, "foo"]:
            msg = f"No axis named {i} for object type DataFrame"
            with pytest.raises(ValueError, match=msg):
                df.loc(axis=i)[:, :, ["C1", "C3"]]
Ejemplo n.º 16
0
    def test_per_axis_per_level_doc_examples(self):

        # test index maker
        idx = pd.IndexSlice

        # from indexing.rst / advanced
        index = MultiIndex.from_product(
            [_mklbl("A", 4),
             _mklbl("B", 2),
             _mklbl("C", 4),
             _mklbl("D", 2)])
        columns = MultiIndex.from_tuples(
            [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
            names=["lvl0", "lvl1"],
        )
        df = DataFrame(
            np.arange(len(index) * len(columns), dtype="int64").reshape(
                (len(index), len(columns))),
            index=index,
            columns=columns,
        )
        result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
        expected = df.loc[[(
            a,
            b,
            c,
            d,
        ) for a, b, c, d in df.index.values if (
            a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")]]
        tm.assert_frame_equal(result, expected)
        result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
        expected = df.loc[[(
            a,
            b,
            c,
            d,
        ) for a, b, c, d in df.index.values if (c == "C1" or c == "C3")]]
        tm.assert_frame_equal(result, expected)
        result = df.loc[idx[:, :, ["C1", "C3"]], :]
        tm.assert_frame_equal(result, expected)

        # not sorted
        msg = ("MultiIndex slicing requires the index to be lexsorted: "
               r"slicing on levels \[1\], lexsort depth 1")
        with pytest.raises(UnsortedIndexError, match=msg):
            df.loc["A1", ("a", slice("foo"))]

        # GH 16734: not sorted, but no real slicing
        tm.assert_frame_equal(df.loc["A1", (slice(None), "foo")],
                              df.loc["A1"].iloc[:, [0, 2]])

        df = df.sort_index(axis=1)

        # slicing
        df.loc["A1", (slice(None), "foo")]
        df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]

        # setitem
        df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
Ejemplo n.º 17
0
 def test_slice_with_zero_step_raises(self, indexer_sl):
     ser = Series(np.arange(20), index=_mklbl("A", 20))
     with pytest.raises(ValueError, match="slice step cannot be zero"):
         indexer_sl(ser)[::0]