Example #1
0
    def test_constructor(self) -> None:
        foo_data = np.array([0, 0, 1], dtype="int64")
        bar_data = np.array([1.1, 1.2, 1.3], dtype="float64")
        pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data],
                                           names=("foo", "bar"))

        index = PandasMultiIndex(pd_idx, "x")

        assert index.dim == "x"
        assert index.index.equals(pd_idx)
        assert index.index.names == ("foo", "bar")
        assert index.index.name == "x"
        assert index.level_coords_dtype == {
            "foo": foo_data.dtype,
            "bar": bar_data.dtype,
        }

        with pytest.raises(ValueError,
                           match=".*conflicting multi-index level name.*"):
            PandasMultiIndex(pd_idx, "foo")

        # default level names
        pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data])
        index = PandasMultiIndex(pd_idx, "x")
        assert index.index.names == ("x_level_0", "x_level_1")
Example #2
0
    def test_rename(self) -> None:
        level_coords_dtype = {"one": "<U1", "two": np.int32}
        index = PandasMultiIndex(
            pd.MultiIndex.from_product([["a", "b"], [1, 2]],
                                       names=("one", "two")),
            "x",
            level_coords_dtype=level_coords_dtype,
        )

        # shortcut
        new_index = index.rename({}, {})
        assert new_index is index

        new_index = index.rename({"two": "three"}, {})
        assert new_index.index.names == ["one", "three"]
        assert new_index.dim == "x"
        assert new_index.level_coords_dtype == {
            "one": "<U1",
            "three": np.int32
        }

        new_index = index.rename({}, {"x": "y"})
        assert new_index.index.names == ["one", "two"]
        assert new_index.dim == "y"
        assert new_index.level_coords_dtype == level_coords_dtype
Example #3
0
    def test_from_variables(self) -> None:
        v_level1 = xr.Variable("x", [1, 2, 3],
                               attrs={"unit": "m"},
                               encoding={"dtype": np.int32})
        v_level2 = xr.Variable("x", ["a", "b", "c"],
                               attrs={"unit": "m"},
                               encoding={"dtype": "U"})

        index = PandasMultiIndex.from_variables({
            "level1": v_level1,
            "level2": v_level2
        })

        expected_idx = pd.MultiIndex.from_arrays(
            [v_level1.data, v_level2.data])
        assert index.dim == "x"
        assert index.index.equals(expected_idx)
        assert index.index.name == "x"
        assert index.index.names == ["level1", "level2"]

        var = xr.Variable(("x", "y"), [[1, 2, 3], [4, 5, 6]])
        with pytest.raises(ValueError,
                           match=r".*only accepts 1-dimensional variables.*"):
            PandasMultiIndex.from_variables({"var": var})

        v_level3 = xr.Variable("y", [4, 5, 6])
        with pytest.raises(
                ValueError,
                match=r"unmatched dimensions for multi-index variables.*"):
            PandasMultiIndex.from_variables({
                "level1": v_level1,
                "level3": v_level3
            })
Example #4
0
    def test_unstack(self) -> None:
        pd_midx = pd.MultiIndex.from_product([["a", "b"], [1, 2, 3]],
                                             names=["one", "two"])
        index = PandasMultiIndex(pd_midx, "x")

        new_indexes, new_pd_idx = index.unstack()
        assert list(new_indexes) == ["one", "two"]
        assert new_indexes["one"].equals(PandasIndex(["a", "b"], "one"))
        assert new_indexes["two"].equals(PandasIndex([1, 2, 3], "two"))
        assert new_pd_idx.equals(pd_midx)
Example #5
0
    def test_copy(self) -> None:
        level_coords_dtype = {"one": "U<1", "two": np.int32}
        expected = PandasMultiIndex(
            pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")),
            "x",
            level_coords_dtype=level_coords_dtype,
        )
        actual = expected.copy()

        assert actual.index.equals(expected.index)
        assert actual.index is not expected.index
        assert actual.dim == expected.dim
        assert actual.level_coords_dtype == expected.level_coords_dtype
Example #6
0
    def test_join(self):
        midx = pd.MultiIndex.from_product([["a", "aa"], [1, 2]], names=("one", "two"))
        level_coords_dtype = {"one": "<U2", "two": "i"}
        index1 = PandasMultiIndex(midx, "x", level_coords_dtype=level_coords_dtype)
        index2 = PandasMultiIndex(midx[0:2], "x", level_coords_dtype=level_coords_dtype)

        actual = index1.join(index2)
        assert actual.equals(index2)
        assert actual.level_coords_dtype == level_coords_dtype

        actual = index1.join(index2, how="outer")
        assert actual.equals(index1)
        assert actual.level_coords_dtype == level_coords_dtype
Example #7
0
    def unique_indexes(self) -> list[PandasIndex]:
        x_idx = PandasIndex(pd.Index([1, 2, 3], name="x"), "x")
        y_idx = PandasIndex(pd.Index([4, 5, 6], name="y"), "y")
        z_pd_midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]],
                                               names=["one", "two"])
        z_midx = PandasMultiIndex(z_pd_midx, "z")

        return [x_idx, y_idx, z_midx]
Example #8
0
    def test_concat(self) -> None:
        pd_midx = pd.MultiIndex.from_product(
            [[0, 1, 2], ["a", "b"]], names=("foo", "bar")
        )
        level_coords_dtype = {"foo": np.int32, "bar": "<U1"}

        midx1 = PandasMultiIndex(
            pd_midx[:2], "x", level_coords_dtype=level_coords_dtype
        )
        midx2 = PandasMultiIndex(
            pd_midx[2:], "x", level_coords_dtype=level_coords_dtype
        )
        expected = PandasMultiIndex(pd_midx, "x", level_coords_dtype=level_coords_dtype)

        actual = PandasMultiIndex.concat([midx1, midx2], "x")
        assert actual.equals(expected)
        assert actual.level_coords_dtype == expected.level_coords_dtype
Example #9
0
    def test_create_variables(self) -> None:
        foo_data = np.array([0, 0, 1], dtype="int64")
        bar_data = np.array([1.1, 1.2, 1.3], dtype="float64")
        pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data], names=("foo", "bar"))
        index_vars = {
            "x": IndexVariable("x", pd_idx),
            "foo": IndexVariable("x", foo_data, attrs={"unit": "m"}),
            "bar": IndexVariable("x", bar_data, encoding={"fill_value": 0}),
        }

        index = PandasMultiIndex(pd_idx, "x")
        actual = index.create_variables(index_vars)

        for k, expected in index_vars.items():
            assert_identical(actual[k], expected)
            assert actual[k].dtype == expected.dtype
            if k != "x":
                assert actual[k].dtype == index.level_coords_dtype[k]
Example #10
0
    def test_stack_non_unique(self) -> None:
        prod_vars = {
            "x": xr.Variable("x", pd.Index(["b", "a"]), attrs={"foo": "bar"}),
            "y": xr.Variable("y", pd.Index([1, 1, 2])),
        }

        index = PandasMultiIndex.stack(prod_vars, "z")

        np.testing.assert_array_equal(index.index.codes,
                                      [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]])
        np.testing.assert_array_equal(index.index.levels[0], ["b", "a"])
        np.testing.assert_array_equal(index.index.levels[1], [1, 2])
Example #11
0
    def test_stack(self) -> None:
        prod_vars = {
            "x": xr.Variable("x", pd.Index(["b", "a"]), attrs={"foo": "bar"}),
            "y": xr.Variable("y", pd.Index([1, 3, 2])),
        }

        index = PandasMultiIndex.stack(prod_vars, "z")

        assert index.dim == "z"
        assert index.index.names == ["x", "y"]
        np.testing.assert_array_equal(
            index.index.codes, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
        )

        with pytest.raises(
            ValueError, match=r"conflicting dimensions for multi-index product.*"
        ):
            PandasMultiIndex.stack(
                {"x": xr.Variable("x", ["a", "b"]), "x2": xr.Variable("x", [1, 2])},
                "z",
            )
Example #12
0
    def test_from_variables(self):
        v_level1 = xr.Variable("x", [1, 2, 3],
                               attrs={"unit": "m"},
                               encoding={"dtype": np.int32})
        v_level2 = xr.Variable("x", ["a", "b", "c"],
                               attrs={"unit": "m"},
                               encoding={"dtype": "U"})

        index, index_vars = PandasMultiIndex.from_variables({
            "level1": v_level1,
            "level2": v_level2
        })

        expected_idx = pd.MultiIndex.from_arrays(
            [v_level1.data, v_level2.data])
        assert index.dim == "x"
        assert index.index.equals(expected_idx)

        assert list(index_vars) == ["x", "level1", "level2"]
        xr.testing.assert_equal(xr.IndexVariable("x", expected_idx),
                                index_vars["x"])
        xr.testing.assert_identical(v_level1.to_index_variable(),
                                    index_vars["level1"])
        xr.testing.assert_identical(v_level2.to_index_variable(),
                                    index_vars["level2"])

        var = xr.Variable(("x", "y"), [[1, 2, 3], [4, 5, 6]])
        with pytest.raises(ValueError,
                           match=r".*only accepts 1-dimensional variables.*"):
            PandasMultiIndex.from_variables({"var": var})

        v_level3 = xr.Variable("y", [4, 5, 6])
        with pytest.raises(ValueError,
                           match=r"unmatched dimensions for variables.*"):
            PandasMultiIndex.from_variables({
                "level1": v_level1,
                "level3": v_level3
            })
Example #13
0
    def test_from_pandas_index(self):
        pd_idx = pd.MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]],
                                           names=("foo", "bar"))

        index, index_vars = PandasMultiIndex.from_pandas_index(pd_idx, "x")

        assert index.dim == "x"
        assert index.index is pd_idx
        assert index.index.names == ("foo", "bar")
        xr.testing.assert_identical(index_vars["x"],
                                    IndexVariable("x", pd_idx))
        xr.testing.assert_identical(index_vars["foo"],
                                    IndexVariable("x", [1, 2, 3]))
        xr.testing.assert_identical(index_vars["bar"],
                                    IndexVariable("x", [4, 5, 6]))
Example #14
0
    def test_sel(self) -> None:
        index = PandasMultiIndex(
            pd.MultiIndex.from_product([["a", "b"], [1, 2]],
                                       names=("one", "two")), "x")

        # test tuples inside slice are considered as scalar indexer values
        actual = index.sel({"x": slice(("a", 1), ("b", 2))})
        expected_dim_indexers = {"x": slice(0, 4)}
        assert actual.dim_indexers == expected_dim_indexers

        with pytest.raises(KeyError, match=r"not all values found"):
            index.sel({"x": [0]})
        with pytest.raises(KeyError):
            index.sel({"x": 0})
        with pytest.raises(ValueError,
                           match=r"cannot provide labels for both.*"):
            index.sel({"one": 0, "x": "a"})
        with pytest.raises(ValueError,
                           match=r"invalid multi-index level names"):
            index.sel({"x": {"three": 0}})
        with pytest.raises(IndexError):
            index.sel({"x": (slice(None), 1, "no_level")})
Example #15
0
    def test_map_index_queries(self) -> None:
        def create_sel_results(
            x_indexer,
            x_index,
            other_vars,
            drop_coords,
            drop_indexes,
            rename_dims,
        ):
            dim_indexers = {"x": x_indexer}
            index_vars = x_index.create_variables()
            indexes = {k: x_index for k in index_vars}
            variables = {}
            variables.update(index_vars)
            variables.update(other_vars)

            return indexing.IndexSelResult(
                dim_indexers=dim_indexers,
                indexes=indexes,
                variables=variables,
                drop_coords=drop_coords,
                drop_indexes=drop_indexes,
                rename_dims=rename_dims,
            )

        def test_indexer(
            data: T_Xarray,
            x: Any,
            expected: indexing.IndexSelResult,
        ) -> None:
            results = indexing.map_index_queries(data, {"x": x})

            assert results.dim_indexers.keys() == expected.dim_indexers.keys()
            assert_array_equal(results.dim_indexers["x"],
                               expected.dim_indexers["x"])

            assert results.indexes.keys() == expected.indexes.keys()
            for k in results.indexes:
                assert results.indexes[k].equals(expected.indexes[k])

            assert results.variables.keys() == expected.variables.keys()
            for k in results.variables:
                assert_array_equal(results.variables[k], expected.variables[k])

            assert set(results.drop_coords) == set(expected.drop_coords)
            assert set(results.drop_indexes) == set(expected.drop_indexes)
            assert results.rename_dims == expected.rename_dims

        data = Dataset({"x": ("x", [1, 2, 3])})
        mindex = pd.MultiIndex.from_product([["a", "b"], [1, 2], [-1, -2]],
                                            names=("one", "two", "three"))
        mdata = DataArray(range(8), [("x", mindex)])

        test_indexer(data, 1, indexing.IndexSelResult({"x": 0}))
        test_indexer(data, np.int32(1), indexing.IndexSelResult({"x": 0}))
        test_indexer(data, Variable([], 1), indexing.IndexSelResult({"x": 0}))
        test_indexer(mdata, ("a", 1, -1), indexing.IndexSelResult({"x": 0}))

        expected = create_sel_results(
            [True, True, False, False, False, False, False, False],
            PandasIndex(pd.Index([-1, -2]), "three"),
            {
                "one": Variable((), "a"),
                "two": Variable((), 1)
            },
            ["x"],
            ["one", "two"],
            {"x": "three"},
        )
        test_indexer(mdata, ("a", 1), expected)

        expected = create_sel_results(
            slice(0, 4, None),
            PandasMultiIndex(
                pd.MultiIndex.from_product([[1, 2], [-1, -2]],
                                           names=("two", "three")),
                "x",
            ),
            {"one": Variable((), "a")},
            [],
            ["one"],
            {},
        )
        test_indexer(mdata, "a", expected)

        expected = create_sel_results(
            [True, True, True, True, False, False, False, False],
            PandasMultiIndex(
                pd.MultiIndex.from_product([[1, 2], [-1, -2]],
                                           names=("two", "three")),
                "x",
            ),
            {"one": Variable((), "a")},
            [],
            ["one"],
            {},
        )
        test_indexer(mdata, ("a", ), expected)

        test_indexer(mdata, [("a", 1, -1), ("b", 2, -2)],
                     indexing.IndexSelResult({"x": [0, 7]}))
        test_indexer(mdata, slice("a", "b"),
                     indexing.IndexSelResult({"x": slice(0, 8, None)}))
        test_indexer(
            mdata,
            slice(("a", 1), ("b", 1)),
            indexing.IndexSelResult({"x": slice(0, 6, None)}),
        )
        test_indexer(
            mdata,
            {
                "one": "a",
                "two": 1,
                "three": -1
            },
            indexing.IndexSelResult({"x": 0}),
        )

        expected = create_sel_results(
            [True, True, False, False, False, False, False, False],
            PandasIndex(pd.Index([-1, -2]), "three"),
            {
                "one": Variable((), "a"),
                "two": Variable((), 1)
            },
            ["x"],
            ["one", "two"],
            {"x": "three"},
        )
        test_indexer(mdata, {"one": "a", "two": 1}, expected)

        expected = create_sel_results(
            [True, False, True, False, False, False, False, False],
            PandasIndex(pd.Index([1, 2]), "two"),
            {
                "one": Variable((), "a"),
                "three": Variable((), -1)
            },
            ["x"],
            ["one", "three"],
            {"x": "two"},
        )
        test_indexer(mdata, {"one": "a", "three": -1}, expected)

        expected = create_sel_results(
            [True, True, True, True, False, False, False, False],
            PandasMultiIndex(
                pd.MultiIndex.from_product([[1, 2], [-1, -2]],
                                           names=("two", "three")),
                "x",
            ),
            {"one": Variable((), "a")},
            [],
            ["one"],
            {},
        )
        test_indexer(mdata, {"one": "a"}, expected)
Example #16
0
    def test_query(self):
        index = PandasMultiIndex(
            pd.MultiIndex.from_product([["a", "b"], [1, 2]],
                                       names=("one", "two")))
        # test tuples inside slice are considered as scalar indexer values
        assert index.query({"x": slice(("a", 1),
                                       ("b", 2))}) == (slice(0, 4), None)

        with pytest.raises(KeyError, match=r"not all values found"):
            index.query({"x": [0]})
        with pytest.raises(KeyError):
            index.query({"x": 0})
        with pytest.raises(ValueError,
                           match=r"cannot provide labels for both.*"):
            index.query({"one": 0, "x": "a"})
        with pytest.raises(ValueError,
                           match=r"invalid multi-index level names"):
            index.query({"x": {"three": 0}})
        with pytest.raises(IndexError):
            index.query({"x": (slice(None), 1, "no_level")})