def test_constructor(self) -> None: foo_data = np.array([0, 0, 1], dtype="int64") bar_data = np.array([1.1, 1.2, 1.3], dtype="float64") pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data], names=("foo", "bar")) index = PandasMultiIndex(pd_idx, "x") assert index.dim == "x" assert index.index.equals(pd_idx) assert index.index.names == ("foo", "bar") assert index.index.name == "x" assert index.level_coords_dtype == { "foo": foo_data.dtype, "bar": bar_data.dtype, } with pytest.raises(ValueError, match=".*conflicting multi-index level name.*"): PandasMultiIndex(pd_idx, "foo") # default level names pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data]) index = PandasMultiIndex(pd_idx, "x") assert index.index.names == ("x_level_0", "x_level_1")
def test_rename(self) -> None: level_coords_dtype = {"one": "<U1", "two": np.int32} index = PandasMultiIndex( pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")), "x", level_coords_dtype=level_coords_dtype, ) # shortcut new_index = index.rename({}, {}) assert new_index is index new_index = index.rename({"two": "three"}, {}) assert new_index.index.names == ["one", "three"] assert new_index.dim == "x" assert new_index.level_coords_dtype == { "one": "<U1", "three": np.int32 } new_index = index.rename({}, {"x": "y"}) assert new_index.index.names == ["one", "two"] assert new_index.dim == "y" assert new_index.level_coords_dtype == level_coords_dtype
def test_from_variables(self) -> None: v_level1 = xr.Variable("x", [1, 2, 3], attrs={"unit": "m"}, encoding={"dtype": np.int32}) v_level2 = xr.Variable("x", ["a", "b", "c"], attrs={"unit": "m"}, encoding={"dtype": "U"}) index = PandasMultiIndex.from_variables({ "level1": v_level1, "level2": v_level2 }) expected_idx = pd.MultiIndex.from_arrays( [v_level1.data, v_level2.data]) assert index.dim == "x" assert index.index.equals(expected_idx) assert index.index.name == "x" assert index.index.names == ["level1", "level2"] var = xr.Variable(("x", "y"), [[1, 2, 3], [4, 5, 6]]) with pytest.raises(ValueError, match=r".*only accepts 1-dimensional variables.*"): PandasMultiIndex.from_variables({"var": var}) v_level3 = xr.Variable("y", [4, 5, 6]) with pytest.raises( ValueError, match=r"unmatched dimensions for multi-index variables.*"): PandasMultiIndex.from_variables({ "level1": v_level1, "level3": v_level3 })
def test_unstack(self) -> None: pd_midx = pd.MultiIndex.from_product([["a", "b"], [1, 2, 3]], names=["one", "two"]) index = PandasMultiIndex(pd_midx, "x") new_indexes, new_pd_idx = index.unstack() assert list(new_indexes) == ["one", "two"] assert new_indexes["one"].equals(PandasIndex(["a", "b"], "one")) assert new_indexes["two"].equals(PandasIndex([1, 2, 3], "two")) assert new_pd_idx.equals(pd_midx)
def test_copy(self) -> None: level_coords_dtype = {"one": "U<1", "two": np.int32} expected = PandasMultiIndex( pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")), "x", level_coords_dtype=level_coords_dtype, ) actual = expected.copy() assert actual.index.equals(expected.index) assert actual.index is not expected.index assert actual.dim == expected.dim assert actual.level_coords_dtype == expected.level_coords_dtype
def test_join(self): midx = pd.MultiIndex.from_product([["a", "aa"], [1, 2]], names=("one", "two")) level_coords_dtype = {"one": "<U2", "two": "i"} index1 = PandasMultiIndex(midx, "x", level_coords_dtype=level_coords_dtype) index2 = PandasMultiIndex(midx[0:2], "x", level_coords_dtype=level_coords_dtype) actual = index1.join(index2) assert actual.equals(index2) assert actual.level_coords_dtype == level_coords_dtype actual = index1.join(index2, how="outer") assert actual.equals(index1) assert actual.level_coords_dtype == level_coords_dtype
def unique_indexes(self) -> list[PandasIndex]: x_idx = PandasIndex(pd.Index([1, 2, 3], name="x"), "x") y_idx = PandasIndex(pd.Index([4, 5, 6], name="y"), "y") z_pd_midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=["one", "two"]) z_midx = PandasMultiIndex(z_pd_midx, "z") return [x_idx, y_idx, z_midx]
def test_concat(self) -> None: pd_midx = pd.MultiIndex.from_product( [[0, 1, 2], ["a", "b"]], names=("foo", "bar") ) level_coords_dtype = {"foo": np.int32, "bar": "<U1"} midx1 = PandasMultiIndex( pd_midx[:2], "x", level_coords_dtype=level_coords_dtype ) midx2 = PandasMultiIndex( pd_midx[2:], "x", level_coords_dtype=level_coords_dtype ) expected = PandasMultiIndex(pd_midx, "x", level_coords_dtype=level_coords_dtype) actual = PandasMultiIndex.concat([midx1, midx2], "x") assert actual.equals(expected) assert actual.level_coords_dtype == expected.level_coords_dtype
def test_create_variables(self) -> None: foo_data = np.array([0, 0, 1], dtype="int64") bar_data = np.array([1.1, 1.2, 1.3], dtype="float64") pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data], names=("foo", "bar")) index_vars = { "x": IndexVariable("x", pd_idx), "foo": IndexVariable("x", foo_data, attrs={"unit": "m"}), "bar": IndexVariable("x", bar_data, encoding={"fill_value": 0}), } index = PandasMultiIndex(pd_idx, "x") actual = index.create_variables(index_vars) for k, expected in index_vars.items(): assert_identical(actual[k], expected) assert actual[k].dtype == expected.dtype if k != "x": assert actual[k].dtype == index.level_coords_dtype[k]
def test_stack_non_unique(self) -> None: prod_vars = { "x": xr.Variable("x", pd.Index(["b", "a"]), attrs={"foo": "bar"}), "y": xr.Variable("y", pd.Index([1, 1, 2])), } index = PandasMultiIndex.stack(prod_vars, "z") np.testing.assert_array_equal(index.index.codes, [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]]) np.testing.assert_array_equal(index.index.levels[0], ["b", "a"]) np.testing.assert_array_equal(index.index.levels[1], [1, 2])
def test_stack(self) -> None: prod_vars = { "x": xr.Variable("x", pd.Index(["b", "a"]), attrs={"foo": "bar"}), "y": xr.Variable("y", pd.Index([1, 3, 2])), } index = PandasMultiIndex.stack(prod_vars, "z") assert index.dim == "z" assert index.index.names == ["x", "y"] np.testing.assert_array_equal( index.index.codes, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] ) with pytest.raises( ValueError, match=r"conflicting dimensions for multi-index product.*" ): PandasMultiIndex.stack( {"x": xr.Variable("x", ["a", "b"]), "x2": xr.Variable("x", [1, 2])}, "z", )
def test_from_variables(self): v_level1 = xr.Variable("x", [1, 2, 3], attrs={"unit": "m"}, encoding={"dtype": np.int32}) v_level2 = xr.Variable("x", ["a", "b", "c"], attrs={"unit": "m"}, encoding={"dtype": "U"}) index, index_vars = PandasMultiIndex.from_variables({ "level1": v_level1, "level2": v_level2 }) expected_idx = pd.MultiIndex.from_arrays( [v_level1.data, v_level2.data]) assert index.dim == "x" assert index.index.equals(expected_idx) assert list(index_vars) == ["x", "level1", "level2"] xr.testing.assert_equal(xr.IndexVariable("x", expected_idx), index_vars["x"]) xr.testing.assert_identical(v_level1.to_index_variable(), index_vars["level1"]) xr.testing.assert_identical(v_level2.to_index_variable(), index_vars["level2"]) var = xr.Variable(("x", "y"), [[1, 2, 3], [4, 5, 6]]) with pytest.raises(ValueError, match=r".*only accepts 1-dimensional variables.*"): PandasMultiIndex.from_variables({"var": var}) v_level3 = xr.Variable("y", [4, 5, 6]) with pytest.raises(ValueError, match=r"unmatched dimensions for variables.*"): PandasMultiIndex.from_variables({ "level1": v_level1, "level3": v_level3 })
def test_from_pandas_index(self): pd_idx = pd.MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=("foo", "bar")) index, index_vars = PandasMultiIndex.from_pandas_index(pd_idx, "x") assert index.dim == "x" assert index.index is pd_idx assert index.index.names == ("foo", "bar") xr.testing.assert_identical(index_vars["x"], IndexVariable("x", pd_idx)) xr.testing.assert_identical(index_vars["foo"], IndexVariable("x", [1, 2, 3])) xr.testing.assert_identical(index_vars["bar"], IndexVariable("x", [4, 5, 6]))
def test_sel(self) -> None: index = PandasMultiIndex( pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")), "x") # test tuples inside slice are considered as scalar indexer values actual = index.sel({"x": slice(("a", 1), ("b", 2))}) expected_dim_indexers = {"x": slice(0, 4)} assert actual.dim_indexers == expected_dim_indexers with pytest.raises(KeyError, match=r"not all values found"): index.sel({"x": [0]}) with pytest.raises(KeyError): index.sel({"x": 0}) with pytest.raises(ValueError, match=r"cannot provide labels for both.*"): index.sel({"one": 0, "x": "a"}) with pytest.raises(ValueError, match=r"invalid multi-index level names"): index.sel({"x": {"three": 0}}) with pytest.raises(IndexError): index.sel({"x": (slice(None), 1, "no_level")})
def test_map_index_queries(self) -> None: def create_sel_results( x_indexer, x_index, other_vars, drop_coords, drop_indexes, rename_dims, ): dim_indexers = {"x": x_indexer} index_vars = x_index.create_variables() indexes = {k: x_index for k in index_vars} variables = {} variables.update(index_vars) variables.update(other_vars) return indexing.IndexSelResult( dim_indexers=dim_indexers, indexes=indexes, variables=variables, drop_coords=drop_coords, drop_indexes=drop_indexes, rename_dims=rename_dims, ) def test_indexer( data: T_Xarray, x: Any, expected: indexing.IndexSelResult, ) -> None: results = indexing.map_index_queries(data, {"x": x}) assert results.dim_indexers.keys() == expected.dim_indexers.keys() assert_array_equal(results.dim_indexers["x"], expected.dim_indexers["x"]) assert results.indexes.keys() == expected.indexes.keys() for k in results.indexes: assert results.indexes[k].equals(expected.indexes[k]) assert results.variables.keys() == expected.variables.keys() for k in results.variables: assert_array_equal(results.variables[k], expected.variables[k]) assert set(results.drop_coords) == set(expected.drop_coords) assert set(results.drop_indexes) == set(expected.drop_indexes) assert results.rename_dims == expected.rename_dims data = Dataset({"x": ("x", [1, 2, 3])}) mindex = pd.MultiIndex.from_product([["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three")) mdata = DataArray(range(8), [("x", mindex)]) test_indexer(data, 1, indexing.IndexSelResult({"x": 0})) test_indexer(data, np.int32(1), indexing.IndexSelResult({"x": 0})) test_indexer(data, Variable([], 1), indexing.IndexSelResult({"x": 0})) test_indexer(mdata, ("a", 1, -1), indexing.IndexSelResult({"x": 0})) expected = create_sel_results( [True, True, False, False, False, False, False, False], PandasIndex(pd.Index([-1, -2]), "three"), { "one": Variable((), "a"), "two": Variable((), 1) }, ["x"], ["one", "two"], {"x": "three"}, ) test_indexer(mdata, ("a", 1), expected) expected = create_sel_results( slice(0, 4, None), PandasMultiIndex( pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")), "x", ), {"one": Variable((), "a")}, [], ["one"], {}, ) test_indexer(mdata, "a", expected) expected = create_sel_results( [True, True, True, True, False, False, False, False], PandasMultiIndex( pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")), "x", ), {"one": Variable((), "a")}, [], ["one"], {}, ) test_indexer(mdata, ("a", ), expected) test_indexer(mdata, [("a", 1, -1), ("b", 2, -2)], indexing.IndexSelResult({"x": [0, 7]})) test_indexer(mdata, slice("a", "b"), indexing.IndexSelResult({"x": slice(0, 8, None)})) test_indexer( mdata, slice(("a", 1), ("b", 1)), indexing.IndexSelResult({"x": slice(0, 6, None)}), ) test_indexer( mdata, { "one": "a", "two": 1, "three": -1 }, indexing.IndexSelResult({"x": 0}), ) expected = create_sel_results( [True, True, False, False, False, False, False, False], PandasIndex(pd.Index([-1, -2]), "three"), { "one": Variable((), "a"), "two": Variable((), 1) }, ["x"], ["one", "two"], {"x": "three"}, ) test_indexer(mdata, {"one": "a", "two": 1}, expected) expected = create_sel_results( [True, False, True, False, False, False, False, False], PandasIndex(pd.Index([1, 2]), "two"), { "one": Variable((), "a"), "three": Variable((), -1) }, ["x"], ["one", "three"], {"x": "two"}, ) test_indexer(mdata, {"one": "a", "three": -1}, expected) expected = create_sel_results( [True, True, True, True, False, False, False, False], PandasMultiIndex( pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")), "x", ), {"one": Variable((), "a")}, [], ["one"], {}, ) test_indexer(mdata, {"one": "a"}, expected)
def test_query(self): index = PandasMultiIndex( pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two"))) # test tuples inside slice are considered as scalar indexer values assert index.query({"x": slice(("a", 1), ("b", 2))}) == (slice(0, 4), None) with pytest.raises(KeyError, match=r"not all values found"): index.query({"x": [0]}) with pytest.raises(KeyError): index.query({"x": 0}) with pytest.raises(ValueError, match=r"cannot provide labels for both.*"): index.query({"one": 0, "x": "a"}) with pytest.raises(ValueError, match=r"invalid multi-index level names"): index.query({"x": {"three": 0}}) with pytest.raises(IndexError): index.query({"x": (slice(None), 1, "no_level")})