Python proper_unstackの例、ndcsv.proper_unstack.proper_unstack Pythonの例

コード例 #1

0

ファイルを表示

def test_proper_unstack_other_mi():
    a = xarray.DataArray(
        [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [5, 6, 7, 8]],
        dims=["row", "col"],
        coords={
            "row": pandas.MultiIndex.from_tuples(
                [("x0", "w0"), ("x0", "w1"), ("x1", "w0"), ("x1", "w1")],
                names=["x", "w"],
            ),
            "col": pandas.MultiIndex.from_tuples(
                [("y0", "z0"), ("y0", "z1"), ("y1", "z0"), ("y1", "z1")],
                names=["y", "z"],
            ),
        },
    )
    b = proper_unstack(a, "row")
    c = xarray.DataArray(
        [[[1, 5], [1, 5]], [[2, 6], [2, 6]], [[3, 7], [3, 7]], [[4, 8], [4, 8]]],
        dims=["col", "x", "w"],
        coords={
            "col": pandas.MultiIndex.from_tuples(
                [("y0", "z0"), ("y0", "z1"), ("y1", "z0"), ("y1", "z1")],
                names=["y", "z"],
            ),
            "x": ["x0", "x1"],
            "w": ["w0", "w1"],
        },
    )
    xarray.testing.assert_equal(b, c)

コード例 #2

0

ファイルを表示

def test_proper_unstack_order():
    # Note: using MultiIndex.from_tuples is NOT the same thing as
    # round-tripping DataArray.stack().unstack(), as the latter is not
    # affected by the re-ordering issue
    index = pandas.MultiIndex.from_tuples(
        [
            ["x1", "first"],
            ["x1", "second"],
            ["x1", "third"],
            ["x1", "fourth"],
            ["x0", "first"],
            ["x0", "second"],
            ["x0", "third"],
            ["x0", "fourth"],
        ],
        names=["x", "count"],
    )
    xa = xarray.DataArray(numpy.arange(8), dims=["dim_0"], coords={"dim_0": index})

    a = proper_unstack(xa, "dim_0")
    b = xarray.DataArray(
        [[0, 1, 2, 3], [4, 5, 6, 7]],
        dims=["x", "count"],
        coords={"x": ["x1", "x0"], "count": ["first", "second", "third", "fourth"]},
    )
    xarray.testing.assert_equal(a, b)
    with pytest.raises(AssertionError):
        # Order is different
        xarray.testing.assert_equal(a, xa.unstack("dim_0"))

コード例 #3

0

ファイルを表示

ファイル: test_proper_unstack.py プロジェクト: jcclin/ndcsv

def test_proper_unstack_other_mi():
    a = xarray.DataArray(
        [[1, 2, 3, 4],
         [5, 6, 7, 8],
         [1, 2, 3, 4],
         [5, 6, 7, 8]],
        dims=['row', 'col'],
        coords={'row': pandas.MultiIndex.from_tuples([('x0', 'w0'),
                                                      ('x0', 'w1'),
                                                      ('x1', 'w0'),
                                                      ('x1', 'w1')],
                                                     names=['x', 'w']),
                'col': pandas.MultiIndex.from_tuples([('y0', 'z0'),
                                                      ('y0', 'z1'),
                                                      ('y1', 'z0'),
                                                      ('y1', 'z1')],
                                                     names=['y', 'z'])})
    b = proper_unstack(a, 'row')
    c = xarray.DataArray(
        [[[1, 5], [1, 5]],
         [[2, 6], [2, 6]],
         [[3, 7], [3, 7]],
         [[4, 8], [4, 8]]],
        dims=['col', 'x', 'w'],
        coords={'col': pandas.MultiIndex.from_tuples([('y0', 'z0'),
                                                      ('y0', 'z1'),
                                                      ('y1', 'z0'),
                                                      ('y1', 'z1')],
                                                     names=['y', 'z']),
                'x': ['x0', 'x1'],
                'w': ['w0', 'w1']})
    xarray.testing.assert_equal(b, c)

コード例 #4

0

ファイルを表示

ファイル: test_proper_unstack.py プロジェクト: jcclin/ndcsv

def test_proper_unstack_order():
    # Note: using MultiIndex.from_tuples is NOT the same thing as
    # round-tripping DataArray.stack().unstack(), as the latter is not
    # affected by the re-ordering issue
    index = pandas.MultiIndex.from_tuples(
        [['x1', 'first'],
         ['x1', 'second'],
         ['x1', 'third'],
         ['x1', 'fourth'],
         ['x0', 'first'],
         ['x0', 'second'],
         ['x0', 'third'],
         ['x0', 'fourth']],
        names=['x', 'count'])
    xa = xarray.DataArray(
        numpy.arange(8), dims=['dim_0'], coords={'dim_0': index})

    a = proper_unstack(xa, 'dim_0')
    b = xarray.DataArray(
        [[0, 1, 2, 3], [4, 5, 6, 7]],
        dims=['x', 'count'],
        coords={'x': ['x1', 'x0'],
                'count': ['first', 'second', 'third', 'fourth']})
    xarray.testing.assert_equal(a, b)
    with pytest.raises(AssertionError):
        # Order is different
        xarray.testing.assert_equal(a, xa.unstack('dim_0'))

コード例 #5

0

ファイルを表示

ファイル: test_proper_unstack.py プロジェクト: jcclin/ndcsv

def test_proper_unstack_mixed_coords():
    a = xarray.DataArray([[0, 1, 2, 3], [4, 5, 6, 7]],
                         dims=['r', 'c'],
                         coords={'r': [1, 'x0'],
                                 'c': [1, 2.2, '3', 'fourth']})
    b = a.stack(s=['r', 'c'])
    c = proper_unstack(b, 's')
    xarray.testing.assert_equal(a, c)

コード例 #6

0

ファイルを表示

def test_proper_unstack_mixed_coords():
    a = xarray.DataArray(
        [[0, 1, 2, 3], [4, 5, 6, 7]],
        dims=["r", "c"],
        coords={"r": [1, "x0"], "c": [1, 2.2, "3", "fourth"]},
    )
    b = a.stack(s=["r", "c"])
    c = proper_unstack(b, "s")
    xarray.testing.assert_equal(a, c)

コード例 #7

0

ファイルを表示

ファイル: test_proper_unstack.py プロジェクト: jcclin/ndcsv

def test_proper_unstack_dtype():
    """Test that we don't accidentally end up with dtype=O for the coords
    """
    a = xarray.DataArray(
        [[0, 1, 2, 3], [4, 5, 6, 7]],
        dims=['r', 'c'],
        coords={'r': pandas.to_datetime(['2000/01/01', '2000/01/02']),
                'c': [1, 2, 3, 4]})
    b = a.stack(s=['r', 'c'])
    c = proper_unstack(b, 's')
    xarray.testing.assert_equal(a, c)

コード例 #8

0

ファイルを表示

def test_proper_unstack_dtype():
    """Test that we don't accidentally end up with dtype=O for the coords"""
    a = xarray.DataArray(
        [[0, 1, 2, 3], [4, 5, 6, 7]],
        dims=["r", "c"],
        coords={
            "r": pandas.to_datetime(["2000/01/01", "2000/01/02"]),
            "c": [1, 2, 3, 4],
        },
    )
    b = a.stack(s=["r", "c"])
    c = proper_unstack(b, "s")
    xarray.testing.assert_equal(a, c)

コード例 #9

0

ファイルを表示

def _write_csv_dataarray(array: xarray.DataArray, buf: IO) -> None:
    """Write :class:`xarray.DataArray` to buffer"""
    if array.ndim == 0:
        # 0D (scalar) array
        buf.write(f"{array.values}\n")
        return

    # Keep track of non-index coordinates
    # Note that scalar (a-dimensional) coords are silently discarded
    coord_renames = {}
    for k, v in array.coords.items():
        if len(v.dims) > 1:
            raise ValueError(
                f"Multi-dimensional coord '{k}' is not supported by the NDCSV format"
            )
        if len(v.dims) == 1 and v.dims[0] != k:
            coord_renames[k] = f"{k} ({v.dims[0]})"
    array = array.rename(coord_renames)

    if array.ndim > 2:
        # Automatically stack dims beyond the first.
        # In the case where there's already a MultiIndex on a dim beyond
        # the first, first unstack them and then stack them again back all
        # together.
        for dim in array.dims[1:]:
            if isinstance(array.get_index(dim), pandas.MultiIndex):
                # Note: unstacked dims end up on the right
                array = proper_unstack(array, dim)
        # The __columns__ label is completely arbitrary and we're going
        # to lose it in a few moments when dumping to CSV.
        array = array.stack(__columns__=array.dims[1:])

    # non-index coords are lost when converting to pandas.
    # Incorporate them into the MultiIndex
    for dim in array.dims:
        from_mindex = False
        if isinstance(array.coords[dim].to_index(), pandas.MultiIndex):
            array = array.reset_index(dim)
            from_mindex = True
        elif dim not in array.coords:
            # Force default RangeIndex
            array.coords[dim] = array.coords[dim]
        if list(array[dim].coords) != [dim]:
            indexes = {dim if from_mindex else f"{dim}_mindex": list(array[dim].coords)}
            array = array.set_index(indexes)  # type: ignore

    _write_csv_pandas(array.to_pandas(), buf)

コード例 #10

0

ファイルを表示

ファイル: test_proper_unstack.py プロジェクト: jcclin/ndcsv

def test_proper_unstack_dataset():
    a = xarray.DataArray(
        [[1, 2, 3, 4],
         [5, 6, 7, 8]],
        dims=['x', 'col'],
        coords={'x': ['x0', 'x1'],
                'col': pandas.MultiIndex.from_tuples([('u0', 'v0'),
                                                      ('u0', 'v1'),
                                                      ('u1', 'v0'),
                                                      ('u1', 'v1')],
                                                     names=['u', 'v'])})
    xa = xarray.Dataset({'foo': a, 'bar': ('w', [1, 2]), 'baz': numpy.pi})
    b = proper_unstack(xa, 'col')
    c = xarray.DataArray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
                         dims=['x', 'u', 'v'],
                         coords={'x': ['x0', 'x1'],
                                 'u': ['u0', 'u1'],
                                 'v': ['v0', 'v1']})
    d = xarray.Dataset({'foo': c, 'bar': ('w', [1, 2]), 'baz': numpy.pi})
    xarray.testing.assert_equal(b, d)
    for c in b.coords:
        assert b.coords[c].dtype.kind == 'U'

コード例 #11

0

ファイルを表示

def test_proper_unstack_dataset():
    a = xarray.DataArray(
        [[1, 2, 3, 4], [5, 6, 7, 8]],
        dims=["x", "col"],
        coords={
            "x": ["x0", "x1"],
            "col": pandas.MultiIndex.from_tuples(
                [("u0", "v0"), ("u0", "v1"), ("u1", "v0"), ("u1", "v1")],
                names=["u", "v"],
            ),
        },
    )
    xa = xarray.Dataset({"foo": a, "bar": ("w", [1, 2]), "baz": numpy.pi})
    b = proper_unstack(xa, "col")
    c = xarray.DataArray(
        [[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
        dims=["x", "u", "v"],
        coords={"x": ["x0", "x1"], "u": ["u0", "u1"], "v": ["v0", "v1"]},
    )
    d = xarray.Dataset({"foo": c, "bar": ("w", [1, 2]), "baz": numpy.pi})
    xarray.testing.assert_equal(b, d)
    for c in b.coords:
        assert b.coords[c].dtype.kind == "U"

コード例 #12

0

ファイルを表示

def _unpack(xa: DataArray, dim: Hashable, unstack: bool = True) -> DataArray:
    """Deal with MultiIndex and non-index coords

    :param DataArray xa:
        array where all MultiIndex'es have been reset
    :param str dim:
        dim to unstack (dim_0 or dim_1). This function does nothing if
        the dim is not present at all
    :param bool unstack:
        If True, unstack all index dims using first-seen order
    """
    rename_map = {}
    dims = []
    index_coords = []
    nonindex_coords = []

    for k, v in xa.coords.items():
        assert len(v.dims) == 1
        if v.dims[0] == dim:
            # Non-index coords are formatted as `name (dim)`
            m = re.match(r"(.+) \((.+)\)$", k)
            if m:
                coord_name, coord_dim = m.group(1), m.group(2)
                # Non-index coordinate
                rename_map[k] = coord_name
                nonindex_coords.append((k, coord_dim))
                if coord_dim not in dims:
                    dims.append(coord_dim)
            else:
                # Stacked dimension
                index_coords.append(k)
                if k not in dims:
                    dims.append(k)

    # If multiple index coordinates, set a MultiIndex for them
    # Leave non-index coordinates out
    if len(dims) > 1:
        # Unstack MultiIndex, using a first-seen order
        xa = xa.set_index({dim: index_coords})  # type: ignore
        if unstack:
            xa = proper_unstack(xa, dim)
            # Now non-index coords will have become multi-dimensional
            # Drop extra dims if there is no ambiguity, otherwise raise error
            for coord, coord_dim in nonindex_coords:
                cvalue = xa.coords[coord]
                slice0 = cvalue.isel(
                    {
                        other_dim: 0
                        for other_dim in cvalue.dims if other_dim != coord_dim
                    },
                    drop=True,
                )
                if (cvalue == slice0).all():
                    xa.coords[coord] = slice0
                else:
                    raise ValueError(
                        f"Non-index coord {coord} has different values for the same "
                        f"value of its dimension {coord_dim}")
        # Finally rename non-index coords
        xa = xa.rename(rename_map)

    elif len(nonindex_coords) == 1 and not index_coords:
        # Special case where the dim will be y (x)
        assert len(dims) == 1
        assert len(rename_map) == 1
        new_dim = nonindex_coords[0][1]
        old_dim, coord_name = next(iter(rename_map.items()))
        coord_value = xa.coords[xa.dims[0]]
        xa = xa.rename({old_dim: new_dim})
        del xa.coords[xa.dims[0]]
        xa.coords[coord_name] = (xa.dims[0], coord_value.data)

    else:
        assert len(dims) == 1
        # Rename dim_0, dim_1 as index coord (if necessary)
        xa = xa.rename({dim: dims[0]})
        # Finally rename non-index coords
        xa = xa.rename(rename_map)
    return xa