Exemple #1
0
def read_indices(group):
    obs_group = group["obs"]
    obs_idx_elem = obs_group[_read_attr(obs_group.attrs, "_index")]
    obs_idx = read_elem(obs_idx_elem)
    var_group = group["var"]
    var_idx_elem = var_group[_read_attr(var_group.attrs, "_index")]
    var_idx = read_elem(var_idx_elem)
    return obs_idx, var_idx
Exemple #2
0
def read_dataframe_0_1_0(elem):
    columns = _read_attr(elem.attrs, "column-order")
    idx_key = _read_attr(elem.attrs, "_index")
    df = pd.DataFrame(
        {k: read_series(elem[k])
         for k in columns},
        index=read_series(elem[idx_key]),
        columns=list(columns),
    )
    if idx_key != "_index":
        df.index.name = idx_key
    return df
Exemple #3
0
def get_spec(
    elem: "Union[h5py.Dataset, h5py.Group, zarr.Group, zarr.Dataset]",
) -> IOSpec:
    return proc_spec({
        k: _read_attr(elem.attrs, k, "")
        for k in ["encoding-type", "encoding-version"]
    })
Exemple #4
0
def test_io_spec(store, value, encoding_type):
    key = f"key_for_{encoding_type}"
    write_elem(store, key, value, dataset_kwargs={})

    assert encoding_type == _read_attr(store[key].attrs, "encoding-type")

    from_disk = read_elem(store[key])
    assert_equal(value, from_disk)
Exemple #5
0
def test_write_to_root(store):
    adata = gen_adata((3, 2))

    write_elem(store, "/", adata)
    from_disk = read_elem(store)

    assert "anndata" == _read_attr(store.attrs, "encoding-type")
    assert_equal(from_disk, adata)
Exemple #6
0
def read_series(dataset: h5py.Dataset) -> Union[np.ndarray, pd.Categorical]:
    # For reading older dataframes
    if "categories" in dataset.attrs:
        if isinstance(dataset, ZarrArray):
            import zarr

            parent_name = dataset.name.rstrip(dataset.basename)
            parent = zarr.open(dataset.store)[parent_name]
        else:
            parent = dataset.parent
        categories_dset = parent[_read_attr(dataset.attrs, "categories")]
        categories = read_elem(categories_dset)
        ordered = bool(_read_attr(categories_dset.attrs, "ordered", False))
        return pd.Categorical.from_codes(read_elem(dataset),
                                         categories,
                                         ordered=ordered)
    else:
        return read_elem(dataset)
Exemple #7
0
def test_io_spec_raw(store):
    adata = gen_adata((3, 2))
    adata.raw = adata

    write_elem(store, "adata", adata)

    assert "raw" == _read_attr(store["adata/raw"].attrs, "encoding-type")

    from_disk = read_elem(store["adata"])
    assert_equal(from_disk.raw, adata.raw)
Exemple #8
0
def test_hdf5_attribute_conversion(tmp_path, teststring, encoding, length):
    with h5py.File(tmp_path / "attributes.h5", "w") as file:
        dset = file.create_dataset("dset", data=np.arange(10))
        attrs = dset.attrs
        attrs.create(
            "string",
            teststring,
            dtype=h5py.h5t.string_dtype(encoding=encoding, length=length),
        )

        assert_equal(teststring, _read_attr(attrs, "string"))
Exemple #9
0
def read_dataframe_partial(elem,
                           *,
                           items=None,
                           indices=(slice(None, None), slice(None, None))):
    if items is not None:
        columns = [
            col for col in _read_attr(elem.attrs, "column-order")
            if col in items
        ]
    else:
        columns = list(_read_attr(elem.attrs, "column-order"))
    idx_key = _read_attr(elem.attrs, "_index")
    df = pd.DataFrame(
        {k: read_elem_partial(elem[k], indices=indices[0])
         for k in columns},
        index=read_elem_partial(elem[idx_key], indices=indices[0]),
        columns=list(columns),
    )
    if idx_key != "_index":
        df.index.name = idx_key
    return df
Exemple #10
0
def read_categorical(elem, *, items=None, indices=(slice(None), )):
    return pd.Categorical.from_codes(
        codes=read_elem_partial(elem["codes"], indices=indices),
        categories=read_elem(elem["categories"]),
        ordered=_read_attr(elem.attrs, "ordered"),
    )
Exemple #11
0
def read_categorical(elem):
    return pd.Categorical.from_codes(
        codes=read_elem(elem["codes"]),
        categories=read_elem(elem["categories"]),
        ordered=_read_attr(elem.attrs, "ordered"),
    )