def read_indices(group): obs_group = group["obs"] obs_idx_elem = obs_group[_read_attr(obs_group.attrs, "_index")] obs_idx = read_elem(obs_idx_elem) var_group = group["var"] var_idx_elem = var_group[_read_attr(var_group.attrs, "_index")] var_idx = read_elem(var_idx_elem) return obs_idx, var_idx
def read_dataframe_0_1_0(elem): columns = _read_attr(elem.attrs, "column-order") idx_key = _read_attr(elem.attrs, "_index") df = pd.DataFrame( {k: read_series(elem[k]) for k in columns}, index=read_series(elem[idx_key]), columns=list(columns), ) if idx_key != "_index": df.index.name = idx_key return df
def get_spec( elem: "Union[h5py.Dataset, h5py.Group, zarr.Group, zarr.Dataset]", ) -> IOSpec: return proc_spec({ k: _read_attr(elem.attrs, k, "") for k in ["encoding-type", "encoding-version"] })
def test_io_spec(store, value, encoding_type): key = f"key_for_{encoding_type}" write_elem(store, key, value, dataset_kwargs={}) assert encoding_type == _read_attr(store[key].attrs, "encoding-type") from_disk = read_elem(store[key]) assert_equal(value, from_disk)
def test_write_to_root(store): adata = gen_adata((3, 2)) write_elem(store, "/", adata) from_disk = read_elem(store) assert "anndata" == _read_attr(store.attrs, "encoding-type") assert_equal(from_disk, adata)
def read_series(dataset: h5py.Dataset) -> Union[np.ndarray, pd.Categorical]: # For reading older dataframes if "categories" in dataset.attrs: if isinstance(dataset, ZarrArray): import zarr parent_name = dataset.name.rstrip(dataset.basename) parent = zarr.open(dataset.store)[parent_name] else: parent = dataset.parent categories_dset = parent[_read_attr(dataset.attrs, "categories")] categories = read_elem(categories_dset) ordered = bool(_read_attr(categories_dset.attrs, "ordered", False)) return pd.Categorical.from_codes(read_elem(dataset), categories, ordered=ordered) else: return read_elem(dataset)
def test_io_spec_raw(store): adata = gen_adata((3, 2)) adata.raw = adata write_elem(store, "adata", adata) assert "raw" == _read_attr(store["adata/raw"].attrs, "encoding-type") from_disk = read_elem(store["adata"]) assert_equal(from_disk.raw, adata.raw)
def test_hdf5_attribute_conversion(tmp_path, teststring, encoding, length): with h5py.File(tmp_path / "attributes.h5", "w") as file: dset = file.create_dataset("dset", data=np.arange(10)) attrs = dset.attrs attrs.create( "string", teststring, dtype=h5py.h5t.string_dtype(encoding=encoding, length=length), ) assert_equal(teststring, _read_attr(attrs, "string"))
def read_dataframe_partial(elem, *, items=None, indices=(slice(None, None), slice(None, None))): if items is not None: columns = [ col for col in _read_attr(elem.attrs, "column-order") if col in items ] else: columns = list(_read_attr(elem.attrs, "column-order")) idx_key = _read_attr(elem.attrs, "_index") df = pd.DataFrame( {k: read_elem_partial(elem[k], indices=indices[0]) for k in columns}, index=read_elem_partial(elem[idx_key], indices=indices[0]), columns=list(columns), ) if idx_key != "_index": df.index.name = idx_key return df
def read_categorical(elem, *, items=None, indices=(slice(None), )): return pd.Categorical.from_codes( codes=read_elem_partial(elem["codes"], indices=indices), categories=read_elem(elem["categories"]), ordered=_read_attr(elem.attrs, "ordered"), )
def read_categorical(elem): return pd.Categorical.from_codes( codes=read_elem(elem["codes"]), categories=read_elem(elem["categories"]), ordered=_read_attr(elem.attrs, "ordered"), )