def test_concat_size_0_dim(): # https://github.com/theislab/anndata/issues/526 a = gen_adata((5, 10)) b = gen_adata((5, 0)) assert concat([a, b], axis=0).shape == (10, 0) assert concat([a, b], axis=1).shape == (5, 10)
def test_batch_key(axis): """Test that concat only adds a label if the key is provided""" def get_annot(adata): return getattr(adata, ("obs", "var")[axis]) lhs = gen_adata((10, 10)) rhs = gen_adata((10, 12)) # There is probably a prettier way to do this annot = get_annot(concat([lhs, rhs], axis=axis)) assert ( list( annot.columns.difference( get_annot(lhs).columns.union(get_annot(rhs).columns) ) ) == [] ) batch_annot = get_annot(concat([lhs, rhs], axis=axis, label="batch")) assert list( batch_annot.columns.difference( get_annot(lhs).columns.union(get_annot(rhs).columns) ) ) == ["batch"]
def test_concat_names(axis): def get_annot(adata): return getattr(adata, ("obs", "var")[axis]) lhs = gen_adata((10, 10)) rhs = gen_adata((10, 10)) assert not get_annot(concat([lhs, rhs], axis=axis)).index.is_unique assert get_annot(concat([lhs, rhs], axis=axis, index_unique="-")).index.is_unique
def test_concat_interface_errors(): adatas = [gen_adata((5, 10)), gen_adata((5, 10))] with pytest.raises(ValueError): concat(adatas, axis=3) with pytest.raises(ValueError): concat(adatas, join="not implemented") with pytest.raises(ValueError): concat([])
def test_concat_outer_aligned_mapping(elem): a = gen_adata((5, 5)) b = gen_adata((3, 5)) del b.obsm[elem] concated = concat({"a": a, "b": b}, join="outer", label="group") result = concated.obsm[elem][concated.obs["group"] == "b"] check_filled_like(result, elem_name=f"obsm/{elem}")
def test_concatenate_size_0_dim(): # https://github.com/theislab/anndata/issues/526 a = gen_adata((5, 10)) b = gen_adata((5, 0)) # Mostly testing that this doesn't error a.concatenate([b]).shape == (10, 0) b.concatenate([a]).shape == (10, 0)
def test_transposed_concat(array_type, axis, join_type, merge_strategy, fill_val): lhs = gen_adata((10, 10), X_type=array_type) rhs = gen_adata((10, 12), X_type=array_type) a = concat([lhs, rhs], axis=axis, join=join_type, merge=merge_strategy) b = concat( [lhs.T, rhs.T], axis=abs(axis - 1), join=join_type, merge=merge_strategy ).T assert_equal(a, b)
def test_concat_size_0_dim(axis, join_type, merge_strategy, shape): # https://github.com/theislab/anndata/issues/526 a = gen_adata((5, 7)) b = gen_adata(shape) alt_axis = 1 - axis dim = ("obs", "var")[axis] expected_size = expected_shape(a, b, axis=axis, join=join_type) result = concat( { "a": a, "b": b }, axis=axis, join=join_type, merge=merge_strategy, pairwise=True, index_unique="-", ) assert result.shape == expected_size if join_type == "outer": # Check new entries along axis of concatenation axis_new_inds = axis_labels(result, axis).str.endswith("-b") altaxis_new_inds = ~axis_labels(result, alt_axis).isin( axis_labels(a, alt_axis)) axis_idx = make_idx_tuple(axis_new_inds, axis) altaxis_idx = make_idx_tuple(altaxis_new_inds, 1 - axis) check_filled_like(result.X[axis_idx], elem_name="X") check_filled_like(result.X[altaxis_idx], elem_name="X") for k, elem in getattr(result, "layers").items(): check_filled_like(elem[axis_idx], elem_name=f"layers/{k}") check_filled_like(elem[altaxis_idx], elem_name=f"layers/{k}") if shape[axis] > 0: b_result = result[axis_idx].copy() mapping_elem = f"{dim}m" setattr(b_result, f"{dim}_names", getattr(b, f"{dim}_names")) for k, result_elem in getattr(b_result, mapping_elem).items(): elem_name = f"{mapping_elem}/{k}" # pd.concat can have unintuitive return types. is similar to numpy promotion if isinstance(result_elem, pd.DataFrame): assert_equal( getattr(b, mapping_elem)[k].astype(object), result_elem.astype(object), elem_name=elem_name, ) else: assert_equal( getattr(b, mapping_elem)[k], result_elem, elem_name=elem_name, )
def test_to_memory_full(tmp_path, array_type): backed_pth = tmp_path / "backed.h5ad" mem_adata = gen_adata((15, 10), X_type=array_type) mem_adata.raw = gen_adata((15, 12), X_type=array_type) mem_adata.write_h5ad(backed_pth, compression="lzf") backed_adata = ad.read_h5ad(backed_pth, backed="r") assert_equal(mem_adata, backed_adata.to_memory()) # Test that raw can be removed del backed_adata.raw del mem_adata.raw assert_equal(mem_adata, backed_adata.to_memory())
def test_setting_dim_index(dim): index_attr = f"{dim}_names" mapping_attr = f"{dim}m" orig = gen_adata((5, 5)) orig.raw = orig curr = orig.copy() view = orig[:, :] new_idx = pd.Index(list("abcde"), name="letters") setattr(curr, index_attr, new_idx) pd.testing.assert_index_equal(getattr(curr, index_attr), new_idx) pd.testing.assert_index_equal( getattr(curr, mapping_attr)["df"].index, new_idx) pd.testing.assert_index_equal(curr.obs_names, curr.raw.obs_names) # Testing view behaviour setattr(view, index_attr, new_idx) assert not view.is_view pd.testing.assert_index_equal(getattr(view, index_attr), new_idx) pd.testing.assert_index_equal( getattr(view, mapping_attr)["df"].index, new_idx) with pytest.raises(AssertionError): pd.testing.assert_index_equal(getattr(view, index_attr), getattr(orig, index_attr)) assert_equal(view, curr, exact=True)
def test_backed_raw_subset(tmp_path, subset_func, subset_func2): backed_pth = tmp_path / "backed.h5ad" final_pth = tmp_path / "final.h5ad" mem_adata = gen_adata((10, 10)) mem_adata.raw = mem_adata obs_idx = subset_func(mem_adata.obs_names) var_idx = subset_func2(mem_adata.var_names) mem_adata.write(backed_pth) backed_adata = ad.read_h5ad(backed_pth, backed="r") backed_v = backed_adata[obs_idx, var_idx] assert backed_v.is_view mem_v = mem_adata[obs_idx, var_idx] assert_equal(backed_v, mem_v) # meaningful as objects are not equivalent? backed_v.write_h5ad(final_pth) final_adata = ad.read_h5ad(final_pth) # todo: Figure out why this doesn’t work if I don’t copy assert_equal(final_adata, mem_v.copy()) # todo: breaks when removing this line, b/c backed_v.X is not accessible backed_v = ad.read_h5ad(backed_pth, backed="r")[obs_idx, var_idx] del final_adata.raw # .raw is dropped when loading backed into memory. assert_equal(final_adata, backed_v.to_memory()) # assert loading into memory
def test_readloom_deprecations(tmp_path): loom_pth = tmp_path / "test.loom" adata_src = gen_adata((5, 10), obsm_types=[np.ndarray], varm_types=[np.ndarray]) adata_src.write_loom(loom_pth, write_obsm_varm=True) # obsm_names -> obsm_mapping obsm_mapping = {"df": adata_src.obs.columns} with pytest.warns(FutureWarning): depr_result = ad.read_loom(loom_pth, obsm_names=obsm_mapping) actual_result = ad.read_loom(loom_pth, obsm_mapping=obsm_mapping) assert_equal(actual_result, depr_result) with pytest.raises(ValueError, match="ambiguous"): ad.read_loom(loom_pth, obsm_mapping=obsm_mapping, obsm_names=obsm_mapping) # varm_names -> varm_mapping varm_mapping = {"df": adata_src.var.columns} with pytest.warns(FutureWarning): depr_result = ad.read_loom(loom_pth, varm_names=varm_mapping) actual_result = ad.read_loom(loom_pth, varm_mapping=varm_mapping) assert_equal(actual_result, depr_result) with pytest.raises(ValueError, match="ambiguous"): ad.read_loom(loom_pth, varm_mapping=varm_mapping, varm_names=varm_mapping) # positional -> keyword with pytest.warns(FutureWarning, match="sparse"): depr_result = ad.read_loom(loom_pth, True) actual_result = ad.read_loom(loom_pth, sparse=True) assert type(depr_result.X) == type(actual_result.X)
def test_hdf5_compression_opts(tmp_path, compression, compression_opts): # https://github.com/theislab/anndata/issues/497 pth = Path(tmp_path) / "adata.h5ad" adata = gen_adata((10, 8)) kwargs = {} if compression is not None: kwargs["compression"] = compression if compression_opts is not None: kwargs["compression_opts"] = compression_opts not_compressed = [] adata.write_h5ad(pth, **kwargs) def check_compressed(key, value): if isinstance(value, h5py.Dataset) and value.shape != (): if compression is not None and value.compression != compression: not_compressed.append(key) elif (compression_opts is not None and value.compression_opts != compression_opts): not_compressed.append(key) with h5py.File(pth) as f: f.visititems(check_compressed) if not_compressed: msg = "\n\t".join(not_compressed) raise AssertionError( f"These elements were not compressed correctly:\n\t{msg}") assert_equal(adata, ad.read_h5ad(pth))
def test_inplace_subset_obs(matrix_type, subset_func): orig = gen_adata((30, 30), X_type=matrix_type) subset_idx = subset_func(orig.obs_names) modified = orig.copy() from_view = orig[subset_idx, :].copy() modified._inplace_subset_obs(subset_idx) assert_equal(asarray(from_view.X), asarray(modified.X), exact=True) assert_equal(from_view.obs, modified.obs, exact=True) assert_equal(from_view.var, modified.var, exact=True) for k in from_view.obsm: assert_equal(asarray(from_view.obsm[k]), asarray(modified.obsm[k]), exact=True) for k in from_view.varm: assert_equal(asarray(from_view.varm[k]), asarray(modified.varm[k]), exact=True) assert_equal(asarray(orig.varm[k]), asarray(modified.varm[k]), exact=True) for k in from_view.layers: assert_equal(asarray(from_view.layers[k]), asarray(modified.layers[k]), exact=True)
def test_sparse_to_dense_disk(tmp_path, mtx_format, to_convert): mem_pth = tmp_path / "orig.h5ad" dense_from_mem_pth = tmp_path / "dense_mem.h5ad" dense_from_disk_pth = tmp_path / "dense_disk.h5ad" mem = gen_adata((50, 50), mtx_format) mem.raw = mem mem.write_h5ad(mem_pth) disk = ad.read_h5ad(mem_pth, backed="r") mem.write_h5ad(dense_from_mem_pth, as_dense=to_convert) disk.write_h5ad(dense_from_disk_pth, as_dense=to_convert) with h5py.File(dense_from_mem_pth, "r") as f: for k in to_convert: assert isinstance(f[k], h5py.Dataset) with h5py.File(dense_from_disk_pth, "r") as f: for k in to_convert: assert isinstance(f[k], h5py.Dataset) for backed in [None, "r"]: from_mem = ad.read_h5ad(dense_from_mem_pth, backed=backed) from_disk = ad.read_h5ad(dense_from_disk_pth, backed=backed) assert_equal(mem, from_mem) assert_equal(mem, from_disk) assert_equal(disk, from_mem) assert_equal(disk, from_disk)
def test_attr_deletion(): full = gen_adata((30, 30)) # Empty has just X, obs_names, var_names empty = AnnData(None, obs=full.obs[[]], var=full.var[[]]) for attr in ["X", "obs", "var", "obsm", "varm", "obsp", "varp", "layers", "uns"]: delattr(full, attr) assert_equal(getattr(full, attr), getattr(empty, attr)) assert_equal(full, empty, exact=True)
def test_transpose(): adata = gen_adata((5, 3)) adata.varp = {f"varp_{k}": v for k, v in adata.varp.items()} adata1 = adata.T adata1.uns["test123"] = 1 assert "test123" in adata.uns assert_equal(adata1.X.shape, (3, 5)) assert_equal(adata1.obsp.keys(), adata.varp.keys())
def test_write_to_root(store): adata = gen_adata((3, 2)) write_elem(store, "/", adata) from_disk = read_elem(store) assert "anndata" == _read_attr(store.attrs, "encoding-type") assert_equal(from_disk, adata)
def test_view_setattr_machinery(attr, subset_func, subset_func2): # Tests that setting attributes on a view doesn't mess anything up too bad adata = gen_adata((10, 10)) view = adata[subset_func(adata.obs_names), subset_func2(adata.var_names)] actual = view.copy() setattr(view, attr, getattr(actual, attr)) assert_equal(actual, view, exact=True)
def test_view_delattr(attr): base = gen_adata((10, 10)) # Indexing into obs and var just to get indexes subset = base[5:7, :5] empty = ad.AnnData(subset.X, obs=subset.obs[[]], var=subset.var[[]]) delattr(subset, attr) assert not subset.is_view # Should now have same value as default assert_equal(getattr(subset, attr), getattr(empty, attr))
def test_adata_in_uns(tmp_path, diskfmt): pth = tmp_path / f"adatas_in_uns.{diskfmt}" read = lambda pth: getattr(ad, f"read_{diskfmt}")(pth) write = lambda adata, pth: getattr(adata, f"write_{diskfmt}")(pth) orig = gen_adata((4, 5)) orig.uns["adatas"] = { "a": gen_adata((1, 2)), "b": gen_adata((12, 8)), } another_one = gen_adata((2, 5)) another_one.raw = gen_adata((2, 7)) orig.uns["adatas"]["b"].uns["another_one"] = another_one write(orig, pth) curr = read(pth) assert_equal(orig, curr)
def test_concat_categories_from_mapping(): mapping = { "a": gen_adata((10, 10)), "b": gen_adata((10, 10)), } keys = list(mapping.keys()) adatas = list(mapping.values()) mapping_call = partial(concat, mapping) iter_call = partial(concat, adatas, keys=keys) assert_equal(mapping_call(), iter_call()) assert_equal(mapping_call(label="batch"), iter_call(label="batch")) assert_equal(mapping_call(index_unique="-"), iter_call(index_unique="-")) assert_equal( mapping_call(label="group", index_unique="+"), iter_call(label="group", index_unique="+"), )
def test_double_index(subset_func, subset_func2): adata = gen_adata((10, 10)) obs_subset = subset_func(adata.obs_names) var_subset = subset_func2(adata.var_names) v1 = adata[obs_subset, var_subset] v2 = adata[obs_subset, :][:, var_subset] assert np.all(asarray(v1.X) == asarray(v2.X)) assert np.all(v1.obs == v2.obs) assert np.all(v1.var == v2.var)
def test_maintain_layers(backing_h5ad): M, N = 100, 101 orig = gen_adata((M, N)) orig.write(backing_h5ad) curr = ad.read(backing_h5ad) assert type(orig.layers["array"]) is type(curr.layers["array"]) assert np.all(orig.layers["array"] == curr.layers["array"]) assert type(orig.layers["sparse"]) is type(curr.layers["sparse"]) assert np.all((orig.layers["sparse"] == curr.layers["sparse"]).toarray())
def test_io_spec_raw(store): adata = gen_adata((3, 2)) adata.raw = adata write_elem(store, "adata", adata) assert "raw" == _read_attr(store["adata/raw"].attrs, "encoding-type") from_disk = read_elem(store["adata"]) assert_equal(from_disk.raw, adata.raw)
def test_assert_equal(): # ndarrays assert_equal(np.ones((10, 10)), np.ones((10, 10))) assert_equal( # Should this require an exact test? np.ones((10, 10), dtype="i8"), np.ones((10, 10), dtype="f8") ) assert_equal( np.array(list(ascii_letters)), np.array(list(ascii_letters)), exact=True ) with pytest.raises(AssertionError): assert_equal(np.array(list(ascii_letters)), np.array(list(ascii_letters))[::-1]) adata = gen_adata((10, 10)) adata.raw = adata.copy() assert_equal(adata, adata.copy(), exact=True) # TODO: I’m not sure this is good behaviour, I’ve disabled in for now. # assert_equal( # adata, # adata[ # np.random.permutation(adata.obs_names), # np.random.permutation(adata.var_names), # ].copy(), # exact=False, # ) adata2 = adata.copy() to_modify = list(adata2.layers.keys())[0] del adata2.layers[to_modify] with pytest.raises(AssertionError) as missing_layer_error: assert_equal(adata, adata2) assert "layers" in str(missing_layer_error.value) # `to_modify` will be in pytest info adata2 = adata.copy() adata2.layers[to_modify][0, 0] = adata2.layers[to_modify][0, 0] + 1 with pytest.raises(AssertionError) as changed_layer_error: assert_equal(adata, adata2) assert "layers" in str(changed_layer_error.value) assert to_modify in str(changed_layer_error.value) assert_equal(adata.obs, adata.obs.copy(), exact=True) csr = sparse.random(100, 100, format="csr") csc = csr.tocsc() dense = csr.toarray() assert_equal(csr, csc) assert_equal(csc, dense) assert_equal(dense, csc) unordered_cat = pd.Categorical(list("aabdcc"), ordered=False) ordered_cat = pd.Categorical(list("aabdcc"), ordered=True) assert_equal(unordered_cat, unordered_cat.copy()) assert_equal(ordered_cat, ordered_cat.copy()) assert_equal(ordered_cat, unordered_cat, exact=False) with pytest.raises(AssertionError): assert_equal(ordered_cat, unordered_cat, exact=True)
def test_zarr_chunk_X(tmp_path): import zarr zarr_pth = Path(tmp_path) / "test.zarr" adata = gen_adata((100, 100), X_type=np.array) adata.write_zarr(zarr_pth, chunks=(10, 10)) z = zarr.open(str(zarr_pth)) # As of v2.3.2 zarr won’t take a Path assert z["X"].chunks == (10, 10) from_zarr = ad.read_zarr(zarr_pth) assert_equal(from_zarr, adata)
def test_inplace_subset_no_X(subset_func, dim): orig = gen_adata((30, 30)) del orig.X subset_idx = subset_func(getattr(orig, f"{dim}_names")) modified = orig.copy() from_view = subset_dim(orig, **{dim: subset_idx}).copy() getattr(modified, f"_inplace_subset_{dim}")(subset_idx) assert_equal(modified, from_view, exact=True)
def test_view_failed_delitem(attr): adata = gen_adata((10, 10)) view = adata[5:7, :][:, :5] adata_hash = joblib.hash(adata) view_hash = joblib.hash(view) with pytest.raises(KeyError): getattr(view, attr).__delitem__("not a key") assert view.is_view assert adata_hash == joblib.hash(adata) assert view_hash == joblib.hash(view)
def test_view_delattr(attr, subset_func): base = gen_adata((10, 10)) orig_hash = joblib.hash(base) subset = base[subset_func(base.obs_names), subset_func(base.var_names)] empty = ad.AnnData(obs=subset.obs[[]], var=subset.var[[]]) delattr(subset, attr) assert not subset.is_view # Should now have same value as default assert_equal(getattr(subset, attr), getattr(empty, attr)) assert orig_hash == joblib.hash(base) # Original should not be modified