def test_hdf5_compression_opts(tmp_path, compression, compression_opts): # https://github.com/theislab/anndata/issues/497 pth = Path(tmp_path) / "adata.h5ad" adata = gen_adata((10, 8)) kwargs = {} if compression is not None: kwargs["compression"] = compression if compression_opts is not None: kwargs["compression_opts"] = compression_opts not_compressed = [] adata.write_h5ad(pth, **kwargs) def check_compressed(key, value): if isinstance(value, h5py.Dataset) and value.shape != (): if compression is not None and value.compression != compression: not_compressed.append(key) elif (compression_opts is not None and value.compression_opts != compression_opts): not_compressed.append(key) with h5py.File(pth) as f: f.visititems(check_compressed) if not_compressed: msg = "\n\t".join(not_compressed) raise AssertionError( f"These elements were not compressed correctly:\n\t{msg}") assert_equal(adata, ad.read_h5ad(pth))
def test_readwrite_zarr(typ, tmp_path): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.raw = adata_src assert not is_categorical_dtype(adata_src.obs["oanno1"]) adata_src.write_zarr(tmp_path / "test_zarr_dir", chunks=True) adata = ad.read_zarr(tmp_path / "test_zarr_dir") assert is_categorical_dtype(adata.obs["oanno1"]) assert not is_categorical_dtype(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert adata.obs["oanno1c"].cat.categories.tolist() == ["cat1"] assert is_categorical_dtype(adata.raw.var["vanno2"]) pd.testing.assert_frame_equal(adata.obs, adata_src.obs) pd.testing.assert_frame_equal(adata.var, adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def test_readloom_deprecations(tmp_path): loom_pth = tmp_path / "test.loom" adata_src = gen_adata((5, 10), obsm_types=[np.ndarray], varm_types=[np.ndarray]) adata_src.write_loom(loom_pth, write_obsm_varm=True) # obsm_names -> obsm_mapping obsm_mapping = {"df": adata_src.obs.columns} with pytest.warns(FutureWarning): depr_result = ad.read_loom(loom_pth, obsm_names=obsm_mapping) actual_result = ad.read_loom(loom_pth, obsm_mapping=obsm_mapping) assert_equal(actual_result, depr_result) with pytest.raises(ValueError, match="ambiguous"): ad.read_loom(loom_pth, obsm_mapping=obsm_mapping, obsm_names=obsm_mapping) # varm_names -> varm_mapping varm_mapping = {"df": adata_src.var.columns} with pytest.warns(FutureWarning): depr_result = ad.read_loom(loom_pth, varm_names=varm_mapping) actual_result = ad.read_loom(loom_pth, varm_mapping=varm_mapping) assert_equal(actual_result, depr_result) with pytest.raises(ValueError, match="ambiguous"): ad.read_loom(loom_pth, varm_mapping=varm_mapping, varm_names=varm_mapping) # positional -> keyword with pytest.warns(FutureWarning, match="sparse"): depr_result = ad.read_loom(loom_pth, True) actual_result = ad.read_loom(loom_pth, sparse=True) assert type(depr_result.X) == type(actual_result.X)
def test_backwards_compat_files(archive_dir): with pytest.warns(ad.OldFormatWarning): from_h5ad = ad.read_h5ad(archive_dir / "adata.h5ad") with pytest.warns(ad.OldFormatWarning): from_zarr = ad.read_zarr(archive_dir / "adata.zarr.zip") assert_equal(from_h5ad, from_zarr, exact=True)
def test_write_large_categorical(tmp_path, diskfmt): M = 30_000 N = 1000 ls = np.array(list(ascii_letters)) def random_cats(n): cats = { "".join(np.random.choice(ls, np.random.choice(range(5, 30)))) for _ in range(n) } while len(cats) < n: # For the rare case that there’s duplicates cats |= random_cats(n - len(cats)) return cats cats = np.array(sorted(random_cats(10_000))) adata_pth = tmp_path / f"adata.{diskfmt}" n_cats = len(np.unique(cats)) orig = ad.AnnData( csr_matrix(([1], ([0], [0])), shape=(M, N)), obs=dict( cat1=cats[np.random.choice(n_cats, M)], cat2=pd.Categorical.from_codes(np.random.choice(n_cats, M), cats), ), ) getattr(orig, f"write_{diskfmt}")(adata_pth) curr = getattr(ad, f"read_{diskfmt}")(adata_pth) assert_equal(orig, curr)
def test_assert_equal_alignedmapping_empty(): chars = np.array(list(ascii_letters)) adata = ad.AnnData( X=np.zeros((10, 10)), obs=pd.DataFrame([], index=np.random.choice(chars[:20], 10, replace=False)), var=pd.DataFrame([], index=np.random.choice(chars[:20], 10, replace=False)), ) diff_idx = ad.AnnData( X=np.zeros((10, 10)), obs=pd.DataFrame([], index=np.random.choice(chars[20:], 10, replace=False)), var=pd.DataFrame([], index=np.random.choice(chars[20:], 10, replace=False)), ) same_idx = ad.AnnData(adata.X, obs=adata.obs.copy(), var=adata.var.copy()) for attr in ["obsm", "varm", "layers", "obsp", "varp"]: with pytest.raises(AssertionError): assert_equal(getattr(adata, attr), getattr(diff_idx, attr)) assert_equal(getattr(adata, attr), getattr(same_idx, attr))
def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad): tmpdir = tempfile.TemporaryDirectory() tmpdirpth = Path(tmpdir.name) mid_pth = tmpdirpth / "mid.h5ad" X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) assert not is_categorical_dtype(adata_src.obs["oanno1"]) adata_src.raw = adata_src adata_src.write(backing_h5ad, **dataset_kwargs) adata_mid = ad.read(backing_h5ad) adata_mid.write(mid_pth, **dataset_kwargs) adata = ad.read_h5ad(mid_pth) assert is_categorical_dtype(adata.obs["oanno1"]) assert not is_categorical_dtype(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert is_categorical_dtype(adata.raw.var["vanno2"]) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert type(adata.raw.varm) is type(adata_src.raw.varm) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def check_filled_like(x, fill_value=None, elem_name=None): if fill_value is None: assert_equal(x, filled_like(x), elem_name=elem_name) else: assert_equal(x, filled_like(x, fill_value=fill_value), elem_name=elem_name)
def test_concatenate_uns(unss, merge_strategy, result, value_gen): """ Test that concatenation works out for different strategies and sets of values. Params ------ unss Set of patterns for values in uns. compat Strategy to use for merging uns. result Pattern we expect to see for the given input and strategy. value_gen Maps values in unss and results to another set of values. This is for checking that we're comparing values correctly. For example `[{"a": 1}, {"a": 1}]` may get mapped to `[{"a": [1, 2, 3]}, {"a": [1, 2, 3]}]`. """ # So we can see what the initial pattern was meant to be print(merge_strategy, "\n", unss, "\n", result) result, *unss = permute_nested_values([result] + unss, value_gen) adatas = [uns_ad(uns) for uns in unss] assert_equal( adatas[0].concatenate(adatas[1:], uns_merge=merge_strategy).uns, result, elem_name="uns", )
def test_copy(): adata = AnnData( None, obs=pd.DataFrame(index=[f"cell{i:03}" for i in range(100)]), var=pd.DataFrame(index=[f"gene{i:03}" for i in range(200)]), ) assert_equal(adata.copy(), adata)
def check_rep_results(func, X, *, fields=["layer", "obsm"], **kwargs): """Checks that the results of a computation add values/ mutate the anndata object in a consistent way.""" # Gen data empty_X = np.zeros(shape=X.shape, dtype=X.dtype) adata = sc.AnnData( X=empty_X.copy(), layers={"layer": empty_X.copy()}, obsm={"obsm": empty_X.copy()}, ) adata_X = adata.copy() adata_X.X = X.copy() adatas_proc = {} for field in fields: cur = adata.copy() sc.get._set_obs_rep(cur, X.copy(), **{field: field}) adatas_proc[field] = cur # Apply function func(adata_X, **kwargs) for field in fields: func(adatas_proc[field], **{field: field}, **kwargs) # Reset X adata_X.X = empty_X.copy() for field in fields: sc.get._set_obs_rep(adatas_proc[field], empty_X.copy(), **{field: field}) for field_a, field_b in permutations(fields, 2): assert_equal(adatas_proc[field_a], adatas_proc[field_b]) for field in fields: assert_equal(adata_X, adatas_proc[field])
def test_setting_dim_index(dim): index_attr = f"{dim}_names" mapping_attr = f"{dim}m" orig = gen_adata((5, 5)) orig.raw = orig curr = orig.copy() view = orig[:, :] new_idx = pd.Index(list("abcde"), name="letters") setattr(curr, index_attr, new_idx) pd.testing.assert_index_equal(getattr(curr, index_attr), new_idx) pd.testing.assert_index_equal( getattr(curr, mapping_attr)["df"].index, new_idx) pd.testing.assert_index_equal(curr.obs_names, curr.raw.obs_names) # Testing view behaviour setattr(view, index_attr, new_idx) assert not view.is_view pd.testing.assert_index_equal(getattr(view, index_attr), new_idx) pd.testing.assert_index_equal( getattr(view, mapping_attr)["df"].index, new_idx) with pytest.raises(AssertionError): pd.testing.assert_index_equal(getattr(view, index_attr), getattr(orig, index_attr)) assert_equal(view, curr, exact=True)
def check_rep_results(func, X, **kwargs): """Checks that the results of a computation add values/ mutate the anndata object in a consistent way.""" # Gen data adata_X = sc.AnnData( X=X.copy(), layers={"layer": np.zeros(shape=X.shape, dtype=X.dtype)}, obsm={"obsm": np.zeros(shape=X.shape, dtype=X.dtype)}, ) adata_layer = sc.AnnData( X=np.zeros(shape=X.shape, dtype=X.dtype), layers={"layer": X.copy()}, obsm={"obsm": np.zeros(shape=X.shape, dtype=X.dtype)}, ) adata_obsm = sc.AnnData( X=np.zeros(shape=X.shape, dtype=X.dtype), layers={"layer": np.zeros(shape=X.shape, dtype=X.dtype)}, obsm={"obsm": X.copy()}, ) # Apply function func(adata_X, **kwargs) func(adata_layer, layer="layer", **kwargs) func(adata_obsm, obsm="obsm", **kwargs) # Reset X adata_X.X = np.zeros(shape=X.shape, dtype=X.dtype) adata_layer.layers["layer"] = np.zeros(shape=X.shape, dtype=X.dtype) adata_obsm.obsm["obsm"] = np.zeros(shape=X.shape, dtype=X.dtype) # Check equality assert_equal(adata_X, adata_layer) assert_equal(adata_X, adata_obsm)
def test_backed_indexing_zarr_var(ondisk_equivalent_adata_zarr, subset_func_zarr): csr_mem, csr_disk, csc_disk = ondisk_equivalent_adata_zarr X = csr_mem.X idx = subset_func_zarr(csr_mem.var_names) assert_equal(X[:, idx], csr_disk[:, idx]) assert_equal(X[:, idx], csc_disk[:, idx])
def test_nan_merge(axis, join_type, array_type): # concat_dim = ("obs", "var")[axis] alt_dim = ("var", "obs")[axis] mapping_attr = f"{alt_dim}m" adata_shape = (20, 10) arr = array_type( sparse.random(adata_shape[1 - axis], 10, density=0.1, format="csr") ) arr_nan = arr.copy() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=sparse.SparseEfficiencyWarning) for _ in range(10): arr_nan[ np.random.choice(arr.shape[0]), np.random.choice(arr.shape[1]) ] = np.nan _data = {"X": sparse.csr_matrix(adata_shape), mapping_attr: {"arr": arr_nan}} orig1 = AnnData(**_data) orig2 = AnnData(**_data) result = concat([orig1, orig2], axis=axis, merge="same") assert_equal(getattr(orig1, mapping_attr), getattr(result, mapping_attr)) orig_nonan = AnnData( **{"X": sparse.csr_matrix(adata_shape), mapping_attr: {"arr": arr}} ) result_nonan = concat([orig1, orig_nonan], axis=axis, merge="same") assert len(getattr(result_nonan, mapping_attr)) == 0
def test_write_string_types(tmp_path, diskfmt): # https://github.com/theislab/anndata/issues/456 adata_pth = tmp_path / f"adata.{diskfmt}" adata = ad.AnnData( np.ones((3, 3)), obs=pd.DataFrame( np.ones((3, 2)), columns=["a", np.str_("b")], index=["a", "b", "c"], ), ) write = getattr(adata, f"write_{diskfmt}") read = getattr(ad, f"read_{diskfmt}") write(adata_pth) from_disk = read(adata_pth) assert_equal(adata, from_disk) adata.obs[b"c"] = np.zeros(3) # This should error, and tell you which key is at fault with pytest.raises(TypeError, match=str(b"c")): write(adata_pth)
def test_recipe_weinreb(): # Just tests for failure for now adata = sc.datasets.pbmc68k_reduced().raw.to_adata() adata.X = adata.X.toarray() orig = adata.copy() sc.pp.recipe_weinreb17(adata, log=False, copy=True) assert_equal(orig, adata)
def test_concatenate_layers(array_type, join_type): adatas = [] for _ in range(5): a = array_type(sparse.random(100, 200, format="csr")) adatas.append(AnnData(X=a, layers={"a": a})) merged = adatas[0].concatenate(adatas[1:], join=join_type) assert_equal(merged.X, merged.layers["a"])
def test_copy_view(): adata = AnnData( None, obs=pd.DataFrame(index=[f"cell{i:03}" for i in range(100)]), var=pd.DataFrame(index=[f"gene{i:03}" for i in range(200)]), ) v = adata[::-2, ::-2] assert_equal(v.copy(), v)
def test_io_spec(store, value, encoding_type): key = f"key_for_{encoding_type}" write_elem(store, key, value, dataset_kwargs={}) assert encoding_type == _read_attr(store[key].attrs, "encoding-type") from_disk = read_elem(store[key]) assert_equal(value, from_disk)
def test_transpose(): adata = gen_adata((5, 3)) adata.varp = {f"varp_{k}": v for k, v in adata.varp.items()} adata1 = adata.T adata1.uns["test123"] = 1 assert "test123" in adata.uns assert_equal(adata1.X.shape, (3, 5)) assert_equal(adata1.obsp.keys(), adata.varp.keys())
def test_attr_deletion(): full = gen_adata((30, 30)) # Empty has just X, obs_names, var_names empty = AnnData(None, obs=full.obs[[]], var=full.var[[]]) for attr in ["X", "obs", "var", "obsm", "varm", "obsp", "varp", "layers", "uns"]: delattr(full, attr) assert_equal(getattr(full, attr), getattr(empty, attr)) assert_equal(full, empty, exact=True)
def test_regress_out_constants(): adata = AnnData(np.hstack((np.full((10, 1), 0.0), np.full((10, 1), 1.0)))) adata.obs['percent_mito'] = np.random.rand(adata.X.shape[0]) adata.obs['n_counts'] = adata.X.sum(axis=1) adata_copy = adata.copy() sc.pp.regress_out(adata, keys=['n_counts', 'percent_mito']) assert_equal(adata, adata_copy)
def test_view_setattr_machinery(attr, subset_func, subset_func2): # Tests that setting attributes on a view doesn't mess anything up too bad adata = gen_adata((10, 10)) view = adata[subset_func(adata.obs_names), subset_func2(adata.var_names)] actual = view.copy() setattr(view, attr, getattr(actual, attr)) assert_equal(actual, view, exact=True)
def test_write_to_root(store): adata = gen_adata((3, 2)) write_elem(store, "/", adata) from_disk = read_elem(store) assert "anndata" == _read_attr(store.attrs, "encoding-type") assert_equal(from_disk, adata)
def test_readwrite_h5ad_one_dimension(typ, backing_h5ad): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_one = adata_src[:, 0].copy() adata_one.write(backing_h5ad) adata = ad.read(backing_h5ad) assert adata.shape == (3, 1) assert_equal(adata, adata_one)
def test_view_delattr(attr): base = gen_adata((10, 10)) # Indexing into obs and var just to get indexes subset = base[5:7, :5] empty = ad.AnnData(subset.X, obs=subset.obs[[]], var=subset.var[[]]) delattr(subset, attr) assert not subset.is_view # Should now have same value as default assert_equal(getattr(subset, attr), getattr(empty, attr))
def test_create_from_sparse_df(): s = sp.random(20, 30, density=0.2) obs_names = [f"obs{i}" for i in range(20)] var_names = [f"var{i}" for i in range(30)] df = pd.DataFrame.sparse.from_spmatrix(s, index=obs_names, columns=var_names) a = AnnData(df) b = AnnData(s, obs=pd.DataFrame(index=obs_names), var=pd.DataFrame(index=var_names)) assert_equal(a, b) assert issparse(a.X)
def test_raw_set_as_none(adata_raw): # Test for theislab/anndata#445 a = adata_raw b = adata_raw.copy() del a.raw b.raw = None assert_equal(a, b)
def test_raw_rw(adata_raw, backing_h5ad): adata_raw.write(backing_h5ad) adata_read = ad.read(backing_h5ad) assert_equal(adata_read, adata_raw, exact=True) assert adata_raw.var_names.tolist() == ["var1", "var2"] assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"] assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]]