Beispiel #1
0
def test_hdf5_compression_opts(tmp_path, compression, compression_opts):
    # https://github.com/theislab/anndata/issues/497
    pth = Path(tmp_path) / "adata.h5ad"
    adata = gen_adata((10, 8))
    kwargs = {}
    if compression is not None:
        kwargs["compression"] = compression
    if compression_opts is not None:
        kwargs["compression_opts"] = compression_opts
    not_compressed = []

    adata.write_h5ad(pth, **kwargs)

    def check_compressed(key, value):
        if isinstance(value, h5py.Dataset) and value.shape != ():
            if compression is not None and value.compression != compression:
                not_compressed.append(key)
            elif (compression_opts is not None
                  and value.compression_opts != compression_opts):
                not_compressed.append(key)

    with h5py.File(pth) as f:
        f.visititems(check_compressed)

    if not_compressed:
        msg = "\n\t".join(not_compressed)
        raise AssertionError(
            f"These elements were not compressed correctly:\n\t{msg}")

    assert_equal(adata, ad.read_h5ad(pth))
Beispiel #2
0
def test_readwrite_zarr(typ, tmp_path):
    X = typ(X_list)
    adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
    adata_src.raw = adata_src
    assert not is_categorical_dtype(adata_src.obs["oanno1"])
    adata_src.write_zarr(tmp_path / "test_zarr_dir", chunks=True)

    adata = ad.read_zarr(tmp_path / "test_zarr_dir")
    assert is_categorical_dtype(adata.obs["oanno1"])
    assert not is_categorical_dtype(adata.obs["oanno2"])
    assert adata.obs.index.tolist() == ["name1", "name2", "name3"]
    assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"]
    assert adata.obs["oanno1c"].cat.categories.tolist() == ["cat1"]
    assert is_categorical_dtype(adata.raw.var["vanno2"])
    pd.testing.assert_frame_equal(adata.obs, adata_src.obs)
    pd.testing.assert_frame_equal(adata.var, adata_src.var)
    assert np.all(adata.var.index == adata_src.var.index)
    assert adata.var.index.dtype == adata_src.var.index.dtype
    assert type(adata.raw.X) is type(adata_src.raw.X)
    assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X))
    assert np.all(adata.raw.var == adata_src.raw.var)
    assert isinstance(adata.uns["uns4"]["a"], (int, np.integer))
    assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer))
    assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"])
    assert_equal(adata, adata_src)
Beispiel #3
0
def test_readloom_deprecations(tmp_path):
    loom_pth = tmp_path / "test.loom"
    adata_src = gen_adata((5, 10),
                          obsm_types=[np.ndarray],
                          varm_types=[np.ndarray])
    adata_src.write_loom(loom_pth, write_obsm_varm=True)

    # obsm_names -> obsm_mapping
    obsm_mapping = {"df": adata_src.obs.columns}
    with pytest.warns(FutureWarning):
        depr_result = ad.read_loom(loom_pth, obsm_names=obsm_mapping)
    actual_result = ad.read_loom(loom_pth, obsm_mapping=obsm_mapping)
    assert_equal(actual_result, depr_result)
    with pytest.raises(ValueError, match="ambiguous"):
        ad.read_loom(loom_pth,
                     obsm_mapping=obsm_mapping,
                     obsm_names=obsm_mapping)

    # varm_names -> varm_mapping
    varm_mapping = {"df": adata_src.var.columns}
    with pytest.warns(FutureWarning):
        depr_result = ad.read_loom(loom_pth, varm_names=varm_mapping)
    actual_result = ad.read_loom(loom_pth, varm_mapping=varm_mapping)
    assert_equal(actual_result, depr_result)
    with pytest.raises(ValueError, match="ambiguous"):
        ad.read_loom(loom_pth,
                     varm_mapping=varm_mapping,
                     varm_names=varm_mapping)

    # positional -> keyword
    with pytest.warns(FutureWarning, match="sparse"):
        depr_result = ad.read_loom(loom_pth, True)
    actual_result = ad.read_loom(loom_pth, sparse=True)
    assert type(depr_result.X) == type(actual_result.X)
Beispiel #4
0
def test_backwards_compat_files(archive_dir):
    with pytest.warns(ad.OldFormatWarning):
        from_h5ad = ad.read_h5ad(archive_dir / "adata.h5ad")
    with pytest.warns(ad.OldFormatWarning):
        from_zarr = ad.read_zarr(archive_dir / "adata.zarr.zip")

    assert_equal(from_h5ad, from_zarr, exact=True)
Beispiel #5
0
def test_write_large_categorical(tmp_path, diskfmt):
    M = 30_000
    N = 1000
    ls = np.array(list(ascii_letters))

    def random_cats(n):
        cats = {
            "".join(np.random.choice(ls, np.random.choice(range(5, 30))))
            for _ in range(n)
        }
        while len(cats) < n:  # For the rare case that there’s duplicates
            cats |= random_cats(n - len(cats))
        return cats

    cats = np.array(sorted(random_cats(10_000)))
    adata_pth = tmp_path / f"adata.{diskfmt}"
    n_cats = len(np.unique(cats))
    orig = ad.AnnData(
        csr_matrix(([1], ([0], [0])), shape=(M, N)),
        obs=dict(
            cat1=cats[np.random.choice(n_cats, M)],
            cat2=pd.Categorical.from_codes(np.random.choice(n_cats, M), cats),
        ),
    )
    getattr(orig, f"write_{diskfmt}")(adata_pth)
    curr = getattr(ad, f"read_{diskfmt}")(adata_pth)
    assert_equal(orig, curr)
def test_assert_equal_alignedmapping_empty():
    chars = np.array(list(ascii_letters))
    adata = ad.AnnData(
        X=np.zeros((10, 10)),
        obs=pd.DataFrame([],
                         index=np.random.choice(chars[:20], 10,
                                                replace=False)),
        var=pd.DataFrame([],
                         index=np.random.choice(chars[:20], 10,
                                                replace=False)),
    )
    diff_idx = ad.AnnData(
        X=np.zeros((10, 10)),
        obs=pd.DataFrame([],
                         index=np.random.choice(chars[20:], 10,
                                                replace=False)),
        var=pd.DataFrame([],
                         index=np.random.choice(chars[20:], 10,
                                                replace=False)),
    )
    same_idx = ad.AnnData(adata.X, obs=adata.obs.copy(), var=adata.var.copy())

    for attr in ["obsm", "varm", "layers", "obsp", "varp"]:
        with pytest.raises(AssertionError):
            assert_equal(getattr(adata, attr), getattr(diff_idx, attr))
        assert_equal(getattr(adata, attr), getattr(same_idx, attr))
Beispiel #7
0
def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad):
    tmpdir = tempfile.TemporaryDirectory()
    tmpdirpth = Path(tmpdir.name)
    mid_pth = tmpdirpth / "mid.h5ad"

    X = typ(X_list)
    adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
    assert not is_categorical_dtype(adata_src.obs["oanno1"])
    adata_src.raw = adata_src
    adata_src.write(backing_h5ad, **dataset_kwargs)

    adata_mid = ad.read(backing_h5ad)
    adata_mid.write(mid_pth, **dataset_kwargs)

    adata = ad.read_h5ad(mid_pth)
    assert is_categorical_dtype(adata.obs["oanno1"])
    assert not is_categorical_dtype(adata.obs["oanno2"])
    assert adata.obs.index.tolist() == ["name1", "name2", "name3"]
    assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"]
    assert is_categorical_dtype(adata.raw.var["vanno2"])
    assert np.all(adata.obs == adata_src.obs)
    assert np.all(adata.var == adata_src.var)
    assert np.all(adata.var.index == adata_src.var.index)
    assert adata.var.index.dtype == adata_src.var.index.dtype
    assert type(adata.raw.X) is type(adata_src.raw.X)
    assert type(adata.raw.varm) is type(adata_src.raw.varm)
    assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X))
    assert np.all(adata.raw.var == adata_src.raw.var)
    assert isinstance(adata.uns["uns4"]["a"], (int, np.integer))
    assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer))
    assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"])
    assert_equal(adata, adata_src)
Beispiel #8
0
def check_filled_like(x, fill_value=None, elem_name=None):
    if fill_value is None:
        assert_equal(x, filled_like(x), elem_name=elem_name)
    else:
        assert_equal(x,
                     filled_like(x, fill_value=fill_value),
                     elem_name=elem_name)
Beispiel #9
0
def test_concatenate_uns(unss, merge_strategy, result, value_gen):
    """
    Test that concatenation works out for different strategies and sets of values.

    Params
    ------
    unss
        Set of patterns for values in uns.
    compat
        Strategy to use for merging uns.
    result
        Pattern we expect to see for the given input and strategy.
    value_gen
        Maps values in unss and results to another set of values. This is for checking that
        we're comparing values correctly. For example `[{"a": 1}, {"a": 1}]` may get mapped
        to `[{"a": [1, 2, 3]}, {"a": [1, 2, 3]}]`.
    """
    # So we can see what the initial pattern was meant to be
    print(merge_strategy, "\n", unss, "\n", result)
    result, *unss = permute_nested_values([result] + unss, value_gen)
    adatas = [uns_ad(uns) for uns in unss]
    assert_equal(
        adatas[0].concatenate(adatas[1:], uns_merge=merge_strategy).uns,
        result,
        elem_name="uns",
    )
Beispiel #10
0
def test_copy():
    adata = AnnData(
        None,
        obs=pd.DataFrame(index=[f"cell{i:03}" for i in range(100)]),
        var=pd.DataFrame(index=[f"gene{i:03}" for i in range(200)]),
    )
    assert_equal(adata.copy(), adata)
Beispiel #11
0
def check_rep_results(func, X, *, fields=["layer", "obsm"], **kwargs):
    """Checks that the results of a computation add values/ mutate the anndata object in a consistent way."""
    # Gen data
    empty_X = np.zeros(shape=X.shape, dtype=X.dtype)
    adata = sc.AnnData(
        X=empty_X.copy(),
        layers={"layer": empty_X.copy()},
        obsm={"obsm": empty_X.copy()},
    )

    adata_X = adata.copy()
    adata_X.X = X.copy()

    adatas_proc = {}
    for field in fields:
        cur = adata.copy()
        sc.get._set_obs_rep(cur, X.copy(), **{field: field})
        adatas_proc[field] = cur

    # Apply function
    func(adata_X, **kwargs)
    for field in fields:
        func(adatas_proc[field], **{field: field}, **kwargs)

    # Reset X
    adata_X.X = empty_X.copy()
    for field in fields:
        sc.get._set_obs_rep(adatas_proc[field], empty_X.copy(),
                            **{field: field})

    for field_a, field_b in permutations(fields, 2):
        assert_equal(adatas_proc[field_a], adatas_proc[field_b])
    for field in fields:
        assert_equal(adata_X, adatas_proc[field])
Beispiel #12
0
def test_setting_dim_index(dim):
    index_attr = f"{dim}_names"
    mapping_attr = f"{dim}m"

    orig = gen_adata((5, 5))
    orig.raw = orig
    curr = orig.copy()
    view = orig[:, :]
    new_idx = pd.Index(list("abcde"), name="letters")

    setattr(curr, index_attr, new_idx)
    pd.testing.assert_index_equal(getattr(curr, index_attr), new_idx)
    pd.testing.assert_index_equal(
        getattr(curr, mapping_attr)["df"].index, new_idx)
    pd.testing.assert_index_equal(curr.obs_names, curr.raw.obs_names)

    # Testing view behaviour
    setattr(view, index_attr, new_idx)
    assert not view.is_view
    pd.testing.assert_index_equal(getattr(view, index_attr), new_idx)
    pd.testing.assert_index_equal(
        getattr(view, mapping_attr)["df"].index, new_idx)
    with pytest.raises(AssertionError):
        pd.testing.assert_index_equal(getattr(view, index_attr),
                                      getattr(orig, index_attr))
    assert_equal(view, curr, exact=True)
Beispiel #13
0
def check_rep_results(func, X, **kwargs):
    """Checks that the results of a computation add values/ mutate the anndata object in a consistent way."""
    # Gen data
    adata_X = sc.AnnData(
        X=X.copy(),
        layers={"layer": np.zeros(shape=X.shape, dtype=X.dtype)},
        obsm={"obsm": np.zeros(shape=X.shape, dtype=X.dtype)},
    )
    adata_layer = sc.AnnData(
        X=np.zeros(shape=X.shape, dtype=X.dtype),
        layers={"layer": X.copy()},
        obsm={"obsm": np.zeros(shape=X.shape, dtype=X.dtype)},
    )
    adata_obsm = sc.AnnData(
        X=np.zeros(shape=X.shape, dtype=X.dtype),
        layers={"layer": np.zeros(shape=X.shape, dtype=X.dtype)},
        obsm={"obsm": X.copy()},
    )

    # Apply function
    func(adata_X, **kwargs)
    func(adata_layer, layer="layer", **kwargs)
    func(adata_obsm, obsm="obsm", **kwargs)

    # Reset X
    adata_X.X = np.zeros(shape=X.shape, dtype=X.dtype)
    adata_layer.layers["layer"] = np.zeros(shape=X.shape, dtype=X.dtype)
    adata_obsm.obsm["obsm"] = np.zeros(shape=X.shape, dtype=X.dtype)

    # Check equality
    assert_equal(adata_X, adata_layer)
    assert_equal(adata_X, adata_obsm)
def test_backed_indexing_zarr_var(ondisk_equivalent_adata_zarr,
                                  subset_func_zarr):
    csr_mem, csr_disk, csc_disk = ondisk_equivalent_adata_zarr
    X = csr_mem.X
    idx = subset_func_zarr(csr_mem.var_names)
    assert_equal(X[:, idx], csr_disk[:, idx])
    assert_equal(X[:, idx], csc_disk[:, idx])
Beispiel #15
0
def test_nan_merge(axis, join_type, array_type):
    # concat_dim = ("obs", "var")[axis]
    alt_dim = ("var", "obs")[axis]
    mapping_attr = f"{alt_dim}m"
    adata_shape = (20, 10)

    arr = array_type(
        sparse.random(adata_shape[1 - axis], 10, density=0.1, format="csr")
    )
    arr_nan = arr.copy()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=sparse.SparseEfficiencyWarning)
        for _ in range(10):
            arr_nan[
                np.random.choice(arr.shape[0]), np.random.choice(arr.shape[1])
            ] = np.nan

    _data = {"X": sparse.csr_matrix(adata_shape), mapping_attr: {"arr": arr_nan}}
    orig1 = AnnData(**_data)
    orig2 = AnnData(**_data)
    result = concat([orig1, orig2], axis=axis, merge="same")

    assert_equal(getattr(orig1, mapping_attr), getattr(result, mapping_attr))

    orig_nonan = AnnData(
        **{"X": sparse.csr_matrix(adata_shape), mapping_attr: {"arr": arr}}
    )
    result_nonan = concat([orig1, orig_nonan], axis=axis, merge="same")

    assert len(getattr(result_nonan, mapping_attr)) == 0
Beispiel #16
0
def test_write_string_types(tmp_path, diskfmt):
    # https://github.com/theislab/anndata/issues/456
    adata_pth = tmp_path / f"adata.{diskfmt}"

    adata = ad.AnnData(
        np.ones((3, 3)),
        obs=pd.DataFrame(
            np.ones((3, 2)),
            columns=["a", np.str_("b")],
            index=["a", "b", "c"],
        ),
    )

    write = getattr(adata, f"write_{diskfmt}")
    read = getattr(ad, f"read_{diskfmt}")

    write(adata_pth)
    from_disk = read(adata_pth)

    assert_equal(adata, from_disk)

    adata.obs[b"c"] = np.zeros(3)
    # This should error, and tell you which key is at fault
    with pytest.raises(TypeError, match=str(b"c")):
        write(adata_pth)
Beispiel #17
0
def test_recipe_weinreb():
    # Just tests for failure for now
    adata = sc.datasets.pbmc68k_reduced().raw.to_adata()
    adata.X = adata.X.toarray()

    orig = adata.copy()
    sc.pp.recipe_weinreb17(adata, log=False, copy=True)
    assert_equal(orig, adata)
Beispiel #18
0
def test_concatenate_layers(array_type, join_type):
    adatas = []
    for _ in range(5):
        a = array_type(sparse.random(100, 200, format="csr"))
        adatas.append(AnnData(X=a, layers={"a": a}))

    merged = adatas[0].concatenate(adatas[1:], join=join_type)
    assert_equal(merged.X, merged.layers["a"])
Beispiel #19
0
def test_copy_view():
    adata = AnnData(
        None,
        obs=pd.DataFrame(index=[f"cell{i:03}" for i in range(100)]),
        var=pd.DataFrame(index=[f"gene{i:03}" for i in range(200)]),
    )
    v = adata[::-2, ::-2]
    assert_equal(v.copy(), v)
Beispiel #20
0
def test_io_spec(store, value, encoding_type):
    key = f"key_for_{encoding_type}"
    write_elem(store, key, value, dataset_kwargs={})

    assert encoding_type == _read_attr(store[key].attrs, "encoding-type")

    from_disk = read_elem(store[key])
    assert_equal(value, from_disk)
Beispiel #21
0
def test_transpose():
    adata = gen_adata((5, 3))
    adata.varp = {f"varp_{k}": v for k, v in adata.varp.items()}
    adata1 = adata.T
    adata1.uns["test123"] = 1
    assert "test123" in adata.uns
    assert_equal(adata1.X.shape, (3, 5))
    assert_equal(adata1.obsp.keys(), adata.varp.keys())
Beispiel #22
0
def test_attr_deletion():
    full = gen_adata((30, 30))
    # Empty has just X, obs_names, var_names
    empty = AnnData(None, obs=full.obs[[]], var=full.var[[]])
    for attr in ["X", "obs", "var", "obsm", "varm", "obsp", "varp", "layers", "uns"]:
        delattr(full, attr)
        assert_equal(getattr(full, attr), getattr(empty, attr))
    assert_equal(full, empty, exact=True)
def test_regress_out_constants():
    adata = AnnData(np.hstack((np.full((10, 1), 0.0), np.full((10, 1), 1.0))))
    adata.obs['percent_mito'] = np.random.rand(adata.X.shape[0])
    adata.obs['n_counts'] = adata.X.sum(axis=1)
    adata_copy = adata.copy()

    sc.pp.regress_out(adata, keys=['n_counts', 'percent_mito'])
    assert_equal(adata, adata_copy)
Beispiel #24
0
def test_view_setattr_machinery(attr, subset_func, subset_func2):
    # Tests that setting attributes on a view doesn't mess anything up too bad
    adata = gen_adata((10, 10))
    view = adata[subset_func(adata.obs_names), subset_func2(adata.var_names)]

    actual = view.copy()
    setattr(view, attr, getattr(actual, attr))
    assert_equal(actual, view, exact=True)
Beispiel #25
0
def test_write_to_root(store):
    adata = gen_adata((3, 2))

    write_elem(store, "/", adata)
    from_disk = read_elem(store)

    assert "anndata" == _read_attr(store.attrs, "encoding-type")
    assert_equal(from_disk, adata)
Beispiel #26
0
def test_readwrite_h5ad_one_dimension(typ, backing_h5ad):
    X = typ(X_list)
    adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict)
    adata_one = adata_src[:, 0].copy()
    adata_one.write(backing_h5ad)
    adata = ad.read(backing_h5ad)
    assert adata.shape == (3, 1)
    assert_equal(adata, adata_one)
Beispiel #27
0
def test_view_delattr(attr):
    base = gen_adata((10, 10))
    # Indexing into obs and var just to get indexes
    subset = base[5:7, :5]
    empty = ad.AnnData(subset.X, obs=subset.obs[[]], var=subset.var[[]])
    delattr(subset, attr)
    assert not subset.is_view
    # Should now have same value as default
    assert_equal(getattr(subset, attr), getattr(empty, attr))
Beispiel #28
0
def test_create_from_sparse_df():
    s = sp.random(20, 30, density=0.2)
    obs_names = [f"obs{i}" for i in range(20)]
    var_names = [f"var{i}" for i in range(30)]
    df = pd.DataFrame.sparse.from_spmatrix(s, index=obs_names, columns=var_names)
    a = AnnData(df)
    b = AnnData(s, obs=pd.DataFrame(index=obs_names), var=pd.DataFrame(index=var_names))
    assert_equal(a, b)
    assert issparse(a.X)
Beispiel #29
0
def test_raw_set_as_none(adata_raw):
    # Test for theislab/anndata#445
    a = adata_raw
    b = adata_raw.copy()

    del a.raw
    b.raw = None

    assert_equal(a, b)
Beispiel #30
0
def test_raw_rw(adata_raw, backing_h5ad):
    adata_raw.write(backing_h5ad)
    adata_read = ad.read(backing_h5ad)

    assert_equal(adata_read, adata_raw, exact=True)

    assert adata_raw.var_names.tolist() == ["var1", "var2"]
    assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
    assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]]