Beispiel #1
0
def test_concat_size_0_dim():
    # https://github.com/theislab/anndata/issues/526
    a = gen_adata((5, 10))
    b = gen_adata((5, 0))

    assert concat([a, b], axis=0).shape == (10, 0)
    assert concat([a, b], axis=1).shape == (5, 10)
Beispiel #2
0
def test_batch_key(axis):
    """Test that concat only adds a label if the key is provided"""

    def get_annot(adata):
        return getattr(adata, ("obs", "var")[axis])

    lhs = gen_adata((10, 10))
    rhs = gen_adata((10, 12))

    # There is probably a prettier way to do this
    annot = get_annot(concat([lhs, rhs], axis=axis))
    assert (
        list(
            annot.columns.difference(
                get_annot(lhs).columns.union(get_annot(rhs).columns)
            )
        )
        == []
    )

    batch_annot = get_annot(concat([lhs, rhs], axis=axis, label="batch"))
    assert list(
        batch_annot.columns.difference(
            get_annot(lhs).columns.union(get_annot(rhs).columns)
        )
    ) == ["batch"]
Beispiel #3
0
def test_concat_names(axis):
    def get_annot(adata):
        return getattr(adata, ("obs", "var")[axis])

    lhs = gen_adata((10, 10))
    rhs = gen_adata((10, 10))

    assert not get_annot(concat([lhs, rhs], axis=axis)).index.is_unique
    assert get_annot(concat([lhs, rhs], axis=axis, index_unique="-")).index.is_unique
Beispiel #4
0
def test_concat_interface_errors():
    adatas = [gen_adata((5, 10)), gen_adata((5, 10))]

    with pytest.raises(ValueError):
        concat(adatas, axis=3)
    with pytest.raises(ValueError):
        concat(adatas, join="not implemented")
    with pytest.raises(ValueError):
        concat([])
Beispiel #5
0
def test_concat_outer_aligned_mapping(elem):
    a = gen_adata((5, 5))
    b = gen_adata((3, 5))
    del b.obsm[elem]

    concated = concat({"a": a, "b": b}, join="outer", label="group")
    result = concated.obsm[elem][concated.obs["group"] == "b"]

    check_filled_like(result, elem_name=f"obsm/{elem}")
Beispiel #6
0
def test_concatenate_size_0_dim():
    # https://github.com/theislab/anndata/issues/526

    a = gen_adata((5, 10))
    b = gen_adata((5, 0))

    # Mostly testing that this doesn't error
    a.concatenate([b]).shape == (10, 0)
    b.concatenate([a]).shape == (10, 0)
Beispiel #7
0
def test_transposed_concat(array_type, axis, join_type, merge_strategy, fill_val):
    lhs = gen_adata((10, 10), X_type=array_type)
    rhs = gen_adata((10, 12), X_type=array_type)

    a = concat([lhs, rhs], axis=axis, join=join_type, merge=merge_strategy)
    b = concat(
        [lhs.T, rhs.T], axis=abs(axis - 1), join=join_type, merge=merge_strategy
    ).T

    assert_equal(a, b)
Beispiel #8
0
def test_concat_size_0_dim(axis, join_type, merge_strategy, shape):
    # https://github.com/theislab/anndata/issues/526
    a = gen_adata((5, 7))
    b = gen_adata(shape)
    alt_axis = 1 - axis
    dim = ("obs", "var")[axis]

    expected_size = expected_shape(a, b, axis=axis, join=join_type)
    result = concat(
        {
            "a": a,
            "b": b
        },
        axis=axis,
        join=join_type,
        merge=merge_strategy,
        pairwise=True,
        index_unique="-",
    )
    assert result.shape == expected_size

    if join_type == "outer":
        # Check new entries along axis of concatenation
        axis_new_inds = axis_labels(result, axis).str.endswith("-b")
        altaxis_new_inds = ~axis_labels(result, alt_axis).isin(
            axis_labels(a, alt_axis))
        axis_idx = make_idx_tuple(axis_new_inds, axis)
        altaxis_idx = make_idx_tuple(altaxis_new_inds, 1 - axis)

        check_filled_like(result.X[axis_idx], elem_name="X")
        check_filled_like(result.X[altaxis_idx], elem_name="X")
        for k, elem in getattr(result, "layers").items():
            check_filled_like(elem[axis_idx], elem_name=f"layers/{k}")
            check_filled_like(elem[altaxis_idx], elem_name=f"layers/{k}")

        if shape[axis] > 0:
            b_result = result[axis_idx].copy()
            mapping_elem = f"{dim}m"
            setattr(b_result, f"{dim}_names", getattr(b, f"{dim}_names"))
            for k, result_elem in getattr(b_result, mapping_elem).items():
                elem_name = f"{mapping_elem}/{k}"
                # pd.concat can have unintuitive return types. is similar to numpy promotion
                if isinstance(result_elem, pd.DataFrame):
                    assert_equal(
                        getattr(b, mapping_elem)[k].astype(object),
                        result_elem.astype(object),
                        elem_name=elem_name,
                    )
                else:
                    assert_equal(
                        getattr(b, mapping_elem)[k],
                        result_elem,
                        elem_name=elem_name,
                    )
Beispiel #9
0
def test_to_memory_full(tmp_path, array_type):
    backed_pth = tmp_path / "backed.h5ad"
    mem_adata = gen_adata((15, 10), X_type=array_type)
    mem_adata.raw = gen_adata((15, 12), X_type=array_type)
    mem_adata.write_h5ad(backed_pth, compression="lzf")

    backed_adata = ad.read_h5ad(backed_pth, backed="r")
    assert_equal(mem_adata, backed_adata.to_memory())

    # Test that raw can be removed
    del backed_adata.raw
    del mem_adata.raw
    assert_equal(mem_adata, backed_adata.to_memory())
Beispiel #10
0
def test_setting_dim_index(dim):
    index_attr = f"{dim}_names"
    mapping_attr = f"{dim}m"

    orig = gen_adata((5, 5))
    orig.raw = orig
    curr = orig.copy()
    view = orig[:, :]
    new_idx = pd.Index(list("abcde"), name="letters")

    setattr(curr, index_attr, new_idx)
    pd.testing.assert_index_equal(getattr(curr, index_attr), new_idx)
    pd.testing.assert_index_equal(
        getattr(curr, mapping_attr)["df"].index, new_idx)
    pd.testing.assert_index_equal(curr.obs_names, curr.raw.obs_names)

    # Testing view behaviour
    setattr(view, index_attr, new_idx)
    assert not view.is_view
    pd.testing.assert_index_equal(getattr(view, index_attr), new_idx)
    pd.testing.assert_index_equal(
        getattr(view, mapping_attr)["df"].index, new_idx)
    with pytest.raises(AssertionError):
        pd.testing.assert_index_equal(getattr(view, index_attr),
                                      getattr(orig, index_attr))
    assert_equal(view, curr, exact=True)
Beispiel #11
0
def test_backed_raw_subset(tmp_path, subset_func, subset_func2):
    backed_pth = tmp_path / "backed.h5ad"
    final_pth = tmp_path / "final.h5ad"
    mem_adata = gen_adata((10, 10))
    mem_adata.raw = mem_adata
    obs_idx = subset_func(mem_adata.obs_names)
    var_idx = subset_func2(mem_adata.var_names)
    mem_adata.write(backed_pth)

    backed_adata = ad.read_h5ad(backed_pth, backed="r")
    backed_v = backed_adata[obs_idx, var_idx]
    assert backed_v.is_view
    mem_v = mem_adata[obs_idx, var_idx]
    assert_equal(backed_v, mem_v)  # meaningful as objects are not equivalent?
    backed_v.write_h5ad(final_pth)

    final_adata = ad.read_h5ad(final_pth)
    # todo: Figure out why this doesn’t work if I don’t copy
    assert_equal(final_adata, mem_v.copy())

    # todo: breaks when removing this line, b/c backed_v.X is not accessible
    backed_v = ad.read_h5ad(backed_pth, backed="r")[obs_idx, var_idx]
    del final_adata.raw  # .raw is dropped when loading backed into memory.
    assert_equal(final_adata,
                 backed_v.to_memory())  # assert loading into memory
Beispiel #12
0
def test_readloom_deprecations(tmp_path):
    loom_pth = tmp_path / "test.loom"
    adata_src = gen_adata((5, 10),
                          obsm_types=[np.ndarray],
                          varm_types=[np.ndarray])
    adata_src.write_loom(loom_pth, write_obsm_varm=True)

    # obsm_names -> obsm_mapping
    obsm_mapping = {"df": adata_src.obs.columns}
    with pytest.warns(FutureWarning):
        depr_result = ad.read_loom(loom_pth, obsm_names=obsm_mapping)
    actual_result = ad.read_loom(loom_pth, obsm_mapping=obsm_mapping)
    assert_equal(actual_result, depr_result)
    with pytest.raises(ValueError, match="ambiguous"):
        ad.read_loom(loom_pth,
                     obsm_mapping=obsm_mapping,
                     obsm_names=obsm_mapping)

    # varm_names -> varm_mapping
    varm_mapping = {"df": adata_src.var.columns}
    with pytest.warns(FutureWarning):
        depr_result = ad.read_loom(loom_pth, varm_names=varm_mapping)
    actual_result = ad.read_loom(loom_pth, varm_mapping=varm_mapping)
    assert_equal(actual_result, depr_result)
    with pytest.raises(ValueError, match="ambiguous"):
        ad.read_loom(loom_pth,
                     varm_mapping=varm_mapping,
                     varm_names=varm_mapping)

    # positional -> keyword
    with pytest.warns(FutureWarning, match="sparse"):
        depr_result = ad.read_loom(loom_pth, True)
    actual_result = ad.read_loom(loom_pth, sparse=True)
    assert type(depr_result.X) == type(actual_result.X)
Beispiel #13
0
def test_hdf5_compression_opts(tmp_path, compression, compression_opts):
    # https://github.com/theislab/anndata/issues/497
    pth = Path(tmp_path) / "adata.h5ad"
    adata = gen_adata((10, 8))
    kwargs = {}
    if compression is not None:
        kwargs["compression"] = compression
    if compression_opts is not None:
        kwargs["compression_opts"] = compression_opts
    not_compressed = []

    adata.write_h5ad(pth, **kwargs)

    def check_compressed(key, value):
        if isinstance(value, h5py.Dataset) and value.shape != ():
            if compression is not None and value.compression != compression:
                not_compressed.append(key)
            elif (compression_opts is not None
                  and value.compression_opts != compression_opts):
                not_compressed.append(key)

    with h5py.File(pth) as f:
        f.visititems(check_compressed)

    if not_compressed:
        msg = "\n\t".join(not_compressed)
        raise AssertionError(
            f"These elements were not compressed correctly:\n\t{msg}")

    assert_equal(adata, ad.read_h5ad(pth))
Beispiel #14
0
def test_inplace_subset_obs(matrix_type, subset_func):
    orig = gen_adata((30, 30), X_type=matrix_type)
    subset_idx = subset_func(orig.obs_names)

    modified = orig.copy()
    from_view = orig[subset_idx, :].copy()
    modified._inplace_subset_obs(subset_idx)

    assert_equal(asarray(from_view.X), asarray(modified.X), exact=True)
    assert_equal(from_view.obs, modified.obs, exact=True)
    assert_equal(from_view.var, modified.var, exact=True)
    for k in from_view.obsm:
        assert_equal(asarray(from_view.obsm[k]),
                     asarray(modified.obsm[k]),
                     exact=True)
    for k in from_view.varm:
        assert_equal(asarray(from_view.varm[k]),
                     asarray(modified.varm[k]),
                     exact=True)
        assert_equal(asarray(orig.varm[k]),
                     asarray(modified.varm[k]),
                     exact=True)
    for k in from_view.layers:
        assert_equal(asarray(from_view.layers[k]),
                     asarray(modified.layers[k]),
                     exact=True)
Beispiel #15
0
def test_sparse_to_dense_disk(tmp_path, mtx_format, to_convert):
    mem_pth = tmp_path / "orig.h5ad"
    dense_from_mem_pth = tmp_path / "dense_mem.h5ad"
    dense_from_disk_pth = tmp_path / "dense_disk.h5ad"
    mem = gen_adata((50, 50), mtx_format)
    mem.raw = mem

    mem.write_h5ad(mem_pth)
    disk = ad.read_h5ad(mem_pth, backed="r")

    mem.write_h5ad(dense_from_mem_pth, as_dense=to_convert)
    disk.write_h5ad(dense_from_disk_pth, as_dense=to_convert)

    with h5py.File(dense_from_mem_pth, "r") as f:
        for k in to_convert:
            assert isinstance(f[k], h5py.Dataset)
    with h5py.File(dense_from_disk_pth, "r") as f:
        for k in to_convert:
            assert isinstance(f[k], h5py.Dataset)

    for backed in [None, "r"]:
        from_mem = ad.read_h5ad(dense_from_mem_pth, backed=backed)
        from_disk = ad.read_h5ad(dense_from_disk_pth, backed=backed)
        assert_equal(mem, from_mem)
        assert_equal(mem, from_disk)
        assert_equal(disk, from_mem)
        assert_equal(disk, from_disk)
Beispiel #16
0
def test_attr_deletion():
    full = gen_adata((30, 30))
    # Empty has just X, obs_names, var_names
    empty = AnnData(None, obs=full.obs[[]], var=full.var[[]])
    for attr in ["X", "obs", "var", "obsm", "varm", "obsp", "varp", "layers", "uns"]:
        delattr(full, attr)
        assert_equal(getattr(full, attr), getattr(empty, attr))
    assert_equal(full, empty, exact=True)
Beispiel #17
0
def test_transpose():
    adata = gen_adata((5, 3))
    adata.varp = {f"varp_{k}": v for k, v in adata.varp.items()}
    adata1 = adata.T
    adata1.uns["test123"] = 1
    assert "test123" in adata.uns
    assert_equal(adata1.X.shape, (3, 5))
    assert_equal(adata1.obsp.keys(), adata.varp.keys())
Beispiel #18
0
def test_write_to_root(store):
    adata = gen_adata((3, 2))

    write_elem(store, "/", adata)
    from_disk = read_elem(store)

    assert "anndata" == _read_attr(store.attrs, "encoding-type")
    assert_equal(from_disk, adata)
Beispiel #19
0
def test_view_setattr_machinery(attr, subset_func, subset_func2):
    # Tests that setting attributes on a view doesn't mess anything up too bad
    adata = gen_adata((10, 10))
    view = adata[subset_func(adata.obs_names), subset_func2(adata.var_names)]

    actual = view.copy()
    setattr(view, attr, getattr(actual, attr))
    assert_equal(actual, view, exact=True)
Beispiel #20
0
def test_view_delattr(attr):
    base = gen_adata((10, 10))
    # Indexing into obs and var just to get indexes
    subset = base[5:7, :5]
    empty = ad.AnnData(subset.X, obs=subset.obs[[]], var=subset.var[[]])
    delattr(subset, attr)
    assert not subset.is_view
    # Should now have same value as default
    assert_equal(getattr(subset, attr), getattr(empty, attr))
Beispiel #21
0
def test_adata_in_uns(tmp_path, diskfmt):
    pth = tmp_path / f"adatas_in_uns.{diskfmt}"
    read = lambda pth: getattr(ad, f"read_{diskfmt}")(pth)
    write = lambda adata, pth: getattr(adata, f"write_{diskfmt}")(pth)

    orig = gen_adata((4, 5))
    orig.uns["adatas"] = {
        "a": gen_adata((1, 2)),
        "b": gen_adata((12, 8)),
    }
    another_one = gen_adata((2, 5))
    another_one.raw = gen_adata((2, 7))
    orig.uns["adatas"]["b"].uns["another_one"] = another_one

    write(orig, pth)
    curr = read(pth)

    assert_equal(orig, curr)
Beispiel #22
0
def test_concat_categories_from_mapping():
    mapping = {
        "a": gen_adata((10, 10)),
        "b": gen_adata((10, 10)),
    }
    keys = list(mapping.keys())
    adatas = list(mapping.values())

    mapping_call = partial(concat, mapping)
    iter_call = partial(concat, adatas, keys=keys)

    assert_equal(mapping_call(), iter_call())
    assert_equal(mapping_call(label="batch"), iter_call(label="batch"))
    assert_equal(mapping_call(index_unique="-"), iter_call(index_unique="-"))
    assert_equal(
        mapping_call(label="group", index_unique="+"),
        iter_call(label="group", index_unique="+"),
    )
Beispiel #23
0
def test_double_index(subset_func, subset_func2):
    adata = gen_adata((10, 10))
    obs_subset = subset_func(adata.obs_names)
    var_subset = subset_func2(adata.var_names)
    v1 = adata[obs_subset, var_subset]
    v2 = adata[obs_subset, :][:, var_subset]

    assert np.all(asarray(v1.X) == asarray(v2.X))
    assert np.all(v1.obs == v2.obs)
    assert np.all(v1.var == v2.var)
Beispiel #24
0
def test_maintain_layers(backing_h5ad):
    M, N = 100, 101
    orig = gen_adata((M, N))
    orig.write(backing_h5ad)
    curr = ad.read(backing_h5ad)

    assert type(orig.layers["array"]) is type(curr.layers["array"])
    assert np.all(orig.layers["array"] == curr.layers["array"])
    assert type(orig.layers["sparse"]) is type(curr.layers["sparse"])
    assert np.all((orig.layers["sparse"] == curr.layers["sparse"]).toarray())
Beispiel #25
0
def test_io_spec_raw(store):
    adata = gen_adata((3, 2))
    adata.raw = adata

    write_elem(store, "adata", adata)

    assert "raw" == _read_attr(store["adata/raw"].attrs, "encoding-type")

    from_disk = read_elem(store["adata"])
    assert_equal(from_disk.raw, adata.raw)
Beispiel #26
0
def test_assert_equal():
    # ndarrays
    assert_equal(np.ones((10, 10)), np.ones((10, 10)))
    assert_equal(  # Should this require an exact test?
        np.ones((10, 10), dtype="i8"), np.ones((10, 10), dtype="f8")
    )
    assert_equal(
        np.array(list(ascii_letters)), np.array(list(ascii_letters)), exact=True
    )
    with pytest.raises(AssertionError):
        assert_equal(np.array(list(ascii_letters)), np.array(list(ascii_letters))[::-1])

    adata = gen_adata((10, 10))
    adata.raw = adata.copy()
    assert_equal(adata, adata.copy(), exact=True)
    # TODO: I’m not sure this is good behaviour, I’ve disabled in for now.
    # assert_equal(
    #     adata,
    #     adata[
    #         np.random.permutation(adata.obs_names),
    #         np.random.permutation(adata.var_names),
    #     ].copy(),
    #     exact=False,
    # )
    adata2 = adata.copy()
    to_modify = list(adata2.layers.keys())[0]
    del adata2.layers[to_modify]
    with pytest.raises(AssertionError) as missing_layer_error:
        assert_equal(adata, adata2)
    assert "layers" in str(missing_layer_error.value)
    # `to_modify` will be in pytest info
    adata2 = adata.copy()
    adata2.layers[to_modify][0, 0] = adata2.layers[to_modify][0, 0] + 1
    with pytest.raises(AssertionError) as changed_layer_error:
        assert_equal(adata, adata2)
    assert "layers" in str(changed_layer_error.value)
    assert to_modify in str(changed_layer_error.value)

    assert_equal(adata.obs, adata.obs.copy(), exact=True)

    csr = sparse.random(100, 100, format="csr")
    csc = csr.tocsc()
    dense = csr.toarray()
    assert_equal(csr, csc)
    assert_equal(csc, dense)
    assert_equal(dense, csc)

    unordered_cat = pd.Categorical(list("aabdcc"), ordered=False)
    ordered_cat = pd.Categorical(list("aabdcc"), ordered=True)

    assert_equal(unordered_cat, unordered_cat.copy())
    assert_equal(ordered_cat, ordered_cat.copy())
    assert_equal(ordered_cat, unordered_cat, exact=False)
    with pytest.raises(AssertionError):
        assert_equal(ordered_cat, unordered_cat, exact=True)
Beispiel #27
0
def test_zarr_chunk_X(tmp_path):
    import zarr

    zarr_pth = Path(tmp_path) / "test.zarr"
    adata = gen_adata((100, 100), X_type=np.array)
    adata.write_zarr(zarr_pth, chunks=(10, 10))

    z = zarr.open(str(zarr_pth))  # As of v2.3.2 zarr won’t take a Path
    assert z["X"].chunks == (10, 10)
    from_zarr = ad.read_zarr(zarr_pth)
    assert_equal(from_zarr, adata)
Beispiel #28
0
def test_inplace_subset_no_X(subset_func, dim):
    orig = gen_adata((30, 30))
    del orig.X

    subset_idx = subset_func(getattr(orig, f"{dim}_names"))

    modified = orig.copy()
    from_view = subset_dim(orig, **{dim: subset_idx}).copy()
    getattr(modified, f"_inplace_subset_{dim}")(subset_idx)

    assert_equal(modified, from_view, exact=True)
Beispiel #29
0
def test_view_failed_delitem(attr):
    adata = gen_adata((10, 10))
    view = adata[5:7, :][:, :5]
    adata_hash = joblib.hash(adata)
    view_hash = joblib.hash(view)

    with pytest.raises(KeyError):
        getattr(view, attr).__delitem__("not a key")

    assert view.is_view
    assert adata_hash == joblib.hash(adata)
    assert view_hash == joblib.hash(view)
Beispiel #30
0
def test_view_delattr(attr, subset_func):
    base = gen_adata((10, 10))
    orig_hash = joblib.hash(base)
    subset = base[subset_func(base.obs_names), subset_func(base.var_names)]
    empty = ad.AnnData(obs=subset.obs[[]], var=subset.var[[]])

    delattr(subset, attr)

    assert not subset.is_view
    # Should now have same value as default
    assert_equal(getattr(subset, attr), getattr(empty, attr))
    assert orig_hash == joblib.hash(base)  # Original should not be modified