Ejemplo n.º 1
0
def convert(
        input,
        output,
        chunk_size=16 * 1024 * 1024,
        genome=None,
        overwrite=False
):
    input_path, input_ext = splitext(input)
    output_path, output_ext = splitext(output)

    print('converting: %s to %s' % (input, output))

    if input_ext == '.h5' or input_ext == '.loom':
        if output_ext == '.zarr':
            # Convert 10x (HDF5) to Zarr
            source = h5py.File(input)
            zarr.tree(source)

            store = zarr.DirectoryStore(output)
            dest = zarr.group(store=store, overwrite=overwrite)

            # following fails if without_attrs=False (the default), possibly related to https://github.com/h5py/h5py/issues/973
            zarr.copy_all(source, dest, log=sys.stdout, without_attrs=True)
            zarr.tree(dest)
        elif output_ext == '.h5ad':
            if not genome:
                keys = list(h5py.File(input).keys())
                if len(keys) == 1:
                    genome = keys[0]
                else:
                    raise Exception(
                        'Set --genome flag when converting from 10x HDF5 (.h5) to Anndata HDF5 (.h5ad); top-level groups in file %s: %s'
                        % (input, ','.join(keys))
                    )
            adata = read_10x_h5(input, genome=genome)

            # TODO: respect overwrite flag
            adata.write(output)

    elif input_ext == '.h5ad':
        adata = read_h5ad(input, backed='r')
        (r, c) = adata.shape
        chunks = (getsize(input) - 1) / chunk_size + 1
        chunk_size = (r - 1) / chunks + 1
        if output_ext == '.zarr':
            print('converting %s (%dx%d) to %s in %d chunks (%d rows each)' % (input, r, c, output, chunks, chunk_size))

            # TODO: respect overwrite flag

            adata.write_zarr(
                make_store(output),
                chunks=(chunk_size, c)
            )
        else:
            raise Exception('Unrecognized output extension: %s' % output_ext)
    else:
        raise Exception('Unrecognized input extension: %s' % input_ext)
Ejemplo n.º 2
0
def test_tree(zarr_version):
    kwargs = _init_creation_kwargs(zarr_version)
    g1 = zarr.group(**kwargs)
    g1.create_group('foo')
    g3 = g1.create_group('bar')
    g3.create_group('baz')
    g5 = g3.create_group('qux')
    g5.create_dataset('baz', shape=100, chunks=10)
    assert repr(zarr.tree(g1)) == repr(g1.tree())
    assert str(zarr.tree(g1)) == str(g1.tree())
Ejemplo n.º 3
0
    def _open_session(self, mode="r"):
        try:
            z = zarr.open(self.root_dir, mode=mode)  # don't create it
        except ValueError:
            # nothing to open here, unlikely a zarr dataset
            return
        else:
            self._handle = z[self.path]

        # preview the internal structure
        if logger.getEffectiveLevel() <= logging.DEBUG:
            zarr.tree(self._handle)
Ejemplo n.º 4
0
def show_meta(input):
    input_path, input_ext = splitext(input)
    if input_ext == ".h5" or input_ext == ".h5ad" or input_ext == ".loom":
        file = h5py.File(input, "r")
        return zarr.tree(file)
    elif input_ext == ".zarr":
        store = make_store(input)
        file = zarr.open(store)
        return tree(file)
import h5py
import sys
import zarr

# Convert 10x (HDF5) to Zarr
source = h5py.File("/Downloads/1M_neurons_filtered_gene_bc_matrices_h5.h5")
zarr.tree(source)

store = zarr.DirectoryStore('data/10x.zarr')
dest = zarr.group(store=store, overwrite=True)
# following fails if without_attrs=False (the default), possibly related to https://github.com/h5py/h5py/issues/973
zarr.copy_all(source, dest, log=sys.stdout, without_attrs=True)


Ejemplo n.º 6
0
import zarr

from constants import ZARR_DB

root = zarr.open(ZARR_DB)  # compare with size of plink file
root.info
root['/chromosome-1/calls'].info
zarr.tree(root)