def convert( input, output, chunk_size=16 * 1024 * 1024, genome=None, overwrite=False ): input_path, input_ext = splitext(input) output_path, output_ext = splitext(output) print('converting: %s to %s' % (input, output)) if input_ext == '.h5' or input_ext == '.loom': if output_ext == '.zarr': # Convert 10x (HDF5) to Zarr source = h5py.File(input) zarr.tree(source) store = zarr.DirectoryStore(output) dest = zarr.group(store=store, overwrite=overwrite) # following fails if without_attrs=False (the default), possibly related to https://github.com/h5py/h5py/issues/973 zarr.copy_all(source, dest, log=sys.stdout, without_attrs=True) zarr.tree(dest) elif output_ext == '.h5ad': if not genome: keys = list(h5py.File(input).keys()) if len(keys) == 1: genome = keys[0] else: raise Exception( 'Set --genome flag when converting from 10x HDF5 (.h5) to Anndata HDF5 (.h5ad); top-level groups in file %s: %s' % (input, ','.join(keys)) ) adata = read_10x_h5(input, genome=genome) # TODO: respect overwrite flag adata.write(output) elif input_ext == '.h5ad': adata = read_h5ad(input, backed='r') (r, c) = adata.shape chunks = (getsize(input) - 1) / chunk_size + 1 chunk_size = (r - 1) / chunks + 1 if output_ext == '.zarr': print('converting %s (%dx%d) to %s in %d chunks (%d rows each)' % (input, r, c, output, chunks, chunk_size)) # TODO: respect overwrite flag adata.write_zarr( make_store(output), chunks=(chunk_size, c) ) else: raise Exception('Unrecognized output extension: %s' % output_ext) else: raise Exception('Unrecognized input extension: %s' % input_ext)
def test_tree(zarr_version): kwargs = _init_creation_kwargs(zarr_version) g1 = zarr.group(**kwargs) g1.create_group('foo') g3 = g1.create_group('bar') g3.create_group('baz') g5 = g3.create_group('qux') g5.create_dataset('baz', shape=100, chunks=10) assert repr(zarr.tree(g1)) == repr(g1.tree()) assert str(zarr.tree(g1)) == str(g1.tree())
def _open_session(self, mode="r"): try: z = zarr.open(self.root_dir, mode=mode) # don't create it except ValueError: # nothing to open here, unlikely a zarr dataset return else: self._handle = z[self.path] # preview the internal structure if logger.getEffectiveLevel() <= logging.DEBUG: zarr.tree(self._handle)
def show_meta(input): input_path, input_ext = splitext(input) if input_ext == ".h5" or input_ext == ".h5ad" or input_ext == ".loom": file = h5py.File(input, "r") return zarr.tree(file) elif input_ext == ".zarr": store = make_store(input) file = zarr.open(store) return tree(file)
import h5py import sys import zarr # Convert 10x (HDF5) to Zarr source = h5py.File("/Downloads/1M_neurons_filtered_gene_bc_matrices_h5.h5") zarr.tree(source) store = zarr.DirectoryStore('data/10x.zarr') dest = zarr.group(store=store, overwrite=True) # following fails if without_attrs=False (the default), possibly related to https://github.com/h5py/h5py/issues/973 zarr.copy_all(source, dest, log=sys.stdout, without_attrs=True)
import zarr from constants import ZARR_DB root = zarr.open(ZARR_DB) # compare with size of plink file root.info root['/chromosome-1/calls'].info zarr.tree(root)