def _upload_individuals(self, mesh_binaries, generate_manifests): cf = CloudFiles(self.layer_path) content_type = "model/mesh" if self.options["encoding"] == "draco": content_type = "model/x.draco" cf.puts( ((f"{self._mesh_dir}/{segid}:{self.options['lod']}:{self._bounds.to_filename()}", mesh_binary) for segid, mesh_binary in mesh_binaries.items()), compress=self._encoding_to_compression_dict[ self.options['encoding']], cache_control=self.options['cache_control'], content_type=content_type, ) if generate_manifests: cf.put_jsons( ((f"{self._mesh_dir}/{segid}:{self.options['lod']}", { "fragments": [ f"{segid}:{self.options['lod']}:{self._bounds.to_filename()}" ] }) for segid, mesh_binary in mesh_binaries.items()), compress=None, cache_control=self.options['cache_control'], )
def test_get_json_order(s3, protocol): from cloudfiles import CloudFiles url = compute_url(protocol, 'get_json_order') cf = CloudFiles(url) N = 5300 cf.put_jsons(( (str(z), [ z ]) for z in range(N) )) contents = cf.get_json(( str(z) for z in range(N) )) for z, content in enumerate(contents): assert content[0] == z cf.delete(( str(z) for z in range(N) ))
class JSONLayerBase(BaseLayerBackend): """A directory with one text file per section """ def __init__(self, path, backend, reference=None, overwrite=True, **kwargs): super().__init__(**kwargs) self.path = path self.dtype = 'O' self.backend = backend self.cf = CloudFiles(self.path, progress=False) def __str__(self): return "JSON {}".format(self.path) def get_sublayer(self, name, layer_type=None, path=None, **kwargs): if path is None: path = os.path.join(self.path, layer_type, name) if layer_type is None: layer_type = self.get_layer_type() return self.backend.create_layer(path=path, reference=self, layer_type=layer_type, **kwargs) def get_filename(self, z): return f'{z:06d}' def read_backend(self, bcube, **kwargs): z_range = bcube.z_range() corgie_logger.debug(f'Read from {str(self)}, z: {z_range}') data = [] for z in z_range: f = self.cf.get_json(self.get_filename(z)) data.append(f) return data def write_backend(self, data, bcube, **kwargs): z_range = range(*bcube.z_range()) assert (len(data) == len(z_range)) corgie_logger.debug(f'Write to {str(self)}, z: {z_range}') filepaths = [self.get_filename(z) for z in z_range] self.cf.put_jsons(zip(filepaths, data), cache_control='no-cache')
def MeshManifestPrefixTask(layer_path: str, prefix: str, lod: int = 0, mesh_dir: Optional[str] = None): """ Finalize mesh generation by post-processing chunk fragment lists into mesh fragment manifests. These are necessary for neuroglancer to know which mesh fragments to download for a given segid. If we parallelize using prefixes single digit prefixes ['0','1',..'9'] all meshes will be correctly processed. But if we do ['10','11',..'99'] meshes from [0,9] won't get processed and need to be handle specifically by creating tasks that will process a single mesh ['0:','1:',..'9:'] """ cf = CloudFiles(layer_path) info = cf.get_json('info') if mesh_dir is None and 'mesh' in info: mesh_dir = info['mesh'] prefix = cf.join(mesh_dir, prefix) segids = defaultdict(list) regexp = re.compile(r'(\d+):(\d+):') for filename in cf.list(prefix=prefix): filename = os.path.basename(filename) # `match` implies the beginning (^). `search` matches whole string matches = re.search(regexp, filename) if not matches: continue segid, mlod = matches.groups() segid, mlod = int(segid), int(mlod) if mlod != lod: continue segids[segid].append(filename) items = ((f"{mesh_dir}/{segid}:{lod}", { "fragments": frags }) for segid, frags in segids.items()) cf.put_jsons(items)
def MeshManifestFilesystemTask( layer_path: str, lod: int = 0, mesh_dir: Optional[str] = None, ): cf = CloudFiles(layer_path) info = cf.get_json('info') if mesh_dir is None and 'mesh' in info: mesh_dir = info['mesh'] filepath = cloudfiles.paths.asfilepath(cf.join(layer_path, mesh_dir)) segids = defaultdict(list) regexp = re.compile(r'(\d+):(\d+):') for entry in os.scandir(filepath): if not entry.is_file(): continue filename = os.path.basename(entry.name) # `match` implies the beginning (^). `search` matches whole string matches = re.search(regexp, filename) if not matches: continue segid, mlod = matches.groups() segid, mlod = int(segid), int(mlod) if mlod != lod: continue filename, ext = os.path.splitext(filename) segids[segid].append(filename) items = ((f"{mesh_dir}/{segid}:{lod}", { "fragments": frags }) for segid, frags in segids.items()) cf.put_jsons(items)
def create_sharded_multires_mesh_tasks( cloudpath:str, shard_index_bytes=2**13, minishard_index_bytes=2**15, num_lod:int = 1, draco_compression_level:int = 1, vertex_quantization_bits:int = 16, minishard_index_encoding="gzip", mesh_dir:Optional[str] = None, spatial_index_db:Optional[str] = None ) -> Iterator[MultiResShardedMeshMergeTask]: configure_multires_info( cloudpath, vertex_quantization_bits, mesh_dir ) # rebuild b/c sharding changes the mesh source class cv = CloudVolume(cloudpath, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.mesh.meta.mip # 17 sec to download for pinky100 all_labels = cv.mesh.spatial_index.query(cv.bounds * cv.resolution) (shard_bits, minishard_bits, preshift_bits) = \ compute_shard_params_for_hashed( num_labels=len(all_labels), shard_index_bytes=int(shard_index_bytes), minishard_index_bytes=int(minishard_index_bytes), ) spec = ShardingSpecification( type='neuroglancer_uint64_sharded_v1', preshift_bits=preshift_bits, hash='murmurhash3_x86_128', minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding="raw", # draco encoded meshes ) cv.mesh.meta.info['sharding'] = spec.to_dict() cv.mesh.meta.commit_info() cv = CloudVolume(cloudpath) # perf: ~66.5k hashes/sec on M1 ARM64 shardfn = lambda lbl: cv.mesh.reader.spec.compute_shard_location(lbl).shard_number shard_labels = defaultdict(list) for label in tqdm(all_labels, desc="Hashes"): shard_labels[shardfn(label)].append(label) del all_labels cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True) files = ( (str(shardno) + '.labels', labels) for shardno, labels in shard_labels.items() ) cf.put_jsons( files, compress="gzip", cache_control="no-cache", total=len(shard_labels) ) cv.provenance.processing.append({ 'method': { 'task': 'MultiResShardedMeshMergeTask', 'cloudpath': cloudpath, 'mip': cv.mesh.meta.mip, 'num_lod': num_lod, 'vertex_quantization_bits': vertex_quantization_bits, 'preshift_bits': preshift_bits, 'minishard_bits': minishard_bits, 'shard_bits': shard_bits, 'mesh_dir': mesh_dir, 'draco_compression_level': draco_compression_level, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv.commit_provenance() return [ partial(MultiResShardedMeshMergeTask, cloudpath, shard_no, num_lod=num_lod, mesh_dir=mesh_dir, spatial_index_db=spatial_index_db, draco_compression_level=draco_compression_level, ) for shard_no in shard_labels.keys() ]
def create_sharded_skeleton_merge_tasks(layer_path, dust_threshold, tick_threshold, shard_index_bytes=2**13, minishard_index_bytes=2**15, minishard_index_encoding='gzip', data_encoding='gzip', max_cable_length=None, spatial_index_db=None): cv = CloudVolume(layer_path, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.skeleton.meta.mip # 17 sec to download for pinky100 all_labels = cv.skeleton.spatial_index.query(cv.bounds * cv.resolution) (shard_bits, minishard_bits, preshift_bits) = \ compute_shard_params_for_hashed( num_labels=len(all_labels), shard_index_bytes=int(shard_index_bytes), minishard_index_bytes=int(minishard_index_bytes), ) spec = ShardingSpecification( type='neuroglancer_uint64_sharded_v1', preshift_bits=preshift_bits, hash='murmurhash3_x86_128', minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding=data_encoding, ) cv.skeleton.meta.info['sharding'] = spec.to_dict() cv.skeleton.meta.commit_info() # rebuild b/c sharding changes the skeleton source cv = CloudVolume(layer_path, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.skeleton.meta.mip # perf: ~36k hashes/sec shardfn = lambda lbl: cv.skeleton.reader.spec.compute_shard_location( lbl).shard_number shard_labels = defaultdict(list) for label in tqdm(all_labels, desc="Hashes"): shard_labels[shardfn(label)].append(label) cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True) files = ((str(shardno) + '.labels', labels) for shardno, labels in shard_labels.items()) cf.put_jsons(files, compress="gzip", cache_control="no-cache", total=len(shard_labels)) cv.provenance.processing.append({ 'method': { 'task': 'ShardedSkeletonMergeTask', 'cloudpath': layer_path, 'mip': cv.skeleton.meta.mip, 'dust_threshold': dust_threshold, 'tick_threshold': tick_threshold, 'max_cable_length': max_cable_length, 'preshift_bits': preshift_bits, 'minishard_bits': minishard_bits, 'shard_bits': shard_bits, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv.commit_provenance() return [ ShardedSkeletonMergeTask(layer_path, shard_no, dust_threshold, tick_threshold, max_cable_length=max_cable_length) for shard_no in shard_labels.keys() ]
def create_sharded_multires_mesh_from_unsharded_tasks( src:str, dest:str, shard_index_bytes=2**13, minishard_index_bytes=2**15, min_shards:int = 1, num_lod:int = 1, draco_compression_level:int = 1, vertex_quantization_bits:int = 16, minishard_index_encoding="gzip", mesh_dir:Optional[str] = None, ) -> Iterator[MultiResShardedMeshMergeTask]: configure_multires_info( dest, vertex_quantization_bits, mesh_dir ) cv_src = CloudVolume(src) cf = CloudFiles(cv_src.mesh.meta.layerpath) all_labels = [] SEGID_RE = re.compile(r'(\d+):0(?:\.gz|\.br|\.zstd)?$') for path in cf.list(): match = SEGID_RE.search(path) if match is None: continue (segid,) = match.groups() all_labels.append(int(segid)) (shard_bits, minishard_bits, preshift_bits) = \ compute_shard_params_for_hashed( num_labels=len(all_labels), shard_index_bytes=int(shard_index_bytes), minishard_index_bytes=int(minishard_index_bytes), min_shards=int(min_shards), ) cv_dest = CloudVolume(dest, mesh_dir=mesh_dir) cv_dest.mesh.meta.info["mip"] = cv_src.mesh.meta.mip cv_dest.commit_info() spec = ShardingSpecification( type='neuroglancer_uint64_sharded_v1', preshift_bits=preshift_bits, hash='murmurhash3_x86_128', minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding="raw", # draco encoded meshes ) cv_dest.mesh.meta.info['sharding'] = spec.to_dict() cv_dest.mesh.meta.commit_info() cv_dest = CloudVolume(dest, mesh_dir=mesh_dir) # perf: ~66.5k hashes/sec on M1 ARM64 shardfn = lambda lbl: cv_dest.mesh.reader.spec.compute_shard_location(lbl).shard_number shard_labels = defaultdict(list) for label in tqdm(all_labels, desc="Hashes"): shard_labels[shardfn(label)].append(label) del all_labels cf = CloudFiles(cv_dest.mesh.meta.layerpath, progress=True) files = ( (str(shardno) + '.labels', labels) for shardno, labels in shard_labels.items() ) cf.put_jsons( files, compress="gzip", cache_control="no-cache", total=len(shard_labels) ) cv_dest.provenance.processing.append({ 'method': { 'task': 'MultiResShardedFromUnshardedMeshMergeTask', 'src': src, 'dest': dest, 'num_lod': num_lod, 'vertex_quantization_bits': vertex_quantization_bits, 'preshift_bits': preshift_bits, 'minishard_bits': minishard_bits, 'shard_bits': shard_bits, 'mesh_dir': mesh_dir, 'draco_compression_level': draco_compression_level, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv_dest.commit_provenance() return [ partial(MultiResShardedFromUnshardedMeshMergeTask, src=src, dest=dest, shard_no=shard_no, num_lod=num_lod, mesh_dir=mesh_dir, draco_compression_level=draco_compression_level, ) for shard_no in shard_labels.keys() ]
def create_sharded_skeletons_from_unsharded_tasks( src: str, dest: str, shard_index_bytes=2**13, minishard_index_bytes=2**15, min_shards: int = 1, minishard_index_encoding='gzip', data_encoding='gzip', skel_dir: Optional[str] = None, ) -> Iterator[ShardedFromUnshardedSkeletonMergeTask]: cv_src = CloudVolume(src) cv_src.mip = cv_src.skeleton.meta.mip cf = CloudFiles(cv_src.skeleton.meta.layerpath) all_labels = [] SEGID_RE = re.compile(r'(\d+)(?:\.gz|\.br|\.zstd)?$') for path in cf.list(): match = SEGID_RE.search(path) if match is None: continue (segid, ) = match.groups() all_labels.append(int(segid)) cv_dest = CloudVolume(dest, skel_dir=skel_dir) cv_dest.skeleton.meta.info = copy.deepcopy(cv_src.skeleton.meta.info) cv_dest.skeleton.meta.info["vertex_attributes"] = [ attr for attr in cv_dest.skeleton.meta.info["vertex_attributes"] if attr["data_type"] in ("float32", "float64") ] (shard_bits, minishard_bits, preshift_bits) = \ compute_shard_params_for_hashed( num_labels=len(all_labels), shard_index_bytes=int(shard_index_bytes), minishard_index_bytes=int(minishard_index_bytes), min_shards=int(min_shards), ) spec = ShardingSpecification( type='neuroglancer_uint64_sharded_v1', preshift_bits=preshift_bits, hash='murmurhash3_x86_128', minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding=data_encoding, ) cv_dest.skeleton.meta.info['sharding'] = spec.to_dict() cv_dest.skeleton.meta.commit_info() cv_dest = CloudVolume(dest, skel_dir=skel_dir) # perf: ~66.5k hashes/sec on M1 ARM64 shardfn = lambda lbl: cv_dest.skeleton.reader.spec.compute_shard_location( lbl).shard_number shard_labels = defaultdict(list) for label in tqdm(all_labels, desc="Hashes"): shard_labels[shardfn(label)].append(label) del all_labels cf = CloudFiles(cv_dest.skeleton.meta.layerpath, progress=True) files = ((str(shardno) + '.labels', labels) for shardno, labels in shard_labels.items()) cf.put_jsons(files, compress="gzip", cache_control="no-cache", total=len(shard_labels)) cv_dest.provenance.processing.append({ 'method': { 'task': 'ShardedFromUnshardedSkeletonMergeTask', 'src': src, 'dest': dest, 'preshift_bits': preshift_bits, 'minishard_bits': minishard_bits, 'shard_bits': shard_bits, 'skel_dir': skel_dir, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv_dest.commit_provenance() return [ partial( ShardedFromUnshardedSkeletonMergeTask, src=src, dest=dest, shard_no=shard_no, skel_dir=skel_dir, ) for shard_no in shard_labels.keys() ]