def skeldir(cloudpath): cf = CloudFiles(cloudpath) info = cf.get_json('info') skel_dir = 'skeletons/' if 'skeletons' in info: skel_dir = info['skeletons'] return skel_dir
def execute(self): cf = CloudFiles(self.layer_path) self._info = cf.get_json('info') if self.mesh_dir is None and 'mesh' in self._info: self.mesh_dir = self._info['mesh'] self._generate_manifests(cf)
def fetch_info(self): cf = CloudFiles(self.cloudpath, secrets=self.config.secrets) self.attributes["root"] = cf.get_json("attributes.json") if 'pixelResolution' in self.attributes["root"]: resolution = self.attributes["root"]["pixelResolution"][ "dimensions"] else: resolution = self.attributes["root"]["resolution"] scale_dirs = [ cf.join(f"s{i}", "attributes.json") for i in range(len(self.attributes["root"]["scales"])) ] scale_attrs = cf.get_json(scale_dirs) self.attributes["scales"] = scale_attrs # glossing over that each scale can have # a different data type, but usually it # should all be the same data_type = scale_attrs[0]["dataType"] info = PrecomputedMetadata.create_info( num_channels=1, layer_type="image", data_type=data_type, encoding=scale_attrs[0]["compression"]["type"], resolution=resolution, voxel_offset=[0, 0, 0], volume_size=scale_attrs[0]["dimensions"][:3], chunk_size=scale_attrs[0]["blockSize"], ) for scale in scale_attrs[1:]: self.add_scale(scale["downsamplingFactors"], chunk_size=scale["blockSize"], encoding=scale["compression"]["type"], info=info) return info
def test_get_json_order(s3, protocol): from cloudfiles import CloudFiles url = compute_url(protocol, 'get_json_order') cf = CloudFiles(url) N = 5300 cf.put_jsons(( (str(z), [ z ]) for z in range(N) )) contents = cf.get_json(( str(z) for z in range(N) )) for z, content in enumerate(contents): assert content[0] == z cf.delete(( str(z) for z in range(N) ))
class JSONLayerBase(BaseLayerBackend): """A directory with one text file per section """ def __init__(self, path, backend, reference=None, overwrite=True, **kwargs): super().__init__(**kwargs) self.path = path self.dtype = 'O' self.backend = backend self.cf = CloudFiles(self.path, progress=False) def __str__(self): return "JSON {}".format(self.path) def get_sublayer(self, name, layer_type=None, path=None, **kwargs): if path is None: path = os.path.join(self.path, layer_type, name) if layer_type is None: layer_type = self.get_layer_type() return self.backend.create_layer(path=path, reference=self, layer_type=layer_type, **kwargs) def get_filename(self, z): return f'{z:06d}' def read_backend(self, bcube, **kwargs): z_range = bcube.z_range() corgie_logger.debug(f'Read from {str(self)}, z: {z_range}') data = [] for z in z_range: f = self.cf.get_json(self.get_filename(z)) data.append(f) return data def write_backend(self, data, bcube, **kwargs): z_range = range(*bcube.z_range()) assert (len(data) == len(z_range)) corgie_logger.debug(f'Write to {str(self)}, z: {z_range}') filepaths = [self.get_filename(z) for z in z_range] self.cf.put_jsons(zip(filepaths, data), cache_control='no-cache')
def MeshManifestPrefixTask(layer_path: str, prefix: str, lod: int = 0, mesh_dir: Optional[str] = None): """ Finalize mesh generation by post-processing chunk fragment lists into mesh fragment manifests. These are necessary for neuroglancer to know which mesh fragments to download for a given segid. If we parallelize using prefixes single digit prefixes ['0','1',..'9'] all meshes will be correctly processed. But if we do ['10','11',..'99'] meshes from [0,9] won't get processed and need to be handle specifically by creating tasks that will process a single mesh ['0:','1:',..'9:'] """ cf = CloudFiles(layer_path) info = cf.get_json('info') if mesh_dir is None and 'mesh' in info: mesh_dir = info['mesh'] prefix = cf.join(mesh_dir, prefix) segids = defaultdict(list) regexp = re.compile(r'(\d+):(\d+):') for filename in cf.list(prefix=prefix): filename = os.path.basename(filename) # `match` implies the beginning (^). `search` matches whole string matches = re.search(regexp, filename) if not matches: continue segid, mlod = matches.groups() segid, mlod = int(segid), int(mlod) if mlod != lod: continue segids[segid].append(filename) items = ((f"{mesh_dir}/{segid}:{lod}", { "fragments": frags }) for segid, frags in segids.items()) cf.put_jsons(items)
def test_read_write(s3, protocol, num_threads, green): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "rw") cf = CloudFiles(url, num_threads=num_threads, green=green) content = b'some_string' cf.put('info', content, compress=None, cache_control='no-cache') cf['info2'] = content assert cf.get('info') == content assert cf['info2'] == content assert cf['info2', 0:3] == content[0:3] assert cf['info2', :] == content[:] assert cf.get('nonexistentfile') is None assert cf.get('info', return_dict=True) == {"info": content} assert cf.get(['info', 'info2'], return_dict=True) == { "info": content, "info2": content } del cf['info2'] assert cf.exists('info2') == False num_infos = max(num_threads, 1) results = cf.get(['info' for i in range(num_infos)]) assert len(results) == num_infos assert results[0]['path'] == 'info' assert results[0]['content'] == content assert all(map(lambda x: x['error'] is None, results)) assert cf.get(['nonexistentfile'])[0]['content'] is None cf.delete('info') cf.put_json('info', {'omg': 'wow'}, cache_control='no-cache') results = cf.get_json('info') assert results == {'omg': 'wow'} cf.delete('info') if protocol == 'file': rmtree(url)
def __init__(self, volume_path: str, mip: int = 0, expand_margin_size: Cartesian = Cartesian(0, 0, 0), expand_direction: int = None, fill_missing: bool = False, validate_mip: int = None, blackout_sections: bool = None, dry_run: bool = False, name: str = 'cutout'): super().__init__(name=name) self.volume_path = volume_path self.mip = mip self.fill_missing = fill_missing self.validate_mip = validate_mip self.blackout_sections = blackout_sections self.dry_run = dry_run if isinstance(expand_margin_size, tuple): expand_margin_size = Cartesian.from_collection(expand_margin_size) if expand_direction == 1: expand_margin_size = (0, 0, 0, *expand_margin_size) elif expand_direction == -1: expand_margin_size = (*expand_margin_size, 0, 0, 0) else: assert expand_direction is None self.expand_margin_size = expand_margin_size if blackout_sections: stor = CloudFiles(volume_path) self.blackout_section_ids = stor.get_json( 'blackout_section_ids.json')['section_ids'] verbose = (logging.getLogger().getEffectiveLevel() <= 30) self.vol = CloudVolume(self.volume_path, bounded=False, fill_missing=self.fill_missing, progress=verbose, mip=self.mip, cache=False, green_threads=True)
def create_xfer_meshes_tasks( src:str, dest:str, mesh_dir:Optional[str] = None, magnitude=2, ): cv_src = CloudVolume(src) cf_dest = CloudFiles(dest) if not mesh_dir: info = cf_dest.get_json("info") if info.get("mesh", None): mesh_dir = info.get("mesh") cf_dest.put_json(f"{mesh_dir}/info", cv_src.mesh.meta.info) alphabet = [ str(i) for i in range(10) ] if cv_src.mesh.meta.is_sharded(): alphabet += [ 'a', 'b', 'c', 'd', 'e', 'f' ] prefixes = itertools.product(*([ alphabet ] * magnitude)) prefixes = [ "".join(x) for x in prefixes ] # explicitly enumerate all prefixes smaller than the magnitude. for i in range(1, magnitude): explicit_prefix = itertools.product(*([ alphabet ] * i)) explicit_prefix = [ "".join(x) for x in explicit_prefix ] if cv_src.mesh.meta.is_sharded(): prefixes += [ f"{x}." for x in explicit_prefix ] else: prefixes += [ f"{x}:0" for x in explicit_prefix ] return [ partial(TransferMeshFilesTask, src=src, dest=dest, prefix=prefix, mesh_dir=mesh_dir, ) for prefix in prefixes ]
def MeshManifestFilesystemTask( layer_path: str, lod: int = 0, mesh_dir: Optional[str] = None, ): cf = CloudFiles(layer_path) info = cf.get_json('info') if mesh_dir is None and 'mesh' in info: mesh_dir = info['mesh'] filepath = cloudfiles.paths.asfilepath(cf.join(layer_path, mesh_dir)) segids = defaultdict(list) regexp = re.compile(r'(\d+):(\d+):') for entry in os.scandir(filepath): if not entry.is_file(): continue filename = os.path.basename(entry.name) # `match` implies the beginning (^). `search` matches whole string matches = re.search(regexp, filename) if not matches: continue segid, mlod = matches.groups() segid, mlod = int(segid), int(mlod) if mlod != lod: continue filename, ext = os.path.splitext(filename) segids[segid].append(filename) items = ((f"{mesh_dir}/{segid}:{lod}", { "fragments": frags }) for segid, frags in segids.items()) cf.put_jsons(items)
def configure_multires_info( cloudpath:str, vertex_quantization_bits:int, mesh_dir:str ): """ Computes properties and uploads a multires mesh info file """ assert vertex_quantization_bits in (10, 16) vol = CloudVolume(cloudpath) mesh_dir = mesh_dir or vol.info.get("mesh", None) if not "mesh" in vol.info: vol.info['mesh'] = mesh_dir vol.commit_info() res = vol.meta.resolution(vol.mesh.meta.mip) cf = CloudFiles(cloudpath) info_filename = f'{mesh_dir}/info' mesh_info = cf.get_json(info_filename) or {} new_mesh_info = copy.deepcopy(mesh_info) new_mesh_info['@type'] = "neuroglancer_multilod_draco" new_mesh_info['vertex_quantization_bits'] = vertex_quantization_bits new_mesh_info['transform'] = [ res[0], 0, 0, 0, 0, res[1], 0, 0, 0, 0, res[2], 0, ] new_mesh_info['lod_scale_multiplier'] = 1.0 if new_mesh_info != mesh_info: cf.put_json( info_filename, new_mesh_info, cache_control="no-cache" )
def test_http_read(): from cloudfiles import CloudFiles, exceptions cf = CloudFiles( "https://storage.googleapis.com/seunglab-test/test_v0/black/") info = cf.get_json('info') assert info == { "data_type": "uint8", "num_channels": 1, "scales": [{ "chunk_sizes": [[64, 64, 50]], "encoding": "raw", "key": "6_6_30", "resolution": [6, 6, 30], "size": [1024, 1024, 100], "voxel_offset": [0, 0, 0] }], "type": "image" }
def create_meshing_tasks( layer_path, mip, shape=(448, 448, 448), simplification=True, max_simplification_error=40, mesh_dir=None, cdn_cache=False, dust_threshold=None, object_ids=None, progress=False, fill_missing=False, encoding='precomputed', spatial_index=True, sharded=False, compress='gzip' ): shape = Vec(*shape) vol = CloudVolume(layer_path, mip) if mesh_dir is None: mesh_dir = 'mesh_mip_{}_err_{}'.format(mip, max_simplification_error) if not 'mesh' in vol.info: vol.info['mesh'] = mesh_dir vol.commit_info() cf = CloudFiles(layer_path) info_filename = '{}/info'.format(mesh_dir) mesh_info = cf.get_json(info_filename) or {} mesh_info['@type'] = 'neuroglancer_legacy_mesh' mesh_info['mip'] = int(vol.mip) mesh_info['chunk_size'] = shape.tolist() if spatial_index: mesh_info['spatial_index'] = { 'resolution': vol.resolution.tolist(), 'chunk_size': (shape*vol.resolution).tolist(), } cf.put_json(info_filename, mesh_info) class MeshTaskIterator(FinelyDividedTaskIterator): def task(self, shape, offset): return MeshTask( shape=shape.clone(), offset=offset.clone(), layer_path=layer_path, mip=vol.mip, simplification_factor=(0 if not simplification else 100), max_simplification_error=max_simplification_error, mesh_dir=mesh_dir, cache_control=('' if cdn_cache else 'no-cache'), dust_threshold=dust_threshold, progress=progress, object_ids=object_ids, fill_missing=fill_missing, encoding=encoding, spatial_index=spatial_index, sharded=sharded, compress=compress, ) def on_finish(self): vol.provenance.processing.append({ 'method': { 'task': 'MeshTask', 'layer_path': layer_path, 'mip': vol.mip, 'shape': shape.tolist(), 'simplification': simplification, 'max_simplification_error': max_simplification_error, 'mesh_dir': mesh_dir, 'fill_missing': fill_missing, 'cdn_cache': cdn_cache, 'dust_threshold': dust_threshold, 'encoding': encoding, 'object_ids': object_ids, 'spatial_index': spatial_index, 'sharded': sharded, 'compress': compress, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance() return MeshTaskIterator(vol.mip_bounds(mip), shape)
def create_spatial_index_mesh_tasks( cloudpath:str, shape:Tuple[int,int,int] = (448,448,448), mip:int = 0, fill_missing:bool = False, compress:Optional[Union[str,bool]] = 'gzip', mesh_dir:Optional[str] = None ): """ The main way to add a spatial index is to use the MeshTask, but old datasets or broken datasets may need it to be reconstituted. An alternative use is create the spatial index over a different area size than the mesh task. """ shape = Vec(*shape) vol = CloudVolume(cloudpath, mip=mip) if mesh_dir is None: mesh_dir = f"mesh_mip_{mip}_err_40" if not "mesh" in vol.info: vol.info['mesh'] = mesh_dir vol.commit_info() cf = CloudFiles(cloudpath) info_filename = '{}/info'.format(mesh_dir) mesh_info = cf.get_json(info_filename) or {} new_mesh_info = copy.deepcopy(mesh_info) new_mesh_info['@type'] = new_mesh_info.get('@type', 'neuroglancer_legacy_mesh') new_mesh_info['mip'] = new_mesh_info.get("mip", int(vol.mip)) new_mesh_info['chunk_size'] = shape.tolist() new_mesh_info['spatial_index'] = { 'resolution': vol.resolution.tolist(), 'chunk_size': (shape * vol.resolution).tolist(), } if new_mesh_info != mesh_info: cf.put_json(info_filename, new_mesh_info) class SpatialIndexMeshTaskIterator(FinelyDividedTaskIterator): def task(self, shape, offset): return partial(MeshSpatialIndex, cloudpath=cloudpath, shape=shape, offset=offset, mip=int(mip), fill_missing=bool(fill_missing), compress=compress, mesh_dir=mesh_dir, ) def on_finish(self): vol.provenance.processing.append({ 'method': { 'task': 'MeshSpatialIndex', 'cloudpath': vol.cloudpath, 'shape': shape.tolist(), 'mip': int(mip), 'mesh_dir': mesh_dir, 'fill_missing': fill_missing, 'compress': compress, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance() return SpatialIndexMeshTaskIterator(vol.bounds, shape)
def create_spatial_index_skeleton_tasks( cloudpath: str, shape: Tuple[int, int, int] = (448, 448, 448), mip: int = 0, fill_missing: bool = False, compress: Optional[Union[str, bool]] = 'gzip', skel_dir: Optional[str] = None): """ The main way to add a spatial index is to use the SkeletonTask, but old datasets or broken datasets may need it to be reconstituted. An alternative use is create the spatial index over a different area size than the skeleton task. """ shape = Vec(*shape) vol = CloudVolume(cloudpath, mip=mip) if skel_dir is None and not vol.info.get("skeletons", None): skel_dir = f"skeletons_mip_{mip}" elif skel_dir is None and vol.info.get("skeletons", None): skel_dir = vol.info["skeletons"] if not "skeletons" in vol.info: vol.info['skeletons'] = skel_dir vol.commit_info() cf = CloudFiles(cloudpath) info_filename = cf.join(skel_dir, 'info') skel_info = cf.get_json(info_filename) or {} new_skel_info = copy.deepcopy(skel_info) new_skel_info['@type'] = new_skel_info.get('@type', 'neuroglancer_skeletons') new_skel_info['mip'] = new_skel_info.get("mip", int(vol.mip)) new_skel_info['chunk_size'] = shape.tolist() new_skel_info['spatial_index'] = { 'resolution': vol.resolution.tolist(), 'chunk_size': (shape * vol.resolution).tolist(), } if new_skel_info != skel_info: cf.put_json(info_filename, new_skel_info) vol = CloudVolume(cloudpath, mip=mip) # reload spatial_index class SpatialIndexSkeletonTaskIterator(FinelyDividedTaskIterator): def task(self, shape, offset): return partial( SpatialIndexTask, cloudpath=cloudpath, shape=shape, offset=offset, subdir=skel_dir, precision=vol.skeleton.spatial_index.precision, mip=int(mip), fill_missing=bool(fill_missing), compress=compress, ) def on_finish(self): vol.provenance.processing.append({ 'method': { 'task': 'SpatialIndexTask', 'cloudpath': vol.cloudpath, 'shape': shape.tolist(), 'mip': int(mip), 'subdir': skel_dir, 'fill_missing': fill_missing, 'compress': compress, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance() return SpatialIndexSkeletonTaskIterator(vol.bounds, shape)