def _compute_meshes(self): with Storage(self.layer_path) as storage: data = self._data[:, :, :, 0].T self._mesher.mesh(data) for obj_id in self._mesher.ids(): if self.options['remap_table'] is None: remapped_id = obj_id else: remapped_id = self._remap_list[obj_id] storage.put_file( file_path='{}/{}:{}:{}'.format( self._mesh_dir, remapped_id, self.options['lod'], self._bounds.to_filename() ), content=self._create_mesh(obj_id), compress=True, cache_control=self.options['cache_control'] ) if self.options['generate_manifests']: fragments = [] fragments.append('{}:{}:{}'.format(remapped_id, self.options['lod'], self._bounds.to_filename())) storage.put_file( file_path='{}/{}:{}'.format( self._mesh_dir, remapped_id, self.options['lod']), content=json.dumps({"fragments": fragments}), content_type='application/json', cache_control=self.options['cache_control'] )
def fetch_z_levels(self): bounds = Bbox(self.offset, self.shape[:3] + self.offset) levelfilenames = [ 'levels/{}/{}'.format(self.mip, z) \ for z in range(bounds.minpt.z, bounds.maxpt.z) ] with Storage(self.levels_path) as stor: levels = stor.get_files(levelfilenames) errors = [ level['filename'] \ for level in levels if level['content'] == None ] if len(errors): raise Exception(", ".join( errors) + " were not defined. Did you run a LuminanceLevelsTask for these slices?") levels = [( int(os.path.basename(item['filename'])), json.loads(item['content'].decode('utf-8')) ) for item in levels ] levels.sort(key=lambda x: x[0]) levels = [x[1] for x in levels] return [ np.array(x['levels'], dtype=np.uint64) for x in levels ]
def fetch_z_levels(self, bounds): """ readout the histograms in each corresponding section. TODO: use local cache for the z levels """ levelfilenames = [ 'levels/{}/{}'.format(self.mip, z) \ for z in range(bounds.minpt.z, bounds.maxpt.z) ] with Storage(self.levels_path) as stor: levels = stor.get_files(levelfilenames) errors = [ level['filename'] \ for level in levels if level['content'] == None ] if len(errors): raise Exception( ", ".join(errors) + " were not defined. Did you run a LuminanceLevelsTask for these slices?" ) levels = [(int(os.path.basename(item['filename'])), json.loads(item['content'].decode('utf-8'))) for item in levels] levels.sort(key=lambda x: x[0]) levels = [x[1] for x in levels] return [np.array(x['levels'], dtype=np.uint64) for x in levels]
def put_chunk_edges( edges_dir: str, chunk_coordinates: np.ndarray, edges_d, compression_level: int ) -> None: """ :param edges_dir: cloudvolume storage path :type str: :param chunk_coordinates: chunk coords x,y,z :type np.ndarray: :param edges_d: edges_d with keys "in", "cross", "between" :type dict: :param compression_level: zstandard compression level (1-22, higher - better ratio) :type int: :return None: """ chunk_edges = ChunkEdgesMsg() chunk_edges.in_chunk.CopyFrom(serialize(edges_d[EDGE_TYPES.in_chunk])) chunk_edges.between_chunk.CopyFrom(serialize(edges_d[EDGE_TYPES.between_chunk])) chunk_edges.cross_chunk.CopyFrom(serialize(edges_d[EDGE_TYPES.cross_chunk])) cctx = zstd.ZstdCompressor(level=compression_level) chunk_str = "_".join(str(coord) for coord in chunk_coordinates) # filename format - edges_x_y_z.serialization.compression file = f"edges_{chunk_str}.proto.zst" with Storage(edges_dir) as storage: storage.put_file( file_path=file, content=cctx.compress(chunk_edges.SerializeToString()), compress=None, cache_control="no-cache", )
def create_info_file_from_build(layer_path, layer_type, resolution, encoding): assert layer_type in ('image', 'segmentation', 'affinities') with Storage(layer_path) as storage: bounds, build_chunk_size = compute_build_bounding_box(storage) data_type, num_channels = get_build_data_type_and_shape(storage) neuroglancer_chunk_size = find_closest_divisor(build_chunk_size, closest_to=[64, 64, 64]) info = CloudVolume.create_new_info( num_channels=num_channels, layer_type=layer_type, data_type=data_type, encoding=encoding, resolution=resolution, voxel_offset=bounds.minpt.tolist(), volume_size=bounds.size3(), mesh=(layer_type == 'segmentation'), chunk_size=list(map(int, neuroglancer_chunk_size)), ) vol = CloudVolume(layer_path, mip=0, info=info).commit_info() vol = create_downsample_scales(layer_path, mip=0, ds_shape=build_chunk_size, axis='z') return vol.info
def remesh_pending(cg: ChunkedGraph): mesh_dir = cg.meta.dataset_info["mesh"] mesh_info = cg.meta.custom_data.get("mesh", {}) unsharded_mesh_path = join( cg.meta.data_source.WATERSHED, mesh_dir, cg.meta.dataset_info["mesh_metadata"]["unsharded_mesh_dir"], ) pending_path = f"{unsharded_mesh_path}/in-progress" for task in _get_pending_tasks(pending_path): fname, l2ids = task print(f"remeshing IDs {l2ids} from {fname}") remeshing( cg, l2ids, stop_layer=mesh_info["max_layer"], mip=mesh_info["mip"], max_err=mesh_info["max_error"], cv_sharded_mesh_dir=mesh_dir, cv_unsharded_mesh_path=unsharded_mesh_path, ) with Storage(pending_path) as storage: # pylint: disable=not-context-manager storage.delete_file(fname) print(f"remesh job for {fname} with {l2ids} complete")
def execute(self): with Storage(self.layer_path) as storage: self._info = json.loads(storage.get_file('info').decode('utf8')) if self.mesh_dir is None and 'mesh' in self._info: self.mesh_dir = self._info['mesh'] self._generate_manifests(storage)
def __iter__(self): with Storage(cloudpath) as storage: for filename in storage.list_files(prefix='build/'): yield IngestTask( chunk_path=storage.get_path_to_file('build/'+filename), chunk_encoding='npz', layer_path=cloudpath, )
def _get_pending_tasks(pending_path: str) -> list: from numpy import frombuffer tasks = [] with Storage(pending_path) as storage: # pylint: disable=not-context-manager for f in storage.get_files( list(storage.list_files(prefix=REMESH_PREFIX))): tasks.append((f["filename"], frombuffer(f["content"], dtype=uint64))) return tasks
def _compute_meshes(self): with Storage(self.layer_path) as storage: data = self._data[:,:,:,0].T self._mesher.mesh(data.flatten(), *data.shape[:3]) for obj_id in self._mesher.ids(): storage.put_file( file_path='{}/{}:{}:{}'.format(self._mesh_dir, obj_id, self.lod, self._bounds.to_filename()), content=self._create_mesh(obj_id), compress=True, )
def load_raw_skeletons(): print("Downloading list of files...") print(cv.skeleton.meta.layerpath) with Storage(cv.skeleton.meta.layerpath, progress=True) as stor: all_files = list(stor.list_files()) all_files = [ fname for fname in all_files if os.path.splitext(fname)[1] == '.frags' ] print("Downloading files...") with Storage(cv.skeleton.meta.layerpath, progress=True) as stor: all_files = stor.get_files(all_files) # CHECKPOINT? for i, res in enumerate(tqdm(all_files, desc='Unpickling')): all_files[i] = pickle.loads(res['content']) # group by segid unfused_skeletons = defaultdict(list) while all_files: fragment = all_files.pop() for label, skel_frag in fragment.items(): unfused_skeletons[label].append(skel_frag) # CHECKPOINT? skeletons = crt_dict() labels = list(unfused_skeletons.keys()) for label in tqdm(labels, desc='Simple Merging'): skels = unfused_skeletons[label] skeleton = PrecomputedSkeleton.simple_merge(skels) skeleton.id = label skeleton.extra_attributes = [ attr for attr in skeleton.extra_attributes \ if attr['data_type'] == 'float32' ] skeletons[label] = skeleton del unfused_skeletons[label] return skeletons
def get_highest_child_nodes_with_meshes( cg, node_id: np.uint64, stop_layer=2, start_layer=None, verify_existence=False, bounding_box=None, flexible_start_layer=None, ): if flexible_start_layer is not None: # Get highest children that are at flexible_start_layer or below # (do this because of skip connections) candidates = cg.get_children_at_layer(node_id, flexible_start_layer, True) elif start_layer is None: candidates = np.array([node_id], dtype=np.uint64) else: candidates = cg.get_subgraph_nodes( node_id, bounding_box=bounding_box, bb_is_coordinate=True, return_layers=[start_layer], ) if verify_existence: valid_node_ids = [] with Storage(cg.cv_mesh_path) as stor: while True: filenames = [get_mesh_name(cg, c) for c in candidates] time_start = time.time() existence_dict = stor.files_exist(filenames) print("Existence took: %.3fs" % (time.time() - time_start)) missing_meshes = [] for mesh_key in existence_dict: node_id = np.uint64(mesh_key.split(":")[0]) if existence_dict[mesh_key]: valid_node_ids.append(node_id) else: if cg.get_chunk_layer(node_id) > stop_layer: missing_meshes.append(node_id) time_start = time.time() if missing_meshes: candidates = cg.get_children(missing_meshes, flatten=True) else: break print("ChunkedGraph lookup took: %.3fs" % (time.time() - time_start)) else: valid_node_ids = candidates return valid_node_ids
def _create_manifest_files_thread(args): cg_info, cv_path, cv_mesh_dir, root_id_start, root_id_end, \ highest_mesh_level = args cg = ChunkedGraph(**cg_info) with Storage(cv_path) as cv_storage: for root_seg_id in range(root_id_start, root_id_end): root_id = cg.get_node_id(np.uint64(root_seg_id), cg.get_chunk_id(layer=int(cg.n_layers), x=0, y=0, z=0))
def test_mesh_manifests(): directory = '/tmp/removeme/mesh_manifests/' layer_path = 'file://' + directory mesh_dir = 'mesh_mip_3_error_40' delete_layer(layer_path) to_path = lambda filename: os.path.join(directory, mesh_dir, filename) n_segids = 100 n_lods = 2 n_fragids = 5 with Storage(layer_path) as stor: stor.put_file('info', '{"mesh":"mesh_mip_3_error_40"}'.encode('utf8')) for segid in range(n_segids): for lod in range(n_lods): for fragid in range(n_fragids): filename = '{}:{}:{}'.format(segid, lod, fragid) lib.touch(to_path(filename)) for i in range(10): MeshManifestTask(layer_path=layer_path, prefix=i, lod=0).execute() for segid in range(n_segids): for fragid in range(n_fragids): filename = '{}:0'.format(segid) assert os.path.exists(to_path(filename)) filename = '{}:1'.format(segid) assert not os.path.exists(to_path(filename)) for i in range(10): MeshManifestTask(layer_path=layer_path, prefix=i, lod=1).execute() for segid in range(n_segids): for fragid in range(n_fragids): filename = '{}:0'.format(segid) assert os.path.exists(to_path(filename)) filename = '{}:1'.format(segid) assert os.path.exists(to_path(filename)) with open(to_path('50:0'), 'r') as f: content = json.loads(f.read()) assert content == { "fragments": ["50:0:0", "50:0:1", "50:0:2", "50:0:3", "50:0:4"] } if os.path.exists(directory): shutil.rmtree(directory)
def mesh_node_and_parents(node_id, cg, cv_path, cv_mesh_dir=None, mip=3, highest_mesh_level=1, create_manifest_root=True, lod=0): layer = cg.get_chunk_layer(node_id) parents = [node_id] + list(cg.get_all_parents(node_id)) for i_layer in range(layer, highest_mesh_level + 1): mesh_single_component(parents[i_layer], cg=cg, cv_path=cv_path, cv_mesh_dir=cv_mesh_dir, mip=mip) if create_manifest_root: with Storage(cv_path) as cv_storage: create_manifest_file(cg=cg, cv_storage=cv_storage, cv_mesh_dir=cv_mesh_dir, node_id=parents[-1], highest_mesh_level=highest_mesh_level, mip=mip, lod=lod)
def compute_mesh_centroids_of_l2_ids(cg, l2_ids, flatten=False): """ Given a list of l2_ids, return a tuple containing a dict that maps l2_ids to their mesh's centroid (a global coordinate), and a list of the l2_ids for which the mesh does not exist. :param cg: ChunkedGraph object :param l2_ids: Sequence[np.uint64] :return: Union[Dict[np.uint64, np.ndarray], [np.uint64], [np.uint64]] """ fragments_to_fetch = [ f"{l2_id}:0:{meshgen_utils.get_chunk_bbox_str(cg, cg.get_chunk_id(l2_id))}" for l2_id in l2_ids ] if flatten: centroids_with_chunk_boundary_points = [] else: centroids_with_chunk_boundary_points = {} last_l2_id = None failed_l2_ids = [] with Storage(cg.cv_mesh_path) as storage: files_contents = storage.get_files(fragments_to_fetch) fragment_map = {} for i in range(len(files_contents)): fragment_map[files_contents[i]["filename"]] = files_contents[i] for i in range(len(fragments_to_fetch)): fragment_to_fetch = fragments_to_fetch[i] l2_id = l2_ids[i] try: fragment = fragment_map[fragment_to_fetch] if fragment["content"] is not None and fragment["error"] is None: mesh = meshgen.decode_draco_mesh_buffer(fragment["content"]) if flatten: centroids_with_chunk_boundary_points.extend( compute_centroid_with_chunk_boundary( cg, mesh["vertices"], l2_id, last_l2_id ) ) else: centroids_with_chunk_boundary_points[ l2_id ] = compute_centroid_with_chunk_boundary( cg, mesh["vertices"], l2_id, last_l2_id ) except: failed_l2_ids.append(l2_id) last_l2_id = l2_id return centroids_with_chunk_boundary_points, failed_l2_ids
def get_highest_child_nodes_with_meshes(cg, node_id: np.uint64, stop_layer=1, start_layer=None, verify_existence=False): if start_layer is None: start_layer = cg.n_layers # FIXME: Read those from config HIGHEST_MESH_LAYER = min(start_layer, cg.n_layers - 3) MESH_MIP = 2 highest_node = get_downstream_multi_child_node(cg, node_id, stop_layer) highest_node_layer = cg.get_chunk_layer(highest_node) if highest_node_layer <= HIGHEST_MESH_LAYER: candidates = [highest_node] else: candidates = cg.get_subgraph_nodes(highest_node, return_layers=[HIGHEST_MESH_LAYER]) if verify_existence: valid_node_ids = [] with Storage(cg.cv_mesh_path) as stor: while True: filenames = [ get_mesh_name(cg, c, MESH_MIP) for c in candidates ] existence_dict = stor.files_exist(filenames) missing_meshes = [] for mesh_key in existence_dict: node_id = np.uint64(mesh_key.split(':')[0]) if existence_dict[mesh_key]: valid_node_ids.append(node_id) else: if cg.get_chunk_layer(node_id) > stop_layer: missing_meshes.append(node_id) if missing_meshes: candidates = cg.get_children(missing_meshes, flatten=True) else: break else: valid_node_ids = candidates return valid_node_ids
def cache(task, cloudpath): layer_path, filename = os.path.split(cloudpath) classname = task.__class__.__name__ lcldir = mkdir(os.path.join('/tmp/', classname)) lclpath = os.path.join(lcldir, filename) if os.path.exists(lclpath): with open(lclpath, 'rb') as f: filestr = f.read() else: with Storage(layer_path, n_threads=0) as stor: filestr = stor.get_file(filename) with open(lclpath, 'wb') as f: f.write(filestr) return filestr
def remesh(cg: ChunkedGraph, operation_id: int, l2ids: ndarray): from cloudvolume.storage import SimpleStorage as Storage mesh_info = cg.meta.custom_data.get("mesh", {}) mesh_dir, unsharded_mesh_path, bucket_path, file_name = get_remesh_info( cg, operation_id) remeshing( cg, l2ids, stop_layer=mesh_info["max_layer"], mip=mesh_info["mip"], max_err=mesh_info["max_error"], cv_sharded_mesh_dir=mesh_dir, cv_unsharded_mesh_path=unsharded_mesh_path, ) with Storage(bucket_path) as storage: # pylint: disable=not-context-manager storage.delete_file(file_name)
def create_segment_metadata_file(cv_input_path, cv_output_path, output_filename, override_default_size_limit): mip = 0 cv = CloudVolume(cv_input_path, mip) max_allowed_size = np.int64(3000000000) vol_size = np.prod(cv.volume_size) if vol_size > max_allowed_size and not override_default_size_limit: raise ValueError( f'Volume size of {vol_size} exceeds maximum of 3 billion voxels') volume_bbox = Bbox(cv.voxel_offset, cv.shape[0:3] + cv.voxel_offset) data = cv[volume_bbox] unique_segids = np.unique(data, return_counts=True) del data arr = np.array([]) for x in zip(unique_segids[0], unique_segids[1]): if x[0] != 0: arr = np.append(arr, {"segmentId": str(x[0]), "voxelCount": x[1]}) with Storage(cv_output_path) as storage: storage.put_file(file_path=output_filename, content=json.dumps(arr, cls=NumpyEncoder), compress=False, cache_control='no-cache')
def get_chunk_edges( edges_dir: str, chunks_coordinates: List[np.ndarray], cv_threads: int = 1 ) -> Dict: """ :param edges_dir: cloudvolume storage path :type str: :param chunks_coordinates: list of chunk coords for which to load edges :type List[np.ndarray]: :param cv_threads: cloudvolume storage client thread count :type int: :return: dictionary {"edge_type": Edges} """ fnames = [] for chunk_coords in chunks_coordinates: chunk_str = "_".join(str(coord) for coord in chunk_coords) # filename format - edges_x_y_z.serialization.compression fnames.append(f"edges_{chunk_str}.proto.zst") storage = ( Storage(edges_dir, n_threads=cv_threads) if cv_threads > 1 else SimpleStorage(edges_dir) ) chunk_edge_dicts = [] with storage: files = storage.get_files(fnames) for _file in files: # cv error if _file["error"]: raise ValueError(_file["error"]) # empty chunk if not _file["content"]: continue edges_dict = _decompress_edges(_file["content"]) chunk_edge_dicts.append(edges_dict) return concatenate_chunk_edges(chunk_edge_dicts)
def execute(self): srccv = CloudVolume(self.src_path, mip=self.mip, fill_missing=True) # Accumulate a histogram of the luminance levels nbits = np.dtype(srccv.dtype).itemsize * 8 levels = np.zeros(shape=(2 ** nbits,), dtype=np.uint64) bounds = Bbox(self.offset, self.shape[:3] + self.offset) bounds = Bbox.clamp(bounds, srccv.bounds) bboxes = self.select_bounding_boxes(bounds) for bbox in bboxes: img2d = srccv[bbox.to_slices()].reshape((bbox.volume())) cts = np.bincount(img2d) levels[0:len(cts)] += cts.astype(np.uint64) covered_area = sum([bbx.volume() for bbx in bboxes]) bboxes = [(bbox.volume(), bbox.size3()) for bbox in bboxes] bboxes.sort(key=lambda x: x[0]) biggest = bboxes[-1][1] output = { "levels": levels.tolist(), "patch_size": biggest.tolist(), "num_patches": len(bboxes), "coverage_ratio": covered_area / self.shape.rectVolume(), } path = self.levels_path if self.levels_path else self.src_path path = os.path.join(path, 'levels') with Storage(path, n_threads=0) as stor: stor.put_json( file_path="{}/{}".format(self.mip, self.offset.z), content=output, cache_control='no-cache' )
missing_report = [] invalid_report = [] success_report = [] for dataset in datasets: layers = ls(dataset) for layer in layers: if not valid_paths.match(layer): continue if 'removeme' in layer: continue with Storage(layer, n_threads=0) as stor: if not stor.exists('provenance'): missing_report.append(layer) else: prov = stor.get_file('provenance') try: prov = DataLayerProvenance().from_json(prov) except: invalid_report.append(layer) else: success_report.append(layer) RESET_COLOR = "\033[m" YELLOW = "\033[1;93m" RED = '\033[1;91m'
pool = pathos.pools.ProcessPool(parallel) for skel in pool.uimap(complex_merge, skeletons.values()): merged_skeletons[skel.id] = skel.to_precomputed() pbar.update(1) pool.close() pool.join() pool.clear() return merged_skeletons if has_checkpoint('complex-merge'): merged_skeletons = load_checkpoint('complex-merge') else: skeletons = checkpoint('simple-merge', load_raw_skeletons) postprocessfn = lambda: postprocess(skeletons) merged_skeletons = checkpoint('complex-merge', postprocessfn) del skeletons del postprocessfn shard_files = synthesize_shard_files(spec, merged_skeletons, progress=True) uploadable = [(fname, data) for fname, data in shard_files.items()] with Storage(cv.skeleton.meta.layerpath) as stor: stor.put_files( files=uploadable, compress=False, content_type='application/octet-stream', cache_control='no-cache', )
def _download_input_chunk(self, bounds): storage = Storage(self.layer_path, n_threads=0) relpath = 'build/{}'.format(bounds.to_filename()) return storage.get_file(relpath)
def ingest(args): """ Ingest an HDF file to a CloudVolume bucket """ if args.local_hdf_path: hdf_file = h5py.File(args.local_hdf_path, "r") else: with Storage(args.cloud_src_path) as storage: hdf_file = h5py.File(storage.get_file(args.cloud_hdf_filename), "r") cur_hdf_group = hdf_file for group_name in args.hdf_keys_to_dataset: cur_hdf_group = cur_hdf_group[group_name] hdf_dataset = cur_hdf_group if args.zyx: dataset_shape = np.array( [hdf_dataset.shape[2], hdf_dataset.shape[1], hdf_dataset.shape[0]]) else: dataset_shape = np.array([*hdf_dataset.shape]) if args.layer_type == "image": data_type = "uint8" else: data_type = "uint64" voxel_offset = args.voxel_offset info = CloudVolume.create_new_info( num_channels=1, layer_type=args.layer_type, data_type=data_type, encoding="raw", resolution=args.resolution, voxel_offset=voxel_offset, chunk_size=args.chunk_size, volume_size=dataset_shape, ) provenance = { "description": args.provenance_description, "owners": [args.owner] } vol = CloudVolume(args.dst_path, info=info, provenance=provenance) vol.commit_info() vol.commit_provenance() all_files = set() for x in np.arange(voxel_offset[0], voxel_offset[0] + dataset_shape[0], args.chunk_size[0]): for y in np.arange(voxel_offset[1], voxel_offset[1] + dataset_shape[1], args.chunk_size[1]): for z in np.arange(voxel_offset[2], voxel_offset[2] + dataset_shape[2], args.chunk_size[2]): all_files.add(tuple((x, y, z))) progress_dir = mkdir( "progress/") # unlike os.mkdir doesn't crash on prexisting done_files = set() for done_file in os.listdir(progress_dir): done_files.add(tuple(done_file.split(","))) to_upload = all_files.difference(done_files) for chunk_start_tuple in to_upload: chunk_start = np.array(list(chunk_start_tuple)) end_of_dataset = np.array(voxel_offset) + dataset_shape chunk_end = chunk_start + np.array(args.chunk_size) chunk_end = Vec(*chunk_end) chunk_end = Vec.clamp(chunk_end, Vec(0, 0, 0), end_of_dataset) chunk_hdf_start = chunk_start - voxel_offset chunk_hdf_end = chunk_end - voxel_offset if args.zyx: chunk = hdf_dataset[chunk_hdf_start[2]:chunk_hdf_end[2], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[0]:chunk_hdf_end[0], ] chunk = chunk.T else: chunk = hdf_dataset[chunk_hdf_start[0]:chunk_hdf_end[0], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[2]:chunk_hdf_end[2], ] print("Processing ", chunk_start_tuple) array = np.array(chunk, dtype=np.dtype(data_type), order="F") vol[chunk_start[0]:chunk_end[0], chunk_start[1]:chunk_end[1], chunk_start[2]:chunk_end[2], ] = array touch(os.path.join(progress_dir, str(chunk_start_tuple)))
def run_task_bundle(settings: Mapping, roi: Tuple[slice, slice, slice]): # Remember: DB must be cleared before starting a whole new run with open("/secrets/mysql") as passwd: mysql_conn = MySQLdb.connect(host=settings["mysql"]["host"], user=settings["mysql"]["user"], db=settings["mysql"]["db"], passwd=passwd.read().strip()) cgraph = chunkedgraph.ChunkedGraph( table_id=settings["chunkedgraph"]["table_id"], instance_id=settings["chunkedgraph"]["instance_id"]) # Things to check: # - Agglomeration and Input Watershed have the same offset/size # - Taskbundle Offset and ROI is a multiple of cgraph.chunksize # - Output Watershed chunksize must be a multiple of cgraph.chunksize agglomeration_input = CloudVolume( settings["layers"]["agglomeration_path_input"], bounded=False) watershed_input = CloudVolume(settings["layers"]["watershed_path_input"], bounded=False) watershed_output = CloudVolume(settings["layers"]["watershed_path_output"], bounded=False, autocrop=True) regiongraph_input = Storage( settings["regiongraph"]["regiongraph_path_input"]) regiongraph_output = Storage( settings["regiongraph"]["regiongraph_path_output"]) regiongraph_chunksize = tuple(settings["regiongraph"]["chunksize"]) chunkgraph_chunksize = np.array(cgraph.chunk_size, dtype=np.int) output_watershed_chunksize = np.array(watershed_output.underlying, dtype=np.int) outer_chunksize = np.maximum(chunkgraph_chunksize, output_watershed_chunksize, dtype=np.int) # Iterate through TaskBundle using a minimal chunk size that is a multiple # of the output watershed chunk size and the Chunked Graph chunk size. for ox in range(roi[0].start, roi[0].stop, outer_chunksize[0]): for oy in range(roi[1].start, roi[1].stop, outer_chunksize[1]): for oz in range(roi[2].start, roi[2].stop, outer_chunksize[2]): watershed_output_buffer = np.zeros((*outer_chunksize, 1), dtype=np.uint64) # Iterate through ChunkGraph chunk-sized tasks: for ix_start in range(0, outer_chunksize[0], chunkgraph_chunksize[0]): for iy_start in range(0, outer_chunksize[1], chunkgraph_chunksize[1]): for iz_start in range(0, outer_chunksize[2], chunkgraph_chunksize[2]): ix_end = ix_start + chunkgraph_chunksize[0] iy_end = iy_start + chunkgraph_chunksize[1] iz_end = iz_start + chunkgraph_chunksize[2] # One voxel overlap in each dimension to get # consistent labeling across chunks edgetask_roi = (slice(ox + ix_start, ox + ix_end + 1), slice(oy + iy_start, oy + iy_end + 1), slice(oz + iz_start, oz + iz_end + 1)) edgetask = EdgeTask( cgraph=cgraph, mysql_conn=mysql_conn, agglomeration_input=agglomeration_input, watershed_input=watershed_input, regiongraph_input=regiongraph_input, regiongraph_output=regiongraph_output, regiongraph_chunksize=regiongraph_chunksize, roi=edgetask_roi) edgetask.execute() # Write relabeled ChunkGraph chunk to (possibly larger) # watershed-chunk aligned buffer watershed_output_buffer[ix_start:ix_end, iy_start:iy_end, iz_start:iz_end, :] = \ edgetask.get_relabeled_watershed() watershed_output[ox:ox + outer_chunksize[0], oy:oy + outer_chunksize[1], oz:oz + outer_chunksize[2], :] = \ watershed_output_buffer
def create_storage(layer_name='layer'): stor_path = os.path.join(layer_path, layer_name) return Storage('file://' + stor_path, n_threads=0)
""" import sys import os from tqdm import tqdm from cloudvolume.lib import max2 from cloudvolume import CloudVolume, Storage, Bbox layer_path = sys.argv[1] cv = CloudVolume(layer_path) bboxes = [] with Storage(layer_path) as stor: for filename in tqdm(stor.list_files(prefix=cv.key), desc="Computing Bounds"): bboxes.append( Bbox.from_filename(filename) ) bounds = Bbox.expand(*bboxes) chunk_size = reduce(max2, map(lambda bbox: bbox.size3(), bboxes)) print('bounds={} (size: {}); chunk_size={}'.format(bounds, bounds.size3(), chunk_size))
def record_remesh_ids(cg: ChunkedGraph, operation_id: int, l2ids: ndarray): from cloudvolume.storage import SimpleStorage as Storage _, _, bucket_path, file_name = get_remesh_info(cg, operation_id) with Storage(bucket_path) as storage: # pylint: disable=not-context-manager storage.put_file(file_path=file_name, content=l2ids.tobytes())