def upload_segments(input_path, precomputed_path, num_mips): """Uploads segmentation data from local to precomputed path. Arguments: input_path: The filepath to the root directory of the octree data with consensus-swcs folder. precomputed_path: CloudVolume precomputed path or url. num_mips: The number of resolutions to upload (for info file). """ check_type(input_path, str) check_precomputed(precomputed_path) check_type(num_mips, (int, np.integer)) if num_mips < 1: raise ValueError( f"Number of resolutions should be > 0, not {num_mips}") (_, _, vox_size, img_size, origin) = get_volume_info( input_path, num_mips, ) vols = create_cloud_volume( precomputed_path, img_size, vox_size, num_mips, layer_type="segmentation", ) swc_dir = Path(input_path) / "consensus-swcs" segments, segids = create_skel_segids(str(swc_dir), origin) for skel in segments: vols[0].skeleton.upload(skel)
def __init__( self, url: str, # = "s3://open-neurodata/brainlit/brain1" mip: int = 0, url_segments: Optional[str] = None, ): check_precomputed(url) check_type(mip, (int, np.integer)) self.url = url self.cv = CloudVolume(url, parallel=False) if mip < 0 or mip >= len(self.cv.scales): raise ValueError( f"{mip} should be between 0 and {len(self.cv.scales)}.") self.mip = mip self.chunk_size = self.cv.scales[self.mip]["chunk_sizes"][0] self.scales = self.cv.scales[self.mip]["resolution"] self.url_segments = url_segments if url_segments is None: try: # default is to add _segments self.cv_segments = CloudVolume(url + "_segments", parallel=False) self.url_segments = url + "_segments" except InfoUnavailableError: warnings.warn( UserWarning( f"Segmentation volume not found at {self.url_segments}, defaulting to None." )) self.cv_segments = None else: check_precomputed(url_segments) self.cv_segments = CloudVolume(url_segments, parallel=False)
def set_url_segments(self, seg_url: str): """Sets the url_segments and cv_segments attributes. Arguments: seg_url: CloudvolumePrecomputedPath to segmentation data. """ check_precomputed(seg_url) self.url_segments = seg_url self.cv_segments = CloudVolume(self.url_segments, parallel=False)
def upload_segments(input_path, precomputed_path, num_mips, benchmarking: Optional[bool] = False): """Uploads segmentation data from local to precomputed path. Arguments: input_path: The filepath to the root directory of the octree data with consensus-swcs folder. precomputed_path: CloudVolume precomputed path or url. num_mips: The number of resolutions to upload (for info file). benchmarking: Optional, scales swc benchmarking data. """ check_type(input_path, str) check_precomputed(precomputed_path) check_type(num_mips, (int, np.integer)) if num_mips < 1: raise ValueError( f"Number of resolutions should be > 0, not {num_mips}") if benchmarking == True: # Getting swc scaling parameters f = Path(input_path).parts[4].split("_") image = f[0] date = type_to_date[image] scale = scales[date] (_, _, vox_size, img_size, origin) = get_volume_info( input_path, num_mips, benchmarking=True, ) chunk_size = [int(i) for i in img_size] else: (_, _, vox_size, img_size, origin) = get_volume_info( input_path, num_mips, ) chunk_size = None vols = create_cloud_volume( precomputed_path, img_size, vox_size, num_mips, layer_type="segmentation", chunk_size=chunk_size, ) swc_dir = Path(input_path) / "consensus-swcs" segments, segids = create_skel_segids(str(swc_dir), origin, benchmarking) for skel in segments: if benchmarking == True: skel.vertices /= scale # Dividing vertices by scale factor vols[0].skeleton.upload(skel)
def create_cloud_volume( precomputed_path: str, img_size: Sequence[int], voxel_size: Sequence[Union[int, float]], num_resolutions: int, chunk_size: Optional[Sequence[int]] = None, parallel: Optional[bool] = False, layer_type: Optional[str] = "image", dtype: Optional[str] = None, commit_info: Optional[bool] = True, ) -> CloudVolumePrecomputed: """Create CloudVolume object and info file. Handles both image volumes and segmentation volumes from octree structure. Arguments: precomputed_path: cloudvolume path img_size: x, y, z voxel dimensions of tiff images. voxel_size: x, y, z dimensions of highest res voxel size (nm). num_resolutions: The number of resolutions to upload. chunk_size: The size of chunks to use for upload. If None, uses img_size/2. parallel: Whether to upload chunks in parallel. layer_type: The type of cloudvolume object to create. dtype: The data type of the volume. If None, uses default for layer type. commit_info: Whether to create an info file at the path, defaults to True. Returns: vol: Volume designated for upload. """ # defaults if chunk_size is None: chunk_size = [int(i / 4) for i in img_size] # /2 took 42 hrs if dtype is None: if layer_type == "image": dtype = "uint16" elif layer_type == "segmentation" or layer_type == "annotation": dtype = "uint64" else: raise ValueError( f"layer type is {layer_type}, when it should be image or str") # check inputs check_precomputed(precomputed_path) check_size(img_size, allow_float=False) check_size(voxel_size) check_type(num_resolutions, (int, np.integer)) if num_resolutions < 1: raise ValueError( f"Number of resolutions should be > 0, not {num_resolutions}") check_size(chunk_size) check_type(parallel, bool) check_type(layer_type, str) if layer_type not in ["image", "segmentation", "annotation"]: raise ValueError( f"{layer_type} should be 'image', 'segmentation', or 'annotation'") check_type(dtype, str) if dtype not in ["uint16", "uint64"]: raise ValueError(f"{dtype} should be 'uint16' or 'uint64'") check_type(commit_info, bool) info = CloudVolume.create_new_info( num_channels=1, layer_type=layer_type, data_type=dtype, # Channel images might be 'uint8' encoding="raw", # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution=voxel_size, # Voxel scaling, units are in nanometers voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin chunk_size=chunk_size, # units are voxels volume_size=[i * 2**(num_resolutions - 1) for i in img_size], ) vol = CloudVolume(precomputed_path, info=info, parallel=parallel) [ vol.add_scale((2**i, 2**i, 2**i), chunk_size=chunk_size) for i in range(num_resolutions) ] if commit_info: vol.commit_info() if layer_type == "image" or layer_type == "annotation": vols = [ CloudVolume(precomputed_path, mip=i, parallel=parallel) for i in range(num_resolutions - 1, -1, -1) ] elif layer_type == "segmentation": info.update(skeletons="skeletons") skel_info = { "@type": "neuroglancer_skeletons", "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], "vertex_attributes": [ { "id": "radius", "data_type": "float32", "num_components": 1 }, { "id": "vertex_types", "data_type": "float32", "num_components": 1 }, { "id": "vertex_color", "data_type": "float32", "num_components": 4 }, ], } with storage.SimpleStorage(vol.cloudpath) as stor: stor.put_json(str(Path("skeletons") / "info"), skel_info) vols = [vol] return vols
def upload_volumes( input_path: str, precomputed_path: str, num_mips: int, parallel: bool = False, chosen: int = -1, ): """Uploads image data from local to a precomputed path. Specify num_mips for additional resolutions. If `chosen` is used, an info file will not be generated. Arguments: input_path: The filepath to the root directory of the octree image data. precomputed_path: CloudVolume precomputed path or url. num_mips: The number of resolutions to upload. parallel: Whether to upload in parallel. Default is False. chosen: If not -1, uploads only that specific mip. Default is -1. """ check_type(input_path, str) check_precomputed(precomputed_path) check_type(num_mips, (int, np.integer)) if num_mips < 1: raise ValueError( f"Number of resolutions should be > 0, not {num_mips}") check_type(parallel, bool) check_type(chosen, (int, np.integer)) if chosen < -1 or chosen >= num_mips: raise ValueError( f"{chosen} should be -1, or between 0 and {num_mips-1}") (files_ordered, paths_bin, vox_size, img_size, _) = get_volume_info( input_path, num_mips, ) if chosen != -1: commit_info = False else: commit_info = True vols = create_cloud_volume( precomputed_path, img_size, vox_size, num_mips, parallel=parallel, layer_type="image", commit_info=commit_info, ) num_procs = min( math.floor(virtual_memory().total / (img_size[0] * img_size[1] * img_size[2] * 8)), cpu_count(), ) start = time.time() if chosen == -1: for mip, vol in enumerate(vols): try: with tqdm_joblib( tqdm( desc="Creating precomputed volume", total=len(files_ordered[mip]), )) as progress_bar: Parallel(num_procs, timeout=1800)(delayed(process)( f, b, vols[mip], ) for f, b in zip(files_ordered[mip], paths_bin[mip])) print( f"\nFinished mip {mip}, took {time.time()-start} seconds") start = time.time() except Exception as e: print(e) print( "timed out on a slice. moving on to the next step of pipeline" ) else: try: with tqdm_joblib( tqdm(desc="Creating precomputed volume", total=len(files_ordered[chosen]))) as progress_bar: Parallel(num_procs, timeout=1800)(delayed(process)( f, b, vols[chosen], ) for f, b in zip(files_ordered[chosen], paths_bin[chosen])) print(f"\nFinished mip {chosen}, took {time.time()-start} seconds") except Exception as e: print(e) print( "timed out on a slice. moving on to the next step of pipeline")
def upload_volumes( input_path: str, precomputed_path: str, num_mips: int, parallel: bool = False, chosen: int = -1, benchmarking: Optional[bool] = False, continue_upload: Optional[Tuple[int, int]] = (0, 0), ): """Uploads image data from local to a precomputed path. Specify num_mips for additional resolutions. If `chosen` is used, an info file will not be generated. Arguments: input_path: The filepath to the root directory of the octree image data. precomputed_path: CloudVolume precomputed path or url. num_mips: The number of resolutions to upload. parallel: Whether to upload in parallel. Default is False. chosen: If not -1, uploads only that specific mip. Default is -1. benchmarking: For scaling purposes, true if uploading benchmarking data. Default is False. continue_upload: Used to continue an upload. Default (0, 0). The tuple (layer_idx, iter) containing layer index and iter to start from. """ check_type(input_path, str) check_precomputed(precomputed_path) check_type(num_mips, (int, np.integer)) if num_mips < 1: raise ValueError( f"Number of resolutions should be > 0, not {num_mips}") check_type(parallel, bool) check_type(chosen, int) check_type(benchmarking, bool) check_iterable_type(continue_upload, int) if chosen < -1 or chosen >= num_mips: raise ValueError( f"{chosen} should be -1, or between 0 and {num_mips-1}") if chosen != -1: commit_info = False else: commit_info = True if benchmarking == True: (files_ordered, bin_paths, vox_size, img_size, _) = get_volume_info(input_path, num_mips, benchmarking=True) vols = create_cloud_volume( precomputed_path, img_size, vox_size, num_mips, chunk_size=[int(i) for i in img_size], parallel=parallel, layer_type="image", commit_info=commit_info, ) else: (files_ordered, bin_paths, vox_size, img_size, _) = get_volume_info( input_path, num_mips, ) vols = create_cloud_volume( precomputed_path, img_size, vox_size, num_mips, parallel=parallel, layer_type="image", commit_info=commit_info, ) num_procs = min( math.floor(virtual_memory().total / (img_size[0] * img_size[1] * img_size[2] * 8)), cpu_count(), ) # skip already uploaded layers vols2 = vols[continue_upload[0]:] files_ordered2 = files_ordered[continue_upload[0]:] bin_paths2 = bin_paths[continue_upload[0]:] # skip already uploaded files on current layer files_ordered2[0] = files_ordered2[0][continue_upload[1]:] bin_paths2[0] = bin_paths2[0][continue_upload[1]:] start = time.time() if chosen == -1: for mip, vol in enumerate(vols2): try: with tqdm_joblib( tqdm( desc= f"Creating precomputed volume at layer index {mip+continue_upload[0]}", total=len(files_ordered2[mip]), )) as progress_bar: Parallel(num_procs, timeout=1800)(delayed(process)( f, b, vols2[mip], ) for f, b in zip( files_ordered2[mip], bin_paths2[mip], )) print( f"\nFinished layer index {mip+continue_upload[0]}, took {time.time()-start} seconds" ) start = time.time() except Exception as e: print(e) print( f"timed out on a chunk on layer index {mip+continue_upload[0]}. moving on to the next step of pipeline" ) else: try: with tqdm_joblib( tqdm( desc=f"Creating precomputed volume at mip {chosen}", total=len(files_ordered[chosen][continue_upload[1]:]), )) as progress_bar: Parallel(num_procs, timeout=1800, verbose=0)(delayed(process)( f, b, vols[chosen], ) for f, b in zip( files_ordered[chosen][continue_upload[1]:], bin_paths[chosen][continue_upload[1]:], )) print( f"\nFinished layer index {chosen}, took {time.time()-start} seconds" ) except Exception as e: print(e) print(f"timed out on a chunk on layer index {chosen}.")