def check_stored_cv_files(dataset_name="basil"): """ Tests if all files were downloaded :param dataset_name: str """ if "basil" == dataset_name: cv_url = "gs://nkem/basil_4k_oldnet/region_graph/" elif "pinky40" == dataset_name: cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/" elif "pinky100" == dataset_name: cv_url = "gs://nkem/pinky100_v0/region_graph/" else: raise Exception("Could not identify region graph ressource") with storage.SimpleStorage(cv_url) as cv_st: dir_path = creator_utils.dir_from_layer_name( creator_utils.layer_name_from_cv_url(cv_st.layer_path)) file_paths = list(cv_st.list_files()) c = 0 n_file_paths = len(file_paths) time_start = time.time() for i_fp, fp in enumerate(file_paths): if i_fp % 1000 == 1: dt = time.time() - time_start eta = dt / i_fp * n_file_paths - dt print("%d / %d - dt: %.3fs - eta: %.3fs" % (i_fp, n_file_paths, dt, eta)) if not os.path.exists(dir_path + fp[:-4] + ".h5"): print(dir_path + fp[:-4] + ".h5") c += 1 print("%d files were missing" % c)
def create_skeleton_layer(s3_bucket, skel_res, img_dims, num_res=7): """Creates segmentation layer for skeletons Arguments: s3_bucket {str} -- path to precomputed skeleton destination skel_res {list} -- x,y,z dimensions of highest res voxel size (nm) img_dims {list} -- x,y,z voxel dimensions of tiff images Keyword Arguments: num_res {int} -- number of image resolutions to be downsampled Returns: vol {cloudvolume.CloudVolume} -- CloudVolume to upload skeletons to """ # create cloudvolume info info = CloudVolume.create_new_info( num_channels=1, layer_type="segmentation", data_type="uint64", # Channel images might be 'uint8' encoding="raw", # raw, jpeg, compressed_segmentation, fpzip, kempressed # Voxel scaling, units are in nanometers resolution=skel_res, voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin # Pick a convenient size for your underlying chunk representation # Powers of two are recommended, doesn't need to cover image exactly chunk_size=[int(i / 4) for i in img_dims], # chunk_size=[128, 128, 64], # units are voxels volume_size=[i * 2 ** (num_res - 1) for i in img_dims], # units are voxels skeletons="skeletons", ) skel_info = { "@type": "neuroglancer_skeletons", "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], "vertex_attributes": [ {"id": "radius", "data_type": "float32", "num_components": 1}, {"id": "vertex_types", "data_type": "float32", "num_components": 1}, {"id": "vertex_color", "data_type": "float32", "num_components": 4}, ], } # get cloudvolume info vol = CloudVolume(s3_bucket, info=info, parallel=True) [vol.add_scale((2 ** i, 2 ** i, 2 ** i)) for i in range(num_res)] # num_res - 1 vol.commit_info() # upload skeleton info to /skeletons/ dir with storage.SimpleStorage(vol.cloudpath) as stor: stor.put_json(str(Path("skeletons") / "info"), skel_info) return vol
def _download_and_store_cv_files_thread(args): """ Helper thread to download files from google cloud """ chunk_id, cv_url, file_paths, olduint32 = args # Reset connection pool to make cloud-volume compatible with parallelizing storage.reset_connection_pools() n_file_paths = len(file_paths) time_start = time.time() with storage.SimpleStorage(cv_url) as cv_st: for i_fp, fp in enumerate(file_paths): if i_fp % 100 == 1: dt = time.time() - time_start eta = dt / i_fp * n_file_paths - dt print("%d: %d / %d - dt: %.3fs - eta: %.3fs" % (chunk_id, i_fp, n_file_paths, dt, eta)) creator_utils.download_and_store_edge_file(cv_st, fp)
def download_and_store_cv_files(dataset_name="basil", n_threads=10, olduint32=False): """ Downloads files from google cloud using cloud-volume :param dataset_name: str :param n_threads: int :param olduint32: bool """ if "basil" == dataset_name: cv_url = "gs://nkem/basil_4k_oldnet/region_graph/" elif "pinky40" == dataset_name: cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/" elif "pinky100" == dataset_name: cv_url = "gs://nkem/pinky100_v0/region_graph/" else: raise Exception("Could not identify region graph ressource") with storage.SimpleStorage(cv_url) as cv_st: dir_path = creator_utils.dir_from_layer_name( creator_utils.layer_name_from_cv_url(cv_st.layer_path)) if not os.path.exists(dir_path): os.makedirs(dir_path) file_paths = list(cv_st.list_files()) file_chunks = np.array_split(file_paths, n_threads * 3) multi_args = [] for i_file_chunk, file_chunk in enumerate(file_chunks): multi_args.append([i_file_chunk, cv_url, file_chunk, olduint32]) # Run parallelizing if n_threads == 1: mu.multiprocess_func(_download_and_store_cv_files_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: mu.multisubprocess_func(_download_and_store_cv_files_thread, multi_args, n_threads=n_threads)
def create_cloud_volume( precomputed_path: str, img_size: Sequence[int], voxel_size: Sequence[Union[int, float]], num_resolutions: int, chunk_size: Optional[Sequence[int]] = None, parallel: Optional[bool] = False, layer_type: Optional[str] = "image", dtype: Optional[str] = None, commit_info: Optional[bool] = True, ) -> CloudVolumePrecomputed: """Create CloudVolume object and info file. Handles both image volumes and segmentation volumes from octree structure. Arguments: precomputed_path: cloudvolume path img_size: x, y, z voxel dimensions of tiff images. voxel_size: x, y, z dimensions of highest res voxel size (nm). num_resolutions: The number of resolutions to upload. chunk_size: The size of chunks to use for upload. If None, uses img_size/2. parallel: Whether to upload chunks in parallel. layer_type: The type of cloudvolume object to create. dtype: The data type of the volume. If None, uses default for layer type. commit_info: Whether to create an info file at the path, defaults to True. Returns: vol: Volume designated for upload. """ # defaults if chunk_size is None: chunk_size = [int(i / 4) for i in img_size] # /2 took 42 hrs if dtype is None: if layer_type == "image": dtype = "uint16" elif layer_type == "segmentation" or layer_type == "annotation": dtype = "uint64" else: raise ValueError( f"layer type is {layer_type}, when it should be image or str") # check inputs check_precomputed(precomputed_path) check_size(img_size, allow_float=False) check_size(voxel_size) check_type(num_resolutions, (int, np.integer)) if num_resolutions < 1: raise ValueError( f"Number of resolutions should be > 0, not {num_resolutions}") check_size(chunk_size) check_type(parallel, bool) check_type(layer_type, str) if layer_type not in ["image", "segmentation", "annotation"]: raise ValueError( f"{layer_type} should be 'image', 'segmentation', or 'annotation'") check_type(dtype, str) if dtype not in ["uint16", "uint64"]: raise ValueError(f"{dtype} should be 'uint16' or 'uint64'") check_type(commit_info, bool) info = CloudVolume.create_new_info( num_channels=1, layer_type=layer_type, data_type=dtype, # Channel images might be 'uint8' encoding="raw", # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution=voxel_size, # Voxel scaling, units are in nanometers voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin chunk_size=chunk_size, # units are voxels volume_size=[i * 2**(num_resolutions - 1) for i in img_size], ) vol = CloudVolume(precomputed_path, info=info, parallel=parallel) [ vol.add_scale((2**i, 2**i, 2**i), chunk_size=chunk_size) for i in range(num_resolutions) ] if commit_info: vol.commit_info() if layer_type == "image" or layer_type == "annotation": vols = [ CloudVolume(precomputed_path, mip=i, parallel=parallel) for i in range(num_resolutions - 1, -1, -1) ] elif layer_type == "segmentation": info.update(skeletons="skeletons") skel_info = { "@type": "neuroglancer_skeletons", "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], "vertex_attributes": [ { "id": "radius", "data_type": "float32", "num_components": 1 }, { "id": "vertex_types", "data_type": "float32", "num_components": 1 }, { "id": "vertex_color", "data_type": "float32", "num_components": 4 }, ], } with storage.SimpleStorage(vol.cloudpath) as stor: stor.put_json(str(Path("skeletons") / "info"), skel_info) vols = [vol] return vols
def create_cloud_volume( precomputed_path, img_size, voxel_size, num_resolutions=2, chunk_size=None, parallel=False, layer_type="image", dtype=None, ): """Create CloudVolume volume object and info file. Arguments: precomputed_path {str} -- cloudvolume path img_size {list} -- x,y,z voxel dimensions of tiff images voxel_size {list} -- x,y,z dimensions of highest res voxel size (nm) Keyword Arguments: num_resolutions {int} -- the number of resolutions to upload chunk_size {list} -- size of chunks to upload. If None, uses img_size/2. parallel {bool} -- whether to upload chunks in parallel layer_type {str} -- one of "image" or "segmentation" dtype {str} -- one of "uint16" or "uint64". If None, uses default for layer type. Returns: vol {cloudvolume.CloudVolume} -- volume to upload to """ if chunk_size is None: chunk_size = [int(i / 2) for i in img_size] if dtype is None: if layer_type == "image": dtype = "uint16" elif layer_type == "segmentation": dtype = "uint64" else: raise ValueError( f"layer type is {layer_type}, when it should be image or str") info = CloudVolume.create_new_info( num_channels=1, layer_type=layer_type, data_type=dtype, # Channel images might be 'uint8' encoding="raw", # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution=voxel_size, # Voxel scaling, units are in nanometers voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin chunk_size=chunk_size, # units are voxels volume_size=[i * 2**(num_resolutions - 1) for i in img_size], # volume_size=img_size, # e.g. a cubic millimeter dataset skeletons="skeletons", ) vol = CloudVolume(precomputed_path, info=info, parallel=parallel) [ vol.add_scale((2**i, 2**i, 2**i), chunk_size=chunk_size) for i in range(num_resolutions) ] vol.commit_info() if layer_type == "image": vols = [ CloudVolume(precomputed_path, mip=i, parallel=parallel) for i in range(num_resolutions - 1, -1, -1) ] elif layer_type == "segmentation": skel_info = { "@type": "neuroglancer_skeletons", "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], "vertex_attributes": [ { "id": "radius", "data_type": "float32", "num_components": 1 }, { "id": "vertex_types", "data_type": "float32", "num_components": 1 }, { "id": "vertex_color", "data_type": "float32", "num_components": 4 }, ], } with storage.SimpleStorage(vol.cloudpath) as stor: stor.put_json(str(Path("skeletons") / "info"), skel_info) vols = [vol] return vols