def test_downsample_higher_mip(): delete_layer() storage, data = create_layer(size=(512, 512, 64, 1), offset=(3, 7, 11)) cv = CloudVolume(storage.layer_path) cv.info['scales'] = cv.info['scales'][:1] tq = MockTaskQueue() cv.commit_info() tasks = create_downsampling_tasks(storage.layer_path, mip=0, num_mips=2) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 3 tasks = create_downsampling_tasks(storage.layer_path, mip=1, num_mips=2) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 4 cv.mip = 3 assert cv[:, :, :].shape == (64, 64, 64, 1)
def make_info_file(brain, home_dir, volume_size, type_vol = "647", commit=True): info = CloudVolume.create_new_info( num_channels = 1, layer_type = "image", # "image" or "segmentation" data_type = "uint16", # 32 not necessary for Princeton atlas, but was for Allen atlas encoding = "raw", # other options: "jpeg", "compressed_segmentation" (req. uint32 or uint64) resolution = [ 1810, 1810, 2000 ], # X,Y,Z values in nanometers, 40 microns in each dim. voxel_offset = [ 0, 0, 0 ], # values X,Y,Z values in voxels chunk_size = [ 1024, 1024, 32], # rechunk of image X,Y,Z in voxels, volume_size = volume_size, # X,Y,Z size in voxels ) # If you"re using amazon or the local file system, you can replace "gs" with "s3" or "file" vol = CloudVolume("file://"+home_dir+"/"+brain+"/"+type_vol, info=info) vol.provenance.description = "TP tracing" vol.provenance.owners = ["*****@*****.**"] # list of contact email addresses if commit: vol.commit_info() # generates gs://bucket/dataset/layer/info json file vol.commit_provenance() # generates gs://bucket/dataset/layer/provenance json file print("Created CloudVolume info file: ",vol.info_cloudpath) return vol
def test_downsample_no_offset_2x2x2(): delete_layer() cf, data = create_layer(size=(512,512,512,1), offset=(0,0,0)) cv = CloudVolume(cf.cloudpath) assert len(cv.scales) == 1 assert len(cv.available_mips) == 1 cv.commit_info() tq = MockTaskQueue() tasks = create_downsampling_tasks( cf.cloudpath, mip=0, num_mips=3, compress=None, factor=(2,2,2) ) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 512 ]) assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 256 ]) assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 64, 64, 64 ]) slice64 = np.s_[0:64, 0:64, 0:64] cv.mip = 0 assert np.all(cv[slice64] == data[slice64]) data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 2, 1]) cv.mip = 1 assert np.all(cv[slice64] == data_ds1[slice64]) data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 4, 1]) cv.mip = 2 assert np.all(cv[slice64] == data_ds2[slice64]) data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 8, 1]) cv.mip = 3 assert np.all(cv[slice64] == data_ds3[slice64])
def test_downsample_no_offset(): delete_layer() storage, data = create_layer(size=(1024,1024,128,1), offset=(0,0,0)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 5 assert len(cv.available_mips) == 5 cv.commit_info() create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=4) cv.refresh_info() assert len(cv.available_mips) == 5 assert np.array_equal(cv.mip_volume_size(0), [ 1024, 1024, 128 ]) assert np.array_equal(cv.mip_volume_size(1), [ 512, 512, 128 ]) assert np.array_equal(cv.mip_volume_size(2), [ 256, 256, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(4), [ 64, 64, 128 ]) slice64 = np.s_[0:64, 0:64, 0:64] cv.mip = 0 assert np.all(cv[slice64] == data[slice64]) data_ds1 = downsample.downsample_with_averaging(data, factor=[2, 2, 1, 1]) cv.mip = 1 assert np.all(cv[slice64] == data_ds1[slice64]) data_ds2 = downsample.downsample_with_averaging(data_ds1, factor=[2, 2, 1, 1]) cv.mip = 2 assert np.all(cv[slice64] == data_ds2[slice64]) data_ds3 = downsample.downsample_with_averaging(data_ds2, factor=[2, 2, 1, 1]) cv.mip = 3 assert np.all(cv[slice64] == data_ds3[slice64]) data_ds4 = downsample.downsample_with_averaging(data_ds3, factor=[2, 2, 1, 1]) cv.mip = 4 assert np.all(cv[slice64] == data_ds4[slice64])
def test_downsample_w_missing(): delete_layer() storage, data = create_layer(size=(512, 512, 128, 1), offset=(3, 7, 11)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 1 assert len(cv.available_mips) == 1 delete_layer() cv.commit_info() tq = MockTaskQueue() try: tasks = create_downsampling_tasks(storage.layer_path, mip=0, num_mips=3, fill_missing=False) tq.insert_all(tasks) except EmptyVolumeException: pass tasks = create_downsampling_tasks(storage.layer_path, mip=0, num_mips=3, fill_missing=True) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [512, 512, 128]) assert np.array_equal(cv.mip_volume_size(1), [256, 256, 128]) assert np.array_equal(cv.mip_volume_size(2), [128, 128, 128]) assert np.array_equal(cv.mip_volume_size(3), [64, 64, 128]) assert np.all(cv.mip_voxel_offset(3) == (0, 0, 11)) cv.mip = 0 cv.fill_missing = True assert np.count_nonzero(cv[3:67, 7:71, 11:75]) == 0
def test_background_color(): info = CloudVolume.create_new_info( num_channels=1, layer_type='image', data_type='uint8', encoding='raw', resolution=[1, 1, 1], voxel_offset=[0, 0, 0], volume_size=[128, 128, 1], mesh='mesh', chunk_size=[64, 64, 1], ) vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, info=info) vol.commit_info() vol.cache.flush() vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, background_color=1, fill_missing=True) assert np.count_nonzero(vol[:] - 1) == 0 vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, background_color=1, fill_missing=True, bounded=False) assert np.count_nonzero(vol[0:129, 0:129, 0:1] - 1) == 0 vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, background_color=1, fill_missing=True, bounded=False, parallel=2) assert np.count_nonzero(vol[0:129, 0:129, 0:1] - 1) == 0 vol.cache.flush() delete_layer('/tmp/cloudvolume/empty_volume')
def make_info_file(volume_size, resolution, layer_dir, voxel_offset=[0, 0, 0], commit=True): """ ---PURPOSE--- Make the cloudvolume info file. ---INPUT--- volume_size [Nx,Ny,Nz] in voxels, e.g. [2160,2560,1271] pix_scale_nm [size of x pix in nm,size of y pix in nm,size of z pix in nm], e.g. [5000,5000,10000] commit if True, will write the info/provenance file to disk. if False, just creates it in memory atlas_type if provided, will add a key to the info file: 'atlas_type': atlas_type """ info = CloudVolume.create_new_info( num_channels=1, layer_type='segmentation', # 'image' or 'segmentation' data_type='uint8', # encoding= 'raw', # other options: 'jpeg', 'compressed_segmentation' (req. uint32 or uint64) resolution=resolution, # Size of X,Y,Z pixels in nanometers, voxel_offset=voxel_offset, # values X,Y,Z values in voxels chunk_size=[ 1024, 1024, 1 ], # rechunk of image X,Y,Z in voxels -- only used for downsampling task I think volume_size=volume_size, # X,Y,Z size in voxels ) vol = CloudVolume(f'file://{layer_dir}', info=info) vol.provenance.description = "Test on spock for profiling precomputed creation" vol.provenance.owners = ['*****@*****.**' ] # list of contact email addresses if commit: vol.commit_info() # generates info json file vol.commit_provenance() # generates provenance json file print("Created CloudVolume info file: ", vol.info_cloudpath) return vol
def prepare_precomputed(precomputed_path, offset, size, resolution, chunk_size, factor=(2,2,1), dtype='uint32'): cv_args = dict( bounded=True, fill_missing=False, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, provenance=None, compress=True, non_aligned_writes=True, parallel=False) info = CloudVolume.create_new_info( num_channels=1, layer_type='segmentation', data_type=dtype, # encoding='compressed_segmentation', encoding='raw', resolution=list(resolution), voxel_offset=np.array(offset), volume_size=np.array(size), chunk_size=chunk_size, max_mip=0, factor=factor, ) cv = CloudVolume('file://'+precomputed_path, mip=0, info=info, **cv_args) cv.commit_info() return cv
def configure_multires_info( cloudpath:str, vertex_quantization_bits:int, mesh_dir:str ): """ Computes properties and uploads a multires mesh info file """ assert vertex_quantization_bits in (10, 16) vol = CloudVolume(cloudpath) mesh_dir = mesh_dir or vol.info.get("mesh", None) if not "mesh" in vol.info: vol.info['mesh'] = mesh_dir vol.commit_info() res = vol.meta.resolution(vol.mesh.meta.mip) cf = CloudFiles(cloudpath) info_filename = f'{mesh_dir}/info' mesh_info = cf.get_json(info_filename) or {} new_mesh_info = copy.deepcopy(mesh_info) new_mesh_info['@type'] = "neuroglancer_multilod_draco" new_mesh_info['vertex_quantization_bits'] = vertex_quantization_bits new_mesh_info['transform'] = [ res[0], 0, 0, 0, 0, res[1], 0, 0, 0, 0, res[2], 0, ] new_mesh_info['lod_scale_multiplier'] = 1.0 if new_mesh_info != mesh_info: cf.put_json( info_filename, new_mesh_info, cache_control="no-cache" )
def test_quantize_affinities(): qpath = 'file:///tmp/removeme/quantized/' delete_layer() delete_layer(qpath) storage, _ = create_layer(size=(256,256,128,3), offset=(0,0,0), layer_type="affinities") cv = CloudVolume(storage.layer_path) shape = (128, 128, 64) slices = np.s_[ :shape[0], :shape[1], :shape[2], :1 ] data = cv[slices] data *= 255.0 data = data.astype(np.uint8) task = QuantizeAffinitiesTask( source_layer_path=storage.layer_path, dest_layer_path=qpath, shape=shape, offset=(0,0,0), ) info = create_quantized_affinity_info(storage.layer_path, qpath, shape) qcv = CloudVolume(qpath, info=info) qcv.commit_info() create_downsample_scales(qpath, mip=0, ds_shape=shape) task.execute() qcv.mip = 0 qdata = qcv[slices] assert np.all(data.shape == qdata.shape) assert np.all(data == qdata) assert data.dtype == np.uint8
def create_meshing_tasks(task_queue, layer_path, mip, shape=Vec(512, 512, 512)): shape = Vec(*shape) max_simplification_error = 40 vol = CloudVolume(layer_path, mip) if not 'mesh' in vol.info: vol.info['mesh'] = 'mesh_mip_{}_err_{}'.format( mip, max_simplification_error) vol.commit_info() for startpt in tqdm(xyzrange(vol.bounds.minpt, vol.bounds.maxpt, shape), desc="Inserting Mesh Tasks"): task = MeshTask( layer_path=layer_path, mip=vol.mip, shape=shape.clone(), offset=startpt.clone(), max_simplification_error=max_simplification_error, ) task_queue.insert(task) task_queue.wait('Uploading MeshTasks')
def test_transfer(): # Bbox version delete_layer() cv, _ = create_layer(size=(128, 64, 64, 1), offset=(0, 0, 0)) cv.transfer_to('file:///tmp/removeme/transfer/', cv.bounds) ls = os.listdir('/tmp/removeme/transfer/1_1_1/') assert '0-64_0-64_0-64.gz' in ls assert len(ls) == 2 assert os.path.exists('/tmp/removeme/transfer/info') assert os.path.exists('/tmp/removeme/transfer/provenance') dcv = CloudVolume("file:///tmp/removeme/transfer") dcv.info["dont_touch_me_bro"] = True dcv.commit_info() cv.transfer_to('file:///tmp/removeme/transfer/', cv.bounds) dcv.refresh_info() assert 'dont_touch_me_bro' in dcv.info
def test_downsample_with_offset(): delete_layer() storage, data = create_layer(size=(512, 512, 128, 1), offset=(3, 7, 11)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 1 assert len(cv.available_mips) == 1 cv.commit_info() tq = MockTaskQueue() tasks = create_downsampling_tasks(storage.layer_path, mip=0, num_mips=3) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [512, 512, 128]) assert np.array_equal(cv.mip_volume_size(1), [256, 256, 128]) assert np.array_equal(cv.mip_volume_size(2), [128, 128, 128]) assert np.array_equal(cv.mip_volume_size(3), [64, 64, 128]) assert np.all(cv.mip_voxel_offset(3) == (0, 0, 11)) cv.mip = 0 assert np.all(cv[3:67, 7:71, 11:75] == data[0:64, 0:64, 0:64]) data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 1, 1]) cv.mip = 1 assert np.all(cv[1:33, 3:35, 11:75] == data_ds1[0:32, 0:32, 0:64]) data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 1, 1]) cv.mip = 2 assert np.all(cv[0:16, 1:17, 11:75] == data_ds2[0:16, 0:16, 0:64]) data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 1, 1]) cv.mip = 3 assert np.all(cv[0:8, 0:8, 11:75] == data_ds3[0:8, 0:8, 0:64])
def create_downsample_scales( layer_path, mip, ds_shape, axis='z', preserve_chunk_size=False, chunk_size=None, encoding=None ): vol = CloudVolume(layer_path, mip) shape = min2(vol.volume_size, ds_shape) # sometimes we downsample a base layer of 512x512 # into underlying chunks of 64x64 which permits more scales underlying_mip = (mip + 1) if (mip + 1) in vol.available_mips else mip underlying_shape = vol.mip_underlying(underlying_mip).astype(np.float32) if chunk_size: underlying_shape = Vec(*chunk_size).astype(np.float32) toidx = { 'x': 0, 'y': 1, 'z': 2 } preserved_idx = toidx[axis] underlying_shape[preserved_idx] = float('inf') scales = downsample_scales.compute_plane_downsampling_scales( size=shape, preserve_axis=axis, max_downsampled_size=int(min(*underlying_shape)), ) scales = scales[1:] # omit (1,1,1) scales = [ list(map(int, vol.downsample_ratio * Vec(*factor3))) for factor3 in scales ] if len(scales) == 0: print("WARNING: No scales generated.") for scale in scales: vol.add_scale(scale, encoding=encoding, chunk_size=chunk_size) if chunk_size is None: if preserve_chunk_size or len(scales) == 0: chunk_size = vol.scales[mip]['chunk_sizes'] else: chunk_size = vol.scales[mip + 1]['chunk_sizes'] else: chunk_size = [ chunk_size ] if encoding is None: encoding = vol.scales[mip]['encoding'] for i in range(mip + 1, mip + len(scales) + 1): vol.scales[i]['chunk_sizes'] = chunk_size return vol.commit_info()
def test_mesh(compress): delete_layer() storage, _ = create_layer(size=(64, 64, 64, 1), offset=(0, 0, 0), layer_type="segmentation") cv = CloudVolume(storage.layer_path) # create a box of ones surrounded by zeroes data = np.zeros(shape=(64, 64, 64, 1), dtype=np.uint32) data[1:-1, 1:-1, 1:-1, :] = 1 cv[0:64, 0:64, 0:64] = data cv.info['mesh'] = 'mesh' cv.commit_info() t = MeshTask(shape=(64, 64, 64), offset=(0, 0, 0), layer_path=storage.layer_path, mip=0, remap_table={"1": "10"}, low_padding=0, high_padding=1, compress=compress) t.execute() assert storage.get_file('mesh/10:0:0-64_0-64_0-64') is not None assert list(storage.list_files('mesh/')) == ['mesh/10:0:0-64_0-64_0-64']
def create_image_layer(s3_bucket, tif_dimensions, voxel_size, num_resolutions): """Creates segmentation layer for skeletons Arguments: s3_bucket {str} -- path to SWC file voxel_size {list} -- 3 floats for voxel size in nm num_resolutions {int} -- number of resolutions for the image Returns: vols {list} -- List of num_resolutions CloudVolume objects, starting from lowest resolution """ # create cloudvolume info info = CloudVolume.create_new_info( num_channels=1, layer_type="image", data_type="uint16", # Channel images might be 'uint8' encoding="raw", # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution=voxel_size, # Voxel scaling, units are in nanometers voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin # Pick a convenient size for your underlying chunk representation # Powers of two are recommended, doesn't need to cover image exactly chunk_size=[int(d / 4) for d in tif_dimensions], # units are voxels # USING MAXIMUM VOLUME size volume_size=[i * 2**(num_resolutions - 1) for i in tif_dimensions], ) # get cloudvolume info vol = CloudVolume(s3_bucket, info=info, parallel=False) # compress = False # scales resolution up, volume size down [vol.add_scale((2**i, 2**i, 2**i)) for i in range(num_resolutions)] # ignore chunk size vol.commit_info() vols = [ CloudVolume(s3_bucket, mip=i, parallel=False) # parallel False, compress for i in range(num_resolutions - 1, -1, -1) ] return vols
def create_downsample_scales(layer_path, mip, ds_shape, axis='z', preserve_chunk_size=False): vol = CloudVolume(layer_path, mip) shape = min2(vol.volume_size, ds_shape) # sometimes we downsample a base layer of 512x512 # into underlying chunks of 64x64 which permits more scales underlying_mip = (mip + 1) if (mip + 1) in vol.available_mips else mip underlying_shape = vol.mip_underlying(underlying_mip).astype(np.float32) toidx = {'x': 0, 'y': 1, 'z': 2} preserved_idx = toidx[axis] underlying_shape[preserved_idx] = float('inf') scales = downsample_scales.compute_plane_downsampling_scales( size=shape, preserve_axis=axis, max_downsampled_size=int(min(*underlying_shape)), ) scales = scales[1:] # omit (1,1,1) scales = [ list(map(int, vol.downsample_ratio * Vec(*factor3))) for factor3 in scales ] for scale in scales: vol.add_scale(scale) if preserve_chunk_size: for i in range(1, len(vol.scales)): vol.scales[i]['chunk_sizes'] = vol.scales[0]['chunk_sizes'] return vol.commit_info()
def ingest(args): """ Ingest an HDF file to a CloudVolume bucket """ if args.local_hdf_path: hdf_file = h5py.File(args.local_hdf_path, "r") else: with Storage(args.cloud_src_path) as storage: hdf_file = h5py.File(storage.get_file(args.cloud_hdf_filename), "r") cur_hdf_group = hdf_file for group_name in args.hdf_keys_to_dataset: cur_hdf_group = cur_hdf_group[group_name] hdf_dataset = cur_hdf_group if args.zyx: dataset_shape = np.array( [hdf_dataset.shape[2], hdf_dataset.shape[1], hdf_dataset.shape[0]]) else: dataset_shape = np.array([*hdf_dataset.shape]) if args.layer_type == "image": data_type = "uint8" else: data_type = "uint64" voxel_offset = args.voxel_offset info = CloudVolume.create_new_info( num_channels=1, layer_type=args.layer_type, data_type=data_type, encoding="raw", resolution=args.resolution, voxel_offset=voxel_offset, chunk_size=args.chunk_size, volume_size=dataset_shape, ) provenance = { "description": args.provenance_description, "owners": [args.owner] } vol = CloudVolume(args.dst_path, info=info, provenance=provenance) vol.commit_info() vol.commit_provenance() all_files = set() for x in np.arange(voxel_offset[0], voxel_offset[0] + dataset_shape[0], args.chunk_size[0]): for y in np.arange(voxel_offset[1], voxel_offset[1] + dataset_shape[1], args.chunk_size[1]): for z in np.arange(voxel_offset[2], voxel_offset[2] + dataset_shape[2], args.chunk_size[2]): all_files.add(tuple((x, y, z))) progress_dir = mkdir( "progress/") # unlike os.mkdir doesn't crash on prexisting done_files = set() for done_file in os.listdir(progress_dir): done_files.add(tuple(done_file.split(","))) to_upload = all_files.difference(done_files) for chunk_start_tuple in to_upload: chunk_start = np.array(list(chunk_start_tuple)) end_of_dataset = np.array(voxel_offset) + dataset_shape chunk_end = chunk_start + np.array(args.chunk_size) chunk_end = Vec(*chunk_end) chunk_end = Vec.clamp(chunk_end, Vec(0, 0, 0), end_of_dataset) chunk_hdf_start = chunk_start - voxel_offset chunk_hdf_end = chunk_end - voxel_offset if args.zyx: chunk = hdf_dataset[chunk_hdf_start[2]:chunk_hdf_end[2], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[0]:chunk_hdf_end[0], ] chunk = chunk.T else: chunk = hdf_dataset[chunk_hdf_start[0]:chunk_hdf_end[0], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[2]:chunk_hdf_end[2], ] print("Processing ", chunk_start_tuple) array = np.array(chunk, dtype=np.dtype(data_type), order="F") vol[chunk_start[0]:chunk_end[0], chunk_start[1]:chunk_end[1], chunk_start[2]:chunk_end[2], ] = array touch(os.path.join(progress_dir, str(chunk_start_tuple)))
def transfer_to(self, cloudpath, bbox, mip, block_size=None, compress=True, compress_level=None): """ Transfer files from one storage location to another, bypassing volume painting. This enables using a single CloudVolume instance to transfer big volumes. In some cases, gsutil or aws s3 cli tools may be more appropriate. This method is provided for convenience. It may be optimized for better performance over time as demand requires. cloudpath (str): path to storage layer bbox (Bbox object): ROI to transfer mip (int): resolution level block_size (int): number of file chunks to transfer per I/O batch. compress (bool): Set to False to upload as uncompressed """ from cloudvolume import CloudVolume if mip is None: mip = self.config.mip if self.is_sharded(mip): raise exceptions.UnsupportedFormatError( f"Sharded sources are not supported. got: {self.meta.cloudpath}" ) bbox = Bbox.create(bbox, self.meta.bounds(mip)) realized_bbox = bbox.expand_to_chunk_size( self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip)) realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip)) if bbox != realized_bbox: raise exceptions.AlignmentError( "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}" .format(bbox, realized_bbox)) default_block_size_MB = 50 # MB chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype( self.meta.dtype).itemsize * self.meta.num_channels if self.meta.layer_type == 'image': # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1 # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed chunk_MB /= 1.3 else: # segmentation chunk_MB /= 100.0 # compression ratios between 80 and 800.... chunk_MB /= 1024.0 * 1024.0 if block_size: step = block_size else: step = int(default_block_size_MB // chunk_MB) + 1 try: destvol = CloudVolume(cloudpath, mip=mip) except exceptions.InfoUnavailableError: destvol = CloudVolume(cloudpath, mip=mip, info=self.meta.info, provenance=self.meta.provenance.serialize()) destvol.commit_info() destvol.commit_provenance() except exceptions.ScaleUnavailableError: destvol = CloudVolume(cloudpath) for i in range(len(destvol.scales) + 1, len(self.meta.scales)): destvol.scales.append(self.meta.scales[i]) destvol.commit_info() destvol.commit_provenance() if destvol.image.is_sharded(mip): raise exceptions.UnsupportedFormatError( f"Sharded destinations are not supported. got: {destvol.cloudpath}" ) num_blocks = np.ceil( self.meta.bounds(mip).volume() / self.meta.chunk_size(mip).rectVolume()) / step num_blocks = int(np.ceil(num_blocks)) cloudpaths = chunknames(bbox, self.meta.bounds(mip), self.meta.key(mip), self.meta.chunk_size(mip), protocol=self.meta.path.protocol) pbar = tqdm( desc='Transferring Blocks of {} Chunks'.format(step), unit='blocks', disable=(not self.config.progress), total=num_blocks, ) cfsrc = CloudFiles(self.meta.cloudpath, secrets=self.config.secrets) cfdest = CloudFiles(cloudpath) def check(files): errors = [ file for file in files if \ (file['content'] is None or file['error'] is not None) ] if errors: error_paths = [f['path'] for f in errors] raise exceptions.EmptyFileException( "{} were empty or had IO errors.".format( ", ".join(error_paths))) return files with pbar: for srcpaths in sip(cloudpaths, step): files = check(cfsrc.get(srcpaths, raw=True)) cfdest.puts(compression.transcode(files, encoding=compress, level=compress_level, in_place=True), compress=compress, content_type=tx.content_type(destvol), raw=True) pbar.update()
def save_cloudvolume(img, path, mode, origin, mip=0, resolution=None, flip_xy=False, voxel_offset=None, volume_size=None, chunk_size=(64, 64, 64), factor=(2, 2, 2)): """Save images to a CloudVolume layer. Parameters ---------- img : array_like The image/volume to save. path : str The directory to write the layer to. mode : {'image', 'segmentation'} """ if mode not in ['image', 'segmentation']: raise ValueError( 'Invalid mode {}. Must be one of "image", "segmentation"'.format( mode)) if not re.search(r'^[a-zA-Z\d]+://$', path.split(os.path.sep)[0]): raise ValueError('No protocol specified in {}.'.format(path)) if not os.path.isfile(os.path.join(path, 'info')): if MPI.COMM_WORLD.Get_rank() == 0: if mode == 'image': info = CloudVolume.create_new_info( num_channels=img.shape[-1], layer_type='image', data_type='uint8', encoding='raw', resolution=resolution, voxel_offset=offset, volume_size=list(volume_size), chunk_size=chunk_size, max_mip=mip, factor=factor) cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=info, provenance=None, compress=True, non_aligned_writes=True, parallel=1) cv = CloudVolume(path, mip=0, **cv_args) cv.commit_info() elif mode == 'segmentation': info = CloudVolume.create_new_info( num_channels=img.shape[-1], layer_type='segmentation', data_type='uint32', encoding='compressed_segmentation', resolution=resolution, voxel_offset=offset, volume_size=list(volume_size), chunk_size=chunk_size, max_mip=mip, factor=factor) if mip >= 1: for i in range(1, mip + 1): info['scales'][i]['compressed_segmentation_block_size'] = \ info['scales'][0]['compressed_segmentation_block_size'] cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=info, provenance=None, compress=True, non_aligned_writes=True, parallel=1) cv = CloudVolume(path, mip=0, **cv_args) cv.commit_info() if MPI.COMM_WORLD.Get_size() > 1: MPI.COMM_WORLD.barrier() if flip_xy: img = np.transpose(img, axes=(1, 2, 0)) else: img = np.transpose(img, axes=(2, 1, 0)) cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, info=None, provenance=None, compress=(mode == 'segmentation'), non_aligned_writes=True, parallel=1) for m in range(mip + 1): cv = CloudVolume(path, mip=m, **cv_args) offset = cv.mip_voxel_offset(m) step = np.power(np.asarray(factor), m) cv_z_start = origin[0] // step[2] + offset[2] cv_z_size = img.shape[2] cv[:, :, cv_z_start:cv_z_start + cv_z_size] = loaded_vol img = img[::factor[0], ::factor[1], ::factor[2]] return cv
def create_hypersquare_ingest_tasks(hypersquare_bucket_name, dataset_name, hypersquare_chunk_size, resolution, voxel_offset, volume_size, overlap): def crtinfo(layer_type, dtype, encoding): return CloudVolume.create_new_info( num_channels=1, layer_type=layer_type, data_type=dtype, encoding=encoding, resolution=resolution, voxel_offset=voxel_offset, volume_size=volume_size, chunk_size=[56, 56, 56], ) imginfo = crtinfo('image', 'uint8', 'jpeg') seginfo = crtinfo('segmentation', 'uint16', 'raw') scales = downsample_scales.compute_plane_downsampling_scales( hypersquare_chunk_size)[1:] # omit (1,1,1) IMG_LAYER_NAME = 'image' SEG_LAYER_NAME = 'segmentation' imgvol = CloudVolume(dataset_name, IMG_LAYER_NAME, 0, info=imginfo) segvol = CloudVolume(dataset_name, SEG_LAYER_NAME, 0, info=seginfo) print("Creating info files for image and segmentation...") imgvol.commit_info() segvol.commit_info() def crttask(volname, tasktype, layer_name): return HyperSquareTask( bucket_name=hypersquare_bucket_name, dataset_name=dataset_name, layer_name=layer_name, volume_dir=volname, layer_type=tasktype, overlap=overlap, resolution=resolution, ) print("Listing hypersquare bucket...") volumes_listing = lib.gcloud_ls('gs://{}/'.format(hypersquare_bucket_name)) # download this from: # with open('e2198_volumes.json', 'r') as f: # volumes_listing = json.loads(f.read()) volumes_listing = [x.split('/')[-2] for x in volumes_listing] class CreateHypersquareIngestTaskIterator(object): def __len__(self): return len(volumes_listing) def __iter__(self): for cloudpath in volumes_listing: # img_task = crttask(cloudpath, 'image', IMG_LAYER_NAME) yield crttask(cloudpath, 'segmentation', SEG_LAYER_NAME) # seg_task.execute() return CreateHypersquareIngestTaskIterator()
def create_contrast_normalization_tasks(src_path, dest_path, levels_path=None, shape=None, mip=0, clip_fraction=0.01, fill_missing=False, translate=(0, 0, 0), minval=None, maxval=None, bounds=None): srcvol = CloudVolume(src_path, mip=mip) try: dvol = CloudVolume(dest_path, mip=mip) except Exception: # no info file info = copy.deepcopy(srcvol.info) dvol = CloudVolume(dest_path, mip=mip, info=info) dvol.info['scales'] = dvol.info['scales'][:mip + 1] dvol.commit_info() if shape == None: shape = Bbox((0, 0, 0), (2048, 2048, 64)) shape = shape.shrink_to_chunk_size(dvol.underlying).size3() shape = Vec.clamp(shape, (1, 1, 1), bounds.size3()) shape = Vec(*shape) create_downsample_scales(dest_path, mip=mip, ds_shape=shape, preserve_chunk_size=True) dvol.refresh_info() bounds = get_bounds(srcvol, bounds, shape, mip) class ContrastNormalizationTaskIterator(object): def __len__(self): return int(reduce(operator.mul, np.ceil(bounds.size3() / shape))) def __iter__(self): for startpt in xyzrange(bounds.minpt, bounds.maxpt, shape): task_shape = min2(shape.clone(), srcvol.bounds.maxpt - startpt) yield ContrastNormalizationTask( src_path=src_path, dest_path=dest_path, levels_path=levels_path, shape=task_shape, offset=startpt.clone(), clip_fraction=clip_fraction, mip=mip, fill_missing=fill_missing, translate=translate, minval=minval, maxval=maxval, ) dvol.provenance.processing.append({ 'method': { 'task': 'ContrastNormalizationTask', 'src_path': src_path, 'dest_path': dest_path, 'shape': Vec(*shape).tolist(), 'clip_fraction': clip_fraction, 'mip': mip, 'translate': Vec(*translate).tolist(), 'minval': minval, 'maxval': maxval, 'bounds': [bounds.minpt.tolist(), bounds.maxpt.tolist()], }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) dvol.commit_provenance() return ContrastNormalizationTaskIterator()
def create_transfer_tasks(src_layer_path, dest_layer_path, chunk_size=None, shape=Vec(2048, 2048, 64), fill_missing=False, translate=(0, 0, 0), bounds=None, mip=0, preserve_chunk_size=True, encoding=None): """ Transfer data from one data layer to another. It's possible to transfer from a lower resolution mip level within a given bounding box. The bounding box should be specified in terms of the highest resolution. """ shape = Vec(*shape) vol = CloudVolume(src_layer_path, mip=mip) translate = Vec(*translate) // vol.downsample_ratio if not chunk_size: chunk_size = vol.info['scales'][mip]['chunk_sizes'][0] chunk_size = Vec(*chunk_size) try: dvol = CloudVolume(dest_layer_path, mip=mip) except Exception: # no info file info = copy.deepcopy(vol.info) dvol = CloudVolume(dest_layer_path, info=info) dvol.commit_info() if encoding is not None: dvol.info['scales'][mip]['encoding'] = encoding dvol.info['scales'] = dvol.info['scales'][:mip + 1] dvol.info['scales'][mip]['chunk_sizes'] = [chunk_size.tolist()] dvol.commit_info() create_downsample_scales(dest_layer_path, mip=mip, ds_shape=shape, preserve_chunk_size=preserve_chunk_size, encoding=encoding) if bounds is None: bounds = vol.bounds.clone() else: bounds = vol.bbox_to_mip(bounds, mip=0, to_mip=mip) bounds = Bbox.clamp(bounds, dvol.bounds) dvol_bounds = dvol.mip_bounds(mip).clone() class TransferTaskIterator(object): def __len__(self): return int(reduce(operator.mul, np.ceil(bounds.size3() / shape))) def __iter__(self): for startpt in xyzrange(bounds.minpt, bounds.maxpt, shape): task_shape = min2(shape.clone(), dvol_bounds.maxpt - startpt) yield TransferTask( src_path=src_layer_path, dest_path=dest_layer_path, shape=task_shape, offset=startpt.clone(), fill_missing=fill_missing, translate=translate, mip=mip, ) job_details = { 'method': { 'task': 'TransferTask', 'src': src_layer_path, 'dest': dest_layer_path, 'shape': list(map(int, shape)), 'fill_missing': fill_missing, 'translate': list(map(int, translate)), 'bounds': [bounds.minpt.tolist(), bounds.maxpt.tolist()], 'mip': mip, }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), } dvol = CloudVolume(dest_layer_path) dvol.provenance.sources = [src_layer_path] dvol.provenance.processing.append(job_details) dvol.commit_provenance() if vol.path.protocol != 'boss': vol.provenance.processing.append(job_details) vol.commit_provenance() return TransferTaskIterator()
def create_meshing_tasks( layer_path, mip, shape=(448, 448, 448), simplification=True, max_simplification_error=40, mesh_dir=None, cdn_cache=False, dust_threshold=None, object_ids=None, progress=False, fill_missing=False, encoding='precomputed', spatial_index=True, sharded=False, compress='gzip' ): shape = Vec(*shape) vol = CloudVolume(layer_path, mip) if mesh_dir is None: mesh_dir = 'mesh_mip_{}_err_{}'.format(mip, max_simplification_error) if not 'mesh' in vol.info: vol.info['mesh'] = mesh_dir vol.commit_info() cf = CloudFiles(layer_path) info_filename = '{}/info'.format(mesh_dir) mesh_info = cf.get_json(info_filename) or {} mesh_info['@type'] = 'neuroglancer_legacy_mesh' mesh_info['mip'] = int(vol.mip) mesh_info['chunk_size'] = shape.tolist() if spatial_index: mesh_info['spatial_index'] = { 'resolution': vol.resolution.tolist(), 'chunk_size': (shape*vol.resolution).tolist(), } cf.put_json(info_filename, mesh_info) class MeshTaskIterator(FinelyDividedTaskIterator): def task(self, shape, offset): return MeshTask( shape=shape.clone(), offset=offset.clone(), layer_path=layer_path, mip=vol.mip, simplification_factor=(0 if not simplification else 100), max_simplification_error=max_simplification_error, mesh_dir=mesh_dir, cache_control=('' if cdn_cache else 'no-cache'), dust_threshold=dust_threshold, progress=progress, object_ids=object_ids, fill_missing=fill_missing, encoding=encoding, spatial_index=spatial_index, sharded=sharded, compress=compress, ) def on_finish(self): vol.provenance.processing.append({ 'method': { 'task': 'MeshTask', 'layer_path': layer_path, 'mip': vol.mip, 'shape': shape.tolist(), 'simplification': simplification, 'max_simplification_error': max_simplification_error, 'mesh_dir': mesh_dir, 'fill_missing': fill_missing, 'cdn_cache': cdn_cache, 'dust_threshold': dust_threshold, 'encoding': encoding, 'object_ids': object_ids, 'spatial_index': spatial_index, 'sharded': sharded, 'compress': compress, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance() return MeshTaskIterator(vol.mip_bounds(mip), shape)
def test_redirects(): info = CloudVolume.create_new_info( num_channels=1, # Increase this number when we add more tests for RGB layer_type='image', data_type='uint8', encoding='raw', resolution=[1, 1, 1], voxel_offset=[0, 0, 0], volume_size=[128, 128, 64], mesh='mesh', chunk_size=[64, 64, 64], ) vol = CloudVolume('file:///tmp/cloudvolume/redirects_0', mip=0, info=info) vol.commit_info() vol.refresh_info() vol.info['redirect'] = 'file:///tmp/cloudvolume/redirects_0' vol.commit_info() vol.refresh_info() del vol.info['redirect'] for i in range(0, 10): info['redirect'] = 'file:///tmp/cloudvolume/redirects_' + str(i + 1) vol = CloudVolume('file:///tmp/cloudvolume/redirects_' + str(i), mip=0, info=info) vol.commit_info() else: del vol.info['redirect'] vol.commit_info() vol = CloudVolume('file:///tmp/cloudvolume/redirects_0', mip=0) assert vol.cloudpath == 'file:///tmp/cloudvolume/redirects_9' info['redirect'] = 'file:///tmp/cloudvolume/redirects_10' vol = CloudVolume('file:///tmp/cloudvolume/redirects_9', mip=0, info=info) vol.commit_info() try: CloudVolume('file:///tmp/cloudvolume/redirects_0', mip=0) assert False except exceptions.TooManyRedirects: pass vol = CloudVolume('file:///tmp/cloudvolume/redirects_9', max_redirects=0) del vol.info['redirect'] vol.commit_info() vol = CloudVolume('file:///tmp/cloudvolume/redirects_5', max_redirects=0) vol.info['redirect'] = 'file:///tmp/cloudvolume/redirects_1' vol.commit_info() try: vol = CloudVolume('file:///tmp/cloudvolume/redirects_5') assert False except exceptions.CyclicRedirect: pass vol.info['redirect'] = 'file:///tmp/cloudvolume/redirects_6' vol.commit_info() vol = CloudVolume('file:///tmp/cloudvolume/redirects_1') try: vol[:, :, :] = 1 assert False except exceptions.ReadOnlyException: pass for i in range(0, 10): delete_layer('/tmp/cloudvolume/redirects_' + str(i))
def setup_environment(dry_run, volume_start, volume_stop, volume_size, layer_path, max_ram_size, output_patch_size, input_patch_size, channel_num, dtype, output_patch_overlap, crop_chunk_margin, mip, thumbnail_mip, max_mip, queue_name, visibility_timeout, thumbnail, encoding, voxel_size, overwrite_info, verbose): """Prepare storage info files and produce tasks.""" assert not (volume_stop is None and volume_size is None) if isinstance(volume_start, tuple): volume_start = Vec(*volume_start) if isinstance(volume_stop, tuple): volume_stop = Vec(*volume_stop) if isinstance(volume_size, tuple): volume_size = Vec(*volume_size) if input_patch_size is None: input_patch_size = output_patch_size if volume_size: assert volume_stop is None volume_stop = volume_start + volume_size else: volume_size = volume_stop - volume_start print('\noutput volume start: ' + tuple2string(volume_start)) print('output volume stop: ' + tuple2string(volume_stop)) print('output volume size: ' + tuple2string(volume_size)) if output_patch_overlap is None: # use 50% patch overlap in default output_patch_overlap = tuple(s // 2 for s in output_patch_size) assert output_patch_overlap[1] == output_patch_overlap[2] if crop_chunk_margin is None: crop_chunk_margin = output_patch_overlap assert crop_chunk_margin[1] == crop_chunk_margin[2] print('margin size: ' + tuple2string(crop_chunk_margin)) if thumbnail: # thumnail requires maximum mip level of 5 thumbnail_mip = max(thumbnail_mip, 5) patch_stride = tuple( s - o for s, o in zip(output_patch_size, output_patch_overlap)) # total number of voxels per patch in one stride patch_voxel_num = np.product(patch_stride) # use half of the maximum ram size to store output buffer ideal_total_patch_num = int(max_ram_size * 1e9 / 2 / 4 / channel_num / patch_voxel_num) # the xy size should be the same assert output_patch_size[1] == output_patch_size[2] # compute the output chunk/block size in cloud storage # assume that the crop margin size is the same with the patch overlap patch_num_start = int(ideal_total_patch_num**(1. / 3.) / 2) patch_num_stop = patch_num_start * 3 # find the patch number solution with minimum cost by bruteforce search cost = sys.float_info.max patch_num = None # patch number in x and y max_factor = 2**max_mip factor = 2**mip for pnxy in range(patch_num_start, patch_num_stop): if (pnxy * patch_stride[2] + output_patch_overlap[2] - 2 * crop_chunk_margin[2]) % max_factor != 0: continue # patch number in z for pnz in range(patch_num_start, patch_num_stop): if (pnz * patch_stride[0] + output_patch_overlap[0] - 2 * crop_chunk_margin[0]) % factor != 0: continue current_cost = (pnxy * pnxy * pnz / ideal_total_patch_num - 1)**2 #+ (pnxy / pnz - 1) ** 2 if current_cost < cost: cost = current_cost patch_num = (pnz, pnxy, pnxy) print('\n--input-patch-size ', tuple2string(input_patch_size)) print('--output-patch-size ', tuple2string(output_patch_size)) print('--output-patch-overlap ', tuple2string(output_patch_overlap)) print('--output-patch-stride ', tuple2string(patch_stride)) print('--patch-num ', patch_num) assert mip >= 0 block_mip = (mip + thumbnail_mip) // 2 block_factor = 2**block_mip output_chunk_size = tuple(n * s + o - 2 * c for n, s, o, c in zip( patch_num, patch_stride, output_patch_overlap, crop_chunk_margin)) input_chunk_size = tuple( ocs + ccm * 2 + ips - ops for ocs, ccm, ips, ops in zip(output_chunk_size, crop_chunk_margin, input_patch_size, output_patch_size)) expand_margin_size = tuple( (ics - ocs) // 2 for ics, ocs in zip(input_chunk_size, output_chunk_size)) input_chunk_start = tuple( vs - ccm - (ips - ops) // 2 for vs, ccm, ips, ops in zip(volume_start, crop_chunk_margin, input_patch_size, output_patch_size)) block_size = (output_chunk_size[0] // factor, output_chunk_size[1] // block_factor, output_chunk_size[2] // block_factor) print('\n--input-chunk-size ' + tuple2string(input_chunk_size)) print('--input-volume-start ' + tuple2string(input_chunk_start)) print('--output-chunk-size ' + tuple2string(output_chunk_size)) print('cutout expand margin size ' + tuple2string(expand_margin_size)) print('output volume start: ' + tuple2string(volume_start)) print('block size ' + tuple2string(block_size)) print('RAM size of each block: ', np.prod(output_chunk_size) / 1024 / 1024 / 1024 * 4 * channel_num, ' GB') voxel_utilization = np.prod(output_chunk_size) / np.prod( patch_num) / np.prod(output_patch_size) print('voxel utilization: {:.2f}'.format(voxel_utilization)) if not dry_run: storage = SimpleStorage(layer_path) thumbnail_layer_path = os.path.join(layer_path, 'thumbnail') thumbnail_storage = SimpleStorage(thumbnail_layer_path) if not overwrite_info: print('\ncheck that we are not overwriting existing info file.') assert storage.exists('info') assert thumbnail_storage.exists('info') print('create and upload info file to ', layer_path) # Note that cloudvolume use fortran order rather than C order info = CloudVolume.create_new_info(channel_num, layer_type='image', data_type=dtype, encoding=encoding, resolution=voxel_size[::-1], voxel_offset=volume_start[::-1], volume_size=volume_size[::-1], chunk_size=block_size[::-1], max_mip=mip) vol = CloudVolume(layer_path, info=info) if overwrite_info: vol.commit_info() thumbnail_factor = 2**thumbnail_mip thumbnail_block_size = (output_chunk_size[0] // factor, output_chunk_size[1] // thumbnail_factor, output_chunk_size[2] // thumbnail_factor) print('thumbnail block size: ' + tuple2string(thumbnail_block_size)) thumbnail_info = CloudVolume.create_new_info( 1, layer_type='image', data_type='uint8', encoding='raw', resolution=voxel_size[::-1], voxel_offset=volume_start[::-1], volume_size=volume_size[::-1], chunk_size=thumbnail_block_size[::-1], max_mip=thumbnail_mip) thumbnail_vol = CloudVolume(thumbnail_layer_path, info=thumbnail_info) if overwrite_info: thumbnail_vol.commit_info() print('create a list of bounding boxes...') roi_start = (volume_start[0], volume_start[1] // factor, volume_start[2] // factor) roi_size = (volume_size[0], volume_size[1] // factor, volume_size[2] // factor) roi_stop = tuple(s + z for s, z in zip(roi_start, roi_size)) # create bounding boxes and ingest to queue bboxes = create_bounding_boxes(output_chunk_size, roi_start=roi_start, roi_stop=roi_stop, verbose=verbose) print('total number of tasks: ', len(bboxes)) if verbose > 1: print('bounding boxes: ', bboxes) return bboxes
def create_graphene_meshing_tasks( cloudpath, timestamp, mip, simplification=True, max_simplification_error=40, mesh_dir=None, cdn_cache=False, object_ids=None, progress=False, fill_missing=False, sharding=None, draco_compression_level=1, bounds=None ): cv = CloudVolume(cloudpath, mip=mip) if mip < cv.meta.watershed_mip: raise ValueError("Must mesh at or above the watershed mip level. Watershed MIP: {} Got: {}".format( cv.meta.watershed_mip, mip )) if mesh_dir is None: mesh_dir = 'meshes' cv.info['mesh'] = mesh_dir # necessary to set the mesh.commit_info() dir right if not 'mesh' in cv.info: cv.commit_info() watershed_downsample_ratio = cv.resolution // cv.meta.resolution(cv.meta.watershed_mip) shape = Vec(*cv.meta.graph_chunk_size) // watershed_downsample_ratio cv.mesh.meta.info['@type'] = 'neuroglancer_legacy_mesh' cv.mesh.meta.info['mip'] = cv.mip cv.mesh.meta.info['chunk_size'] = list(shape) if sharding: cv.mesh.meta.info['sharding'] = sharding cv.mesh.meta.commit_info() simplification = (0 if not simplification else 100) class GrapheneMeshTaskIterator(FinelyDividedTaskIterator): def task(self, shape, offset): return GrapheneMeshTask( cloudpath=cloudpath, shape=shape.clone(), offset=offset.clone(), mip=int(mip), simplification_factor=simplification, max_simplification_error=max_simplification_error, draco_compression_level=draco_compression_level, mesh_dir=mesh_dir, cache_control=('' if cdn_cache else 'no-cache'), progress=progress, fill_missing=fill_missing, timestamp=timestamp, ) def on_finish(self): cv.provenance.processing.append({ 'method': { 'task': 'GrapheneMeshTask', 'cloudpath': cv.cloudpath, 'shape': cv.meta.graph_chunk_size, 'mip': int(mip), 'simplification': simplification, 'max_simplification_error': max_simplification_error, 'mesh_dir': mesh_dir, 'fill_missing': fill_missing, 'cdn_cache': cdn_cache, 'timestamp': timestamp, 'draco_compression_level': draco_compression_level, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv.commit_provenance() if bounds is None: bounds = cv.meta.bounds(mip).clone() else: bounds = cv.bbox_to_mip(bounds, mip=0, to_mip=mip) bounds = Bbox.clamp(bounds, cv.bounds) bounds = bounds.expand_to_chunk_size(shape, cv.voxel_offset) return GrapheneMeshTaskIterator(bounds, shape)
def create_structures(animal): """ This is the important method called from main. This does all the work. Args: animal: string to identify the animal/stack Returns: Nothing, creates a directory of the precomputed volume. Copy this directory somewhere apache can read it. e.g., /net/birdstore/Active_Atlas_Data/data_root/pipeline_data/ """ sqlController = SqlController(animal) fileLocationManager = FileLocationManager(animal) # Set all relevant directories THUMBNAIL_PATH = os.path.join(fileLocationManager.prep, 'CH1', 'thumbnail') CSV_PATH = '/net/birdstore/Active_Atlas_Data/data_root/atlas_data/foundation_brain_annotations' CLEANED = os.path.join(fileLocationManager.prep, 'CH1', 'thumbnail_cleaned') PRECOMPUTE_PATH = f'/net/birdstore/Active_Atlas_Data/data_root/atlas_data/foundation_brain_annotations/{animal}' width = sqlController.scan_run.width height = sqlController.scan_run.height width = int(width * SCALING_FACTOR) height = int(height * SCALING_FACTOR) aligned_shape = np.array((width, height)) THUMBNAILS = sorted(os.listdir(THUMBNAIL_PATH)) num_section = len(THUMBNAILS) structure_dict = sqlController.get_structures_dict() csvfile = os.path.join(CSV_PATH, f'{animal}_annotation.csv') hand_annotations = pd.read_csv(csvfile) hand_annotations['vertices'] = hand_annotations['vertices'] \ .apply(lambda x: x.replace(' ', ','))\ .apply(lambda x: x.replace('\n',','))\ .apply(lambda x: x.replace(',]',']'))\ .apply(lambda x: x.replace(',,', ','))\ .apply(lambda x: x.replace(',,', ','))\ .apply(lambda x: x.replace(',,', ',')).apply(lambda x: x.replace(',,', ',')) hand_annotations['vertices'] = hand_annotations['vertices'].apply(lambda x: ast.literal_eval(x)) structures = list(hand_annotations['name'].unique()) section_structure_vertices = defaultdict(dict) for structure in tqdm(structures): contour_annotations, first_sec, last_sec = get_contours_from_annotations(animal, structure, hand_annotations, densify=4) for section in contour_annotations: section_structure_vertices[section][structure] = contour_annotations[section][structure][1] ##### Reproduce create_clean transform section_offset = {} for file_name in tqdm(THUMBNAILS): filepath = os.path.join(THUMBNAIL_PATH, file_name) img = io.imread(filepath) section = int(file_name.split('.')[0]) section_offset[section] = (aligned_shape - img.shape[:2][::-1]) // 2 ##### Reproduce create_alignment transform image_name_list = sorted(os.listdir(CLEANED)) anchor_idx = len(image_name_list) // 2 transformation_to_previous_sec = {} for i in range(1, len(image_name_list)): fixed_fn = os.path.splitext(image_name_list[i - 1])[0] moving_fn = os.path.splitext(image_name_list[i])[0] transformation_to_previous_sec[i] = load_consecutive_section_transform(animal, moving_fn, fixed_fn) transformation_to_anchor_sec = {} # Converts every transformation for moving_idx in range(len(image_name_list)): if moving_idx == anchor_idx: transformation_to_anchor_sec[image_name_list[moving_idx]] = np.eye(3) elif moving_idx < anchor_idx: T_composed = np.eye(3) for i in range(anchor_idx, moving_idx, -1): T_composed = np.dot(np.linalg.inv(transformation_to_previous_sec[i]), T_composed) transformation_to_anchor_sec[image_name_list[moving_idx]] = T_composed else: T_composed = np.eye(3) for i in range(anchor_idx + 1, moving_idx + 1): T_composed = np.dot(transformation_to_previous_sec[i], T_composed) transformation_to_anchor_sec[image_name_list[moving_idx]] = T_composed warp_transforms = create_warp_transforms(animal, transformation_to_anchor_sec, 'thumbnail', 'thumbnail') ordered_transforms = sorted(warp_transforms.items()) section_transform = {} for section, transform in ordered_transforms: section_num = int(section.split('.')[0]) transform = np.linalg.inv(transform) section_transform[section_num] = transform ##### Alignment of annotation coordinates keys = [k for k in structure_dict.keys()] # This missing_sections will need to be manually built up from Beth's spreadsheet missing_sections = {k: [117] for k in keys} fill_sections = defaultdict(dict) pr5_sections = [] other_structures = set() volume = np.zeros((aligned_shape[1], aligned_shape[0], num_section), dtype=np.uint8) for section in section_structure_vertices: template = np.zeros((aligned_shape[1], aligned_shape[0]), dtype=np.uint8) for structure in section_structure_vertices[section]: points = np.array(section_structure_vertices[section][structure]) points = points // 32 points = points + section_offset[section] # create_clean offset points = transform_create_alignment(points, section_transform[section]) # create_alignment transform points = points.astype(np.int32) try: missing_list = missing_sections[structure] except: missing_list = [] if section in missing_list: fill_sections[structure][section] = points if 'pr5' in structure.lower(): pr5_sections.append(section) try: # color = colors[structure.upper()] color = structure_dict[structure][1] # structure dict returns a list of [description, color] # for each key except: color = 255 other_structures.add(structure) cv2.polylines(template, [points], True, color, 2, lineType=cv2.LINE_AA) volume[:, :, section - 1] = template # fill up missing sections template = np.zeros((aligned_shape[1], aligned_shape[0]), dtype=np.uint8) for structure, v in fill_sections.items(): color = structure_dict[structure][1] for section, points in v.items(): cv2.polylines(template, [points], True, color, 2, lineType=cv2.LINE_AA) volume[:, :, section] = template volume_filepath = os.path.join(CSV_PATH, f'{animal}_annotations.npy') volume = np.swapaxes(volume, 0, 1) print('Saving:', volume_filepath, 'with shape', volume.shape) with open(volume_filepath, 'wb') as file: np.save(file, volume) # now use 9-1 notebook to convert to a precomputed. # Voxel resolution in nanometer (how much nanometer each element in numpy array represent) resol = (14464, 14464, 20000) # Voxel offset offset = (0, 0, 0) # Layer type layer_type = 'segmentation' # number of channels num_channels = 1 # segmentation properties in the format of [(number1, label1), (number2, label2) ...] # where number is an integer that is in the volume and label is a string that describes that segmenetation segmentation_properties = [(number, f'{structure}: {label}') for structure, (label, number) in structure_dict.items()] extra_structures = ['Pr5', 'VTg', 'DRD', 'IF', 'MPB', 'Op', 'RPC', 'LSO', 'MVe', 'CnF', 'pc', 'DTgC', 'LPB', 'Pr5DM', 'DTgP', 'RMC', 'VTA', 'IPC', 'DRI', 'LDTg', 'IPA', 'PTg', 'DTg', 'IPL', 'SuVe', 'Sol', 'IPR', '8n', 'Dk', 'IO', 'Cb', 'Pr5VL', 'APT', 'Gr', 'RR', 'InC', 'X', 'EW'] segmentation_properties += [(len(structure_dict) + index + 1, structure) for index, structure in enumerate(extra_structures)] cloudpath = f'file://{PRECOMPUTE_PATH}' info = CloudVolume.create_new_info( num_channels = num_channels, layer_type = layer_type, data_type = str(volume.dtype), # Channel images might be 'uint8' encoding = 'raw', # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution = resol, # Voxel scaling, units are in nanometers voxel_offset = offset, # x,y,z offset in voxels from the origin chunk_size = [64, 64, 64], # units are voxels volume_size = volume.shape, # e.g. a cubic millimeter dataset ) vol = CloudVolume(cloudpath, mip=0, info=info, compress=False) vol.commit_info() vol[:, :, :] = volume[:, :, :] vol.info['segment_properties'] = 'names' vol.commit_info() segment_properties_path = os.path.join(PRECOMPUTE_PATH, 'names') os.makedirs(segment_properties_path, exist_ok=True) info = { "@type": "neuroglancer_segment_properties", "inline": { "ids": [str(number) for number, label in segmentation_properties], "properties": [{ "id": "label", "description": "Name of structures", "type": "label", "values": [str(label) for number, label in segmentation_properties] }] } } print('Creating names in', segment_properties_path) with open(os.path.join(segment_properties_path, 'info'), 'w') as file: json.dump(info, file, indent=2) # Setting parallel to a number > 1 hangs the script. It still runs fast with parallel=1 tq = LocalTaskQueue(parallel=1) tasks = tc.create_downsampling_tasks(cloudpath, compress=False) # Downsample the volumes tq.insert(tasks) tq.execute() print('Finished')
def setup_environment(dry_run, volume_start, volume_stop, volume_size, layer_path, max_ram_size, output_patch_size, input_patch_size, channel_num, dtype, output_patch_overlap, crop_chunk_margin, mip, thumbnail_mip, max_mip, thumbnail, encoding, voxel_size, overwrite_info): """Prepare storage info files and produce tasks.""" assert not (volume_stop is None and volume_size is None) if isinstance(volume_start, tuple): volume_start = Vec(*volume_start) if isinstance(volume_stop, tuple): volume_stop = Vec(*volume_stop) if isinstance(volume_size, tuple): volume_size = Vec(*volume_size) if input_patch_size is None: input_patch_size = output_patch_size if volume_size is not None: assert len(volume_size) == 3 assert volume_stop is None volume_stop = volume_start + volume_size else: volume_size = volume_stop - volume_start print('\noutput volume start: ' + tuple2string(volume_start)) print('output volume stop: ' + tuple2string(volume_stop)) print('output volume size: ' + tuple2string(volume_size)) if output_patch_overlap is None: # use 50% patch overlap in default output_patch_overlap = tuple(s//2 for s in output_patch_size) assert output_patch_overlap[1] == output_patch_overlap[2] if crop_chunk_margin is None: crop_chunk_margin = output_patch_overlap assert crop_chunk_margin[1] == crop_chunk_margin[2] print('margin size: ' + tuple2string(crop_chunk_margin)) if thumbnail: # thumnail requires maximum mip level of 5 thumbnail_mip = max(thumbnail_mip, 5) block_size, output_chunk_size, factor = get_optimized_block_size( output_patch_size, output_patch_overlap, max_ram_size, channel_num, max_mip, crop_chunk_margin, input_patch_size, mip, thumbnail_mip, volume_start ) if not dry_run: storage = SimpleStorage(layer_path) thumbnail_layer_path = os.path.join(layer_path, 'thumbnail') thumbnail_storage = SimpleStorage(thumbnail_layer_path) if not overwrite_info: print('\ncheck that we are not overwriting existing info file.') assert storage.exists('info') assert thumbnail_storage.exists('info') if overwrite_info: print('create and upload info file to ', layer_path) # Note that cloudvolume use fortran order rather than C order info = CloudVolume.create_new_info(channel_num, layer_type='image', data_type=dtype, encoding=encoding, resolution=voxel_size[::-1], voxel_offset=volume_start[::-1], volume_size=volume_size[::-1], chunk_size=block_size[::-1], max_mip=mip) vol = CloudVolume(layer_path, info=info) vol.commit_info() if overwrite_info: thumbnail_factor = 2**thumbnail_mip thumbnail_block_size = (output_chunk_size[0]//factor, output_chunk_size[1]//thumbnail_factor, output_chunk_size[2]//thumbnail_factor) print('thumbnail block size: ' + tuple2string(thumbnail_block_size)) thumbnail_info = CloudVolume.create_new_info( 1, layer_type='image', data_type='uint8', encoding='raw', resolution=voxel_size[::-1], voxel_offset=volume_start[::-1], volume_size=volume_size[::-1], chunk_size=thumbnail_block_size[::-1], max_mip=thumbnail_mip) thumbnail_vol = CloudVolume(thumbnail_layer_path, info=thumbnail_info) thumbnail_vol.commit_info() print('create a list of bounding boxes...') roi_start = (volume_start[0], volume_start[1]//factor, volume_start[2]//factor) roi_size = (volume_size[0], volume_size[1]//factor, volume_size[2]//factor) roi_stop = tuple(s+z for s, z in zip(roi_start, roi_size)) # create bounding boxes and ingest to queue bboxes = BoundingBoxes.from_manual_setup( output_chunk_size, roi_start=roi_start, roi_stop=roi_stop) logging.info(f'total number of tasks: {len(bboxes)}') logging.debug(f'bounding boxes: {bboxes}') print(yellow( 'Note that you should reuse the printed out parameters in the production run.' + ' These parameters are not ingested to AWS SQS queue.')) return bboxes
def create_spatial_index_mesh_tasks( cloudpath:str, shape:Tuple[int,int,int] = (448,448,448), mip:int = 0, fill_missing:bool = False, compress:Optional[Union[str,bool]] = 'gzip', mesh_dir:Optional[str] = None ): """ The main way to add a spatial index is to use the MeshTask, but old datasets or broken datasets may need it to be reconstituted. An alternative use is create the spatial index over a different area size than the mesh task. """ shape = Vec(*shape) vol = CloudVolume(cloudpath, mip=mip) if mesh_dir is None: mesh_dir = f"mesh_mip_{mip}_err_40" if not "mesh" in vol.info: vol.info['mesh'] = mesh_dir vol.commit_info() cf = CloudFiles(cloudpath) info_filename = '{}/info'.format(mesh_dir) mesh_info = cf.get_json(info_filename) or {} new_mesh_info = copy.deepcopy(mesh_info) new_mesh_info['@type'] = new_mesh_info.get('@type', 'neuroglancer_legacy_mesh') new_mesh_info['mip'] = new_mesh_info.get("mip", int(vol.mip)) new_mesh_info['chunk_size'] = shape.tolist() new_mesh_info['spatial_index'] = { 'resolution': vol.resolution.tolist(), 'chunk_size': (shape * vol.resolution).tolist(), } if new_mesh_info != mesh_info: cf.put_json(info_filename, new_mesh_info) class SpatialIndexMeshTaskIterator(FinelyDividedTaskIterator): def task(self, shape, offset): return partial(MeshSpatialIndex, cloudpath=cloudpath, shape=shape, offset=offset, mip=int(mip), fill_missing=bool(fill_missing), compress=compress, mesh_dir=mesh_dir, ) def on_finish(self): vol.provenance.processing.append({ 'method': { 'task': 'MeshSpatialIndex', 'cloudpath': vol.cloudpath, 'shape': shape.tolist(), 'mip': int(mip), 'mesh_dir': mesh_dir, 'fill_missing': fill_missing, 'compress': compress, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance() return SpatialIndexMeshTaskIterator(vol.bounds, shape)