def test_sharded(): skel = Skeleton([ (0, 0, 0), (1, 0, 0), (2, 0, 0), (0, 1, 0), (0, 2, 0), (0, 3, 0), ], edges=[(0, 1), (1, 2), (3, 4), (4, 5), (3, 5)], segid=1, extra_attributes=[{ "id": "radius", "data_type": "float32", "num_components": 1, }]).physical_space() skels = {} for i in range(10): sk = skel.clone() sk.id = i skels[i] = sk.to_precomputed() mkdir('/tmp/removeme/skeletons/sharded/skeletons') with open('/tmp/removeme/skeletons/sharded/info', 'wt') as f: f.write(jsonify(info)) for idxenc in ('raw', 'gzip'): for dataenc in ('raw', 'gzip'): spec = ShardingSpecification( 'neuroglancer_uint64_sharded_v1', preshift_bits=1, hash='murmurhash3_x86_128', minishard_bits=2, shard_bits=1, minishard_index_encoding=idxenc, data_encoding=dataenc, ) skel_info['sharding'] = spec.to_dict() with open('/tmp/removeme/skeletons/sharded/skeletons/info', 'wt') as f: f.write(jsonify(skel_info)) files = spec.synthesize_shards(skels) for fname in files.keys(): with open('/tmp/removeme/skeletons/sharded/skeletons/' + fname, 'wb') as f: f.write(files[fname]) cv = CloudVolume('file:///tmp/removeme/skeletons/sharded/') assert cv.skeleton.meta.mip == 3 for i in range(10): sk = cv.skeleton.get(i).physical_space() sk.id = 1 assert sk == skel shutil.rmtree('/tmp/removeme/skeletons')
def put_file(self, file_path, content, content_type, compress, cache_control=None): path = self.get_path_to_file(file_path) mkdir(os.path.dirname(path)) # keep default as gzip if compress == "br": path += ".br" elif compress: path += '.gz' if content \ and content_type \ and re.search('json|te?xt', content_type) \ and type(content) is str: content = content.encode('utf-8') try: with open(path, 'wb') as f: f.write(content) except IOError as err: with open(path, 'wb') as f: f.write(content)
def main(): """ Runs the script to upload big brain files organized as octree (see https://github.com/neurodata/mouselight_code/issues/1) to S3 in neuroglancer format. Example: >> python upload_to_neuroglancer.py s3://mouse-light-viz/precomputed_volumes/brain1 /cis/local/jacs/data/jacsstorage/samples/2018-08-01/ """ parser = argparse.ArgumentParser( "Convert a folder of SWC files to neuroglancer format and upload them to the given S3 bucket location." ) parser.add_argument( "s3_bucket", help="S3 bucket path of the form s3://<bucket-name>/<path-to-layer>", ) parser.add_argument( "image_dir", help= "Path to local directory where image hierarchy lives. Assuming it is formatted as a resolution octree.", ) parser.add_argument( "--chosen_res", help= "Specified resolution to upload. 0 is highest. Default uploads all", default=-1, type=int, ) parser.add_argument("--channel", help="Channel number to upload. Default is 0", default=0, type=int) parser.add_argument( "--num_resolutions", help= "Number of resoltions for which downsampling has been done. Default: 7", default=7, type=int, ) args = parser.parse_args() files_ordered, bin_paths, vox_size, tiff_dims = get_volume_info( args.image_dir, args.num_resolutions, args.channel) vols = create_image_layer(args.s3_bucket, tiff_dims, vox_size, args.num_resolutions) pbar = tqdm(enumerate(zip(files_ordered, bin_paths)), total=len(files_ordered)) mkdir("./progress/") for idx, item in pbar: if args.chosen_res == -1: pbar.set_description_str( f"uploading chunks to resolution {args.num_resolutions - idx - 1}..." ) upload_chunks(vols[idx], item[0], item[1], parallel=True) else: if idx == (args.num_resolutions - args.chosen_res - 1): pbar.set_description_str( f"uploading chunks to resolution {args.num_resolutions - idx - 1}..." ) upload_chunks(vols[idx], item[0], item[1], parallel=True)
def test_download_upload_file(green): delete_layer() cv, _ = create_layer(size=(50, 50, 50, 1), offset=(0, 0, 0)) cv.green_threads = green mkdir('/tmp/file/') cv.download_to_file('/tmp/file/test', cv.bounds) cv2 = CloudVolume('file:///tmp/file/test2/', info=cv.info) cv2.upload_from_file('/tmp/file/test', cv.bounds) assert np.all(cv2[:] == cv[:]) shutil.rmtree('/tmp/file/')
def test_numpy_memmap(): delete_layer() cv, data = create_layer(size=(50, 50, 50, 1), offset=(0, 0, 0)) mkdir('/tmp/file/test/') with open("/tmp/file/test/chunk.data", "wb") as f: f.write(data.tobytes("F")) fp = np.memmap("/tmp/file/test/chunk.data", dtype=data.dtype, mode='r', shape=(50, 50, 50, 1), order='F') cv[:] = fp[:] shutil.rmtree('/tmp/file/')
def xy_path_projection(paths, labels, N=0): """Used for debugging paths.""" if type(paths) != list: paths = [paths] projection = np.zeros((labels.shape[0], labels.shape[1]), dtype=np.uint8) outline = labels.any(axis=-1).astype(np.uint8) * 77 outline = outline.reshape((labels.shape[0], labels.shape[1])) projection += outline for path in paths: for coord in path: projection[coord[0], coord[1]] = 255 projection = Image.fromarray(projection.T, 'L') N = str(N).zfill(3) mkdir('./saved_images/projections') projection.save('./saved_images/projections/{}.png'.format(N), 'PNG')
def cache(task, cloudpath): layer_path, filename = os.path.split(cloudpath) classname = task.__class__.__name__ lcldir = mkdir(os.path.join('/tmp/', classname)) lclpath = os.path.join(lcldir, filename) if os.path.exists(lclpath): with open(lclpath, 'rb') as f: filestr = f.read() else: cf = CloudFiles(layer_path) filestr = cf.get(filename) with open(lclpath, 'wb') as f: f.write(filestr) return filestr
def cache(task, cloudpath): layer_path, filename = os.path.split(cloudpath) classname = task.__class__.__name__ lcldir = mkdir(os.path.join('/tmp/', classname)) lclpath = os.path.join(lcldir, filename) if os.path.exists(lclpath): with open(lclpath, 'rb') as f: filestr = f.read() else: with Storage(layer_path, n_threads=0) as stor: filestr = stor.get_file(filename) with open(lclpath, 'wb') as f: f.write(filestr) return filestr
def forge(src, scale, const, pdrf_scale, pdrf_exponent, soma_detect, soma_accept, soma_scale, soma_const, anisotropy, dust, progress, fill_holes, fix_avocados, fix_branches, fix_borders, parallel, max_paths, outdir): """Skeletonize an input image and write out SWCs.""" labels = np.load(src) skels = kimimaro.skeletonize( labels, teasar_params={ "scale": scale, "const": const, "pdrf_scale": pdrf_scale, "pdrf_exponent": pdrf_exponent, "soma_detection_threshold": soma_detect, "soma_acceptance_threshold": soma_accept, "soma_invalidation_scale": soma_scale, "soma_invalidation_const": soma_const, "max_paths": max_paths, }, anisotropy=anisotropy, dust_threshold=dust, progress=progress, fill_holes=fill_holes, fix_avocados=fix_avocados, fix_branching=fix_branches, fix_borders=fix_borders, parallel=parallel, ) directory = mkdir(outdir) for label, skel in skels.items(): fname = os.path.join(directory, f"{label}.swc") with open(fname, "wt") as f: f.write(skel.to_swc()) if progress: print(f"kimimaro: wrote {len(skels)} skeletons to {directory}")
def parallel_upload_chunks(vol, files, bin_paths, chunk_size, num_workers): """Push tif images as chunks in CloudVolume object in Parallel Arguments: vol {cloudvolume.CloudVolume} -- volume that will contain image data files {list} -- strings of tif image filepaths bin_paths {list} -- binary paths to tif files chunk_size {list} -- 3 ints for original tif image dimensions num_workers {int} -- max number of concurrently running jobs """ tiff_jobs = int(num_workers / 2) if num_workers == cpu_count() else num_workers # tiffs = Parallel(tiff_jobs, backend="loky", verbose=50)( # delayed(tf.imread)("/".join(i)) for i in files # ) # ranges = Parallel(tiff_jobs, backend='threading')( # delayed(get_data_ranges)(i, chunk_size) for i in bin_paths # ) # print("loaded tiffs and bin paths") progress_dir = mkdir( "./progress/" + str(vol.mip)) # unlike os.mkdir doesn't crash on prexisting done_files = set([z for z in os.listdir(progress_dir)]) vol_ = CloudVolume(vol.layer_cloudpath, parallel=False, mip=vol.mip) # all_files = set([str(range) for range in ranges]) # # to_upload = [z for z in list(all_files.difference(done_files))] # vol_ = CloudVolume(vol.layer_cloudpath, parallel=False, mip=vol.mip) # Parallel(tiff_jobs, verbose=50)( # delayed(upload_chunk)(vol_, r, i, progress_dir, to_upload) # for r, i in zip(ranges, tiffs) # ) Parallel(tiff_jobs, timeout=1800, verbose=10)( delayed(process)(f, b, chunk_size, vol_, progress_dir, done_files) for f, b in zip(files, bin_paths))
brains = ["PRV_50hr-019", "20201001_10_57_49_hsv_36h_6","20201001_10_01_03_hsv_36h_5", "20201001_15_39_26_hsv_28h_4","20201001_17_13_35_hsv_28h_2", "20200930_18_34_47_hsv_28hr_3"] #for array job parallelization print(os.environ["SLURM_ARRAY_TASK_ID"]) jobid = int(os.environ["SLURM_ARRAY_TASK_ID"]) #setting dirs brain = brains[jobid] #make sure this is the folder hierarchy you want to keep home_dir = "/jukebox/scratch/zmd/save/contra_ipsi_projection_studies_20191125" tif_dir = "/jukebox/scratch/zmd/%s/transformed_annotations/single_tifs" % brain type_vol = "atlas" #change if needed, just makes the folder name this #get x,y,z resolution image = Image.open(os.path.join(tif_dir, os.listdir(tif_dir)[0])) x, y = image.size volume_size = [x, y, len(os.listdir(tif_dir))] vol = make_info_file(brain, home_dir, volume_size, type_vol = type_vol) #setup run progress_dir = mkdir(home_dir + "/progress_"+brain+"_"+type_vol) done_files = set([ int(z) for z in os.listdir(progress_dir) ]) all_files = set(range(vol.bounds.minpt.z, vol.bounds.maxpt.z)) to_upload = [ (vol,int(z)) for z in list(all_files.difference(done_files)) ] print("\n # of files to process: %s \n" % len(to_upload)) to_upload.sort() #run print("Running processor...\n") with ProcessPoolExecutor(max_workers=12) as executor: executor.map(process, to_upload) #downsample print("Downsampling...\n") make_demo_downsample(type_vol, mip_start=0,num_mips=5)
'201810_adultacutePC_ymaze_cfos': ['dadult_pc_crus1_1','dadult_pc_crus1_2', 'dadult_pc_crus1_3','dadult_pc_crus1_4', 'dadult_pc_crus1_5'], '202002_cfos': ['an2_vecctrl_ymaze','an3_vecctrl_ymaze', 'an4_vecctrl_ymaze','an9_vecctrl_ymaze', 'an10_vecctrl_ymaze','an1_crus1_lat','an2_crus1_lat', 'an4_crus1_lat','an5_crus1_lat','an6_crus1_lat', 'an7_crus1_lat','an10_crus1_lat','an11_crus1_lat', 'an13_crus1_lat','an19_crus1_lat','an4_saline','an5_cno'] } for dataset in animal_dataset_dict.keys(): animal_id_list = animal_dataset_dict[dataset] for animal_id in animal_id_list: # Update the info file and save it print(f"Dataset: {dataset}, animal_id: {animal_id}") layer_dir = os.path.join(viz_dir,dataset,f'rawatlas_{animal_id}_iso') vol = CloudVolume(f'file://{layer_dir}') info_dict = vol.info info_dict['segment_properties'] = "segment_properties" info_filename = '/'.join(vol.info_cloudpath.split('/')[2:]) with open(info_filename,'w') as outfile: json.dump(info_dict,outfile,sort_keys=True,indent=2) print(f"ammended info file to include 'segment_properties' key: {info_filename}") # copy over the segment_properties directory seg_props_dir = os.path.join(layer_dir,'segment_properties') mkdir(seg_props_dir) dest_seg_props_info = os.path.join(seg_props_dir,'info') shutil.copyfile(src_seg_props_info,dest_seg_props_info) print("copied over segment_properties info file")
def ingest(args): """ Ingest an HDF file to a CloudVolume bucket """ if args.local_hdf_path: hdf_file = h5py.File(args.local_hdf_path, "r") else: with Storage(args.cloud_src_path) as storage: hdf_file = h5py.File(storage.get_file(args.cloud_hdf_filename), "r") cur_hdf_group = hdf_file for group_name in args.hdf_keys_to_dataset: cur_hdf_group = cur_hdf_group[group_name] hdf_dataset = cur_hdf_group if args.zyx: dataset_shape = np.array( [hdf_dataset.shape[2], hdf_dataset.shape[1], hdf_dataset.shape[0]]) else: dataset_shape = np.array([*hdf_dataset.shape]) if args.layer_type == "image": data_type = "uint8" else: data_type = "uint64" voxel_offset = args.voxel_offset info = CloudVolume.create_new_info( num_channels=1, layer_type=args.layer_type, data_type=data_type, encoding="raw", resolution=args.resolution, voxel_offset=voxel_offset, chunk_size=args.chunk_size, volume_size=dataset_shape, ) provenance = { "description": args.provenance_description, "owners": [args.owner] } vol = CloudVolume(args.dst_path, info=info, provenance=provenance) vol.commit_info() vol.commit_provenance() all_files = set() for x in np.arange(voxel_offset[0], voxel_offset[0] + dataset_shape[0], args.chunk_size[0]): for y in np.arange(voxel_offset[1], voxel_offset[1] + dataset_shape[1], args.chunk_size[1]): for z in np.arange(voxel_offset[2], voxel_offset[2] + dataset_shape[2], args.chunk_size[2]): all_files.add(tuple((x, y, z))) progress_dir = mkdir( "progress/") # unlike os.mkdir doesn't crash on prexisting done_files = set() for done_file in os.listdir(progress_dir): done_files.add(tuple(done_file.split(","))) to_upload = all_files.difference(done_files) for chunk_start_tuple in to_upload: chunk_start = np.array(list(chunk_start_tuple)) end_of_dataset = np.array(voxel_offset) + dataset_shape chunk_end = chunk_start + np.array(args.chunk_size) chunk_end = Vec(*chunk_end) chunk_end = Vec.clamp(chunk_end, Vec(0, 0, 0), end_of_dataset) chunk_hdf_start = chunk_start - voxel_offset chunk_hdf_end = chunk_end - voxel_offset if args.zyx: chunk = hdf_dataset[chunk_hdf_start[2]:chunk_hdf_end[2], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[0]:chunk_hdf_end[0], ] chunk = chunk.T else: chunk = hdf_dataset[chunk_hdf_start[0]:chunk_hdf_end[0], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[2]:chunk_hdf_end[2], ] print("Processing ", chunk_start_tuple) array = np.array(chunk, dtype=np.dtype(data_type), order="F") vol[chunk_start[0]:chunk_end[0], chunk_start[1]:chunk_end[1], chunk_start[2]:chunk_end[2], ] = array touch(os.path.join(progress_dir, str(chunk_start_tuple)))
print("success") if __name__ == "__main__": """ First command line arguments """ step = sys.argv[1] viz_dir = sys.argv[2] animal_id = sys.argv[3] print(f"Viz_dir: {viz_dir}") print(f"Animal id: {animal_id}") rawcells_pth = os.path.join('/jukebox/wang/Jess/lightsheet_output', '201904_ymaze_cfos','processed',f'an{animal_id}','clearmap_cluster_output', 'cells.npy') layer_name = f'rawcells_an{animal_id}_dilated' layer_dir = os.path.join(viz_dir,layer_name) """ Make progress dir """ progress_dir = mkdir(viz_dir + f'/progress_{layer_name}') # unlike os.mkdir doesn't crash on prexisting """ Raw cells have the same dimensions as raw data """ x_dim = 2160 y_dim = 2560 full_sizedatafld = os.path.join('/jukebox/wang/Jess/lightsheet_output/', '201904_ymaze_cfos/processed',f'an{animal_id}','full_sizedatafld') rawdata_path = glob.glob(full_sizedatafld + f'/an{animal_id}*647*')[0] all_slices = glob.glob(f"{rawdata_path}/*tif") z_dim = len(all_slices) x_scale_nm, y_scale_nm,z_scale_nm = 5000,5000,10000 # the same for all datasets """ Handle the different steps """ if step == 'step0': print("step 0")
with LocalTaskQueue(parallel=8) as tq: tasks = tc.create_meshing_tasks(cloudpath, mip=0, shape=(256, 256, 256)) tq.insert_all(tasks) tasks = tc.create_mesh_manifest_tasks(cloudpath) tq.insert_all(tasks) print("Done!") if __name__ == '__main__': """ Fill the CloudVolume() instance with data from the tif slices """ vol = make_info_file() """ Now load the tifffile in its entirety """ image = np.array(tifffile.imread(atlas_file),dtype=np.uint32, order='F') # F stands for fortran order z_dim,y_dim,x_dim = image.shape print(image.shape) progress_dir = mkdir(home_dir + '/progress_allenatlas_2017/') # unlike os.mkdir doesn't crash on prexisting done_files = set([ int(z) for z in os.listdir(progress_dir) ]) all_files = set(range(vol.bounds.minpt.z, vol.bounds.maxpt.z)) to_upload = [ int(z) for z in list(all_files.difference(done_files)) ] to_upload.sort() print("Remaining slices to upload are:",to_upload) with ProcessPoolExecutor(max_workers=8) as executor: executor.map(process_slice, to_upload)
def test_sharded(): skel = Skeleton([ (0, 0, 0), (1, 0, 0), (2, 0, 0), (0, 1, 0), (0, 2, 0), (0, 3, 0), ], edges=[(0, 1), (1, 2), (3, 4), (4, 5), (3, 5)], segid=1, extra_attributes=[{ "id": "radius", "data_type": "float32", "num_components": 1, }]).physical_space() skels = {} for i in range(10): sk = skel.clone() sk.id = i skels[i] = sk.to_precomputed() mkdir('/tmp/removeme/skeletons/sharded/skeletons') with open('/tmp/removeme/skeletons/sharded/info', 'wt') as f: f.write(jsonify(info)) for idxenc in ('raw', 'gzip'): for dataenc in ('raw', 'gzip'): spec = ShardingSpecification( 'neuroglancer_uint64_sharded_v1', preshift_bits=1, hash='murmurhash3_x86_128', minishard_bits=2, shard_bits=1, minishard_index_encoding=idxenc, data_encoding=dataenc, ) skel_info['sharding'] = spec.to_dict() with open('/tmp/removeme/skeletons/sharded/skeletons/info', 'wt') as f: f.write(jsonify(skel_info)) files = spec.synthesize_shards(skels) for fname in files.keys(): with open('/tmp/removeme/skeletons/sharded/skeletons/' + fname, 'wb') as f: f.write(files[fname]) cv = CloudVolume('file:///tmp/removeme/skeletons/sharded/') assert cv.skeleton.meta.mip == 3 for i in range(10): sk = cv.skeleton.get(i).physical_space() sk.id = 1 assert sk == skel labels = [] for fname in files.keys(): lbls = cv.skeleton.reader.list_labels(fname, path='skeletons') labels += list(lbls) labels.sort() assert labels == list(range(10)) for filename, shard in files.items(): decoded_skels = cv.skeleton.reader.disassemble_shard(shard) for label, binary in decoded_skels.items(): Skeleton.from_precomputed(binary) exists = cv.skeleton.reader.exists(list(range(11)), path='skeletons') assert exists == { 0: 'skeletons/0.shard', 1: 'skeletons/0.shard', 2: 'skeletons/0.shard', 3: 'skeletons/0.shard', 4: 'skeletons/0.shard', 5: 'skeletons/0.shard', 6: 'skeletons/0.shard', 7: 'skeletons/0.shard', 8: 'skeletons/1.shard', 9: 'skeletons/1.shard', 10: None, } shutil.rmtree('/tmp/removeme/skeletons')
def threaded_upload_chunks( meta, cache, img, mip, chunk_ranges, compress, cdn_cache, progress, n_threads=DEFAULT_THREADS, delete_black_uploads=False, background_color=0, green=False, compress_level=None, ): if cache.enabled: mkdir(cache.path) while img.ndim < 4: img = img[..., np.newaxis] remote = CloudFiles(meta.cloudpath, progress=progress) local = CloudFiles('file://' + cache.path, progress=progress) def do_upload(imgchunk, cloudpath): encoded = chunks.encode(imgchunk, meta.encoding(mip), meta.compressed_segmentation_block_size(mip)) remote.put( path=cloudpath, content=encoded, content_type=content_type(meta.encoding(mip)), compress=should_compress(meta.encoding(mip), compress, cache), compression_level=compress_level, cache_control=cdn_cache_control(cdn_cache), ) if cache.enabled: local.put(path=cloudpath, content=encoded, content_type=content_type(meta.encoding(mip)), compress=should_compress(meta.encoding(mip), compress, cache, iscache=True)) def do_delete(cloudpath): remote.delete(cloudpath) if cache.enabled: local.delete(cloudpath) def process(startpt, endpt, spt, ept): if np.array_equal(spt, ept): return imgchunk = img[startpt.x:endpt.x, startpt.y:endpt.y, startpt.z:endpt.z, :] # handle the edge of the dataset clamp_ept = min2(ept, meta.bounds(mip).maxpt) newept = clamp_ept - spt imgchunk = imgchunk[:newept.x, :newept.y, :newept.z, :] filename = "{}-{}_{}-{}_{}-{}".format(spt.x, clamp_ept.x, spt.y, clamp_ept.y, spt.z, clamp_ept.z) cloudpath = meta.join(meta.key(mip), filename) if delete_black_uploads: if np.any(imgchunk != background_color): do_upload(imgchunk, cloudpath) else: do_delete(cloudpath) else: do_upload(imgchunk, cloudpath) schedule_jobs( fns=(partial(process, *vals) for vals in chunk_ranges), concurrency=n_threads, progress=('Uploading' if progress else None), total=len(chunk_ranges), green=green, )
image.close() touch(os.path.join(progress_dir, str(z))) print("success") return if __name__ == "__main__": """ First command line arguments """ step = sys.argv[1] # "step0", "step1", "step2" or "step3" sample_name = sys.argv[2] # e.g. "zimmerman_02-f12" raw_atlas_dir = sys.argv[3] # Path to single_tifs directory assert os.path.exists(raw_atlas_dir) viz_dir = os.path.join( '/jukebox/LightSheetData/lightserv/cz15/zimmerman_02', sample_name, 'imaging_request_1', 'viz', 'raw_atlas') mkdir(viz_dir) # does not crash on prexisting cpus = os.cpu_count() if cpus > 16: cpus = 16 layer_name = f"{sample_name}_raw_atlas" # Make directories for orig layer, destination layer # orig - just for uploading mip=-1 orig_layer_name = layer_name + '_rechunkme' orig_layer_dir = os.path.join(viz_dir, orig_layer_name) mkdir(orig_layer_dir) progress_dir = mkdir(viz_dir + f'/progress_{orig_layer_name}' ) # unlike os.mkdir doesn't crash on prexisting # dest - where the rechunked layer will live