def correct_stitched_data(data_s3_path, out_s3_path, num_procs=12): # create vol vol = CloudVolume(data_s3_path) mip = 0 for i in range(len(vol.scales)): # get low res image smaller than 10 um if vol.scales[i]['resolution'][0] < 10000: mip = i vol_ds = CloudVolume(data_s3_path, mip, parallel=True, fill_missing=True) # create new vol if it doesnt exist vol_bc = CloudVolume(out_s3_path, info=vol.info.copy()) vol_bc.commit_info() # download image at low res data = sitk.GetImageFromArray(np.squeeze(vol_ds[:, :, :]).T) data.SetSpacing(np.array(vol_ds.scales[mip]['resolution']) / 1000) bias = get_bias_field(data, scale=0.125) bias_slices = [bias[:, :, i] for i in range(bias.GetSize()[-1])] try: with tqdm_joblib( tqdm(desc=f"Uploading bias corrected data...", total=len(bias_slices))) as progress_bar: Parallel(num_procs, timeout=3600, verbose=10)(delayed(process_slice)( bias_slice, z, data_s3_path, out_s3_path) for z, bias_slice in enumerate(bias_slices)) except: print("timed out on bias correcting slice. moving to next step.")
def create_precomputed_volume( input_path, voxel_size, precomputed_path, extension='tif', num_mips=8 ): files_slices = list(enumerate(np.sort(glob(f'{input_path}/*/*.{extension}')).tolist())) zs = [i[0] for i in files_slices] files = np.array([i[1] for i in files_slices]) img_size = get_image_dims(files) # convert voxel size from um to nm vol = create_cloud_volume(precomputed_path,img_size,voxel_size*1000,parallel=False,num_downsampling_levels=num_mips) # num procs to use based on available memory num_procs = min(math.floor(virtual_memory().total/(img_size[0]*img_size[1] * 8)), joblib.cpu_count()) try: with tqdm_joblib(tqdm(desc="Creating precomputed volume", total=len(files))) as progress_bar: Parallel(num_procs, timeout=1800, verbose=10)( delayed(process)( z, f, vol.layer_cloudpath, num_mips, ) for z,f in zip(zs,files) ) except Exception as e: print(e) print("timed out on a slice. moving on to the next step of pipeline")
def download_raw_data(in_bucket_path, channel, outdir, log_s3_path=None): input_s3_url = S3Url(in_bucket_path.strip("/")) in_bucket_name = input_s3_url.bucket in_path = input_s3_url.key total_n_jobs = cpu_count() # get list of all tiles to correct for given channel all_files = get_list_of_files_to_process(in_bucket_name, in_path, channel) total_files = len(all_files) # download all the files as tiff files_per_proc = math.ceil(total_files / total_n_jobs) + 1 work = chunks(all_files, files_per_proc) with tqdm_joblib(tqdm(desc="Downloading tiles", total=total_n_jobs)) as progress_bar: Parallel(n_jobs=total_n_jobs, verbose=10)( delayed(download_tiles)(files, in_bucket_name, outdir) for files in work)
type=str, default=None, ) args = parser.parse_args() # for all channels in experiment for i in range(args.num_channels): output_s3_path = args.output_s3_path.strip("/") colm_pipeline( args.input_s3_path, f"{output_s3_path}/CHN0{i}", i, args.autofluorescence_channel, args.raw_data_path, args.stitched_data_path, args.log_s3_path, ) if i < args.num_channels - 1: # delete all tiff files in raw_data_path directories_to_remove = glob(f"{args.raw_data_path}/LOC*") directories_to_remove.extend(glob(f"{args.stitched_data_path}/RES*")) with tqdm_joblib( tqdm( desc=f"Delete files from CHN0{i}", total=len(directories_to_remove) ) ) as progress_bar: Parallel(-1)(delayed(shutil.rmtree)(f) for f in directories_to_remove) # make sure to delete mdata.bin from terastitcher os.remove(f"{args.raw_data_path}/mdata.bin")
def correct_raw_data(raw_data_path, channel, subsample_factor=2, log_s3_path=None, background_correction=True): total_n_jobs = cpu_count() # overwrite existing raw data with corrected data outdir = raw_data_path # get list of all tiles to correct for given channel all_files = np.sort(glob.glob(f'{raw_data_path}/*/*.tiff')) if background_correction: background_val = get_background_value(raw_data_path) total_files = len(all_files) bias_path = f'{outdir}/CHN0{channel}_bias.tiff' if os.path.exists(bias_path): bias = tf.imread(bias_path) else: # subsample tiles files_cb = all_files[::subsample_factor] num_files = len(files_cb) # compute running sums in parallel sums = Parallel(total_n_jobs, verbose=10)( delayed(sum_tiles)(f) for f in chunks(files_cb, math.ceil(num_files // (total_n_jobs)) + 1)) sums = [i[:, :, None] for i in sums] mean_tile = np.squeeze(np.sum(np.concatenate(sums, axis=2), axis=2)) / num_files if background_correction: # subtract background out from bias correction mean_tile -= background_val mean_tile = sitk.GetImageFromArray(mean_tile) # get the bias correction tile using N4ITK bias = sitk.GetArrayFromImage(get_bias_field(mean_tile, scale=1.0)) # save bias tile to local directory tf.imsave(bias_path, bias.astype('float32')) # save bias tile to S3 if log_s3_path: s3 = boto3.resource('s3') img = Image.fromarray(bias) fp = BytesIO() img.save(fp, format='TIFF') # reset pointer to beginning of file fp.seek(0) log_s3_url = S3Url(log_s3_path.strip('/')) bias_path = f'{log_s3_url.key}/CHN0{channel}_bias.tiff' s3.Object(log_s3_url.bucket, bias_path).upload_fileobj(fp) # correct all the files and save them files_per_proc = math.ceil(total_files / total_n_jobs) + 1 work = chunks(all_files, files_per_proc) with tqdm_joblib(tqdm(desc="Correcting tiles", total=total_n_jobs)) as progress_bar: Parallel(n_jobs=total_n_jobs, verbose=10)( delayed(correct_tiles)(files, outdir, bias, background_val) for files in work)