Esempio n. 1
0
def correct_stitched_data(data_s3_path, out_s3_path, num_procs=12):
    # create vol
    vol = CloudVolume(data_s3_path)
    mip = 0
    for i in range(len(vol.scales)):
        # get low res image smaller than 10 um
        if vol.scales[i]['resolution'][0] < 10000:
            mip = i
    vol_ds = CloudVolume(data_s3_path, mip, parallel=True, fill_missing=True)

    # create new vol if it doesnt exist
    vol_bc = CloudVolume(out_s3_path, info=vol.info.copy())
    vol_bc.commit_info()

    # download image at low res
    data = sitk.GetImageFromArray(np.squeeze(vol_ds[:, :, :]).T)
    data.SetSpacing(np.array(vol_ds.scales[mip]['resolution']) / 1000)

    bias = get_bias_field(data, scale=0.125)
    bias_slices = [bias[:, :, i] for i in range(bias.GetSize()[-1])]
    try:
        with tqdm_joblib(
                tqdm(desc=f"Uploading bias corrected data...",
                     total=len(bias_slices))) as progress_bar:
            Parallel(num_procs, timeout=3600,
                     verbose=10)(delayed(process_slice)(
                         bias_slice, z, data_s3_path, out_s3_path)
                                 for z, bias_slice in enumerate(bias_slices))
    except:
        print("timed out on bias correcting slice. moving to next step.")
Esempio n. 2
0
def create_precomputed_volume(
    input_path,
    voxel_size,
    precomputed_path,
    extension='tif',
    num_mips=8
):

    files_slices = list(enumerate(np.sort(glob(f'{input_path}/*/*.{extension}')).tolist()))
    zs = [i[0] for i in files_slices]
    files = np.array([i[1] for i in files_slices])

    img_size = get_image_dims(files)
    # convert voxel size from um to nm
    vol = create_cloud_volume(precomputed_path,img_size,voxel_size*1000,parallel=False,num_downsampling_levels=num_mips)

    # num procs to use based on available memory
    num_procs = min(math.floor(virtual_memory().total/(img_size[0]*img_size[1] * 8)), joblib.cpu_count())

    try:
        with tqdm_joblib(tqdm(desc="Creating precomputed volume", total=len(files))) as progress_bar:
            Parallel(num_procs, timeout=1800, verbose=10)(
                delayed(process)(
                    z,
                    f,
                    vol.layer_cloudpath,
                    num_mips,
                    
                ) for z,f in zip(zs,files)
            )
    except Exception as e:
        print(e)
        print("timed out on a slice. moving on to the next step of pipeline")
Esempio n. 3
0
def download_raw_data(in_bucket_path, channel, outdir, log_s3_path=None):

    input_s3_url = S3Url(in_bucket_path.strip("/"))
    in_bucket_name = input_s3_url.bucket
    in_path = input_s3_url.key
    total_n_jobs = cpu_count()

    # get list of all tiles to correct for  given channel
    all_files = get_list_of_files_to_process(in_bucket_name, in_path, channel)
    total_files = len(all_files)

    # download all the files as tiff
    files_per_proc = math.ceil(total_files / total_n_jobs) + 1
    work = chunks(all_files, files_per_proc)
    with tqdm_joblib(tqdm(desc="Downloading tiles",
                          total=total_n_jobs)) as progress_bar:
        Parallel(n_jobs=total_n_jobs, verbose=10)(
            delayed(download_tiles)(files, in_bucket_name, outdir)
            for files in work)
Esempio n. 4
0
        type=str,
        default=None,
    )

    args = parser.parse_args()

    # for all channels in experiment
    for i in range(args.num_channels):
        output_s3_path = args.output_s3_path.strip("/")
        colm_pipeline(
            args.input_s3_path,
            f"{output_s3_path}/CHN0{i}",
            i,
            args.autofluorescence_channel,
            args.raw_data_path,
            args.stitched_data_path,
            args.log_s3_path,
        )
        if i < args.num_channels - 1:
            # delete all tiff files in raw_data_path
            directories_to_remove = glob(f"{args.raw_data_path}/LOC*")
            directories_to_remove.extend(glob(f"{args.stitched_data_path}/RES*"))
            with tqdm_joblib(
                tqdm(
                    desc=f"Delete files from CHN0{i}", total=len(directories_to_remove)
                )
            ) as progress_bar:
                Parallel(-1)(delayed(shutil.rmtree)(f) for f in directories_to_remove)
            # make sure to delete mdata.bin from terastitcher
            os.remove(f"{args.raw_data_path}/mdata.bin")
Esempio n. 5
0
def correct_raw_data(raw_data_path,
                     channel,
                     subsample_factor=2,
                     log_s3_path=None,
                     background_correction=True):

    total_n_jobs = cpu_count()
    # overwrite existing raw data with corrected data
    outdir = raw_data_path

    # get list of all tiles to correct for  given channel
    all_files = np.sort(glob.glob(f'{raw_data_path}/*/*.tiff'))
    if background_correction:
        background_val = get_background_value(raw_data_path)
    total_files = len(all_files)

    bias_path = f'{outdir}/CHN0{channel}_bias.tiff'
    if os.path.exists(bias_path):
        bias = tf.imread(bias_path)

    else:
        # subsample tiles
        files_cb = all_files[::subsample_factor]
        num_files = len(files_cb)

        # compute running sums in parallel
        sums = Parallel(total_n_jobs, verbose=10)(
            delayed(sum_tiles)(f)
            for f in chunks(files_cb,
                            math.ceil(num_files // (total_n_jobs)) + 1))
        sums = [i[:, :, None] for i in sums]
        mean_tile = np.squeeze(np.sum(np.concatenate(sums, axis=2),
                                      axis=2)) / num_files
        if background_correction:
            # subtract background out from bias correction
            mean_tile -= background_val
        mean_tile = sitk.GetImageFromArray(mean_tile)

        # get the bias correction tile using N4ITK
        bias = sitk.GetArrayFromImage(get_bias_field(mean_tile, scale=1.0))

        # save bias tile to local directory
        tf.imsave(bias_path, bias.astype('float32'))

    # save bias tile to S3
    if log_s3_path:
        s3 = boto3.resource('s3')
        img = Image.fromarray(bias)
        fp = BytesIO()
        img.save(fp, format='TIFF')
        # reset pointer to beginning  of file
        fp.seek(0)
        log_s3_url = S3Url(log_s3_path.strip('/'))
        bias_path = f'{log_s3_url.key}/CHN0{channel}_bias.tiff'
        s3.Object(log_s3_url.bucket, bias_path).upload_fileobj(fp)

    # correct all the files and save them
    files_per_proc = math.ceil(total_files / total_n_jobs) + 1
    work = chunks(all_files, files_per_proc)
    with tqdm_joblib(tqdm(desc="Correcting tiles",
                          total=total_n_jobs)) as progress_bar:
        Parallel(n_jobs=total_n_jobs, verbose=10)(
            delayed(correct_tiles)(files, outdir, bias, background_val)
            for files in work)