Exemple #1
0
def colm_pipeline(input_s3_path,
                  output_s3_path,
                  channel_of_interest,
                  autofluorescence_channel,
                  raw_data_path,
                  stitched_data_path,
                  log_s3_path=None):
    """
    input_s3_path: S3 path to raw COLM data. Should be of the form s3://<bucket>/<experiment>
    output_s3_path: S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed>
    channel_of_interest: Channel number to operate on. Should be a single integer.
    autofluorescence_channel: Autofluorescence channel number. Should be a single integer.
    raw_data_path: Local path where corrected raw data will be stored.
    stitched_data_path: Local path where stitched slices will be stored.
    log_s3_path: S3 path at which pipeline intermediates can be stored including bias correction tile and xml files from Terastitcher.

    """
    # get the metadata file paths specific for COLM
    input_s3_url = S3Url(input_s3_path.strip('/'))
    output_s3_url = S3Url(output_s3_path.strip('/'))

    # download raw data onto local SSD
    vw0_path = f'{input_s3_url.url}/VW0/'
    download_raw_data(vw0_path,
                      channel_of_interest,
                      raw_data_path,
                      log_s3_path=log_s3_path)

    # compute stitching alignments
    # download stitching files if they exist at log path
    if not download_terastitcher_files(log_s3_path, raw_data_path):
        stitch_only = False if channel_of_interest == 0 else True
        if not stitch_only:
            run_terastitcher(raw_data_path,
                             stitched_data_path,
                             input_s3_path,
                             log_s3_path=log_s3_path,
                             compute_only=True)

    # bias correct all tiles
    # save bias correction tile to log_s3_path
    correct_raw_data(raw_data_path,
                     channel_of_interest,
                     log_s3_path=log_s3_path)

    # now stitch the data
    metadata = run_terastitcher(raw_data_path,
                                stitched_data_path,
                                input_s3_path,
                                log_s3_path=log_s3_path,
                                stitch_only=True)

    # downsample and upload stitched data to S3
    create_precomputed_volume(stitched_data_path,
                              np.array(metadata['voxel_size']), output_s3_path)

    # correct whole brain bias
    # in order to not replicate data (higher S3 cost)
    # overwrite original precomputed volume with corrected data
    correct_stitched_data(output_s3_path, output_s3_path)
Exemple #2
0
def run_terastitcher(
    raw_data_path,
    stitched_data_path,
    input_s3_path,
    log_s3_path=None,
    stitch_only=False,
    compute_only=False,
):

    input_s3_url = S3Url(input_s3_path.strip("/"))

    # generate commands to stitch data using Terastitcher
    # if stitch_only and not log_s3_path:
    #     raise("If using previous stitching results, must specify log_s3_path")
    # download terastitcher files if they arent already on local storage
    # download_terastitcher_files(log_s3_path, raw_data_path)

    if stitch_only:
        do_steps = STITCH_ONLY
    elif compute_only:
        do_steps = COMPUTE_ONLY
    else:
        do_steps = ALL_STEPS

    metadata, commands = generate_stitching_commands(
        stitched_data_path,
        raw_data_path,
        input_s3_url.bucket,
        input_s3_url.key,
        do_steps,
    )

    # run the Terastitcher commands
    for i in commands:
        print(i)
        subprocess.run(shlex.split(i))

    # # upload xml results to log_s3_path if not None
    # # and if not stitch_only
    if log_s3_path and not stitch_only:
        log_s3_url = S3Url(log_s3_path.strip("/"))
        files_to_save = glob(f"{raw_data_path}/*.xml")
        for i in tqdm(files_to_save, desc="saving xml files to S3"):
            out_path = i.split("/")[-1]
            upload_file_to_s3(i, log_s3_url.bucket,
                              f"{log_s3_url.key}/{out_path}")

    return metadata
Exemple #3
0
def get_layer_json(s3_layer_path, affine_matrix):
    """
    affine_matrix has translations in microns
    """
    vol = CloudVolume(s3_layer_path)
    s3_url = S3Url(s3_layer_path)
    # this is in units of m
    output_resolution = np.array([
        minimum_ngl_json['dimensions']['x'][0],
        minimum_ngl_json['dimensions']['y'][0],
        minimum_ngl_json['dimensions']['z'][0]
    ])

    if affine_matrix is None:
        affine_matrix = np.eye(4)
    else:
        # convert translations from microns to voxels and convert output resolution from m to um
        affine_matrix[:3, -1] /= output_resolution * 1e6
        # # get image size in nm
        # image_size = np.array(vol.scales[0]['size']) * np.array(vol.scales[0]['resolution'])
        # # convert image size to voxels at output_resolution
        # image_size = image_size / (output_resolution * 1e9)
        # print(image_size)
        # # set origin to center of image for neuroglancer
        # origin = image_size / 2
        # affine_matrix[:3,-1] -= origin

    if s3_url.bucket == 'colm-precomputed-volumes':
        url = f'precomputed://https://dlab-colm.neurodata.io/{s3_url.key}'
    else:
        url = f'precomputed://{s3_layer_path}'

    # layer_data['source']['transform']['matrix'] = affine[:3,:].tolist()
    layer_data = {
        'type': vol.layer_type,
        'source': {
            'url': url,
            "transform": {
                # last column here is x, y, z translations respectively
                "matrix": affine_matrix[:3, :].tolist(),
                "outputDimensions":
                copy.deepcopy(minimum_ngl_json['dimensions'])
            },
        },
        'tab': 'source',
        'shader':
        '#uicontrol vec3 color color(default="white")\n#uicontrol float min slider(default=0, min=0, max=1, step=0.001)\n#uicontrol float max slider(default=1, min=0, max=1, step=0.001)\n#uicontrol float brightness slider(default=0, min=-1, max=1, step=0.1)\n#uicontrol float contrast slider(default=0, min=-3, max=3, step=0.1)\n\nfloat scale(float x) {\n  return (x - min) / (max - min);\n}\n\nvoid main() {\n  emitRGB(\n    color * vec3(\n      scale(\n        toNormalized(getDataValue()))\n       + brightness) * exp(contrast)\n  );\n}',
        'shaderControls': {
            'max': 0.005
        },
        'blend': 'default',
        'name': s3_url.key.split('/')[-1]
    }

    return layer_data
Exemple #4
0
def register(input_s3_path, output_s3_path, log_s3_path, orientation,
             fixed_scale, translation, rotation):

    # registration
    # get channel name
    print(input_s3_path)
    s3_url = S3Url(input_s3_path)
    channel = s3_url.key.split('/')[-1]
    exp = s3_url.key.split('/')[-2]

    # only after stitching autofluorescence channel
    base_path = os.path.expanduser('~/')
    registration_prefix = f'{base_path}/{exp}_{channel}_registration/'
    target_name = f'{base_path}/autofluorescence_data.tif'

    # download downsampled autofluorescence channel
    print("downloading data for registration...")
    voxel_size = download_data(input_s3_path, target_name)
    # if high res atlas labels file doesn't exist
    ara_annotation_10um = os.path.expanduser(
        '~/CloudReg/registration/atlases/ara_annotation_10um.tif')
    if not os.path.exists(ara_annotation_10um):
        # download it
        _ = download_data(ara_annotation_data_link(10),
                          ara_annotation_10um,
                          desired_resolution=10000)

    # initialize affine transformation for data
    atlas_res = 100
    atlas_s3_path = ara_average_data_link(atlas_res)
    initial_affine = get_affine_matrix(translation, rotation,
                                       atlas_orientation, orientation,
                                       fixed_scale, atlas_s3_path)

    # run registration
    affine_string = [', '.join(map(str, i)) for i in initial_affine]
    affine_string = '; '.join(affine_string)
    matlab_registration_command = f'''
        matlab -nodisplay -nosplash -nodesktop -r \"base_path=\'{base_path}\';target_name=\'{target_name}\';registration_prefix=\'{registration_prefix}\';dxJ0={voxel_size};fixed_scale={fixed_scale};initial_affine=[{affine_string}];run(\'~/CloudReg/registration/registration_script_mouse_GN.m\')\"
    '''
    print(matlab_registration_command)
    subprocess.run(shlex.split(matlab_registration_command))

    # savse results to S3
    if log_s3_path:
        # sync registration results to log_s3_path
        aws_cli(['s3', 'sync', registration_prefix, log_s3_path])
Exemple #5
0
def get_layer_json(s3_layer_path, affine_matrix, output_resolution):
    """
    affine_matrix has translations in microns
    output resolution in meters
    """
    vol = CloudVolume(s3_layer_path)
    s3_url = S3Url(s3_layer_path)
    # this is in units of m
    # output_resolution = np.array([minimum_ngl_json['dimensions']['x'][0], minimum_ngl_json['dimensions']['y'][0], minimum_ngl_json['dimensions']['z'][0]])

    if affine_matrix is None:
        affine_matrix = np.eye(4)
    else:
        # convert translations from microns to voxels and convert output resolution from m to um
        affine_matrix[:3, -1] /= output_resolution * 1e6

    if s3_url.bucket == "colm-precomputed-volumes":
        url = f"precomputed://https://dlab-colm.neurodata.io/{s3_url.key}"
    else:
        url = f"precomputed://{s3_layer_path}"

    # layer_data['source']['transform']['matrix'] = affine[:3,:].tolist()
    layer_data = {
        "type": vol.layer_type,
        "source": {
            "url": url,
            "transform": {
                # last column here is x, y, z translations respectively
                "matrix": affine_matrix[:3, :].tolist(),
                "outputDimensions":
                get_output_dimensions_json(output_resolution),
            },
        },
        "tab": "source",
        "shader":
        '#uicontrol vec3 color color(default="white")\n#uicontrol float min slider(default=0, min=0, max=1, step=0.001)\n#uicontrol float max slider(default=1, min=0, max=1, step=0.001)\n#uicontrol float brightness slider(default=0, min=-1, max=1, step=0.1)\n#uicontrol float contrast slider(default=0, min=-3, max=3, step=0.1)\n\nfloat scale(float x) {\n  return (x - min) / (max - min);\n}\n\nvoid main() {\n  emitRGB(\n    color * vec3(\n      scale(\n        toNormalized(getDataValue()))\n       + brightness) * exp(contrast)\n  );\n}',
        "shaderControls": {
            "max": 0.005
        },
        "blend": "default",
        "name": s3_url.key.split("/")[-1],
    }

    return layer_data
Exemple #6
0
def download_raw_data(in_bucket_path, channel, outdir, log_s3_path=None):

    input_s3_url = S3Url(in_bucket_path.strip("/"))
    in_bucket_name = input_s3_url.bucket
    in_path = input_s3_url.key
    total_n_jobs = cpu_count()

    # get list of all tiles to correct for  given channel
    all_files = get_list_of_files_to_process(in_bucket_name, in_path, channel)
    total_files = len(all_files)

    # download all the files as tiff
    files_per_proc = math.ceil(total_files / total_n_jobs) + 1
    work = chunks(all_files, files_per_proc)
    with tqdm_joblib(tqdm(desc="Downloading tiles",
                          total=total_n_jobs)) as progress_bar:
        Parallel(n_jobs=total_n_jobs, verbose=10)(
            delayed(download_tiles)(files, in_bucket_name, outdir)
            for files in work)
Exemple #7
0
def register(
    input_s3_path,
    output_s3_path,
    log_s3_path,
    orientation,
    fixed_scale,
    translation,
    rotation,
    missing_data_correction,
    grid_correction,
    bias_correction,
    regularization,
    num_iterations,
):

    # registration
    # get channel name
    print(input_s3_path)
    s3_url = S3Url(input_s3_path)
    channel = s3_url.key.split("/")[-1]
    exp = s3_url.key.split("/")[-2]

    # only after stitching autofluorescence channel
    base_path = os.path.expanduser("~/")
    registration_prefix = f"{base_path}/{exp}_{channel}_registration/"
    target_name = f"{base_path}/autofluorescence_data.tif"

    # download downsampled autofluorescence channel
    print("downloading data for registration...")
    voxel_size = download_data(input_s3_path, target_name)
    # if high res atlas labels file doesn't exist
    ara_annotation_10um = os.path.expanduser(
        "~/CloudReg/registration/atlases/ara_annotation_10um.tif")
    if not os.path.exists(ara_annotation_10um):
        # download it
        _ = download_data(ara_annotation_data_link(10),
                          ara_annotation_10um,
                          desired_resolution=10000)

    # initialize affine transformation for data
    atlas_res = 100
    atlas_s3_path = ara_average_data_link(atlas_res)
    initial_affine = get_affine_matrix(
        translation,
        rotation,
        atlas_orientation,
        orientation,
        fixed_scale,
        atlas_s3_path,
    )

    # run registration
    affine_string = [", ".join(map(str, i)) for i in initial_affine]
    affine_string = "; ".join(affine_string)
    matlab_registration_command = f"""
        matlab -nodisplay -nosplash -nodesktop -r \"niter={num_iterations};sigmaR={regularization};missing_data_correction={int(missing_data_correction)};grid_correction={int(grid_correction)};bias_correction={int(bias_correction)};base_path=\'{base_path}\';target_name=\'{target_name}\';registration_prefix=\'{registration_prefix}\';dxJ0={voxel_size};fixed_scale={fixed_scale};initial_affine=[{affine_string}];run(\'~/CloudReg/registration/registration_script_mouse_GN.m\')\"
    """
    print(matlab_registration_command)
    subprocess.run(shlex.split(matlab_registration_command))

    # save results to S3
    if log_s3_path:
        # sync registration results to log_s3_path
        aws_cli(["s3", "sync", registration_prefix, log_s3_path])

    # upload high res deformed atlas and deformed target to S3
    ingest_image_stack(
        output_s3_path,
        voxel_size,
        f"{registration_prefix}/downloop_2_labels_to_target_highres.img",
        "img",
        "uint64",
    )

    # print out viz link for visualization
    # visualize results at 5 microns
    viz_link = create_viz_link([input_s3_path, output_s3_path],
                               output_resolution=np.array([5] * 3) / 1e6)
    print("###################")
    print(f"VIZ LINK: {viz_link}")
    print("###################")
Exemple #8
0
def colm_pipeline(
    input_s3_path,
    output_s3_path,
    channel_of_interest,
    autofluorescence_channel,
    raw_data_path,
    stitched_data_path,
    log_s3_path=None,
):
    """
    input_s3_path: S3 path to raw COLM data. Should be of the form s3://<bucket>/<experiment>
    output_s3_path: S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed>
    channel_of_interest: Channel number to operate on. Should be a single integer.
    autofluorescence_channel: Autofluorescence channel number. Should be a single integer.
    raw_data_path: Local path where corrected raw data will be stored.
    stitched_data_path: Local path where stitched slices will be stored.
    log_s3_path: S3 path at which pipeline intermediates can be stored including bias correction tile and xml files from Terastitcher.

    """
    # get the metadata file paths specific for COLM
    input_s3_url = S3Url(input_s3_path.strip("/"))
    output_s3_url = S3Url(output_s3_path.strip("/"))

    # download raw data onto local SSD
    vw0_path = f"{input_s3_url.url}/VW0/"
    download_raw_data(
        vw0_path, channel_of_interest, raw_data_path, log_s3_path=log_s3_path
    )

    # compute stitching alignments first if you need to
    # download stitching files if they exist at log path
    if (
        not download_terastitcher_files(log_s3_path, raw_data_path)
        and channel_of_interest == 0
    ):
        metadata = run_terastitcher(
            raw_data_path,
            stitched_data_path,
            input_s3_path,
            log_s3_path=log_s3_path,
            compute_only=True,
        )

    # bias correct all tiles
    # save bias correction tile to log_s3_path
    correct_raw_data(raw_data_path, channel_of_interest, log_s3_path=log_s3_path)

    # now stitch the data with alignments we computed
    metadata = run_terastitcher(
        raw_data_path,
        stitched_data_path,
        input_s3_path,
        log_s3_path=log_s3_path,
        stitch_only=True,
    )

    # downsample and upload stitched data to S3
    stitched_path = glob(f"{stitched_data_path}/RES*")[0]
    create_precomputed_volume(
        stitched_path, np.array(metadata["voxel_size"]), output_s3_path
    )

    # correct whole brain bias
    # in order to not replicate data (higher S3 cost)
    # overwrite original precomputed volume with corrected data
    correct_stitched_data(output_s3_path, output_s3_path)

    # print viz link to console
    # visualize data at 5 microns
    viz_link = create_viz_link(
        [output_s3_path], output_resolution=np.array([5] * 3) / 1e6
    )
    print("###################")
    print(f"VIZ LINK: {viz_link}")
    print("###################")
def correct_raw_data(raw_data_path,
                     channel,
                     subsample_factor=2,
                     log_s3_path=None,
                     background_correction=True):

    total_n_jobs = cpu_count()
    # overwrite existing raw data with corrected data
    outdir = raw_data_path

    # get list of all tiles to correct for  given channel
    all_files = np.sort(glob.glob(f'{raw_data_path}/*/*.tiff'))
    if background_correction:
        background_val = get_background_value(raw_data_path)
    total_files = len(all_files)

    bias_path = f'{outdir}/CHN0{channel}_bias.tiff'
    if os.path.exists(bias_path):
        bias = tf.imread(bias_path)

    else:
        # subsample tiles
        files_cb = all_files[::subsample_factor]
        num_files = len(files_cb)

        # compute running sums in parallel
        sums = Parallel(total_n_jobs, verbose=10)(
            delayed(sum_tiles)(f)
            for f in chunks(files_cb,
                            math.ceil(num_files // (total_n_jobs)) + 1))
        sums = [i[:, :, None] for i in sums]
        mean_tile = np.squeeze(np.sum(np.concatenate(sums, axis=2),
                                      axis=2)) / num_files
        if background_correction:
            # subtract background out from bias correction
            mean_tile -= background_val
        mean_tile = sitk.GetImageFromArray(mean_tile)

        # get the bias correction tile using N4ITK
        bias = sitk.GetArrayFromImage(get_bias_field(mean_tile, scale=1.0))

        # save bias tile to local directory
        tf.imsave(bias_path, bias.astype('float32'))

    # save bias tile to S3
    if log_s3_path:
        s3 = boto3.resource('s3')
        img = Image.fromarray(bias)
        fp = BytesIO()
        img.save(fp, format='TIFF')
        # reset pointer to beginning  of file
        fp.seek(0)
        log_s3_url = S3Url(log_s3_path.strip('/'))
        bias_path = f'{log_s3_url.key}/CHN0{channel}_bias.tiff'
        s3.Object(log_s3_url.bucket, bias_path).upload_fileobj(fp)

    # correct all the files and save them
    files_per_proc = math.ceil(total_files / total_n_jobs) + 1
    work = chunks(all_files, files_per_proc)
    with tqdm_joblib(tqdm(desc="Correcting tiles",
                          total=total_n_jobs)) as progress_bar:
        Parallel(n_jobs=total_n_jobs, verbose=10)(
            delayed(correct_tiles)(files, outdir, bias, background_val)
            for files in work)