def view_segmentations(version, raw_scale, seg_names=[], seg_scales=[], bb=np.s_[:]):
    folder = os.path.join(ROOT, version, 'images', 'local')
    raw_file = os.path.join(folder, 'sbem-6dpf-1-whole-raw.xml')
    raw_file = get_data_path(raw_file, return_absolute_path=True)
    raw_key = get_key(False, time_point=0, setup_id=0, scale=raw_scale)

    with z5py.File(raw_file, 'r') as f:
        ds = f[raw_key]
        ds.n_threads = 16
        raw = ds[bb]
        ref_shape = raw.shape

    data = [to_source(raw, name='raw')]

    for seg_name, seg_scale in zip(seg_names, seg_scales):
        seg_file = os.path.join(folder, seg_name + '.xml')
        seg_file = get_data_path(seg_file, return_absolute_path=True)
        seg_key = get_key(False, time_point=0, setup_id=0, scale=seg_scale)
        with z5py.File(seg_file, 'r') as f:
            ds = f[seg_key]
            ds.n_threads = 16
            seg = ds[bb].astype('uint32')
            if seg.shape != ref_shape:
                # FIXME this will fail with bounding box
                print("Resize", ref_shape)
                seg = ResizeWrapper(to_source(seg, name=seg_name), ref_shape)
        data.append(to_source(seg, name=seg_name))

    view(*data)
def segment_chromatin(version, ilastik_project, ilastik_directory):
    version_folder = os.path.join(ROOT, version)
    assert os.path.exists(version_folder), version_folder

    raw_path = os.path.join(version_folder, 'images', 'local',
                            'sbem-6dpf-1-whole-raw.xml')
    raw_path = get_data_path(raw_path, return_absolute_path=True)
    nucleus_seg_path = os.path.join(version_folder, 'images', 'local',
                                    'sbem-6dpf-1-whole-segmented-nuclei.xml')
    nucleus_seg_path = get_data_path(nucleus_seg_path,
                                     return_absolute_path=True)
    nuclei_table = os.path.join(version_folder, 'tables',
                                'sbem-6dpf-1-whole-segmented-nuclei-labels',
                                'default.csv')

    tmp_input = 'tmp_chromatin_prediction/tmp_input'
    tmp_output = 'tmp_chromatin_prediction/tmp_output'
    os.makedirs(tmp_input, exist_ok=True)
    os.makedirs(tmp_output, exist_ok=True)
    final_output = './chromatin_prediction.h5'

    # in general run on cluster - 256GB ram, 32 cores
    chromatin_segmentation_workflow(nuclei_table,
                                    nucleus_seg_path,
                                    ilastik_project,
                                    ilastik_directory,
                                    tmp_input,
                                    tmp_output,
                                    final_output,
                                    raw_path,
                                    chunk_size=3000,
                                    cores=32,
                                    memory=254000)
Example #3
0
def make_nuclei_tables(old_folder,
                       folder,
                       name,
                       tmp_folder,
                       resolution,
                       target='slurm',
                       max_jobs=100,
                       seg_has_changed=True):
    # make the table folder
    table_folder = os.path.join(folder, 'tables', name)
    os.makedirs(table_folder, exist_ok=True)

    seg_key = get_seg_key(folder, name, scale=0)
    seg_path = get_seg_path(folder, name, seg_key)

    # make the basic attributes table
    base_out = os.path.join(table_folder, 'default.csv')
    base_attributes(seg_path,
                    seg_key,
                    base_out,
                    resolution,
                    tmp_folder,
                    target=target,
                    max_jobs=max_jobs,
                    correct_anchors=True)

    # make the morphology attribute table
    xml_raw = os.path.join(folder, 'images', 'local',
                           'sbem-6dpf-1-whole-raw.xml')
    raw_path = get_data_path(xml_raw, return_absolute_path=True)
    chromatin_seg_path = get_seg_path(folder,
                                      'sbem-6dpf-1-whole-segmented-chromatin')
    morpho_out = os.path.join(table_folder, 'morphology.csv')
    write_morphology_nuclei(raw_path, seg_path, chromatin_seg_path, base_out,
                            morpho_out, tmp_folder, target, max_jobs)

    # mapping to extrapolated intensities
    mask_name = 'sbem-6dpf-1-whole-segmented-extrapolated'
    k1 = get_seg_key(folder, name, 1)
    k2 = get_seg_key(folder, mask_name, 0)
    extrapol_mask = os.path.join(folder, 'images', 'local',
                                 '%s.xml' % mask_name)
    extrapol_mask = get_data_path(extrapol_mask, return_absolute_path=True)
    extrapol_out = os.path.join(table_folder,
                                'extrapolated_intensity_correction.csv')
    extrapolated_intensities(seg_path, k1, extrapol_mask, k2, extrapol_out,
                             tmp_folder, target, max_jobs)

    write_additional_table_file(table_folder)
def _to_bdv_s3(file_format,
               dataset_folder, dataset_name, storage,
               service_endpoint, bucket_name, region):
    new_format = file_format + ".s3"
    os.makedirs(os.path.join(dataset_folder, "images", new_format.replace(".", "-")), exist_ok=True)

    xml = storage["relativePath"]
    xml_remote = xml.replace(file_format.replace(".", "-"), new_format.replace(".", "-"))

    # the absolute xml paths
    xml_path = os.path.join(dataset_folder, xml)
    xml_remote_path = os.path.join(dataset_folder, xml_remote)
    data_rel_path = os.path.join(os.path.split(xml)[0], get_data_path(xml_path))
    data_abs_path = os.path.join(dataset_folder, data_rel_path)
    if not os.path.exists(data_abs_path):
        warn(f"Could not find data path at {data_abs_path} corresponding to xml {xml_path}")
    path_in_bucket = os.path.join(dataset_name, data_rel_path)

    # copy to the xml for remote data
    copy_xml_as_n5_s3(xml_path, xml_remote_path,
                      service_endpoint=service_endpoint,
                      bucket_name=bucket_name,
                      path_in_bucket=path_in_bucket,
                      region=region,
                      bdv_type=new_format)
    return new_format, {"relativePath": xml_remote}
Example #5
0
def make_n5_files(version):
    version_folder = os.path.join(ROOT, version)

    # default chunk size
    default_chunks = 3 * (128, )
    # special chunk sizes
    chunk_dict = {'sbem-6dpf-1-whole-raw': None}  # don't copy raw yet

    copied = []

    xmls = glob(os.path.join(version_folder, 'images', 'local', '*.xml'))
    for xml in xmls:
        name = os.path.splitext(os.path.split(xml)[1])[0]
        chunks = chunk_dict.get(name, default_chunks)
        # chunks None means we skip copying for now
        if chunks is None:
            continue

        h5_path = get_data_path(xml, return_absolute_path=True)
        n5_path = os.path.splitext(h5_path)[0] + '.n5'
        copied.append(h5_path)
        if os.path.exists(n5_path):
            continue

        # load resolution from xml
        resolution = get_resolution(xml, 0)
        copy_to_bdv_n5(h5_path, n5_path, chunks, resolution)

    return copied
Example #6
0
def create_auxiliary_gene_file(meds_root, out_file, return_result=False):
    all_genes_dset = 'genes'
    names_dset = 'gene_names'

    # get all the prospr gene xmls in the image folder
    med_files = glob(os.path.join(meds_root, "prospr*.xml"))
    # filter out prospr files that are not genes (=semgneted regions and virtual cells)
    med_files = [name for name in med_files if 'segmented' not in name]
    med_files = [name for name in med_files if 'virtual' not in name]

    # get the gene names from filenames
    gene_names = [os.path.splitext(os.path.basename(f))[0] for f in med_files]
    # cut all the preceeding prospr-... part
    gene_names = ['-'.join(name.split('-')[4:]) for name in gene_names]
    num_genes = len(gene_names)
    assert num_genes == len(med_files)

    # get the data paths from the xmls
    med_files = [
        get_data_path(med_file, return_absolute_path=True)
        for med_file in med_files
    ]

    is_h5 = os.path.splitext(med_files[0])[1] == '.h5'
    med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
    with open_file(med_files[0], 'r') as f:
        spatial_shape = f[med_key].shape

    shape = (num_genes, ) + spatial_shape

    # iterate through med files and write down binarized into one file
    with open_file(out_file) as f:
        out_dset = f.create_dataset(all_genes_dset,
                                    shape=shape,
                                    dtype='bool',
                                    chunks=(1, 64, 64, 64),
                                    compression='gzip')
        out_dset.n_threads = 8

        for i, med_file in enumerate(tqdm(med_files)):
            is_h5 = os.path.splitext(med_file)[1] == '.h5'
            med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
            with open_file(med_file, 'r') as f2:
                ds = f2[med_key]
                this_shape = ds.shape
                if this_shape != spatial_shape:
                    raise RuntimeError("Incompatible shapes %s, %s" %
                                       (str(this_shape), str(spatial_shape)))
                ds.n_threads = 8
                data = ds[:]
            out_dset[i] = data

        gene_names_ascii = [n.encode('ascii', 'ignore') for n in gene_names]
        f.create_dataset(names_dset, data=gene_names_ascii, dtype='S40')

    if return_result:
        # reload the binarized version
        with open_file(out_file, 'r') as f:
            all_genes = f[all_genes_dset][:]
        return all_genes
Example #7
0
def export_meshes(xml_path,
                  table_path,
                  cell_ids,
                  out_folder,
                  scale,
                  resolution=None,
                  n_jobs=16):
    os.makedirs(out_folder, exist_ok=True)

    if resolution is None:
        resolution = get_resolution(xml_path, 0)
        if scale > 0:
            resolution = [re * 2**scale for re in resolution]

    # load the segmentation dataset
    path = get_data_path(xml_path, return_absolute_path=True)
    key = 'setup0/timepoint0/s%i' % scale
    f = z5py.File(path, 'r')
    ds = f[key]
    ds.n_threads = 8

    # load the default table to get the bounding boxes
    if table_path is None:
        bb_starts, bb_stops = None, None
    else:
        bb_starts, bb_stops = load_bounding_boxes(table_path, resolution)

    def _mesh(cell_id):
        out_path = os.path.join(out_folder, 'mesh_%i.obj' % cell_id)
        export_mesh(cell_id, ds, bb_starts, bb_stops, resolution, out_path)

    print("Computing meshes ...")
    with futures.ThreadPoolExecutor(n_jobs) as tp:
        list(tqdm(tp.map(_mesh, cell_ids), total=len(cell_ids)))
def upload_source(dataset_folder, metadata, data_format, bucket_name, s3_prefix="embl", client="minio"):
    if data_format.endswith(".s3"):
        base_format = data_format.rstrip(".s3")
        raise ValueError(f"Cannot upload data in format {data_format}, use format {base_format} instead.")
    s3_format = data_format + ".s3"

    if data_format.startswith("bdv"):
        local_xml = os.path.join(dataset_folder, metadata["image"]["imageData"][data_format]["relativePath"])
        remote_xml = os.path.join(dataset_folder, metadata["image"]["imageData"][s3_format]["relativePath"])

        data_path = get_data_path(local_xml, return_absolute_path=True)
        path_in_bucket = read_path_in_bucket(remote_xml)

    elif data_format == "ome.zarr":
        data_path = os.path.join(dataset_folder, metadata["image"]["imageData"][data_format]["relativePath"])
        s3_address = metadata["image"]["imageData"][s3_format]["s3Address"]
        bucket_end_pos = s3_address.find(bucket_name) + len(bucket_name) + 1
        path_in_bucket = s3_address[bucket_end_pos:]

    else:
        raise ValueError(f"Invalid data format {data_format}")

    if client != "minio":
        raise ValueError(f"Invalid client {client}, currently only minio is supported")

    assert os.path.exists(data_path)
    cmd = ["mc", "cp", "-r", f"{data_path}/", f"{s3_prefix}/{bucket_name}/{path_in_bucket}/"]
    subprocess.run(cmd)
Example #9
0
def get_seg_path(folder, name, key=None):
    xml_path = os.path.join(folder, 'images', 'local', '%s.xml' % name)
    path = get_data_path(xml_path, return_absolute_path=True)
    assert os.path.exists(path), path
    if key is not None:
        with open_file(path, 'r') as f:
            assert key in f, "%s not in %s" % (key, path)
    return path
Example #10
0
def _remove_image_data(storage_type, path):
    if storage_type.startswith("bdv"):  # bdv data: remove xml and data
        data_path = bdv_metadata.get_data_path(path, return_absolute_path=True)
        os.remove(path)
        rmtree(data_path) if storage_type.endswith("n5") else os.remove(
            data_path)
    else:  # ome.zarr data, can just rmtree
        rmtree(path)
Example #11
0
def update_n5_xmls(version):
    version_folder = os.path.join(ROOT, version)
    xmls = glob(os.path.join(version_folder, 'images', 'local', '*.xml'))
    for xml in xmls:
        data_rel_path = get_data_path(xml)
        # is this already n5? -> continue
        if os.path.splitext(data_rel_path) == '.n5':
            continue

        # get the absolute path and check if the corresponding n5 file exists
        data_abs_path = get_data_path(xml, return_absolute_path=True)
        new_abs_path = os.path.splitext(data_abs_path)[0] + '.n5'
        # n5 file is not there? -> continue
        if not os.path.exists(new_abs_path):
            continue

        # write the new relative path
        new_rel_path = os.path.splitext(data_rel_path)[0] + '.n5'
        copy_xml_with_newpath(xml, xml, new_rel_path, data_format='bdv.n5')
Example #12
0
def copy_and_check_image_dict(folder, new_folder):
    image_dict_in = os.path.join(folder, 'images', 'images.json')
    image_dict_out = os.path.join(new_folder, 'images', 'images.json')
    with open(image_dict_in) as f:
        image_dict = json.load(f)

    for name, properties in image_dict.items():

        intersection = set(properties.keys()) - IMAGE_DICT_KEYS
        if len(intersection) > 0:
            raise RuntimeError("Validating image dict: invalid keys %s" % str(intersection))

        storage = properties['Storage']
        # validate local xml location
        xml = storage['local']
        xml = os.path.join(new_folder, 'images', xml)
        if not os.path.exists(xml):
            raise RuntimeError("Validating image dict: could not find %s" % xml)

        # validate data location
        data_path = get_data_path(xml, return_absolute_path=True)
        if not os.path.exists(data_path):
            raise RuntimeError("Validating image dict: could not find %s" % data_path)

        # validate remote xml location
        if 'remote' in storage:
            xml = storage['remote']
            xml = os.path.join(new_folder, 'images', xml)
            if not os.path.exists(xml):
                raise RuntimeError("Validating image dict: could not find %s" % xml)

        # validate tables
        if 'TableFolder' in properties:
            # check that we have the table folder
            table_folder = os.path.join(new_folder, properties['TableFolder'])
            if not os.path.exists(table_folder):
                raise RuntimeError("Validating image dict: could not find %s" % table_folder)
            default_table = os.path.join(table_folder, 'default.csv')

            # check that we have the default table
            if not os.path.exists(default_table):
                raise RuntimeError("Validating image dict: could not find %s" % default_table)

            # if we have an additional table file, check that the additional tables exist
            additional_table_file = os.path.join(table_folder, 'additional_tables.txt')
            if os.path.exists(additional_table_file):
                with open(additional_table_file, 'r') as f:
                    for fname in f:
                        additional_table = os.path.join(table_folder, fname.rstrip('\n'))
                        if not os.path.exists(additional_table):
                            raise RuntimeError("Validating image dict: could not find %s" % additional_table)

    with open(image_dict_out, 'w') as f:
        json.dump(image_dict, f)
Example #13
0
def copy_file(xml_in, xml_out, storage='local'):
    if storage == 'local':
        data_path = get_data_path(xml_in, return_absolute_path=True)
        bdv_format = get_bdv_format(xml_in)
        xml_dir = os.path.split(xml_out)[0]
        data_path = os.path.relpath(data_path, start=xml_dir)
        copy_xml_with_newpath(xml_in, xml_out, data_path,
                              path_type='relative', data_format=bdv_format)
    elif storage == 'remote':
        shutil.copyfile(xml_in, xml_out)
    else:
        raise ValueError("Invalid storage spec %s" % storage)
Example #14
0
def eval_seg(version):
    seg_path = os.path.join(ROOT, version, 'images', 'local', NAME + '.xml')
    seg_path = get_data_path(seg_path, return_absolute_path=True)
    if seg_path.endswith('.n5'):
        key = 'setup0/timepoint0/s0'
    else:
        key = 't00000/s00/0/cells'

    fp, fn, tot = eval_nuclei(seg_path, key, ANNOTATIONS)
    print("Evaluation yields:")
    print("False positives:", fp)
    print("False negatives:", fn)
    print("Total number of annotations:", tot)
def fix_chunks_dataset(source, scale, corrupted_chunks):
    bucket = 'platybrowser'

    source_s3_key = read_path_in_bucket(source)
    local_ds_path = get_data_path(source.replace('remote', 'local'),
                                  return_absolute_path=True)
    ds_key = f'setup0/timepoint0/s{scale}'

    return fix_corrupted_chunks_minio(corrupted_chunks,
                                      local_ds_path,
                                      ds_key,
                                      bucket,
                                      source_s3_key,
                                      ds_key,
                                      server='embl')
def copy_xml_file(xml_in, xml_out, file_format):
    if file_format in ('bdv.hdf5', 'bdv.n5'):
        data_path = get_data_path(xml_in, return_absolute_path=True)
        bdv_format = get_bdv_format(xml_in)
        xml_dir = os.path.split(xml_out)[0]
        data_path = os.path.relpath(data_path, start=xml_dir)
        copy_xml_with_newpath(xml_in,
                              xml_out,
                              data_path,
                              path_type='relative',
                              data_format=bdv_format)
    elif file_format == 'bdv.n5.s3':
        shutil.copyfile(xml_in, xml_out)
    else:
        raise ValueError(f"Invalid file format {file_format}")
Example #17
0
def make_remote_xmls(version):
    version_folder = os.path.join(ROOT, version)
    xmls = glob(os.path.join(version_folder, 'images', 'local', '*.xml'))

    # iterate over the xmls, check if target is a n5 file
    # if it is, make xml with correct path in bucket in the remote folder
    for xml in xmls:
        data_path = get_data_path(xml, return_absolute_path=True)
        if not data_path.endswith('.n5'):
            continue
        path_in_bucket = os.path.relpath(data_path, ROOT)
        if 'local' in path_in_bucket:
            path_in_bucket = path_in_bucket.replace('local', 'remote')

        xml_out = xml.replace('local', 'remote')
        write_s3_xml(xml, xml_out, path_in_bucket)
Example #18
0
def move_image_file(image_folder, xml_path):
    name = os.path.splitext(os.path.split(xml_path)[1])[0]
    new_name = look_up_filename(name)

    # get the linked hdf5 path
    image_path = get_data_path(xml_path, return_absolute_path=True)

    # move the xml to 'images/local'
    new_xml_path = os.path.join(image_folder, 'local', new_name + '.xml')
    if DRY_RUN:
        print("Moving", xml_path, "to", new_xml_path)
    else:
        shutil.move(xml_path, new_xml_path)

    # if the hdf5 file is in the same folder, move it to 'images/local' as well
    h5_is_local = len(
        os.path.relpath(image_path,
                        os.path.split(xml_path)[0]).split('/')) == 1
    if h5_is_local:
        new_image_path = os.path.join(image_folder, 'local', new_name + '.h5')
        if DRY_RUN:
            print("Moving", image_path, "to", new_image_path)
        else:
            assert os.path.exists(image_path), image_path
            shutil.move(image_path, new_image_path)
    # if not, construct the new correct data path
    else:
        # the new image path might be in rawdata; in this case there is now '/local'
        # subfolder, if it is in a version folder, it is in '/local'
        im_root, im_name = os.path.split(image_path)
        # take care of 'segmentations'
        if os.path.split(im_root)[1] == 'segmentations':
            im_root = os.path.join(os.path.split(im_root)[0], 'images')
        new_image_path = os.path.join(im_root, new_name + '.h5')
        if not os.path.exists(new_image_path):
            new_image_path = os.path.join(im_root, 'local', new_name + '.h5')

    new_rel_data_path = os.path.relpath(new_image_path,
                                        os.path.split(new_xml_path)[0])
    if DRY_RUN:
        print("Setting new xml path to", new_rel_data_path)
    else:
        assert os.path.exists(new_image_path), new_image_path
        # set path in xml
        copy_xml_with_newpath(new_xml_path, new_xml_path, new_rel_data_path)

    return new_name
Example #19
0
def get_seg_path(folder, name):
    # check if we have a data sub folder, if we have it load
    # the segmentation from there
    data_folder = os.path.join(folder, 'images', 'local')
    data_folder = data_folder if os.path.exists(data_folder) else folder

    # check if we have an xml
    path = os.path.join(data_folder, '%s.xml' % name)
    # read h5 path from the xml
    if os.path.exists(path):
        path = get_data_path(path, return_absolute_path=True)
        if not os.path.exists(path):
            raise RuntimeError("Invalid path in xml")
        return path
    else:
        raise RuntimeError(
            "The specified folder does not contain segmentation file with name %s"
            % name)
Example #20
0
def _check_data(storage, format_, name, dataset_folder, require_local_data,
                require_remote_data, assert_true, assert_equal):
    # checks for bdv format
    if format_.startswith("bdv"):
        path = os.path.join(dataset_folder, storage["relativePath"])
        assert_true(os.path.exists(path),
                    f"Could not find data for {name} at {path}")

        # check that the source name and name in the xml agree for bdv formats
        bdv_name = get_name(path, setup_id=0)
        msg = f"{path}: Source name and name in bdv metadata disagree: {name} != {bdv_name}"
        assert_equal(name, bdv_name, msg)

        # check that the remote s3 address exists
        if format_.endswith(".s3") and require_remote_data:
            _check_bdv_n5_s3(path, assert_true)

        # check that the referenced local file path exists
        elif require_local_data:
            data_path = get_data_path(path, return_absolute_path=True)
            assert_true(os.path.exists(data_path))

    # local ome.zarr check: source name and name in the ome.zarr metadata agree
    elif format_ == "ome.zarr" and require_local_data:
        path = os.path.join(dataset_folder, storage["relativePath"])
        assert_true(os.path.exists(path),
                    f"Could not find data for {name} at {path}")

        with open_file(path, "r", ext=".zarr") as f:
            ome_name = f.attrs["multiscales"][0]["name"]
        # we can't do this check if we only load a sub-channel
        if "channel" not in storage:
            assert_equal(
                name, ome_name,
                f"Source name and name in ngff metadata don't match: {name} != {ome_name}"
            )

    # remote ome.zarr check:
    elif format_ == "ome.zarr.s3" and require_remote_data:
        s3_address = storage["s3Address"]
        channel = storage.get("channel")
        _check_ome_zarr_s3(s3_address, name, assert_true, assert_equal,
                           channel)
Example #21
0
def migrate_rawfolder():
    raw_folder = os.path.join(ROOT, 'rawdata')
    xmls = glob(os.path.join(raw_folder, "*.xml"))

    for xml_path in xmls:
        name = os.path.splitext(os.path.split(xml_path)[1])[0]
        new_name = look_up_filename(name)

        # get the linked hdf5 path
        image_path = get_data_path(xml_path, return_absolute_path=True)

        # move the xml to 'images/local'
        new_xml_path = os.path.join(raw_folder, new_name + '.xml')
        if DRY_RUN:
            print("Moving", xml_path, "to", new_xml_path)
        else:
            shutil.move(xml_path, new_xml_path)

        new_image_path = os.path.join(raw_folder, new_name + '.h5')
        if DRY_RUN:
            print("Moving", image_path, "to", new_image_path)
        else:
            assert os.path.exists(image_path), image_path
            shutil.move(image_path, new_image_path)

        new_rel_data_path = new_name + '.h5'
        if DRY_RUN:
            print("Setting new xml path to", new_rel_data_path)
        else:
            assert os.path.exists(new_image_path), new_image_path
            # set path in xml
            copy_xml_with_newpath(new_xml_path, new_xml_path,
                                  new_rel_data_path)

        # rename the tables folder if it exists
        table_folder = os.path.join(raw_folder, 'tables', name)
        if os.path.exists(table_folder):
            new_table_folder = os.path.join(raw_folder, 'tables', new_name)
            if DRY_RUN:
                print("Rename", table_folder, "to", new_table_folder)
            else:
                os.rename(table_folder, new_table_folder)
Example #22
0
def cutout_data(tag, name, scale, bb_start, bb_stop):
    assert all(sta < sto for sta, sto in zip(bb_start, bb_stop))

    path = os.path.join('data', tag, name_to_path(name))
    path = get_data_path(path, return_absolute_path=True)
    resolution = get_res_level(scale)

    base_scale = name_to_base_scale(name)
    assert base_scale <= scale, "%s does not support scale %i; minimum is %i" % (
        name, scale, base_scale)
    data_scale = scale - base_scale

    bb_start_ = [int(sta / re) for sta, re in zip(bb_start, resolution)][::-1]
    bb_stop_ = [int(sto / re) for sto, re in zip(bb_stop, resolution)][::-1]
    bb = tuple(slice(sta, sto) for sta, sto in zip(bb_start_, bb_stop_))

    key = 't00000/s00/%i/cells' % data_scale
    with h5py.File(path, 'r') as f:
        ds = f[key]
        data = ds[bb]
    return data
Example #23
0
    def check_dataset(self, dataset_folder, exp_shape, raw_name, file_format="bdv.n5"):
        # validate the full project
        validate_project(
            self.root, assert_true=self.assertTrue, assert_in=self.assertIn, assert_equal=self.assertEqual
        )

        # check the raw data
        folder_name = file_format.replace(".", "-")
        if file_format.startswith("bdv"):
            xml_path = os.path.join(dataset_folder, "images", folder_name, f"{raw_name}.xml")
            raw_path = get_data_path(xml_path, return_absolute_path=True)
            is_h5 = file_format == "bdv.hdf5"
            key = get_key(is_h5, 0, 0, 0)
        else:
            self.assertEqual(file_format, "ome.zarr")
            raw_path = os.path.join(dataset_folder, "images", folder_name, f"{raw_name}.ome.zarr")
            key = "s0"

        with open_file(raw_path, "r") as f:
            data = f[key][:]
            shape = data.shape
        self.assertEqual(shape, exp_shape)
        self.assertFalse(np.allclose(data, 0.))
Example #24
0
def compare_seg_to_ref(seg_path, seg_key, version, with_roi, target, max_jobs):
    ref_path = os.path.join(ROOT, version, 'images', 'local',
                            'sbem-6dpf-1-whole-segmented-cells.xml')
    ref_path = get_data_path(ref_path, return_absolute_path=True)

    if ref_path.endswith('.n5'):
        ref_key = 'setup0/timepoint0/so'
    else:
        ref_key = 't00000/s00/0/cells'

    shape = check_segmentations(ref_path, ref_key, seg_path, seg_key)

    halo = [100, 1024, 1024]
    if with_roi:
        roi_begin = [sh // 2 - ha for sh, ha in zip(shape, halo)]
        roi_end = [sh // 2 + ha for sh, ha in zip(shape, halo)]
    else:
        roi_begin = roi_end = None

    tmp_folder = './tmp_partition_comparison_%s' % seg_key
    res = partition_comparison(seg_path,
                               seg_key,
                               ref_path,
                               ref_key,
                               tmp_folder,
                               target,
                               max_jobs,
                               roi_begin=roi_begin,
                               roi_end=roi_end)
    print("Have evaluated segmentation:")
    print(seg_path, ":", seg_key)
    print("against refetence:")
    print(ref_path, ":", ref_key)
    print("Result:")
    print("VI:", vis, "(split)", vim, "(merge)", vis + vim, "(total)")
    print("Adapted Rand error:", ari)
def eval_seg(version, semantic_eval):
    seg_path = os.path.join(ROOT, version, 'images', 'local', NAME + '.xml')
    seg_path = get_data_path(seg_path, return_absolute_path=True)
    table_path = os.path.join(ROOT, version, 'tables', NAME, 'regions.csv')
    if seg_path.endswith('.n5'):
        key = 'setup0/timepoint0/s0'
    else:
        key = 't00000/s00/0/cells'

    if semantic_eval:
        semantic_mapping = load_semantic_mapping(table_path)
    else:
        semantic_mapping = None

    ignore_ids = get_ignore_seg_ids(table_path)
    fm, fs, tot = eval_cells(seg_path,
                             key,
                             ANNOTATIONS,
                             ignore_seg_ids=ignore_ids,
                             semantic_mapping=semantic_mapping)
    print("Evaluation yields:")
    print("False merges:", fm)
    print("False splits:", fs)
    print("Total number of annotations:", tot)
def xml_to_h5_path(xml_path):
    path = get_data_path(xml_path, return_absolute_path=True)
    return path
Example #27
0
def make_cell_tables(old_folder,
                     folder,
                     name,
                     tmp_folder,
                     resolution,
                     target='slurm',
                     max_jobs=100,
                     seg_has_changed=True):
    # make the table folder
    table_folder = os.path.join(folder, 'tables', name)
    os.makedirs(table_folder, exist_ok=True)

    seg_key = get_seg_key(folder, name, scale=0)
    seg_path = get_seg_path(folder, name, seg_key)

    # make the basic attributes table
    base_out = os.path.join(table_folder, 'default.csv')
    label_ids = base_attributes(seg_path,
                                seg_key,
                                base_out,
                                resolution,
                                tmp_folder,
                                target=target,
                                max_jobs=max_jobs,
                                correct_anchors=False)

    # make table with cell nucleus mapping
    nuc_mapping_table = os.path.join(table_folder, 'cells_to_nuclei.csv')
    nuc_path = get_seg_path(folder, 'sbem-6dpf-1-whole-segmented-nuclei',
                            seg_key)
    map_cells_to_nuclei(label_ids, seg_path, nuc_path, nuc_mapping_table,
                        tmp_folder, target, max_jobs)

    # add a column with (somewhat stringent) cell criterion to the default table
    add_cell_criterion_column(base_out, nuc_mapping_table)

    # make table with gene mapping
    aux_gene_xml = os.path.join(folder, 'misc',
                                'prospr-6dpf-1-whole_meds_all_genes.xml')
    aux_gene_path = get_data_path(aux_gene_xml, return_absolute_path=True)
    if not os.path.exists(aux_gene_path):
        raise RuntimeError("Can't find auxiliary gene file @ %s" %
                           aux_gene_path)
    gene_out = os.path.join(table_folder, 'genes.csv')
    gene_assignment_table(seg_path, aux_gene_path, gene_out, label_ids,
                          tmp_folder, target)

    # make table with gene mapping via VCs
    vc_name = 'prospr-6dpf-1-whole-virtual-cells'
    vc_vol_path = get_seg_path(folder, vc_name)
    vc_key = get_seg_key(folder, vc_name, scale=0)
    vc_expression_path = os.path.join(folder, 'tables', vc_name,
                                      'profile_clust_curated.csv')
    med_expression_path = gene_out
    vc_out = os.path.join(table_folder, 'vc_assignments.csv')
    vc_assignment_table(seg_path, vc_vol_path, vc_key, vc_expression_path,
                        med_expression_path, vc_out, tmp_folder, target)

    # region and semantic mapping
    region_out = os.path.join(table_folder, 'regions.csv')
    # need to make sure the inputs are copied / updated in
    # the segmentation folder beforehand
    segmentation_folder = os.path.join(folder, 'images', 'local')
    region_attributes(seg_path, region_out, segmentation_folder, label_ids,
                      tmp_folder, target, max_jobs)

    # make table with morphology
    xml_raw = os.path.join(folder, 'images', 'local',
                           'sbem-6dpf-1-whole-raw.xml')
    raw_path = get_data_path(xml_raw, return_absolute_path=True)
    morpho_out = os.path.join(table_folder, 'morphology.csv')
    write_morphology_cells(raw_path, seg_path, nuc_path, base_out, morpho_out,
                           nuc_mapping_table, region_out, tmp_folder, target,
                           max_jobs)

    # mapping to extrapolated intensities
    mask_name = 'sbem-6dpf-1-whole-segmented-extrapolated'
    k1 = get_seg_key(folder, name, 3)
    k2 = get_seg_key(folder, mask_name, 0)
    extrapol_mask = os.path.join(folder, 'images', 'local',
                                 '%s.xml' % mask_name)
    extrapol_mask = get_data_path(extrapol_mask, return_absolute_path=True)
    extrapol_out = os.path.join(table_folder,
                                'extrapolated_intensity_correction.csv')
    extrapolated_intensities(seg_path, k1, extrapol_mask, k2, extrapol_out,
                             tmp_folder, target, max_jobs)

    # TODO need to update the neuron trace table as well
    old_ganglia_table = os.path.join(old_folder, 'tables', name,
                                     'ganglia_ids.csv')
    new_ganglia_table = os.path.join(table_folder, 'ganglia_ids.csv')
    old_gcluster_table = os.path.join(old_folder, 'tables', name,
                                      'gene_clusters.csv')
    new_gcluster_table = os.path.join(table_folder, 'gene_clusters.csv')
    old_symm_pair_table = os.path.join(old_folder, 'tables', name,
                                       'symmetric_cells.csv')
    new_symm_pair_table = os.path.join(table_folder, 'symmetric_cells.csv')
    old_mcluster_table = os.path.join(old_folder, 'tables', name,
                                      'morphology_clusters.csv')
    new_mcluster_table = os.path.join(table_folder, 'morphology_clusters.csv')
    # we only need to trigger the label id propagation if the segmentation was updated
    if seg_has_changed:
        id_lut = os.path.join(
            folder, 'misc',
            'new_id_lut_sbem-6dpf-1-whole-segmented-cells.json')

        # update the cell id column of the cilia cell_id_mapping table
        cilia_name = 'sbem-6dpf-1-whole-segmented-cilia'
        old_cilia_table = os.path.join(old_folder, 'tables', cilia_name,
                                       'cell_mapping.csv')
        new_cilia_table = os.path.join(folder, 'tables', cilia_name,
                                       'cell_mapping.csv')
        propagate_attributes(id_lut,
                             old_cilia_table,
                             new_cilia_table,
                             'cell_id',
                             override=True)

        # update the ganglia id mapping table, gene clusters and symmetric pairs
        propagate_attributes(id_lut,
                             old_ganglia_table,
                             new_ganglia_table,
                             'label_id',
                             override=True)
        propagate_attributes(id_lut,
                             old_gcluster_table,
                             new_gcluster_table,
                             'label_id',
                             override=True)
        propagate_attributes(id_lut,
                             old_symm_pair_table,
                             new_symm_pair_table,
                             'label_id',
                             override=True)
        propagate_attributes(id_lut,
                             old_mcluster_table,
                             new_mcluster_table,
                             'label_id',
                             override=True)

    else:
        # otherwise, need to copy the ganglia, gene cluster and symmetric pair table
        make_squashed_link(old_ganglia_table, new_ganglia_table)
        make_squashed_link(old_gcluster_table, new_gcluster_table)
        make_squashed_link(old_symm_pair_table, new_symm_pair_table)
        make_squashed_link(old_mcluster_table, new_mcluster_table)

    write_additional_table_file(table_folder)
def make_traces_table(traces,
                      reference_scale,
                      resolution,
                      out_path,
                      seg_infos={}):
    """ Make table from traces compatible with the platy browser.
    """

    files = {}
    datasets = {}
    for seg_name, seg_info in seg_infos.items():

        seg_path = seg_info['path']
        if seg_path.endswith('.xml'):
            seg_path = get_data_path(seg_path, return_absolute_path=True)
        seg_scale = seg_info['scale']
        is_h5 = is_h5_file(seg_path)
        seg_key = get_key(is_h5, time_point=0, setup_id=0, scale=seg_scale)
        f = open_file(seg_path, 'r')
        ds = f[seg_key]

        if len(files) == 0:
            ref_shape = ds.shape
        else:
            assert ds.shape == ref_shape, "%s, %s" % (str(
                ds.shape), str(ref_shape))

        files[seg_name] = f
        datasets[seg_name] = ds

    table = []
    for nid, vals in tqdm(traces.items()):

        coords = vals_to_coords(vals, resolution)
        bb_min = coords.min(axis=0)
        bb_max = coords.max(axis=0) + 1

        # get spatial attributes
        anchor = coords[0].astype('float32') * resolution / 1000.
        bb_min = bb_min.astype('float32') * resolution / 1000.
        bb_max = bb_max.astype('float32') * resolution / 1000.

        # get cell and nucleus ids
        point_slice = tuple(slice(int(c), int(c) + 1) for c in coords[0])
        # attributes:
        # label_id
        # anchor_x anchor_y anchor_z
        # bb_min_x bb_min_y bb_min_z bb_max_x bb_max_y bb_max_z
        # n_points + seg ids
        attributes = [
            nid, anchor[2], anchor[1], anchor[0], bb_min[2], bb_min[1],
            bb_min[0], bb_max[2], bb_max[1], bb_max[0],
            len(coords)
        ]

        for ds in datasets.values():
            seg_id = ds[point_slice][0, 0, 0]
            attributes += [seg_id]

        table.append(attributes)

    for f in files.values():
        f.close()

    table = np.array(table, dtype='float32')
    header = [
        'label_id', 'anchor_x', 'anchor_y', 'anchor_z', 'bb_min_x', 'bb_min_y',
        'bb_min_z', 'bb_max_x', 'bb_max_y', 'bb_max_z', 'n_points'
    ]
    header += ['%s_id' % seg_name for seg_name in seg_infos]

    table = pd.DataFrame(table, columns=header)
    table.to_csv(out_path, index=False, sep='\t')
Example #29
0
def copy_xml_with_relpath(xml_in, xml_out):
    path = get_data_path(xml_in, return_absolute_path=True)
    xml_root = os.path.split(xml_out)[0]
    path = os.path.relpath(path, xml_root)
    copy_xml_with_newpath(xml_in, xml_out, path, path_type="relative")
Example #30
0
def copy_xml_with_abspath(xml_in, xml_out):
    path = get_data_path(xml_in, return_absolute_path=True)
    copy_xml_with_newpath(xml_in, xml_out, path, path_type='absolute')