Ejemplo n.º 1
0
def create_auxiliary_gene_file(meds_root, out_file, return_result=False):
    all_genes_dset = 'genes'
    names_dset = 'gene_names'

    # get all the prospr gene xmls in the image folder
    med_files = glob(os.path.join(meds_root, "prospr*.xml"))
    # filter out prospr files that are not genes (=semgneted regions and virtual cells)
    med_files = [name for name in med_files if 'segmented' not in name]
    med_files = [name for name in med_files if 'virtual' not in name]

    # get the gene names from filenames
    gene_names = [os.path.splitext(os.path.basename(f))[0] for f in med_files]
    # cut all the preceeding prospr-... part
    gene_names = ['-'.join(name.split('-')[4:]) for name in gene_names]
    num_genes = len(gene_names)
    assert num_genes == len(med_files)

    # get the data paths from the xmls
    med_files = [
        get_data_path(med_file, return_absolute_path=True)
        for med_file in med_files
    ]

    is_h5 = os.path.splitext(med_files[0])[1] == '.h5'
    med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
    with open_file(med_files[0], 'r') as f:
        spatial_shape = f[med_key].shape

    shape = (num_genes, ) + spatial_shape

    # iterate through med files and write down binarized into one file
    with open_file(out_file) as f:
        out_dset = f.create_dataset(all_genes_dset,
                                    shape=shape,
                                    dtype='bool',
                                    chunks=(1, 64, 64, 64),
                                    compression='gzip')
        out_dset.n_threads = 8

        for i, med_file in enumerate(tqdm(med_files)):
            is_h5 = os.path.splitext(med_file)[1] == '.h5'
            med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
            with open_file(med_file, 'r') as f2:
                ds = f2[med_key]
                this_shape = ds.shape
                if this_shape != spatial_shape:
                    raise RuntimeError("Incompatible shapes %s, %s" %
                                       (str(this_shape), str(spatial_shape)))
                ds.n_threads = 8
                data = ds[:]
            out_dset[i] = data

        gene_names_ascii = [n.encode('ascii', 'ignore') for n in gene_names]
        f.create_dataset(names_dset, data=gene_names_ascii, dtype='S40')

    if return_result:
        # reload the binarized version
        with open_file(out_file, 'r') as f:
            all_genes = f[all_genes_dset][:]
        return all_genes
Ejemplo n.º 2
0
def copy_to_bdv_n5(in_file, out_file, chunks, resolution,
                   n_threads=32, start_scale=0):

    n_scales = get_number_of_scales(in_file, 0, 0)
    scale_factors = get_scale_factors(in_file, 0)
    # double check newly implemented functions in pybdv
    assert n_scales == len(scale_factors)

    scale_factors = normalize_scale_factors(scale_factors, start_scale)

    for out_scale, in_scale in enumerate(range(start_scale, n_scales)):
        in_key = get_key(True, 0, 0, in_scale)
        out_key = get_key(False, 0, 0, out_scale)

        if chunks is None:
            with open_file(in_file, 'r') as f:
                chunks_ = f[in_key].chunks
        else:
            chunks_ = chunks

        copy_dataset(in_file, in_key, out_file, out_key,
                     convert_dtype=False,
                     chunks=chunks_,
                     n_threads=n_threads)
        copy_attributes(in_file, in_key, out_file, out_key)

    write_n5_metadata(out_file, scale_factors, resolution, setup_id=0)
def view_segmentations(version, raw_scale, seg_names=[], seg_scales=[], bb=np.s_[:]):
    folder = os.path.join(ROOT, version, 'images', 'local')
    raw_file = os.path.join(folder, 'sbem-6dpf-1-whole-raw.xml')
    raw_file = get_data_path(raw_file, return_absolute_path=True)
    raw_key = get_key(False, time_point=0, setup_id=0, scale=raw_scale)

    with z5py.File(raw_file, 'r') as f:
        ds = f[raw_key]
        ds.n_threads = 16
        raw = ds[bb]
        ref_shape = raw.shape

    data = [to_source(raw, name='raw')]

    for seg_name, seg_scale in zip(seg_names, seg_scales):
        seg_file = os.path.join(folder, seg_name + '.xml')
        seg_file = get_data_path(seg_file, return_absolute_path=True)
        seg_key = get_key(False, time_point=0, setup_id=0, scale=seg_scale)
        with z5py.File(seg_file, 'r') as f:
            ds = f[seg_key]
            ds.n_threads = 16
            seg = ds[bb].astype('uint32')
            if seg.shape != ref_shape:
                # FIXME this will fail with bounding box
                print("Resize", ref_shape)
                seg = ResizeWrapper(to_source(seg, name=seg_name), ref_shape)
        data.append(to_source(seg, name=seg_name))

    view(*data)
Ejemplo n.º 4
0
def get_seg_key_xml(xml_path, scale):
    bdv_format = get_bdv_format(xml_path)
    if bdv_format == 'bdv.hdf5':
        return get_key(True, time_point=0, setup_id=0, scale=scale)
    elif bdv_format == 'bdv.n5':
        return get_key(False, time_point=0, setup_id=0, scale=scale)
    else:
        raise RuntimeError("Invalid bdv format: %s" % bdv_format)
Ejemplo n.º 5
0
def import_traces(input_folder, out_path,
                  reference_path, reference_scale,
                  resolution, scale_factors,
                  radius=2, chunks=None, max_jobs=8,
                  unit='micrometer', source_name=None):
    """ Import trace data into the mobie format.

    input_folder [str] - folder with traces to be imported.
    out_path [str] - where to save the segmentation
    reference_path [str] - path to the reference volume
    reference_scale [str] - scale to use for reference
    resolution [list[float]] - resolution of the traces in micrometers
    scale_factors [list[list[int]]] - scale factors for down-sampling
    radius [int] - radius to write for the traces
    chunks [list[int]] - chunks for the traces volume
    max_jobs [int] - number of threads to use for down-samling
    unit [str] - physical unit (default: micrometer)
    source_name [str] - name of the source (default: None)
    """

    traces = parse_traces(input_folder)

    # check that we are compatible with bdv (ids need to be smaller than int16 max)
    max_id = np.iinfo('int16').max
    max_trace_id = max(traces.keys())
    if max_trace_id > max_id:
        raise RuntimeError("Can't export id %i > %i" % (max_trace_id, max_id))

    is_h5 = is_h5py(reference_path)
    ref_key = get_key(is_h5, timepoint=0, setup_id=0, scale=reference_scale)
    with open_file(reference_path, 'r') as f:
        ds = f[ref_key]
        shape = ds.shape
        if chunks is None:
            chunks = ds.chunks

    key0 = get_key(is_h5, timepoint=0, setup_id=0, scale=0)
    print("Writing traces ...")
    traces_to_volume(traces, out_path, key0, shape, resolution, chunks, radius, max_jobs)

    print("Downscaling traces ...")
    make_scales(out_path, scale_factors, downscale_mode='max',
                ndim=3, setup_id=0, is_h5=is_h5,
                chunks=chunks, n_threads=max_jobs)

    xml_path = os.path.splitext(out_path)[0] + '.xml'
    # we assume that the resolution is in nanometer, but want to write in microns for bdv
    bdv_res = [res / 1000. for res in resolution]
    write_xml_metadata(xml_path, out_path, unit, bdv_res, is_h5,
                       setup_id=0, timepoint=0, setup_name=source_name,
                       affine=None, attributes={'channel': {'id': 0}},
                       overwrite=False, overwrite_data=False, enforce_consistency=False)
    bdv_scale_factors = [[1, 1, 1]] + scale_factors
    if is_h5:
        write_h5_metadata(out_path, bdv_scale_factors)
    else:
        write_n5_metadata(out_path, bdv_scale_factors, bdv_res)
Ejemplo n.º 6
0
def map_ids(path1,
            path2,
            out_path,
            tmp_folder,
            max_jobs,
            target,
            prefix,
            key1=None,
            key2=None,
            scale=0):
    task = NodeLabelWorkflow

    config_folder = os.path.join(tmp_folder, 'configs')
    write_default_global_config(config_folder)
    configs = task.get_config()

    conf = configs['merge_node_labels']
    conf.update({'threads_per_job': 8, 'mem_limit': 16})
    with open(os.path.join(config_folder, 'merge_node_labels.config'),
              'w') as f:
        json.dump(conf, f)

    if key1 is None:
        is_h5 = is_h5_file(path1)
        key1 = get_key(is_h5, time_point=0, setup_id=0, scale=scale)
    if key2 is None:
        is_h5 = is_h5_file(path2)
        key2 = get_key(is_h5, time_point=0, setup_id=0, scale=scale)

    tmp_path = os.path.join(tmp_folder, 'data.n5')
    tmp_key = prefix
    t = task(tmp_folder=tmp_folder,
             config_dir=config_folder,
             target=target,
             max_jobs=max_jobs,
             ws_path=path1,
             ws_key=key1,
             input_path=path2,
             input_key=key2,
             output_path=tmp_path,
             output_key=tmp_key,
             prefix=prefix,
             max_overlap=True,
             serialize_counts=True)
    ret = luigi.build([t], local_scheduler=True)
    if not ret:
        raise RuntimeError("Id-mapping failed")

    ds = z5py.File(tmp_path)[tmp_key]
    lut = ds[:]
    assert lut.ndim == 2
    lut = dict(zip(range(len(lut)), lut.tolist()))

    with open(out_path, 'w') as f:
        json.dump(lut, f)
Ejemplo n.º 7
0
def downsample(ref_path, in_path, in_key, out_path, resolution, tmp_folder,
               target, max_jobs):
    ref_is_h5 = is_h5_file(ref_path)
    gkey = get_key(ref_is_h5, 0, 0)
    with open_file(ref_path, 'r') as f:
        g = f[gkey]
        levels = list(g.keys())
        levels.sort()

        sample_factors = []
        for level in range(1, len(levels)):
            k0 = get_key(ref_is_h5, 0, 0, level - 1)
            k1 = get_key(ref_is_h5, 0, 0, level)
            ds0 = f[k0]
            ds1 = f[k1]

            s0 = ds0.shape
            s1 = ds1.shape
            factor = [
                int(round(float(sh0) / sh1, 0)) for sh0, sh1 in zip(s0, s1)
            ]

            sample_factors.append(factor)
        assert len(sample_factors) == len(levels) - 1

    config_dir = os.path.join(tmp_folder, 'configs')
    task = DownscalingWorkflow
    config = task.get_config()['downscaling']
    config.update({'library': 'skimage', 'time_limit': 360, 'mem_limit': 8})
    with open(os.path.join(config_dir, 'downscaling.config'), 'w') as f:
        json.dump(config, f)

    halos = len(sample_factors) * [[0, 0, 0]]

    # TODO this needs merge of
    # https://github.com/constantinpape/cluster_tools/pull/17
    t = task(tmp_folder=tmp_folder,
             config_dir=config_dir,
             max_jobs=max_jobs,
             target=target,
             input_path=in_path,
             input_key=in_key,
             scale_factors=sample_factors,
             halos=halos,
             metadata_format='bdv',
             metadata_dict={
                 'resolution': resolution,
                 'unit': 'micrometer'
             },
             output_path=out_path)
    ret = luigi.build([t], local_scheduler=True)
    if not ret:
        raise RuntimeError("Downscaling failed")
def traces_to_volume(traces,
                     reference_vol_path,
                     reference_scale,
                     out_path,
                     resolution,
                     scale_factors,
                     radius=2,
                     chunks=None,
                     n_threads=8):
    """ Export traces as segmentation compatible with the platy-browser.
    """

    # check that we are compatible with bdv (ids need to be smaller than int16 max)
    max_id = np.iinfo('int16').max
    max_trace_id = max(traces.keys())
    if max_trace_id > max_id:
        raise RuntimeError("Can't export id %i > %i" % (max_trace_id, max_id))

    is_h5 = is_h5_file(reference_vol_path)
    ref_key = get_key(is_h5, time_point=0, setup_id=0, scale=reference_scale)
    with open_file(reference_vol_path, 'r') as f:
        ds = f[ref_key]
        shape = ds.shape
        if chunks is None:
            chunks = ds.chunks

    is_h5 = is_h5_file(out_path)
    key0 = get_key(is_h5, time_point=0, setup_id=0, scale=0)
    print("Writing traces ...")
    write_vol_from_traces(traces, out_path, key0, shape, resolution, chunks,
                          radius, n_threads)

    print("Downscaling traces ...")
    make_scales(out_path,
                scale_factors,
                downscale_mode='max',
                ndim=3,
                setup_id=0,
                is_h5=is_h5,
                chunks=chunks,
                n_threads=n_threads)

    xml_path = os.path.splitext(out_path)[0] + '.xml'
    # we assume that the resolution is in nanometer, but want to write in microns for bdv
    bdv_res = [res / 1000. for res in resolution]
    unit = 'micrometer'
    write_xml_metadata(xml_path, out_path, unit, bdv_res, is_h5)
    bdv_scale_factors = [[1, 1, 1]] + scale_factors
    if is_h5:
        write_h5_metadata(out_path, bdv_scale_factors)
    else:
        write_n5_metadata(out_path, bdv_scale_factors, bdv_res)
def add_timepoint(in_path, out_path, tp, channel, mode, in_key=None):
    if in_key is None:
        in_key = get_key(is_h5=True, timepoint=tp, setup_id=channel, scale=0)
    out_key = get_key(is_h5=False, timepoint=tp, setup_id=0, scale=0)

    # skip timepoints that have been copied already
    with open_file(out_path, 'r') as f:
        if out_key in f:
            return

    convert_to_bdv(in_path, in_key, out_path, SCALE_FACTORS,
                   downscale_mode=mode, resolution=RESOLUTION,
                   unit='micrometer', setup_id=0, timepoint=tp)
Ejemplo n.º 10
0
def intensity_correction(in_path,
                         out_path,
                         mask_path,
                         mask_key,
                         trafo_path,
                         tmp_folder,
                         resolution,
                         target='slurm',
                         max_jobs=250):
    trafo_ext = os.path.splitext(trafo_path)[1]
    if trafo_ext == '.csv':
        trafo_path = csv_to_json(trafo_path)
    elif trafo_ext != '.json':
        raise ValueError("Expect trafo as json.")

    in_is_h5 = is_h5_file(in_path)
    out_is_h5 = is_h5_file(out_path)

    key = get_key(in_is_h5, 0, 0, 0)
    out_key = get_key(out_is_h5, 0, 0, 0)
    validate_trafo(trafo_path, in_path, key)

    config_dir = os.path.join(tmp_folder, 'configs')
    write_default_global_config(config_dir)

    task = LinearTransformationWorkflow
    conf = task.get_config()['linear']
    conf.update({'time_limit': 360, 'mem_limit': 8})
    with open(os.path.join(config_dir, 'linear.config'), 'w') as f:
        json.dump(conf, f)

    t = task(tmp_folder=tmp_folder,
             config_dir=config_dir,
             target=target,
             max_jobs=max_jobs,
             input_path=in_path,
             input_key=key,
             mask_path=mask_path,
             mask_key=mask_key,
             output_path=out_path,
             output_key=out_key,
             transformation=trafo_path)
    ret = luigi.build([t], local_scheduler=True)
    if not ret:
        raise RuntimeError("Transformation failed")

    downsample(in_path, out_path, out_key, out_path, resolution, tmp_folder,
               target, max_jobs)
Ejemplo n.º 11
0
    def test_multi_timepoint(self):
        from pybdv import make_bdv
        from pybdv.metadata import get_time_range

        n_timepoints = 6
        shape = (64,) * 3

        tp_data = []
        tp_setups = []
        for tp in range(n_timepoints):
            data = np.random.rand(*shape)
            # make sure that we at least have 2 setup ids that agree
            setup_id = np.random.randint(0, 20) if tp > 1 else 0
            make_bdv(data, self.out_path, setup_id=setup_id, timepoint=tp)
            tp_data.append(data)
            tp_setups.append(setup_id)

        tstart, tstop = get_time_range(self.xml_path)
        self.assertEqual(tstart, 0)
        self.assertEqual(tstop, n_timepoints - 1)

        for tp in range(n_timepoints):
            setup_id = tp_setups[tp]
            tp_key = get_key(self.is_h5, timepoint=tp, setup_id=setup_id, scale=0)
            with open_file(self.out_path, 'r') as f:
                data = f[tp_key][:]
            data_exp = tp_data[tp]
            self.assertTrue(np.allclose(data, data_exp))
Ejemplo n.º 12
0
    def test_multi_setup(self):
        from pybdv import make_bdv
        from pybdv.metadata import get_affine
        shape = (64,) * 3
        n_views = 2

        data_dict = {}
        affine_dict = {}

        for vid in range(n_views):
            data = np.random.rand(*shape).astype('float32')
            affine = {'trafo1': [round(aff, 4) for aff in np.random.rand(12)],
                      'trafo2': [round(aff, 4) for aff in np.random.rand(12)]}
            make_bdv(data, self.out_path, setup_id=vid, affine=affine)
            data_dict[vid] = data
            affine_dict[vid] = affine

        # check implicit setup id
        data = np.random.rand(*shape).astype('float32')
        make_bdv(data, self.out_path)
        data_dict[n_views] = data

        for vid in range(n_views + 1):
            expected_key = get_key(self.is_h5, timepoint=0, setup_id=vid, scale=0)
            with open_file(self.out_path, 'r') as f:
                self.assertTrue(expected_key in f)
                data = f[expected_key][:]
            exp_data = data_dict[vid]
            self.assertTrue(np.allclose(data, exp_data))

        # check affine trafos (only for explicit setup-ids)
        for vid in range(n_views):
            affine = affine_dict[vid]
            affine_out = get_affine(self.xml_path, vid)
            self.assertEqual(affine, affine_out)
def add_segmentations(seg_paths):
    path0 = seg_paths[0]
    seg_name = 'lm-cells'
    out_path = f'./data/{DS_NAME}/images/local/{seg_name}.n5'
    key = 'data'

    if not os.path.exists(out_path.replace('.n5', '.xml')):
        add_segmentation(
            path0, key,
            './data', DS_NAME,
            segmentation_name=seg_name,
            resolution=RESOLUTION,
            chunks=CHUNKS,
            scale_factors=SCALE_FACTORS,
            max_jobs=8,
            add_default_table=False
        )

    assert os.path.exists(out_path)
    for tp, path in enumerate(seg_paths[1:], 1):
        add_timepoint(path, out_path, tp, channel=0, mode='nearest',
                      in_key=key)
        tp_key = get_key(False, tp, 0, 0)
        add_max_id(out_path, tp_key, out_path, tp_key,
                   tmp_folder=f'tmp_max_ids/tp{tp}',
                   target='local',
                   max_jobs=8)
def make_table(seg_path, n_timepoints, out_path):
    tmp_tables = './tmp_tables'

    table = None

    for tp in range(n_timepoints):
        tmp_folder = os.path.join(tmp_tables, f'table{tp}')
        res_path = os.path.join(tmp_folder, 'table2.csv')

        if os.path.exists(res_path):
            this_table = pd.read_csv(res_path, sep='\t')
        else:
            tmp_path = os.path.join(tmp_folder, 'table.csv')
            key = get_key(False, timepoint=tp, setup_id=0, scale=0)
            compute_default_table(seg_path, key, tmp_path,
                                  resolution=RESOLUTION, tmp_folder=tmp_folder,
                                  target='local', max_jobs=8)
            this_table = pd.read_csv(tmp_path, sep='\t')
            this_table = update_table(this_table, tp, seg_path, key)
            this_table.to_csv(res_path, sep='\t', index=False)

        if table is None:
            table = this_table
        else:
            table = pd.concat([table, this_table])

    table.to_csv(out_path, sep='\t', index=False)
Ejemplo n.º 15
0
    def check_segmentation(self, dataset_folder, name):
        self.assertTrue(os.path.exists(dataset_folder))
        exp_data = self.data

        # check the segmentation metadata
        metadata = read_dataset_metadata(dataset_folder)
        self.assertIn(name, metadata["sources"])
        validate_source_metadata(name, metadata["sources"][name],
                                 dataset_folder)

        # check the segmentation data
        seg_path = os.path.join(dataset_folder, "images", "bdv-n5",
                                f"{name}.n5")
        self.assertTrue(os.path.exists(seg_path))
        key = get_key(False, 0, 0, 0)
        with open_file(seg_path, "r") as f:
            data = f[key][:]
        self.assertTrue(np.array_equal(data, exp_data))

        # check the table
        table_path = os.path.join(dataset_folder, "tables", name,
                                  "default.tsv")
        self.assertTrue(os.path.exists(table_path)), table_path
        table = pd.read_csv(table_path, sep="\t")

        label_ids = table["label_id"].values
        exp_label_ids = np.unique(data)
        if 0 in exp_label_ids:
            exp_label_ids = exp_label_ids[1:]
        self.assertTrue(np.array_equal(label_ids, exp_label_ids))
def check_result(timepoint):
    import napari

    print("Checking results for timepoint", timepoint)

    halo = [64, 384, 384]

    key = get_key(is_h5=True, timepoint=timepoint, setup_id=0, scale=0)
    with open_file(PATH, 'r') as f:
        ds = f[key]
        bb = tuple(
            slice(sh // 2 - ha, sh // 2 + ha)
            for sh, ha in zip(ds.shape, halo))
        raw = ds[bb]

    pred_path = os.path.join(
        'tmp_plantseg/tp_%03i/PreProcessing' % timepoint,
        'generic_light_sheet_3d_unet/PostProcessing/raw_predictions.h5')
    with open_file(pred_path, 'r') as f:
        pred = f['predictions'][bb]

    seg_path = 'tmp_plantseg/tp_%03i/segmentation.h5' % timepoint
    with open_file(seg_path, 'r') as f:
        seg = f['data'][bb]

    with napari.gui_qt():
        viewer = napari.Viewer()
        viewer.add_image(raw)
        viewer.add_image(pred)
        viewer.add_labels(seg)
Ejemplo n.º 17
0
 def _get_number_of_labels(self):
     seg_path = self.cell_segmentation_path if self.compute_cell_features else\
         self.nucleus_segmentation_path
     is_h5 = os.path.splitext(seg_path)[1].lower() in ('.hdf', '.hdf5', '.h5')
     key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
     with vu.file_reader(seg_path, 'r') as f:
         n_labels = int(f[key].attrs['maxId']) + 1
     return n_labels
Ejemplo n.º 18
0
 def get_scale_key(self, scale):
     if self.metadata_format == 'paintera':
         prefix = 's%i' % scale
         out_key = os.path.join(self.output_key_prefix, prefix)
     else:
         is_h5 = self.metadata_format in ('bdv', 'bdv.hdf5')
         # TODO support multiple set-ups for multi-channel data
         out_key = get_key(is_h5, time_point=0, setup_id=0, scale=scale)
     return out_key
Ejemplo n.º 19
0
    def check_seg(self, exp_data, scales):
        key = get_key(False, 0, 0, 0)
        with open_file(self.out_path, 'r') as f:
            ds = f[key]
            data = ds[:]
            max_id = ds.attrs['maxId']
        self.assertEqual(data.shape, exp_data.shape)
        self.assertTrue(np.array_equal(data, exp_data))
        self.assertAlmostEqual(max_id, data.max())

        exp_shape = data.shape
        for scale, scale_facor in enumerate(scales, 1):
            key = get_key(False, 0, 0, scale)
            with open_file(self.out_path, 'r') as f:
                self.assertIn(key, f)
                this_shape = f[key].shape
            exp_shape = sample_shape(exp_shape, scale_facor)
            self.assertEqual(this_shape, exp_shape)
Ejemplo n.º 20
0
    def check_result(self):
        with open_file(self.in_path, 'r') as f:
            exp = f['data'][:]

        key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0)
        with open_file(self.out_path, 'r') as f:
            self.assertTrue(key in f)
            res = f[key][:]

        self.assertEqual(res.shape, exp.shape)
        self.assertTrue(np.allclose(res, exp))
Ejemplo n.º 21
0
def number_of_voxels():
    p = '../data/rawdata/sbem-6dpf-1-whole-raw.n5'
    is_h5 = is_h5_file(p)
    key = get_key(is_h5, setup_id=0, time_point=0, scale=0)
    with h5py.File(p, 'r') as f:
        ds = f[key]
        shape = ds.shape
    n_vox = np.prod(list(shape))
    print("Number of voxel:")
    print(n_vox)
    print("corresponds to")
    print(float(n_vox) / 1e12, "TVoxel")
Ejemplo n.º 22
0
def get_data_key(file_format, scale, path=None):
    if file_format.startswith("bdv"):
        is_h5 = file_format == "bdv.hdf5"
        key = get_key(is_h5, timepoint=0, setup_id=0, scale=scale)
    elif file_format == "ome.zarr":
        assert path is not None
        with open_file(path, "r") as f:
            mscales = f.attrs["multiscales"][0]
            key = mscales["datasets"][0]["path"]
    else:
        raise NotImplementedError(file_format)
    return key
def segment_timepoint(timepoint, gpu=None):
    # create the input data for this timepoint
    tmp_folder = 'tmp_plantseg/tp_%03i' % timepoint
    os.makedirs(tmp_folder, exist_ok=True)

    if gpu is not None:
        assert isinstance(gpu, int)
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu)

    with ChangeDir(tmp_folder):
        raw_path = './raw.h5'
        config_path = './config.yaml'

        res_path = './segmentation.h5'
        if os.path.exists(res_path):
            return

        with open(TEMPLATE_CONFIG, 'r') as f:
            config = yaml.load(f)
        config['path'] = raw_path
        with open(config_path, 'w') as f:
            yaml.dump(config, f)

        with open_file(raw_path, 'a') as f:
            if 'raw' not in f:
                key = get_key(is_h5=True,
                              timepoint=timepoint,
                              setup_id=0,
                              scale=0)
                with open_file(PATH, 'r') as f_in:
                    raw = f_in[key][:]
                f.create_dataset('raw', data=raw, chunks=(32, 128, 128))

        cmd = [PYTHON, PLANTSEG, '--config', config_path]
        run(cmd)

        print("Run post-processing ...")
        seg_path = 'PreProcessing/generic_light_sheet_3d_unet/MultiCut/raw_predictions_multicut.h5'
        with open_file(seg_path, 'r') as f, open_file(res_path, 'a') as f_out:
            seg = f['segmentation'][:]
            ids, sizes = np.unique(seg, return_counts=True)
            if 0 in ids:
                ids += 1
                seg += 1
            bg_id = ids[np.argmax(sizes)]
            seg[seg == bg_id] = 0
            seg = seg.astype('uint32')
            vigra.analysis.relabelConsecutive(seg,
                                              out=seg,
                                              start_label=1,
                                              keep_zeros=True)
            f_out.create_dataset('data', data=seg, compression='gzip')
Ejemplo n.º 24
0
def append_nephridia_table():
    table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cilia/cell_mapping.csv'
    table = pd.read_csv(table_path, sep='\t')
    cell_ids = table['cell_id'].values
    cell_ids = np.unique(cell_ids)
    if cell_ids[0] == 0:
        cell_ids = cell_ids[1:]

    out_table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cells/regions.csv'
    seg_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-cells.n5'
    nep_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-nephridia.n5'

    table = pd.read_csv(out_table_path, sep='\t')
    new_col = np.zeros(len(table), dtype='float32')

    print("Loading volumes ...")
    scale = 4
    is_h5 = is_h5_file(seg_path)
    key = get_key(is_h5, setup_id=0, time_point=0, scale=scale)
    with open_file(seg_path, 'r') as f:
        seg = f[key][:]

    scale = 0
    is_h5 = is_h5_file(nep_path)
    key = get_key(is_h5, setup_id=0, time_point=0, scale=scale)
    with open_file(nep_path, 'r') as f:
        nep = f[key][:]
    assert nep.shape == seg.shape

    print("Iterating over cells ...")
    for cid in cell_ids:
        nid = np.unique(nep[seg == cid])
        if 0 in nid:
            nid = nid[1:]
        assert len(nid) == 1
        new_col[cid] = nid

    table['nephridia'] = new_col
    table.to_csv(out_table_path, sep='\t', index=False)
Ejemplo n.º 25
0
    def _test_simple(self, shape, affine=None):
        from pybdv import make_bdv
        data = np.random.rand(*shape).astype('float32')

        make_bdv(data, self.out_path, affine=affine)

        key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0)
        self.assertTrue(os.path.exists(self.out_path))
        with open_file(self.out_path, 'r') as f:
            self.assertTrue(key in f)
            ds = f[key]
            self.assertEqual(ds.shape, shape)
            out_data = ds[:]
        self.assertTrue(np.allclose(data, out_data))
Ejemplo n.º 26
0
    def test_multi_threaded(self):
        from pybdv import make_bdv
        shape = (128,) * 3
        chunks = (64,) * 3

        data = np.random.rand(*shape)
        scale_factors = 2 * [[2, 2, 2]]

        make_bdv(data, self.out_path, chunks=chunks,
                 n_threads=4, downscale_factors=scale_factors)
        key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0)
        with open_file(self.out_path, 'r') as f:
            d = f[key][:]
        self.assertTrue(np.allclose(d, data))
Ejemplo n.º 27
0
def make_nephridia_segmentation():
    table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cilia/cell_mapping.csv'
    seg_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-cells.n5'

    out_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-nephridia.xml'

    table = pd.read_csv(table_path, sep='\t')
    cell_ids = table['cell_id'].values
    cell_ids = np.unique(cell_ids)
    if cell_ids[0] == 0:
        cell_ids = cell_ids[1:]
    print(cell_ids)

    scale = 4
    is_h5 = is_h5_file(seg_path)
    key = get_key(is_h5, setup_id=0, time_point=0, scale=scale)
    with open_file(seg_path, 'r') as f:
        ds = f[key]
        seg = ds[:].astype('uint32')
        bshape = (32, 256, 256)

        tmp = np.zeros_like(seg)
        print("Isin ...")
        tmp = elf.parallel.isin(seg,
                                cell_ids,
                                out=tmp,
                                n_threads=16,
                                verbose=True,
                                block_shape=bshape)
        print("Label ...")
        tmp = vigra.analysis.labelVolumeWithBackground(tmp)

        print("Size filter ...")
        ids, counts = elf.parallel.unique(tmp,
                                          return_counts=True,
                                          n_threads=16,
                                          verbose=True,
                                          block_shape=bshape)
        keep_ids = np.argsort(counts)[::-1]
        keep_ids = ids[keep_ids[:3]]
        assert keep_ids[0] == 0

        out = np.zeros(tmp.shape, dtype='uint8')
        for new_id, keep_id in enumerate(keep_ids[1:], 1):
            out[tmp == keep_id] = new_id

    factors = 3 * [[2, 2, 2]]
    res = [.4, .32, .32]
    make_bdv(out, out_path, factors, resolution=res, unit='micrometer')
Ejemplo n.º 28
0
    def test_custom_chunks(self):
        from pybdv import make_bdv
        shape = (128,) * 3
        chunks = (64, 42, 59)

        data = np.random.rand(*shape)
        make_bdv(data, self.out_path, chunks=chunks)

        key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0)
        with open_file(self.out_path, 'r') as f:
            ds = f[key]
            chunks_out = ds.chunks
            d = ds[:]
            self.assertEqual(chunks, chunks_out)
        self.assertTrue(np.allclose(d, data))
Ejemplo n.º 29
0
        def _check(exp_data, exp_sf, exp_attrs, exp_affine):
            key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0)
            with open_file(self.out_path, 'r') as f:
                data = f[key][:]
            self.assertTrue(np.allclose(data, exp_data))

            sf = get_scale_factors(self.out_path, setup_id=0)
            sf = absolute_to_relative_scale_factors(sf)
            self.assertEqual(sf, [[1, 1, 1]] + exp_sf)

            attrs = get_attributes(self.xml_path, setup_id=0)
            self.assertEqual(attrs, exp_attrs)

            affine = get_affine(self.xml_path, setup_id=0, timepoint=0)['affine0']
            self.assertTrue(np.allclose(np.array(affine), np.array(exp_affine), atol=1e-4))
def write_h5_files(table, folder, raw_seg_path):
    """
    Writes individual h5 file for each row in the table, equal to the bounding box of that object
    + a 10 pixel border on all dimensions

    Args:
        table [pd.Dataframe] - table of nucleus statistics
        folder [str] - a temporary folder to write files to
        raw_seg_path [str] - path to the raw segmentation .h5
    """

    for row in table.itertuples(index=False):

        # min max coordinates in microns for segmentation
        minmax_seg = [
            row.bb_min_x, row.bb_min_y, row.bb_min_z, row.bb_max_x,
            row.bb_max_y, row.bb_max_z
        ]

        # raw scale (from xml) for 2x downsampled
        raw_scale = [0.02, 0.02, 0.025]

        # slice for raw file
        raw_slice = calculate_slice(raw_scale, minmax_seg, addBorder=True)
        is_h5 = is_h5_file(raw_seg_path)
        raw_key = get_key(is_h5, setup=0, time_point=0, scale=1)
        with open_file(raw_seg_path, 'r') as f:
            # get 2x downsampled nuclei
            data = f[raw_key]
            img_array = data[raw_slice]

        # write h5 file for nucleus
        result_path = folder + os.sep + str(row.label_id) + '.h5'
        with open_file(result_path, 'a') as f:

            # check dataset is bigger than 64x64x64
            if img_array.shape[0] >= 64 and img_array.shape[
                    1] >= 64 and img_array.shape[2] >= 64:
                chunks = (64, 64, 64)
            else:
                chunks = img_array.shape

            f.create_dataset('dataset',
                             chunks=chunks,
                             compression='gzip',
                             shape=img_array.shape,
                             dtype=img_array.dtype)
            f['dataset'][:] = img_array