Example #1
0
    def _test_2d(self, matrix, out_file=None, sigma=None, **kwargs):
        from elf.transformation import transform_subvolume_affine
        shape = (512, 512)
        x = np.random.rand(*shape)
        exp = affine_transform(x, matrix, **kwargs)

        if out_file is not None:
            with open_file(out_file) as f:
                x = f.create_dataset('tmp', data=x, chunks=(64, 64))
            f = open_file(out_file, 'r')
            x = f['tmp']

        bbs = [
            np.s_[:, :], np.s_[:256, :256], np.s_[37:115, 226:503],
            np.s_[:200, :], np.s_[:, 10:115]
        ]
        for bb in bbs:
            bb, _ = normalize_index(bb, shape)
            res = transform_subvolume_affine(x,
                                             matrix,
                                             bb,
                                             sigma=sigma,
                                             **kwargs)
            exp_bb = exp[bb]

            self.assertEqual(res.shape, exp_bb.shape)
            if sigma is None:
                self.assertTrue(np.allclose(res, exp_bb))
            else:
                self.assertTrue(~np.allclose(res, 0))

        if out_file is not None:
            f.close()
Example #2
0
    def _test_3d(self, matrix, out_file=None, **kwargs):
        from elf.transformation import transform_subvolume_affine
        shape = 3 * (64, )
        x = np.random.rand(*shape)
        exp = affine_transform(x, matrix, **kwargs)

        if out_file is not None:
            with open_file(out_file) as f:
                x = f.create_dataset('tmp', data=x, chunks=3 * (16, ))
            f = open_file(out_file, 'r')
            x = f['tmp']

        bbs = [
            np.s_[:, :, :], np.s_[:32, :32, :32], np.s_[1:31, 5:27, 3:13],
            np.s_[4:19, :, 22:], np.s_[1:29], np.s_[:, 15:27, :], np.s_[:, 1:3,
                                                                        4:14]
        ]
        for bb in bbs:
            bb, _ = normalize_index(bb, shape)
            res = transform_subvolume_affine(x, matrix, bb, **kwargs)
            exp_bb = exp[bb]

            self.assertEqual(res.shape, exp_bb.shape)
            self.assertTrue(np.allclose(res, exp_bb))

        if out_file is not None:
            f.close()
Example #3
0
def create_auxiliary_gene_file(meds_root, out_file, return_result=False):
    all_genes_dset = 'genes'
    names_dset = 'gene_names'

    # get all the prospr gene xmls in the image folder
    med_files = glob(os.path.join(meds_root, "prospr*.xml"))
    # filter out prospr files that are not genes (=semgneted regions and virtual cells)
    med_files = [name for name in med_files if 'segmented' not in name]
    med_files = [name for name in med_files if 'virtual' not in name]

    # get the gene names from filenames
    gene_names = [os.path.splitext(os.path.basename(f))[0] for f in med_files]
    # cut all the preceeding prospr-... part
    gene_names = ['-'.join(name.split('-')[4:]) for name in gene_names]
    num_genes = len(gene_names)
    assert num_genes == len(med_files)

    # get the data paths from the xmls
    med_files = [
        get_data_path(med_file, return_absolute_path=True)
        for med_file in med_files
    ]

    is_h5 = os.path.splitext(med_files[0])[1] == '.h5'
    med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
    with open_file(med_files[0], 'r') as f:
        spatial_shape = f[med_key].shape

    shape = (num_genes, ) + spatial_shape

    # iterate through med files and write down binarized into one file
    with open_file(out_file) as f:
        out_dset = f.create_dataset(all_genes_dset,
                                    shape=shape,
                                    dtype='bool',
                                    chunks=(1, 64, 64, 64),
                                    compression='gzip')
        out_dset.n_threads = 8

        for i, med_file in enumerate(tqdm(med_files)):
            is_h5 = os.path.splitext(med_file)[1] == '.h5'
            med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
            with open_file(med_file, 'r') as f2:
                ds = f2[med_key]
                this_shape = ds.shape
                if this_shape != spatial_shape:
                    raise RuntimeError("Incompatible shapes %s, %s" %
                                       (str(this_shape), str(spatial_shape)))
                ds.n_threads = 8
                data = ds[:]
            out_dset[i] = data

        gene_names_ascii = [n.encode('ascii', 'ignore') for n in gene_names]
        f.create_dataset(names_dset, data=gene_names_ascii, dtype='S40')

    if return_result:
        # reload the binarized version
        with open_file(out_file, 'r') as f:
            all_genes = f[all_genes_dset][:]
        return all_genes
def check_result(timepoint):
    import napari

    print("Checking results for timepoint", timepoint)

    halo = [64, 384, 384]

    key = get_key(is_h5=True, timepoint=timepoint, setup_id=0, scale=0)
    with open_file(PATH, 'r') as f:
        ds = f[key]
        bb = tuple(
            slice(sh // 2 - ha, sh // 2 + ha)
            for sh, ha in zip(ds.shape, halo))
        raw = ds[bb]

    pred_path = os.path.join(
        'tmp_plantseg/tp_%03i/PreProcessing' % timepoint,
        'generic_light_sheet_3d_unet/PostProcessing/raw_predictions.h5')
    with open_file(pred_path, 'r') as f:
        pred = f['predictions'][bb]

    seg_path = 'tmp_plantseg/tp_%03i/segmentation.h5' % timepoint
    with open_file(seg_path, 'r') as f:
        seg = f['data'][bb]

    with napari.gui_qt():
        viewer = napari.Viewer()
        viewer.add_image(raw)
        viewer.add_image(pred)
        viewer.add_labels(seg)
    def load_data(self, cil_id):
        if cil_id in (0, 1):
            return None
        cell_seg_key = 't00000/s00/%i/cells' % (self.scale - 1, )
        cil_seg_key = 't00000/s00/%i/cells' % (self.scale + 1, )
        raw_key = 't00000/s00/%i/cells' % self.scale

        bb = self.bbs[cil_id]
        with open_file(self.raw_path, 'r') as f:
            ds = f[raw_key]
            raw = ds[bb]

        with open_file(self.cilia_seg_path, 'r') as f:
            ds = f[cil_seg_key]
            cil_seg = ds[bb].astype('uint32')
        cil_mask = cil_seg == cil_id
        cil_mask = 2 * cil_mask.astype('uint32')

        cell_id = self.id_mapping[cil_id]
        if cell_id in (0, np.nan):
            cell_seg = None
        else:

            with open_file(self.cell_seg_path, 'r') as f:
                ds = f[cell_seg_key]
                cell_seg = ds[bb].astype('uint32')
                cell_seg = (cell_seg == cell_id).astype('uint32')

        return raw, cil_seg, cil_mask, cell_seg
Example #6
0
def mc_segmentation (bb, mc_blocks, filename_raw, filename_mem, filename_sv): 
	f = open_file(filename_raw, 'r')
	data_raw = f['data'][bb].astype(np.float32) 
	shape = data_raw.shape

	f = open_file(filename_mem, 'r') 
	data_mem = f['data'][bb].astype(np.float32).reshape(data_raw.shape)
	assert data_mem.shape == shape

	f = open_file(filename_sv, 'r')
	data_sv = f['data'][bb].astype('uint64')
	assert data_sv.shape == shape

	print("Final shape:", shape)

	# run blockwise segmentation
	print("Start segmentation")
	segmentation = elf_workflow.multicut_segmentation(raw=data_raw,
                                                  boundaries=data_mem,
                                                  rf=rf, use_2dws=False,
                                                  watershed=data_sv,
                                                  multicut_solver='blockwise-multicut',
                                                  solver_kwargs={'internal_solver': 'kernighan-lin',
                                                                 'block_shape': mc_blocks},
                                                  n_threads=8)  # multicut_solver = 'kernighan-lin')
	print('segmentation is done')
	return segmentation
Example #7
0
def segment(input_path, input_prefix, output_path, output_key, n_workers):
    with open_file(input_path, 'r') as f, open_file(output_path, 'a') as f_out:

        ds_fg = f[os.path.join(input_prefix, 'foreground')]
        ds_fg.n_threads = n_workers

        ds_affs = f[os.path.join(input_prefix, 'affinities')]
        ds_affs.n_threads = n_workers
        print("Loading affinities ...")
        affs = ds_affs[:]

        print("Loading mask ...")
        mask = ds_fg[:] > 0.5
        strides = [4, 4, 4]

        print("Run mutex watershed ...")
        seg = blockwise_mutex_watershed(affs, OFFSETS, strides,
                                        block_shape=ds_fg.chunks,
                                        randomize_strides=True,
                                        mask=mask,
                                        n_threads=n_workers)

        print("Writing result ...")
        ds_out = f_out.require_dataset(output_key,
                                       shape=ds_fg.shape,
                                       chunks=ds_fg.chunks,
                                       compression='gzip',
                                       dtype='uint64')
        ds_out.n_threads = n_workers
        ds_out[:] = seg
def predict_boundaries_2d(in_path, out_path, checkpoint, device=torch.device('cuda')):
    model = get_model()
    state = torch.load(checkpoint)['model_state']
    model.load_state_dict(state)
    model.to(device)
    model.eval()

    with open_file(in_path, 'r') as f:
        raw = f['raw'][:]

    prediction = np.zeros_like(raw, dtype='float32')

    with torch.no_grad():
        for z in range(raw.shape[0]):
            input_ = raw[z].astype('float32') / 255.
            input_ = torch.from_numpy(input_[None, None]).to(device)
            pred = model(input_).cpu().numpy()[0, 0]
            prediction[z] = pred

    with open_file(out_path, 'a') as f:
        ds = f.require_dataset('boundaries', prediction.shape, compression='gzip', dtype='float32',
                               chunks=(1,) + prediction.shape[1:])
        ds[:] = prediction

    return prediction
Example #9
0
    def __setstate__(self, state):
        raw_path, raw_key = state["raw_path"], state["raw_key"]
        label_path, label_key = state["label_path"], state["label_key"]
        roi = state["roi"]
        try:
            raw = open_file(raw_path, mode="r")[raw_key]
            if roi is not None:
                raw = RoiWrapper(
                    raw, (slice(None), ) +
                    roi) if state["_with_channels"] else RoiWrapper(raw, roi)
            state["raw"] = raw
        except Exception:
            msg = f"SegmentationDataset could not be deserialized because of missing {raw_path}, {raw_key}.\n"
            msg += "The dataset is deserialized in order to allow loading trained models from a checkpoint.\n"
            msg += "But it cannot be used for further training and wil throw an error."
            warnings.warn(msg)
            state["raw"] = None

        try:
            labels = open_file(label_path, mode="r")[label_key]
            if roi is not None:
                labels = RoiWrapper(labels, (slice(None),) + roi) if state["_with_label_channels"] else\
                    RoiWrapper(labels, roi)
            state["labels"] = labels
        except Exception:
            msg = f"SegmentationDataset could not be deserialized because of missing {label_path}, {label_key}.\n"
            msg += "The dataset is deserialized in order to allow loading trained models from a checkpoint.\n"
            msg += "But it cannot be used for further training and wil throw an error."
            warnings.warn(msg)
            state["labels"] = None

        self.__dict__.update(state)
Example #10
0
def eval_nuclei(seg_path,
                seg_key,
                annotation_path,
                annotation_key=None,
                min_radius=6):
    """ Evaluate the nucleus segmentation by computing
    the percentage of false positive and false negative nucleus annotations
    in manually annotated validation slices.
    """
    eval_res = {}
    with open_file(seg_path, 'r') as f_seg, open_file(annotation_path,
                                                      'r') as f_ann:
        ds_seg = f_seg[seg_key]
        g = f_ann if annotation_key is None else f_ann[annotation_key]

        def visit_annotation(name, node):
            nonlocal eval_res
            if is_dataset(node):
                print("Evaluating:", name)
                res = eval_slice(ds_seg, node, min_radius)
                eval_res = merge_evaluations(res, eval_res)
                # for debugging
                # print("current eval:", eval_res)
            else:
                print("Group:", name)

        g.visititems(visit_annotation)

    return to_scores(eval_res)
Example #11
0
def predict_affinities(checkpoint, gpu_ids, input_path, input_key, output_path,
                       output_key):
    model = get_model()
    state_dict = torch.load(checkpoint)['model_state']
    model.load_state_dict(state_dict)

    block_shape = (96, 96, 96)
    halo = (32, 32, 32)

    with open_file(input_path, 'r') as f_in, open_file(output_path,
                                                       'a') as f_out:
        ds_in = f_in[input_key]
        shape = ds_in.shape

        ds_fg = f_out.require_dataset(os.path.join(output_key, 'foreground'),
                                      shape=shape,
                                      chunks=block_shape,
                                      compression='gzip',
                                      dtype='float32')

        aff_shape = (model.out_channels - 1, ) + shape
        ds_affs = f_out.require_dataset(os.path.join(output_key, 'affinities'),
                                        shape=aff_shape,
                                        chunks=(1, ) + block_shape,
                                        compression='gzip',
                                        dtype='float32')

        outputs = [(ds_fg, np.s_[0]), (ds_affs, np.s_[1:])]

        predict_with_halo(ds_in,
                          model,
                          gpu_ids,
                          block_shape,
                          halo,
                          output=outputs)
Example #12
0
def check_segmentations(ref_path, ref_key, seg_path, seg_key):
    with open_file(ref_path, 'r') as f:
        shape = f[ref_key]

    with open_file(seg_path, 'r') as f:
        seg_shape = f[seg_key].shape
        assert shape == seg_shape, "%s, %s" % (str(shape), str(seg_shape))
    return shape
Example #13
0
def make_small_example_data():
    bb = np.s_[:25, :512, :512]
    with open_file('./data/data.n5') as f:
        raw = f['raw'][bb]
        ws = f['watersheds'][bb]

    with open_file('./data/small_data.n5') as f:
        pass
Example #14
0
def rank_false_merges(problem_path,
                      graph_key,
                      feat_key,
                      morpho_key,
                      node_label_path,
                      node_label_key,
                      ignore_ids,
                      out_path_ids,
                      out_path_scores,
                      n_threads,
                      n_candidates,
                      heuristic=weight_quantile_heuristic):
    g = ndist.Graph(problem_path, graph_key, n_threads)
    with open_file(problem_path, 'r') as f:
        ds = f[feat_key]
        ds.n_threads = n_threads
        probs = ds[:, 0]

        ds = f[morpho_key]
        ds.n_threads = n_threads
        sizes = ds[:, 1]

    with open_file(node_label_path, 'r') as f:
        ds = f[node_label_key]
        ds.n_threads = n_threads
        node_labels = ds[:]

    seg_ids = np.arange(len(sizes), dtype='uint64')
    seg_ids = seg_ids[np.argsort(sizes)[::-1]][:n_candidates]
    seg_ids = seg_ids[~np.isin(seg_ids, ignore_ids.tolist() + [0])]
    max_size = sizes[seg_ids].max()
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(weight_quantile_heuristic, seg_id, g, node_labels, sizes,
                      max_size, probs) for seg_id in seg_ids
        ]
        fm_scores = np.array([t.result() for t in tasks])

    # print("Id:", seg_ids[0])
    # sc = weight_quantile_heuristic(seg_ids[0], g,
    #                                node_labels, sizes, max_size, probs)
    # print("Score:", sc)
    # return

    # sort ids by score (decreasing)
    sorter = np.argsort(fm_scores)[::-1]
    seg_ids = seg_ids[sorter]
    fm_scores = fm_scores[sorter]

    with open(out_path_scores, 'w') as f:
        json.dump(fm_scores.tolist(), f)
    with open(out_path_ids, 'w') as f:
        json.dump(seg_ids.tolist(), f)
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)
        config = self.get_task_config()

        with open_file(self.input_path, 'r') as f:
            dtype = f[self.input_key].dtype
        chunks = config['chunks']
        if chunks is None:
            chunks = block_shape
        compression = config['compression']

        with open_file(self.output_path, 'r') as f:
            f.require_dataset(self.output_key,
                              shape=self.shape,
                              chunks=chunks,
                              compression=compression,
                              dtype=dtype)

        trafo_file = self.update_transformations()
        # we don't need any additional config besides the paths
        config.update({
            "input_path": self.input_path,
            "input_key": self.input_key,
            "output_path": self.output_path,
            "output_key": self.output_key,
            "transformation_file": trafo_file,
            "elastix_directory": self.elastix_directory,
            "tmp_folder": self.tmp_folder
        })

        block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin,
                                         roi_end)
        self._write_log("scheduled %i blocks to run" % len(block_list))

        # prime and run the jobs
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)

        # prime and run the jobs
        n_jobs = min(self.max_jobs, len(block_list))
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #16
0
def copy_attributes(in_file, in_key, out_file, out_key):
    with open_file(in_file, 'r') as fin, open_file(out_file) as fout:
        ds_in = fin[in_key]
        ds_out = fout[out_key]
        for k, v in ds_in.attrs.items():
            if isinstance(v, numbers.Real):
                v = float(v)
            elif isinstance(v, numbers.Integral):
                v = int(v)
            elif isinstance(v, np.ndarray):
                v = v.tolist()
            ds_out.attrs[k] = v
Example #17
0
def _load_node_labes(initial_path, initial_key, save_path, save_key):
    if os.path.exists(save_path) and save_key in open_file(save_path, 'r'):
        with open_file(save_path, 'r') as f:
            node_labels = f[save_key][:]
    else:
        with open_file(initial_path, 'r') as f:
            node_labels = f[initial_key][:]

    if node_labels.ndim == 2:
        node_labels = node_labels[:, 1]
    assert node_labels.ndim == 1
    return node_labels.astype('uint32')
def segment_timepoint(timepoint, gpu=None):
    # create the input data for this timepoint
    tmp_folder = 'tmp_plantseg/tp_%03i' % timepoint
    os.makedirs(tmp_folder, exist_ok=True)

    if gpu is not None:
        assert isinstance(gpu, int)
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu)

    with ChangeDir(tmp_folder):
        raw_path = './raw.h5'
        config_path = './config.yaml'

        res_path = './segmentation.h5'
        if os.path.exists(res_path):
            return

        with open(TEMPLATE_CONFIG, 'r') as f:
            config = yaml.load(f)
        config['path'] = raw_path
        with open(config_path, 'w') as f:
            yaml.dump(config, f)

        with open_file(raw_path, 'a') as f:
            if 'raw' not in f:
                key = get_key(is_h5=True,
                              timepoint=timepoint,
                              setup_id=0,
                              scale=0)
                with open_file(PATH, 'r') as f_in:
                    raw = f_in[key][:]
                f.create_dataset('raw', data=raw, chunks=(32, 128, 128))

        cmd = [PYTHON, PLANTSEG, '--config', config_path]
        run(cmd)

        print("Run post-processing ...")
        seg_path = 'PreProcessing/generic_light_sheet_3d_unet/MultiCut/raw_predictions_multicut.h5'
        with open_file(seg_path, 'r') as f, open_file(res_path, 'a') as f_out:
            seg = f['segmentation'][:]
            ids, sizes = np.unique(seg, return_counts=True)
            if 0 in ids:
                ids += 1
                seg += 1
            bg_id = ids[np.argmax(sizes)]
            seg[seg == bg_id] = 0
            seg = seg.astype('uint32')
            vigra.analysis.relabelConsecutive(seg,
                                              out=seg,
                                              start_label=1,
                                              keep_zeros=True)
            f_out.create_dataset('data', data=seg, compression='gzip')
Example #19
0
def compute_mean_and_std():
    key = 'setup0/timepoint0/s1'
    f = open_file(RAW_PATH, 'r')
    ds = f[key]

    mask_key = 'setup0/timepoint0/s0'
    mask = open_file(MASK_PATH)[mask_key][:].astype('bool')
    mask = ResizedVolume(mask, ds.shape)

    m, s = mean_and_std(ds, mask=mask, n_threads=16, verbose=True)
    print("Computed mean and standard deviation:")
    print("Mean:", m)
    print("Standard deviation:", s)
Example #20
0
def _predict(model, raw, trainer, gpu_ids, save_path, sample_id):

    save_key = f"sample{sample_id}"
    if save_path is not None and os.path.exists(save_path):
        with open_file(save_path, 'r') as f:
            if save_key in f:
                print("Loading predictions for sample", sample_id, "from file")
                ds = f[save_key]
                ds.n_threads = 8
                return ds[:]

    normalizer = get_normalizer(trainer)
    dataset = trainer.val_loader.dataset
    ndim = dataset.ndim
    if isinstance(dataset, ConcatDataset):
        patch_shape = dataset.datasets[0].patch_shape
    else:
        patch_shape = dataset.patch_shape

    if ndim == 2 and len(patch_shape) == 3:
        patch_shape = patch_shape[1:]
    assert len(patch_shape) == ndim

    # choose a small halo and set the correct block shape
    halo = (32, 32) if ndim == 2 else (8, 16, 16)
    block_shape = tuple(psh - 2 * ha for psh, ha in zip(patch_shape, halo))

    if save_path is None:
        output = None
    else:
        f = open_file(save_path, 'a')
        out_shape = (trainer.model.out_channels, ) + raw.shape
        chunks = (1, ) + block_shape
        output = f.create_dataset(save_key,
                                  shape=out_shape,
                                  chunks=chunks,
                                  compression='gzip',
                                  dtype='float32')

    gpu_ids = [int(gpu) if gpu != 'cpu' else gpu for gpu in gpu_ids]
    pred = predict_with_halo(raw,
                             model,
                             gpu_ids,
                             block_shape,
                             halo,
                             preprocess=normalizer,
                             output=output)
    if output is not None:
        f.close()

    return pred
Example #21
0
    def init_dataset(self):
        data_path = os.path.join(self.test_folder, "data.h5")
        data_key = "data"
        with open_file(data_path, "a") as f:
            f.create_dataset(data_key, data=np.random.rand(*self.shape))

        seg_path = os.path.join(self.test_folder, "seg.h5")
        with open_file(seg_path, "a") as f:
            f.create_dataset(data_key,
                             data=np.random.randint(0, 100, size=self.shape))

        scales = [[2, 2, 2]]
        max_jobs = min(4, mp.cpu_count())

        tmp_folder = os.path.join(self.test_folder, "tmp-init-raw")
        mobie.add_image(data_path,
                        data_key,
                        self.root,
                        self.dataset_name,
                        self.raw_name,
                        resolution=(1, 1, 1),
                        chunks=self.chunks,
                        scale_factors=scales,
                        tmp_folder=tmp_folder,
                        max_jobs=max_jobs)

        tmp_folder = os.path.join(self.test_folder, "tmp-init-seg")
        mobie.add_segmentation(seg_path,
                               data_key,
                               self.root,
                               self.dataset_name,
                               self.seg_name,
                               resolution=(1, 1, 1),
                               chunks=self.chunks,
                               scale_factors=scales,
                               tmp_folder=tmp_folder,
                               max_jobs=max_jobs)

        display_settings = [
            mobie.metadata.get_image_display("image-group-0", [self.raw_name]),
            mobie.metadata.get_segmentation_display("segmentation-group-1",
                                                    [self.seg_name]),
        ]
        source_transforms = [
            mobie.metadata.get_affine_source_transform(
                [self.raw_name, self.seg_name], np.random.rand(12))
        ]
        mobie.create_view(os.path.join(self.root, self.dataset_name),
                          "my-view", [[self.raw_name], [self.seg_name]],
                          display_settings=display_settings,
                          source_transforms=source_transforms)
def map_cells_to_nuclei(label_ids, seg_path, nuc_path, out_path,
                        tmp_folder, target, max_jobs,
                        overlap_threshold=.25):

    # choose the keys of the same size
    if seg_path.endswith('.n5'):
        seg_key = 'setup0/timepoint0/s2'
    else:
        seg_key = 't00000/s00/2/cells'
    if nuc_path.endswith('.n5'):
        nuc_key = 'setup0/timepoint0/s0'
    else:
        nuc_key = 't00000/s00/0/cells'
    with open_file(seg_path, 'r') as f:
        shape1 = f[seg_key].shape
    with open_file(nuc_path, 'r') as f:
        shape2 = f[nuc_key].shape
    assert shape1 == shape2

    # compute the pixel-wise overlap of cells with nuclei
    cids_to_nids = node_labels(seg_path, seg_key,
                               nuc_path, nuc_key, prefix='nuc_to_cells',
                               tmp_folder=tmp_folder, target=target, max_jobs=max_jobs,
                               max_overlap=False, ignore_label=0)
    cids_to_nids = overlaps_to_ids(cids_to_nids, overlap_threshold)

    # compute the pixel-wise overlap of nuclei with cells
    nids_to_cids = node_labels(nuc_path, nuc_key,
                               seg_path, seg_key, prefix='cells_to_nuc',
                               tmp_folder=tmp_folder, target=target, max_jobs=max_jobs,
                               max_overlap=False, ignore_label=0)
    nids_to_cids = overlaps_to_ids(nids_to_cids, overlap_threshold)

    # only keep cell ids that have overlap with a single nucleus
    cids_to_nids = {label_id: ovlp_ids[0] for label_id, ovlp_ids in cids_to_nids.items()
                    if len(ovlp_ids) == 1}

    # only keep nucleus ids that have overlap with a single cell
    nids_to_cids = {label_id: ovlp_ids[0] for label_id, ovlp_ids in nids_to_cids.items()
                    if len(ovlp_ids) == 1}

    # only keep cell ids for which overlap-ids agree
    cids_to_nids = {label_id: ovlp_id for label_id, ovlp_id in cids_to_nids.items()
                    if nids_to_cids.get(ovlp_id, 0) == label_id}

    data = np.array([cids_to_nids.get(label_id, 0) for label_id in label_ids])

    col_names = ['label_id', 'nucleus_id']
    data = np.concatenate([label_ids[:, None], data[:, None]], axis=1)
    write_csv(out_path, data, col_names)
Example #23
0
def _resize(path, native_resolution, target_resolution):
    assert len(native_resolution) == len(target_resolution)
    scale_factor = tuple(
        nres / tres
        for nres, tres in zip(native_resolution, target_resolution))
    paths = glob(os.path.join(path, "*.h5"))

    # check if anything needs to be resized
    need_resize = []
    for pp in paths:
        with open_file(pp, "r") as f:
            for name, obj in f.items():
                rescaled_name = f"rescaled/{name}"
                if is_group(obj):
                    continue
                if rescaled_name in f:
                    this_resolution = f[rescaled_name].attrs["resolution"]
                    correct_res = all(
                        np.isclose(this_re, target_re) for this_re, target_re
                        in zip(this_resolution, target_resolution))
                    if correct_res:
                        continue
                need_resize.append(path)

    # resize if necessary
    need_resize = list(set(need_resize))
    for pp in need_resize:
        with open_file(pp, mode="a") as f:
            if "rescaled" in f:
                del f["rescaled"]
            for name, obj in f.items():
                print("Resizing", pp, name)
                print("from resolution (microns)", native_resolution, "to",
                      target_resolution)
                print("with scale factor", scale_factor)

                vol = obj[:]
                if name == "raw":
                    vol = rescale(vol, scale_factor,
                                  preserve_range=True).astype(vol.dtype)
                else:
                    vol = rescale(vol,
                                  scale_factor,
                                  preserve_range=True,
                                  order=0,
                                  anti_aliasing=False).astype(vol.dtype)
                ds = f.create_dataset(rescaled_name,
                                      data=vol,
                                      compression="gzip")
                ds.attrs["resolution"] = target_resolution
Example #24
0
def run_segmentation_for_cubes(filename_raw=None,
                               dataset_raw='/t00000/s00/0/cells',
                               filename_mem=None,
                               filename_sv=None,
                               rf=None,
                               n_threads=1,
                               beta=0.5,
                               output_folder=None,
                               result_pattern='{}_{}_{}.h5',
                               cube_size=(1024, 1024, 1024),
                               overlap=(256, 256, 256),
                               block_size=[256, 256, 256],
                               start_index=(0, 0, 0),
                               end_index=None):

    step_z, step_y, step_x = cube_size[0] - overlap[0], cube_size[1] - overlap[
        1], cube_size[2] - overlap[2]
    with open_file(filename_raw, 'r') as f:
        shape = f[dataset_raw].shape
    nz, ny, nx = shape
    if not end_index:
        end_index = shape
    error_file = output_folder + 'errors_mc.txt'
    if not os.path.exists(error_file):
        with open(error_file, 'w') as f:
            f.write(
                "Here will be indexes of cubes with errors in multicut. /n")
    for z in range(start_index[0], end_index[0], step_z):
        for y in range(start_index[1], end_index[1], step_y):
            for x in range(start_index[2], end_index[2], step_x):
                bb = np.s_[z:min(z + cube_size[0], nz),
                           y:min(y + cube_size[1], ny),
                           x:min(x + cube_size[2], nx)]
                filename_results = result_pattern.format(z, y, x)
                if not (os.path.exists(output_folder + filename_results)):
                    segmentation = mc_segmentation(bb,
                                                   mc_blocks=block_size,
                                                   filename_raw=filename_raw,
                                                   dataset_raw=dataset_raw,
                                                   filename_mem=filename_mem,
                                                   filename_sv=filename_sv,
                                                   rf=rf,
                                                   n_threads=n_threads,
                                                   beta=beta,
                                                   error_file=error_file)
                    f = open_file(output_folder + filename_results, 'w')
                    f.create_dataset('data',
                                     data=segmentation,
                                     compression="gzip")
Example #25
0
def align_seg(compute_offset):

    points_mobie = [[1077, 525], [941, 564], [848, 1314], [467, 959],
                    [432, 826], [976, 758], [1134, 907]]
    points_amira = [[1008, 315], [875, 350], [777, 1103], [394, 747],
                    [357, 616], [908, 547], [1064, 695]]

    if compute_offset:
        diff = [[mo - am for mo, am in zip(pm, pa)]
                for pm, pa in zip(points_mobie, points_amira)]
        offset = np.array(diff).mean(axis=0)
        offset = np.round(offset).astype('int').tolist()
        offset = [777] + offset
        print(offset)
        return

    ds_name = 'cell1'

    p = _get_path(ds_name)
    with open_file(p, mode='r', ext='') as f:
        ds = f['*.tif']
        shape = ds.shape
        ds.n_threads = 8
        print("Load raw1")
        raw1 = ds[:]
        # raw1 = ResizedVolume(raw1, (raw1.shape[0] // 2, raw1.shape[1], raw1.shape[2]))[:]

    roi = np.s_[777:-223, :, :]
    raw_p = f'/g/emcf/pape/sponge-fibsem-project/data/{ds_name}/images/local/fibsem-raw.n5'
    with open_file(raw_p, 'r') as f:
        ds = f['setup0/timepoint0/s0']
        ds.n_thredas = 8
        ds = RoiWrapper(ds, roi)
        rshape = ds.shape
        print("Load raw2")
        raw2 = ds[:]

    scale_factor = [float(rs) / sh for rs, sh in zip(rshape, shape)]
    print(ds_name)
    print("Seg-shape  :", shape)
    print("Mobie-shape:", rshape)
    print("Factor     :", scale_factor)

    print("Start viewer")
    import napari
    with napari.gui_qt():
        viewer = napari.Viewer()
        viewer.add_image(raw1, name='amira')
        viewer.add_image(raw2, name='mobie')
Example #26
0
def mc_segmentation(bb, mc_blocks, filename_raw, filename_mem, filename_sv):
    f = open_file(filename_raw, 'r')
    data_raw = f['/t00000/s00/0/cells'][bb].astype(np.float32)
    shape = data_raw.shape

    if np.min(data_raw) == np.max(data_raw):
        # print('no raw data ')
        # print(np.min(data_raw))
        return np.zeros(shape)

    f = open_file(filename_mem, 'r')
    data_mem = f['data'][bb].astype(np.float32).reshape(data_raw.shape)
    assert data_mem.shape == shape

    f = open_file(filename_sv, 'r')
    data_sv = f['data'][bb].astype('uint32')
    assert data_sv.shape == shape

    data_sv, maxlabel, mapping = vigra.analysis.relabelConsecutive(data_sv)

    # print("Final shape:", shape)

    # print('sv', np.min(data_sv), np.max(data_sv))

    if np.min(data_sv) == np.max(data_sv):
        # print('no superpixels')
        return np.zeros(shape)
    try:
        # run blockwise segmentation
        # print("Start segmentation")
        segmentation = elf_workflow.multicut_segmentation(
            raw=data_raw,
            boundaries=data_mem,
            rf=rf,
            use_2dws=False,
            watershed=data_sv,
            multicut_solver='blockwise-multicut',
            solver_kwargs={
                'internal_solver': 'kernighan-lin',
                'block_shape': mc_blocks
            },
            n_threads=16,
            beta=0.6)
        # print('segmentation is done')
        return segmentation
    except RuntimeError:
        error_cubes.append(bb)
        # print('runtime error in segmentation')
        return np.zeros(shape)
def write_h5_files(table, folder, raw_seg_path):
    """
    Writes individual h5 file for each row in the table, equal to the bounding box of that object
    + a 10 pixel border on all dimensions

    Args:
        table [pd.Dataframe] - table of nucleus statistics
        folder [str] - a temporary folder to write files to
        raw_seg_path [str] - path to the raw segmentation .h5
    """

    for row in table.itertuples(index=False):

        # min max coordinates in microns for segmentation
        minmax_seg = [
            row.bb_min_x, row.bb_min_y, row.bb_min_z, row.bb_max_x,
            row.bb_max_y, row.bb_max_z
        ]

        # raw scale (from xml) for 2x downsampled
        raw_scale = [0.02, 0.02, 0.025]

        # slice for raw file
        raw_slice = calculate_slice(raw_scale, minmax_seg, addBorder=True)
        is_h5 = is_h5_file(raw_seg_path)
        raw_key = get_key(is_h5, setup=0, time_point=0, scale=1)
        with open_file(raw_seg_path, 'r') as f:
            # get 2x downsampled nuclei
            data = f[raw_key]
            img_array = data[raw_slice]

        # write h5 file for nucleus
        result_path = folder + os.sep + str(row.label_id) + '.h5'
        with open_file(result_path, 'a') as f:

            # check dataset is bigger than 64x64x64
            if img_array.shape[0] >= 64 and img_array.shape[
                    1] >= 64 and img_array.shape[2] >= 64:
                chunks = (64, 64, 64)
            else:
                chunks = img_array.shape

            f.create_dataset('dataset',
                             chunks=chunks,
                             compression='gzip',
                             shape=img_array.shape,
                             dtype=img_array.dtype)
            f['dataset'][:] = img_array
Example #28
0
def prefilter_blocks(mask_path, mask_key,
                     shape, block_shape,
                     save_file, n_threads=48):
    if os.path.exists(save_file):
        print("Loading block list from file")
        with open(save_file) as f:
            return json.load(f)

    with open_file(mask_path, 'r') as f:
        ds = f[mask_key]
        mask = ResizedVolume(ds, shape=shape, order=0)

        blocking = nt.blocking([0, 0, 0], shape, block_shape)
        n_blocks = blocking.numberOfBlocks

        def check_block(block_id):
            block = blocking.getBlock(block_id)
            bb = tuple(slice(beg, end) for beg, end in zip(block.begin, block.end))
            d = mask[bb]
            if d.sum() > 0:
                return block_id
            else:
                return None

        print("Computing block list ...")
        with futures.ThreadPoolExecutor(n_threads) as tp:
            blocks = list(tqdm(tp.map(check_block, range(n_blocks)), total=n_blocks))
        blocks = [bid for bid in blocks if bid is not None]

    with open(save_file, 'w') as f:
        json.dump(blocks, f)
    return blocks
Example #29
0
def parse_simple_htm(folder, pattern="*.h5", exclude_names=None):
    """Parse simple htm layout, see e.g. example data at
    https://owncloud.gwdg.de/index.php/s/eu8JMlUFZ82ccHT
    """
    files = glob(os.path.join(folder, pattern))
    files.sort()

    # get the channel and label names
    channel_names = []
    label_names = []
    with io.open_file(files[0], "r") as f:
        for name, obj in f.items():
            if exclude_names is not None and name in exclude_names:
                continue
            if io.is_dataset(obj):
                channel_names.append(name)
            elif io.is_group(obj) and name == "segmentation":
                for label_name, label in obj.items():
                    if exclude_names is not None and label_name in exclude_names:
                        continue
                    if io.is_dataset(label):
                        label_names.append(f"segmentation/{label_name}")

    assert channel_names
    image_data = {name: _load_channel_simple(files, name) for name in channel_names}
    label_data = None if label_names is None else {name: _load_channel_simple(files, name) for name in label_names}
    return image_data, label_data
Example #30
0
def debug_vol():
    path = '../data.n5'
    key = 'volumes/cilia/segmentation'
    f = open_file(path)
    ds = f[key]
    shape = ds.shape
    block_shape = ds.chunks

    roi_begin = [7216, 12288, 7488]
    roi_end = [8640, 19040, 11392]

    blocks, blocking = blocks_in_volume(shape,
                                        block_shape,
                                        roi_begin,
                                        roi_end,
                                        return_blocking=True)
    print("Have", len(blocks), "blocks in roi")

    # check reading all blocks
    for block_id in blocks:
        print("Check block", block_id)
        block = blocking.getBlock(block_id)
        bb = block_to_bb(block)
        d = ds[bb]
        print("Have block", block_id)

    print("All checks passsed")