def _test_2d(self, matrix, out_file=None, sigma=None, **kwargs): from elf.transformation import transform_subvolume_affine shape = (512, 512) x = np.random.rand(*shape) exp = affine_transform(x, matrix, **kwargs) if out_file is not None: with open_file(out_file) as f: x = f.create_dataset('tmp', data=x, chunks=(64, 64)) f = open_file(out_file, 'r') x = f['tmp'] bbs = [ np.s_[:, :], np.s_[:256, :256], np.s_[37:115, 226:503], np.s_[:200, :], np.s_[:, 10:115] ] for bb in bbs: bb, _ = normalize_index(bb, shape) res = transform_subvolume_affine(x, matrix, bb, sigma=sigma, **kwargs) exp_bb = exp[bb] self.assertEqual(res.shape, exp_bb.shape) if sigma is None: self.assertTrue(np.allclose(res, exp_bb)) else: self.assertTrue(~np.allclose(res, 0)) if out_file is not None: f.close()
def _test_3d(self, matrix, out_file=None, **kwargs): from elf.transformation import transform_subvolume_affine shape = 3 * (64, ) x = np.random.rand(*shape) exp = affine_transform(x, matrix, **kwargs) if out_file is not None: with open_file(out_file) as f: x = f.create_dataset('tmp', data=x, chunks=3 * (16, )) f = open_file(out_file, 'r') x = f['tmp'] bbs = [ np.s_[:, :, :], np.s_[:32, :32, :32], np.s_[1:31, 5:27, 3:13], np.s_[4:19, :, 22:], np.s_[1:29], np.s_[:, 15:27, :], np.s_[:, 1:3, 4:14] ] for bb in bbs: bb, _ = normalize_index(bb, shape) res = transform_subvolume_affine(x, matrix, bb, **kwargs) exp_bb = exp[bb] self.assertEqual(res.shape, exp_bb.shape) self.assertTrue(np.allclose(res, exp_bb)) if out_file is not None: f.close()
def create_auxiliary_gene_file(meds_root, out_file, return_result=False): all_genes_dset = 'genes' names_dset = 'gene_names' # get all the prospr gene xmls in the image folder med_files = glob(os.path.join(meds_root, "prospr*.xml")) # filter out prospr files that are not genes (=semgneted regions and virtual cells) med_files = [name for name in med_files if 'segmented' not in name] med_files = [name for name in med_files if 'virtual' not in name] # get the gene names from filenames gene_names = [os.path.splitext(os.path.basename(f))[0] for f in med_files] # cut all the preceeding prospr-... part gene_names = ['-'.join(name.split('-')[4:]) for name in gene_names] num_genes = len(gene_names) assert num_genes == len(med_files) # get the data paths from the xmls med_files = [ get_data_path(med_file, return_absolute_path=True) for med_file in med_files ] is_h5 = os.path.splitext(med_files[0])[1] == '.h5' med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0) with open_file(med_files[0], 'r') as f: spatial_shape = f[med_key].shape shape = (num_genes, ) + spatial_shape # iterate through med files and write down binarized into one file with open_file(out_file) as f: out_dset = f.create_dataset(all_genes_dset, shape=shape, dtype='bool', chunks=(1, 64, 64, 64), compression='gzip') out_dset.n_threads = 8 for i, med_file in enumerate(tqdm(med_files)): is_h5 = os.path.splitext(med_file)[1] == '.h5' med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0) with open_file(med_file, 'r') as f2: ds = f2[med_key] this_shape = ds.shape if this_shape != spatial_shape: raise RuntimeError("Incompatible shapes %s, %s" % (str(this_shape), str(spatial_shape))) ds.n_threads = 8 data = ds[:] out_dset[i] = data gene_names_ascii = [n.encode('ascii', 'ignore') for n in gene_names] f.create_dataset(names_dset, data=gene_names_ascii, dtype='S40') if return_result: # reload the binarized version with open_file(out_file, 'r') as f: all_genes = f[all_genes_dset][:] return all_genes
def check_result(timepoint): import napari print("Checking results for timepoint", timepoint) halo = [64, 384, 384] key = get_key(is_h5=True, timepoint=timepoint, setup_id=0, scale=0) with open_file(PATH, 'r') as f: ds = f[key] bb = tuple( slice(sh // 2 - ha, sh // 2 + ha) for sh, ha in zip(ds.shape, halo)) raw = ds[bb] pred_path = os.path.join( 'tmp_plantseg/tp_%03i/PreProcessing' % timepoint, 'generic_light_sheet_3d_unet/PostProcessing/raw_predictions.h5') with open_file(pred_path, 'r') as f: pred = f['predictions'][bb] seg_path = 'tmp_plantseg/tp_%03i/segmentation.h5' % timepoint with open_file(seg_path, 'r') as f: seg = f['data'][bb] with napari.gui_qt(): viewer = napari.Viewer() viewer.add_image(raw) viewer.add_image(pred) viewer.add_labels(seg)
def load_data(self, cil_id): if cil_id in (0, 1): return None cell_seg_key = 't00000/s00/%i/cells' % (self.scale - 1, ) cil_seg_key = 't00000/s00/%i/cells' % (self.scale + 1, ) raw_key = 't00000/s00/%i/cells' % self.scale bb = self.bbs[cil_id] with open_file(self.raw_path, 'r') as f: ds = f[raw_key] raw = ds[bb] with open_file(self.cilia_seg_path, 'r') as f: ds = f[cil_seg_key] cil_seg = ds[bb].astype('uint32') cil_mask = cil_seg == cil_id cil_mask = 2 * cil_mask.astype('uint32') cell_id = self.id_mapping[cil_id] if cell_id in (0, np.nan): cell_seg = None else: with open_file(self.cell_seg_path, 'r') as f: ds = f[cell_seg_key] cell_seg = ds[bb].astype('uint32') cell_seg = (cell_seg == cell_id).astype('uint32') return raw, cil_seg, cil_mask, cell_seg
def mc_segmentation (bb, mc_blocks, filename_raw, filename_mem, filename_sv): f = open_file(filename_raw, 'r') data_raw = f['data'][bb].astype(np.float32) shape = data_raw.shape f = open_file(filename_mem, 'r') data_mem = f['data'][bb].astype(np.float32).reshape(data_raw.shape) assert data_mem.shape == shape f = open_file(filename_sv, 'r') data_sv = f['data'][bb].astype('uint64') assert data_sv.shape == shape print("Final shape:", shape) # run blockwise segmentation print("Start segmentation") segmentation = elf_workflow.multicut_segmentation(raw=data_raw, boundaries=data_mem, rf=rf, use_2dws=False, watershed=data_sv, multicut_solver='blockwise-multicut', solver_kwargs={'internal_solver': 'kernighan-lin', 'block_shape': mc_blocks}, n_threads=8) # multicut_solver = 'kernighan-lin') print('segmentation is done') return segmentation
def segment(input_path, input_prefix, output_path, output_key, n_workers): with open_file(input_path, 'r') as f, open_file(output_path, 'a') as f_out: ds_fg = f[os.path.join(input_prefix, 'foreground')] ds_fg.n_threads = n_workers ds_affs = f[os.path.join(input_prefix, 'affinities')] ds_affs.n_threads = n_workers print("Loading affinities ...") affs = ds_affs[:] print("Loading mask ...") mask = ds_fg[:] > 0.5 strides = [4, 4, 4] print("Run mutex watershed ...") seg = blockwise_mutex_watershed(affs, OFFSETS, strides, block_shape=ds_fg.chunks, randomize_strides=True, mask=mask, n_threads=n_workers) print("Writing result ...") ds_out = f_out.require_dataset(output_key, shape=ds_fg.shape, chunks=ds_fg.chunks, compression='gzip', dtype='uint64') ds_out.n_threads = n_workers ds_out[:] = seg
def predict_boundaries_2d(in_path, out_path, checkpoint, device=torch.device('cuda')): model = get_model() state = torch.load(checkpoint)['model_state'] model.load_state_dict(state) model.to(device) model.eval() with open_file(in_path, 'r') as f: raw = f['raw'][:] prediction = np.zeros_like(raw, dtype='float32') with torch.no_grad(): for z in range(raw.shape[0]): input_ = raw[z].astype('float32') / 255. input_ = torch.from_numpy(input_[None, None]).to(device) pred = model(input_).cpu().numpy()[0, 0] prediction[z] = pred with open_file(out_path, 'a') as f: ds = f.require_dataset('boundaries', prediction.shape, compression='gzip', dtype='float32', chunks=(1,) + prediction.shape[1:]) ds[:] = prediction return prediction
def __setstate__(self, state): raw_path, raw_key = state["raw_path"], state["raw_key"] label_path, label_key = state["label_path"], state["label_key"] roi = state["roi"] try: raw = open_file(raw_path, mode="r")[raw_key] if roi is not None: raw = RoiWrapper( raw, (slice(None), ) + roi) if state["_with_channels"] else RoiWrapper(raw, roi) state["raw"] = raw except Exception: msg = f"SegmentationDataset could not be deserialized because of missing {raw_path}, {raw_key}.\n" msg += "The dataset is deserialized in order to allow loading trained models from a checkpoint.\n" msg += "But it cannot be used for further training and wil throw an error." warnings.warn(msg) state["raw"] = None try: labels = open_file(label_path, mode="r")[label_key] if roi is not None: labels = RoiWrapper(labels, (slice(None),) + roi) if state["_with_label_channels"] else\ RoiWrapper(labels, roi) state["labels"] = labels except Exception: msg = f"SegmentationDataset could not be deserialized because of missing {label_path}, {label_key}.\n" msg += "The dataset is deserialized in order to allow loading trained models from a checkpoint.\n" msg += "But it cannot be used for further training and wil throw an error." warnings.warn(msg) state["labels"] = None self.__dict__.update(state)
def eval_nuclei(seg_path, seg_key, annotation_path, annotation_key=None, min_radius=6): """ Evaluate the nucleus segmentation by computing the percentage of false positive and false negative nucleus annotations in manually annotated validation slices. """ eval_res = {} with open_file(seg_path, 'r') as f_seg, open_file(annotation_path, 'r') as f_ann: ds_seg = f_seg[seg_key] g = f_ann if annotation_key is None else f_ann[annotation_key] def visit_annotation(name, node): nonlocal eval_res if is_dataset(node): print("Evaluating:", name) res = eval_slice(ds_seg, node, min_radius) eval_res = merge_evaluations(res, eval_res) # for debugging # print("current eval:", eval_res) else: print("Group:", name) g.visititems(visit_annotation) return to_scores(eval_res)
def predict_affinities(checkpoint, gpu_ids, input_path, input_key, output_path, output_key): model = get_model() state_dict = torch.load(checkpoint)['model_state'] model.load_state_dict(state_dict) block_shape = (96, 96, 96) halo = (32, 32, 32) with open_file(input_path, 'r') as f_in, open_file(output_path, 'a') as f_out: ds_in = f_in[input_key] shape = ds_in.shape ds_fg = f_out.require_dataset(os.path.join(output_key, 'foreground'), shape=shape, chunks=block_shape, compression='gzip', dtype='float32') aff_shape = (model.out_channels - 1, ) + shape ds_affs = f_out.require_dataset(os.path.join(output_key, 'affinities'), shape=aff_shape, chunks=(1, ) + block_shape, compression='gzip', dtype='float32') outputs = [(ds_fg, np.s_[0]), (ds_affs, np.s_[1:])] predict_with_halo(ds_in, model, gpu_ids, block_shape, halo, output=outputs)
def check_segmentations(ref_path, ref_key, seg_path, seg_key): with open_file(ref_path, 'r') as f: shape = f[ref_key] with open_file(seg_path, 'r') as f: seg_shape = f[seg_key].shape assert shape == seg_shape, "%s, %s" % (str(shape), str(seg_shape)) return shape
def make_small_example_data(): bb = np.s_[:25, :512, :512] with open_file('./data/data.n5') as f: raw = f['raw'][bb] ws = f['watersheds'][bb] with open_file('./data/small_data.n5') as f: pass
def rank_false_merges(problem_path, graph_key, feat_key, morpho_key, node_label_path, node_label_key, ignore_ids, out_path_ids, out_path_scores, n_threads, n_candidates, heuristic=weight_quantile_heuristic): g = ndist.Graph(problem_path, graph_key, n_threads) with open_file(problem_path, 'r') as f: ds = f[feat_key] ds.n_threads = n_threads probs = ds[:, 0] ds = f[morpho_key] ds.n_threads = n_threads sizes = ds[:, 1] with open_file(node_label_path, 'r') as f: ds = f[node_label_key] ds.n_threads = n_threads node_labels = ds[:] seg_ids = np.arange(len(sizes), dtype='uint64') seg_ids = seg_ids[np.argsort(sizes)[::-1]][:n_candidates] seg_ids = seg_ids[~np.isin(seg_ids, ignore_ids.tolist() + [0])] max_size = sizes[seg_ids].max() with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(weight_quantile_heuristic, seg_id, g, node_labels, sizes, max_size, probs) for seg_id in seg_ids ] fm_scores = np.array([t.result() for t in tasks]) # print("Id:", seg_ids[0]) # sc = weight_quantile_heuristic(seg_ids[0], g, # node_labels, sizes, max_size, probs) # print("Score:", sc) # return # sort ids by score (decreasing) sorter = np.argsort(fm_scores)[::-1] seg_ids = seg_ids[sorter] fm_scores = fm_scores[sorter] with open(out_path_scores, 'w') as f: json.dump(fm_scores.tolist(), f) with open(out_path_ids, 'w') as f: json.dump(seg_ids.tolist(), f)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) config = self.get_task_config() with open_file(self.input_path, 'r') as f: dtype = f[self.input_key].dtype chunks = config['chunks'] if chunks is None: chunks = block_shape compression = config['compression'] with open_file(self.output_path, 'r') as f: f.require_dataset(self.output_key, shape=self.shape, chunks=chunks, compression=compression, dtype=dtype) trafo_file = self.update_transformations() # we don't need any additional config besides the paths config.update({ "input_path": self.input_path, "input_key": self.input_key, "output_path": self.output_path, "output_key": self.output_key, "transformation_file": trafo_file, "elastix_directory": self.elastix_directory, "tmp_folder": self.tmp_folder }) block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin, roi_end) self._write_log("scheduled %i blocks to run" % len(block_list)) # prime and run the jobs n_jobs = min(len(block_list), self.max_jobs) self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs) # prime and run the jobs n_jobs = min(self.max_jobs, len(block_list)) self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def copy_attributes(in_file, in_key, out_file, out_key): with open_file(in_file, 'r') as fin, open_file(out_file) as fout: ds_in = fin[in_key] ds_out = fout[out_key] for k, v in ds_in.attrs.items(): if isinstance(v, numbers.Real): v = float(v) elif isinstance(v, numbers.Integral): v = int(v) elif isinstance(v, np.ndarray): v = v.tolist() ds_out.attrs[k] = v
def _load_node_labes(initial_path, initial_key, save_path, save_key): if os.path.exists(save_path) and save_key in open_file(save_path, 'r'): with open_file(save_path, 'r') as f: node_labels = f[save_key][:] else: with open_file(initial_path, 'r') as f: node_labels = f[initial_key][:] if node_labels.ndim == 2: node_labels = node_labels[:, 1] assert node_labels.ndim == 1 return node_labels.astype('uint32')
def segment_timepoint(timepoint, gpu=None): # create the input data for this timepoint tmp_folder = 'tmp_plantseg/tp_%03i' % timepoint os.makedirs(tmp_folder, exist_ok=True) if gpu is not None: assert isinstance(gpu, int) os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) with ChangeDir(tmp_folder): raw_path = './raw.h5' config_path = './config.yaml' res_path = './segmentation.h5' if os.path.exists(res_path): return with open(TEMPLATE_CONFIG, 'r') as f: config = yaml.load(f) config['path'] = raw_path with open(config_path, 'w') as f: yaml.dump(config, f) with open_file(raw_path, 'a') as f: if 'raw' not in f: key = get_key(is_h5=True, timepoint=timepoint, setup_id=0, scale=0) with open_file(PATH, 'r') as f_in: raw = f_in[key][:] f.create_dataset('raw', data=raw, chunks=(32, 128, 128)) cmd = [PYTHON, PLANTSEG, '--config', config_path] run(cmd) print("Run post-processing ...") seg_path = 'PreProcessing/generic_light_sheet_3d_unet/MultiCut/raw_predictions_multicut.h5' with open_file(seg_path, 'r') as f, open_file(res_path, 'a') as f_out: seg = f['segmentation'][:] ids, sizes = np.unique(seg, return_counts=True) if 0 in ids: ids += 1 seg += 1 bg_id = ids[np.argmax(sizes)] seg[seg == bg_id] = 0 seg = seg.astype('uint32') vigra.analysis.relabelConsecutive(seg, out=seg, start_label=1, keep_zeros=True) f_out.create_dataset('data', data=seg, compression='gzip')
def compute_mean_and_std(): key = 'setup0/timepoint0/s1' f = open_file(RAW_PATH, 'r') ds = f[key] mask_key = 'setup0/timepoint0/s0' mask = open_file(MASK_PATH)[mask_key][:].astype('bool') mask = ResizedVolume(mask, ds.shape) m, s = mean_and_std(ds, mask=mask, n_threads=16, verbose=True) print("Computed mean and standard deviation:") print("Mean:", m) print("Standard deviation:", s)
def _predict(model, raw, trainer, gpu_ids, save_path, sample_id): save_key = f"sample{sample_id}" if save_path is not None and os.path.exists(save_path): with open_file(save_path, 'r') as f: if save_key in f: print("Loading predictions for sample", sample_id, "from file") ds = f[save_key] ds.n_threads = 8 return ds[:] normalizer = get_normalizer(trainer) dataset = trainer.val_loader.dataset ndim = dataset.ndim if isinstance(dataset, ConcatDataset): patch_shape = dataset.datasets[0].patch_shape else: patch_shape = dataset.patch_shape if ndim == 2 and len(patch_shape) == 3: patch_shape = patch_shape[1:] assert len(patch_shape) == ndim # choose a small halo and set the correct block shape halo = (32, 32) if ndim == 2 else (8, 16, 16) block_shape = tuple(psh - 2 * ha for psh, ha in zip(patch_shape, halo)) if save_path is None: output = None else: f = open_file(save_path, 'a') out_shape = (trainer.model.out_channels, ) + raw.shape chunks = (1, ) + block_shape output = f.create_dataset(save_key, shape=out_shape, chunks=chunks, compression='gzip', dtype='float32') gpu_ids = [int(gpu) if gpu != 'cpu' else gpu for gpu in gpu_ids] pred = predict_with_halo(raw, model, gpu_ids, block_shape, halo, preprocess=normalizer, output=output) if output is not None: f.close() return pred
def init_dataset(self): data_path = os.path.join(self.test_folder, "data.h5") data_key = "data" with open_file(data_path, "a") as f: f.create_dataset(data_key, data=np.random.rand(*self.shape)) seg_path = os.path.join(self.test_folder, "seg.h5") with open_file(seg_path, "a") as f: f.create_dataset(data_key, data=np.random.randint(0, 100, size=self.shape)) scales = [[2, 2, 2]] max_jobs = min(4, mp.cpu_count()) tmp_folder = os.path.join(self.test_folder, "tmp-init-raw") mobie.add_image(data_path, data_key, self.root, self.dataset_name, self.raw_name, resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, tmp_folder=tmp_folder, max_jobs=max_jobs) tmp_folder = os.path.join(self.test_folder, "tmp-init-seg") mobie.add_segmentation(seg_path, data_key, self.root, self.dataset_name, self.seg_name, resolution=(1, 1, 1), chunks=self.chunks, scale_factors=scales, tmp_folder=tmp_folder, max_jobs=max_jobs) display_settings = [ mobie.metadata.get_image_display("image-group-0", [self.raw_name]), mobie.metadata.get_segmentation_display("segmentation-group-1", [self.seg_name]), ] source_transforms = [ mobie.metadata.get_affine_source_transform( [self.raw_name, self.seg_name], np.random.rand(12)) ] mobie.create_view(os.path.join(self.root, self.dataset_name), "my-view", [[self.raw_name], [self.seg_name]], display_settings=display_settings, source_transforms=source_transforms)
def map_cells_to_nuclei(label_ids, seg_path, nuc_path, out_path, tmp_folder, target, max_jobs, overlap_threshold=.25): # choose the keys of the same size if seg_path.endswith('.n5'): seg_key = 'setup0/timepoint0/s2' else: seg_key = 't00000/s00/2/cells' if nuc_path.endswith('.n5'): nuc_key = 'setup0/timepoint0/s0' else: nuc_key = 't00000/s00/0/cells' with open_file(seg_path, 'r') as f: shape1 = f[seg_key].shape with open_file(nuc_path, 'r') as f: shape2 = f[nuc_key].shape assert shape1 == shape2 # compute the pixel-wise overlap of cells with nuclei cids_to_nids = node_labels(seg_path, seg_key, nuc_path, nuc_key, prefix='nuc_to_cells', tmp_folder=tmp_folder, target=target, max_jobs=max_jobs, max_overlap=False, ignore_label=0) cids_to_nids = overlaps_to_ids(cids_to_nids, overlap_threshold) # compute the pixel-wise overlap of nuclei with cells nids_to_cids = node_labels(nuc_path, nuc_key, seg_path, seg_key, prefix='cells_to_nuc', tmp_folder=tmp_folder, target=target, max_jobs=max_jobs, max_overlap=False, ignore_label=0) nids_to_cids = overlaps_to_ids(nids_to_cids, overlap_threshold) # only keep cell ids that have overlap with a single nucleus cids_to_nids = {label_id: ovlp_ids[0] for label_id, ovlp_ids in cids_to_nids.items() if len(ovlp_ids) == 1} # only keep nucleus ids that have overlap with a single cell nids_to_cids = {label_id: ovlp_ids[0] for label_id, ovlp_ids in nids_to_cids.items() if len(ovlp_ids) == 1} # only keep cell ids for which overlap-ids agree cids_to_nids = {label_id: ovlp_id for label_id, ovlp_id in cids_to_nids.items() if nids_to_cids.get(ovlp_id, 0) == label_id} data = np.array([cids_to_nids.get(label_id, 0) for label_id in label_ids]) col_names = ['label_id', 'nucleus_id'] data = np.concatenate([label_ids[:, None], data[:, None]], axis=1) write_csv(out_path, data, col_names)
def _resize(path, native_resolution, target_resolution): assert len(native_resolution) == len(target_resolution) scale_factor = tuple( nres / tres for nres, tres in zip(native_resolution, target_resolution)) paths = glob(os.path.join(path, "*.h5")) # check if anything needs to be resized need_resize = [] for pp in paths: with open_file(pp, "r") as f: for name, obj in f.items(): rescaled_name = f"rescaled/{name}" if is_group(obj): continue if rescaled_name in f: this_resolution = f[rescaled_name].attrs["resolution"] correct_res = all( np.isclose(this_re, target_re) for this_re, target_re in zip(this_resolution, target_resolution)) if correct_res: continue need_resize.append(path) # resize if necessary need_resize = list(set(need_resize)) for pp in need_resize: with open_file(pp, mode="a") as f: if "rescaled" in f: del f["rescaled"] for name, obj in f.items(): print("Resizing", pp, name) print("from resolution (microns)", native_resolution, "to", target_resolution) print("with scale factor", scale_factor) vol = obj[:] if name == "raw": vol = rescale(vol, scale_factor, preserve_range=True).astype(vol.dtype) else: vol = rescale(vol, scale_factor, preserve_range=True, order=0, anti_aliasing=False).astype(vol.dtype) ds = f.create_dataset(rescaled_name, data=vol, compression="gzip") ds.attrs["resolution"] = target_resolution
def run_segmentation_for_cubes(filename_raw=None, dataset_raw='/t00000/s00/0/cells', filename_mem=None, filename_sv=None, rf=None, n_threads=1, beta=0.5, output_folder=None, result_pattern='{}_{}_{}.h5', cube_size=(1024, 1024, 1024), overlap=(256, 256, 256), block_size=[256, 256, 256], start_index=(0, 0, 0), end_index=None): step_z, step_y, step_x = cube_size[0] - overlap[0], cube_size[1] - overlap[ 1], cube_size[2] - overlap[2] with open_file(filename_raw, 'r') as f: shape = f[dataset_raw].shape nz, ny, nx = shape if not end_index: end_index = shape error_file = output_folder + 'errors_mc.txt' if not os.path.exists(error_file): with open(error_file, 'w') as f: f.write( "Here will be indexes of cubes with errors in multicut. /n") for z in range(start_index[0], end_index[0], step_z): for y in range(start_index[1], end_index[1], step_y): for x in range(start_index[2], end_index[2], step_x): bb = np.s_[z:min(z + cube_size[0], nz), y:min(y + cube_size[1], ny), x:min(x + cube_size[2], nx)] filename_results = result_pattern.format(z, y, x) if not (os.path.exists(output_folder + filename_results)): segmentation = mc_segmentation(bb, mc_blocks=block_size, filename_raw=filename_raw, dataset_raw=dataset_raw, filename_mem=filename_mem, filename_sv=filename_sv, rf=rf, n_threads=n_threads, beta=beta, error_file=error_file) f = open_file(output_folder + filename_results, 'w') f.create_dataset('data', data=segmentation, compression="gzip")
def align_seg(compute_offset): points_mobie = [[1077, 525], [941, 564], [848, 1314], [467, 959], [432, 826], [976, 758], [1134, 907]] points_amira = [[1008, 315], [875, 350], [777, 1103], [394, 747], [357, 616], [908, 547], [1064, 695]] if compute_offset: diff = [[mo - am for mo, am in zip(pm, pa)] for pm, pa in zip(points_mobie, points_amira)] offset = np.array(diff).mean(axis=0) offset = np.round(offset).astype('int').tolist() offset = [777] + offset print(offset) return ds_name = 'cell1' p = _get_path(ds_name) with open_file(p, mode='r', ext='') as f: ds = f['*.tif'] shape = ds.shape ds.n_threads = 8 print("Load raw1") raw1 = ds[:] # raw1 = ResizedVolume(raw1, (raw1.shape[0] // 2, raw1.shape[1], raw1.shape[2]))[:] roi = np.s_[777:-223, :, :] raw_p = f'/g/emcf/pape/sponge-fibsem-project/data/{ds_name}/images/local/fibsem-raw.n5' with open_file(raw_p, 'r') as f: ds = f['setup0/timepoint0/s0'] ds.n_thredas = 8 ds = RoiWrapper(ds, roi) rshape = ds.shape print("Load raw2") raw2 = ds[:] scale_factor = [float(rs) / sh for rs, sh in zip(rshape, shape)] print(ds_name) print("Seg-shape :", shape) print("Mobie-shape:", rshape) print("Factor :", scale_factor) print("Start viewer") import napari with napari.gui_qt(): viewer = napari.Viewer() viewer.add_image(raw1, name='amira') viewer.add_image(raw2, name='mobie')
def mc_segmentation(bb, mc_blocks, filename_raw, filename_mem, filename_sv): f = open_file(filename_raw, 'r') data_raw = f['/t00000/s00/0/cells'][bb].astype(np.float32) shape = data_raw.shape if np.min(data_raw) == np.max(data_raw): # print('no raw data ') # print(np.min(data_raw)) return np.zeros(shape) f = open_file(filename_mem, 'r') data_mem = f['data'][bb].astype(np.float32).reshape(data_raw.shape) assert data_mem.shape == shape f = open_file(filename_sv, 'r') data_sv = f['data'][bb].astype('uint32') assert data_sv.shape == shape data_sv, maxlabel, mapping = vigra.analysis.relabelConsecutive(data_sv) # print("Final shape:", shape) # print('sv', np.min(data_sv), np.max(data_sv)) if np.min(data_sv) == np.max(data_sv): # print('no superpixels') return np.zeros(shape) try: # run blockwise segmentation # print("Start segmentation") segmentation = elf_workflow.multicut_segmentation( raw=data_raw, boundaries=data_mem, rf=rf, use_2dws=False, watershed=data_sv, multicut_solver='blockwise-multicut', solver_kwargs={ 'internal_solver': 'kernighan-lin', 'block_shape': mc_blocks }, n_threads=16, beta=0.6) # print('segmentation is done') return segmentation except RuntimeError: error_cubes.append(bb) # print('runtime error in segmentation') return np.zeros(shape)
def write_h5_files(table, folder, raw_seg_path): """ Writes individual h5 file for each row in the table, equal to the bounding box of that object + a 10 pixel border on all dimensions Args: table [pd.Dataframe] - table of nucleus statistics folder [str] - a temporary folder to write files to raw_seg_path [str] - path to the raw segmentation .h5 """ for row in table.itertuples(index=False): # min max coordinates in microns for segmentation minmax_seg = [ row.bb_min_x, row.bb_min_y, row.bb_min_z, row.bb_max_x, row.bb_max_y, row.bb_max_z ] # raw scale (from xml) for 2x downsampled raw_scale = [0.02, 0.02, 0.025] # slice for raw file raw_slice = calculate_slice(raw_scale, minmax_seg, addBorder=True) is_h5 = is_h5_file(raw_seg_path) raw_key = get_key(is_h5, setup=0, time_point=0, scale=1) with open_file(raw_seg_path, 'r') as f: # get 2x downsampled nuclei data = f[raw_key] img_array = data[raw_slice] # write h5 file for nucleus result_path = folder + os.sep + str(row.label_id) + '.h5' with open_file(result_path, 'a') as f: # check dataset is bigger than 64x64x64 if img_array.shape[0] >= 64 and img_array.shape[ 1] >= 64 and img_array.shape[2] >= 64: chunks = (64, 64, 64) else: chunks = img_array.shape f.create_dataset('dataset', chunks=chunks, compression='gzip', shape=img_array.shape, dtype=img_array.dtype) f['dataset'][:] = img_array
def prefilter_blocks(mask_path, mask_key, shape, block_shape, save_file, n_threads=48): if os.path.exists(save_file): print("Loading block list from file") with open(save_file) as f: return json.load(f) with open_file(mask_path, 'r') as f: ds = f[mask_key] mask = ResizedVolume(ds, shape=shape, order=0) blocking = nt.blocking([0, 0, 0], shape, block_shape) n_blocks = blocking.numberOfBlocks def check_block(block_id): block = blocking.getBlock(block_id) bb = tuple(slice(beg, end) for beg, end in zip(block.begin, block.end)) d = mask[bb] if d.sum() > 0: return block_id else: return None print("Computing block list ...") with futures.ThreadPoolExecutor(n_threads) as tp: blocks = list(tqdm(tp.map(check_block, range(n_blocks)), total=n_blocks)) blocks = [bid for bid in blocks if bid is not None] with open(save_file, 'w') as f: json.dump(blocks, f) return blocks
def parse_simple_htm(folder, pattern="*.h5", exclude_names=None): """Parse simple htm layout, see e.g. example data at https://owncloud.gwdg.de/index.php/s/eu8JMlUFZ82ccHT """ files = glob(os.path.join(folder, pattern)) files.sort() # get the channel and label names channel_names = [] label_names = [] with io.open_file(files[0], "r") as f: for name, obj in f.items(): if exclude_names is not None and name in exclude_names: continue if io.is_dataset(obj): channel_names.append(name) elif io.is_group(obj) and name == "segmentation": for label_name, label in obj.items(): if exclude_names is not None and label_name in exclude_names: continue if io.is_dataset(label): label_names.append(f"segmentation/{label_name}") assert channel_names image_data = {name: _load_channel_simple(files, name) for name in channel_names} label_data = None if label_names is None else {name: _load_channel_simple(files, name) for name in label_names} return image_data, label_data
def debug_vol(): path = '../data.n5' key = 'volumes/cilia/segmentation' f = open_file(path) ds = f[key] shape = ds.shape block_shape = ds.chunks roi_begin = [7216, 12288, 7488] roi_end = [8640, 19040, 11392] blocks, blocking = blocks_in_volume(shape, block_shape, roi_begin, roi_end, return_blocking=True) print("Have", len(blocks), "blocks in roi") # check reading all blocks for block_id in blocks: print("Check block", block_id) block = blocking.getBlock(block_id) bb = block_to_bb(block) d = ds[bb] print("Have block", block_id) print("All checks passsed")