def create_auxiliary_gene_file(meds_root, out_file, return_result=False): all_genes_dset = 'genes' names_dset = 'gene_names' # get all the prospr gene xmls in the image folder med_files = glob(os.path.join(meds_root, "prospr*.xml")) # filter out prospr files that are not genes (=semgneted regions and virtual cells) med_files = [name for name in med_files if 'segmented' not in name] med_files = [name for name in med_files if 'virtual' not in name] # get the gene names from filenames gene_names = [os.path.splitext(os.path.basename(f))[0] for f in med_files] # cut all the preceeding prospr-... part gene_names = ['-'.join(name.split('-')[4:]) for name in gene_names] num_genes = len(gene_names) assert num_genes == len(med_files) # get the data paths from the xmls med_files = [ get_data_path(med_file, return_absolute_path=True) for med_file in med_files ] is_h5 = os.path.splitext(med_files[0])[1] == '.h5' med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0) with open_file(med_files[0], 'r') as f: spatial_shape = f[med_key].shape shape = (num_genes, ) + spatial_shape # iterate through med files and write down binarized into one file with open_file(out_file) as f: out_dset = f.create_dataset(all_genes_dset, shape=shape, dtype='bool', chunks=(1, 64, 64, 64), compression='gzip') out_dset.n_threads = 8 for i, med_file in enumerate(tqdm(med_files)): is_h5 = os.path.splitext(med_file)[1] == '.h5' med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0) with open_file(med_file, 'r') as f2: ds = f2[med_key] this_shape = ds.shape if this_shape != spatial_shape: raise RuntimeError("Incompatible shapes %s, %s" % (str(this_shape), str(spatial_shape))) ds.n_threads = 8 data = ds[:] out_dset[i] = data gene_names_ascii = [n.encode('ascii', 'ignore') for n in gene_names] f.create_dataset(names_dset, data=gene_names_ascii, dtype='S40') if return_result: # reload the binarized version with open_file(out_file, 'r') as f: all_genes = f[all_genes_dset][:] return all_genes
def copy_to_bdv_n5(in_file, out_file, chunks, resolution, n_threads=32, start_scale=0): n_scales = get_number_of_scales(in_file, 0, 0) scale_factors = get_scale_factors(in_file, 0) # double check newly implemented functions in pybdv assert n_scales == len(scale_factors) scale_factors = normalize_scale_factors(scale_factors, start_scale) for out_scale, in_scale in enumerate(range(start_scale, n_scales)): in_key = get_key(True, 0, 0, in_scale) out_key = get_key(False, 0, 0, out_scale) if chunks is None: with open_file(in_file, 'r') as f: chunks_ = f[in_key].chunks else: chunks_ = chunks copy_dataset(in_file, in_key, out_file, out_key, convert_dtype=False, chunks=chunks_, n_threads=n_threads) copy_attributes(in_file, in_key, out_file, out_key) write_n5_metadata(out_file, scale_factors, resolution, setup_id=0)
def view_segmentations(version, raw_scale, seg_names=[], seg_scales=[], bb=np.s_[:]): folder = os.path.join(ROOT, version, 'images', 'local') raw_file = os.path.join(folder, 'sbem-6dpf-1-whole-raw.xml') raw_file = get_data_path(raw_file, return_absolute_path=True) raw_key = get_key(False, time_point=0, setup_id=0, scale=raw_scale) with z5py.File(raw_file, 'r') as f: ds = f[raw_key] ds.n_threads = 16 raw = ds[bb] ref_shape = raw.shape data = [to_source(raw, name='raw')] for seg_name, seg_scale in zip(seg_names, seg_scales): seg_file = os.path.join(folder, seg_name + '.xml') seg_file = get_data_path(seg_file, return_absolute_path=True) seg_key = get_key(False, time_point=0, setup_id=0, scale=seg_scale) with z5py.File(seg_file, 'r') as f: ds = f[seg_key] ds.n_threads = 16 seg = ds[bb].astype('uint32') if seg.shape != ref_shape: # FIXME this will fail with bounding box print("Resize", ref_shape) seg = ResizeWrapper(to_source(seg, name=seg_name), ref_shape) data.append(to_source(seg, name=seg_name)) view(*data)
def get_seg_key_xml(xml_path, scale): bdv_format = get_bdv_format(xml_path) if bdv_format == 'bdv.hdf5': return get_key(True, time_point=0, setup_id=0, scale=scale) elif bdv_format == 'bdv.n5': return get_key(False, time_point=0, setup_id=0, scale=scale) else: raise RuntimeError("Invalid bdv format: %s" % bdv_format)
def import_traces(input_folder, out_path, reference_path, reference_scale, resolution, scale_factors, radius=2, chunks=None, max_jobs=8, unit='micrometer', source_name=None): """ Import trace data into the mobie format. input_folder [str] - folder with traces to be imported. out_path [str] - where to save the segmentation reference_path [str] - path to the reference volume reference_scale [str] - scale to use for reference resolution [list[float]] - resolution of the traces in micrometers scale_factors [list[list[int]]] - scale factors for down-sampling radius [int] - radius to write for the traces chunks [list[int]] - chunks for the traces volume max_jobs [int] - number of threads to use for down-samling unit [str] - physical unit (default: micrometer) source_name [str] - name of the source (default: None) """ traces = parse_traces(input_folder) # check that we are compatible with bdv (ids need to be smaller than int16 max) max_id = np.iinfo('int16').max max_trace_id = max(traces.keys()) if max_trace_id > max_id: raise RuntimeError("Can't export id %i > %i" % (max_trace_id, max_id)) is_h5 = is_h5py(reference_path) ref_key = get_key(is_h5, timepoint=0, setup_id=0, scale=reference_scale) with open_file(reference_path, 'r') as f: ds = f[ref_key] shape = ds.shape if chunks is None: chunks = ds.chunks key0 = get_key(is_h5, timepoint=0, setup_id=0, scale=0) print("Writing traces ...") traces_to_volume(traces, out_path, key0, shape, resolution, chunks, radius, max_jobs) print("Downscaling traces ...") make_scales(out_path, scale_factors, downscale_mode='max', ndim=3, setup_id=0, is_h5=is_h5, chunks=chunks, n_threads=max_jobs) xml_path = os.path.splitext(out_path)[0] + '.xml' # we assume that the resolution is in nanometer, but want to write in microns for bdv bdv_res = [res / 1000. for res in resolution] write_xml_metadata(xml_path, out_path, unit, bdv_res, is_h5, setup_id=0, timepoint=0, setup_name=source_name, affine=None, attributes={'channel': {'id': 0}}, overwrite=False, overwrite_data=False, enforce_consistency=False) bdv_scale_factors = [[1, 1, 1]] + scale_factors if is_h5: write_h5_metadata(out_path, bdv_scale_factors) else: write_n5_metadata(out_path, bdv_scale_factors, bdv_res)
def map_ids(path1, path2, out_path, tmp_folder, max_jobs, target, prefix, key1=None, key2=None, scale=0): task = NodeLabelWorkflow config_folder = os.path.join(tmp_folder, 'configs') write_default_global_config(config_folder) configs = task.get_config() conf = configs['merge_node_labels'] conf.update({'threads_per_job': 8, 'mem_limit': 16}) with open(os.path.join(config_folder, 'merge_node_labels.config'), 'w') as f: json.dump(conf, f) if key1 is None: is_h5 = is_h5_file(path1) key1 = get_key(is_h5, time_point=0, setup_id=0, scale=scale) if key2 is None: is_h5 = is_h5_file(path2) key2 = get_key(is_h5, time_point=0, setup_id=0, scale=scale) tmp_path = os.path.join(tmp_folder, 'data.n5') tmp_key = prefix t = task(tmp_folder=tmp_folder, config_dir=config_folder, target=target, max_jobs=max_jobs, ws_path=path1, ws_key=key1, input_path=path2, input_key=key2, output_path=tmp_path, output_key=tmp_key, prefix=prefix, max_overlap=True, serialize_counts=True) ret = luigi.build([t], local_scheduler=True) if not ret: raise RuntimeError("Id-mapping failed") ds = z5py.File(tmp_path)[tmp_key] lut = ds[:] assert lut.ndim == 2 lut = dict(zip(range(len(lut)), lut.tolist())) with open(out_path, 'w') as f: json.dump(lut, f)
def downsample(ref_path, in_path, in_key, out_path, resolution, tmp_folder, target, max_jobs): ref_is_h5 = is_h5_file(ref_path) gkey = get_key(ref_is_h5, 0, 0) with open_file(ref_path, 'r') as f: g = f[gkey] levels = list(g.keys()) levels.sort() sample_factors = [] for level in range(1, len(levels)): k0 = get_key(ref_is_h5, 0, 0, level - 1) k1 = get_key(ref_is_h5, 0, 0, level) ds0 = f[k0] ds1 = f[k1] s0 = ds0.shape s1 = ds1.shape factor = [ int(round(float(sh0) / sh1, 0)) for sh0, sh1 in zip(s0, s1) ] sample_factors.append(factor) assert len(sample_factors) == len(levels) - 1 config_dir = os.path.join(tmp_folder, 'configs') task = DownscalingWorkflow config = task.get_config()['downscaling'] config.update({'library': 'skimage', 'time_limit': 360, 'mem_limit': 8}) with open(os.path.join(config_dir, 'downscaling.config'), 'w') as f: json.dump(config, f) halos = len(sample_factors) * [[0, 0, 0]] # TODO this needs merge of # https://github.com/constantinpape/cluster_tools/pull/17 t = task(tmp_folder=tmp_folder, config_dir=config_dir, max_jobs=max_jobs, target=target, input_path=in_path, input_key=in_key, scale_factors=sample_factors, halos=halos, metadata_format='bdv', metadata_dict={ 'resolution': resolution, 'unit': 'micrometer' }, output_path=out_path) ret = luigi.build([t], local_scheduler=True) if not ret: raise RuntimeError("Downscaling failed")
def traces_to_volume(traces, reference_vol_path, reference_scale, out_path, resolution, scale_factors, radius=2, chunks=None, n_threads=8): """ Export traces as segmentation compatible with the platy-browser. """ # check that we are compatible with bdv (ids need to be smaller than int16 max) max_id = np.iinfo('int16').max max_trace_id = max(traces.keys()) if max_trace_id > max_id: raise RuntimeError("Can't export id %i > %i" % (max_trace_id, max_id)) is_h5 = is_h5_file(reference_vol_path) ref_key = get_key(is_h5, time_point=0, setup_id=0, scale=reference_scale) with open_file(reference_vol_path, 'r') as f: ds = f[ref_key] shape = ds.shape if chunks is None: chunks = ds.chunks is_h5 = is_h5_file(out_path) key0 = get_key(is_h5, time_point=0, setup_id=0, scale=0) print("Writing traces ...") write_vol_from_traces(traces, out_path, key0, shape, resolution, chunks, radius, n_threads) print("Downscaling traces ...") make_scales(out_path, scale_factors, downscale_mode='max', ndim=3, setup_id=0, is_h5=is_h5, chunks=chunks, n_threads=n_threads) xml_path = os.path.splitext(out_path)[0] + '.xml' # we assume that the resolution is in nanometer, but want to write in microns for bdv bdv_res = [res / 1000. for res in resolution] unit = 'micrometer' write_xml_metadata(xml_path, out_path, unit, bdv_res, is_h5) bdv_scale_factors = [[1, 1, 1]] + scale_factors if is_h5: write_h5_metadata(out_path, bdv_scale_factors) else: write_n5_metadata(out_path, bdv_scale_factors, bdv_res)
def add_timepoint(in_path, out_path, tp, channel, mode, in_key=None): if in_key is None: in_key = get_key(is_h5=True, timepoint=tp, setup_id=channel, scale=0) out_key = get_key(is_h5=False, timepoint=tp, setup_id=0, scale=0) # skip timepoints that have been copied already with open_file(out_path, 'r') as f: if out_key in f: return convert_to_bdv(in_path, in_key, out_path, SCALE_FACTORS, downscale_mode=mode, resolution=RESOLUTION, unit='micrometer', setup_id=0, timepoint=tp)
def intensity_correction(in_path, out_path, mask_path, mask_key, trafo_path, tmp_folder, resolution, target='slurm', max_jobs=250): trafo_ext = os.path.splitext(trafo_path)[1] if trafo_ext == '.csv': trafo_path = csv_to_json(trafo_path) elif trafo_ext != '.json': raise ValueError("Expect trafo as json.") in_is_h5 = is_h5_file(in_path) out_is_h5 = is_h5_file(out_path) key = get_key(in_is_h5, 0, 0, 0) out_key = get_key(out_is_h5, 0, 0, 0) validate_trafo(trafo_path, in_path, key) config_dir = os.path.join(tmp_folder, 'configs') write_default_global_config(config_dir) task = LinearTransformationWorkflow conf = task.get_config()['linear'] conf.update({'time_limit': 360, 'mem_limit': 8}) with open(os.path.join(config_dir, 'linear.config'), 'w') as f: json.dump(conf, f) t = task(tmp_folder=tmp_folder, config_dir=config_dir, target=target, max_jobs=max_jobs, input_path=in_path, input_key=key, mask_path=mask_path, mask_key=mask_key, output_path=out_path, output_key=out_key, transformation=trafo_path) ret = luigi.build([t], local_scheduler=True) if not ret: raise RuntimeError("Transformation failed") downsample(in_path, out_path, out_key, out_path, resolution, tmp_folder, target, max_jobs)
def test_multi_timepoint(self): from pybdv import make_bdv from pybdv.metadata import get_time_range n_timepoints = 6 shape = (64,) * 3 tp_data = [] tp_setups = [] for tp in range(n_timepoints): data = np.random.rand(*shape) # make sure that we at least have 2 setup ids that agree setup_id = np.random.randint(0, 20) if tp > 1 else 0 make_bdv(data, self.out_path, setup_id=setup_id, timepoint=tp) tp_data.append(data) tp_setups.append(setup_id) tstart, tstop = get_time_range(self.xml_path) self.assertEqual(tstart, 0) self.assertEqual(tstop, n_timepoints - 1) for tp in range(n_timepoints): setup_id = tp_setups[tp] tp_key = get_key(self.is_h5, timepoint=tp, setup_id=setup_id, scale=0) with open_file(self.out_path, 'r') as f: data = f[tp_key][:] data_exp = tp_data[tp] self.assertTrue(np.allclose(data, data_exp))
def test_multi_setup(self): from pybdv import make_bdv from pybdv.metadata import get_affine shape = (64,) * 3 n_views = 2 data_dict = {} affine_dict = {} for vid in range(n_views): data = np.random.rand(*shape).astype('float32') affine = {'trafo1': [round(aff, 4) for aff in np.random.rand(12)], 'trafo2': [round(aff, 4) for aff in np.random.rand(12)]} make_bdv(data, self.out_path, setup_id=vid, affine=affine) data_dict[vid] = data affine_dict[vid] = affine # check implicit setup id data = np.random.rand(*shape).astype('float32') make_bdv(data, self.out_path) data_dict[n_views] = data for vid in range(n_views + 1): expected_key = get_key(self.is_h5, timepoint=0, setup_id=vid, scale=0) with open_file(self.out_path, 'r') as f: self.assertTrue(expected_key in f) data = f[expected_key][:] exp_data = data_dict[vid] self.assertTrue(np.allclose(data, exp_data)) # check affine trafos (only for explicit setup-ids) for vid in range(n_views): affine = affine_dict[vid] affine_out = get_affine(self.xml_path, vid) self.assertEqual(affine, affine_out)
def add_segmentations(seg_paths): path0 = seg_paths[0] seg_name = 'lm-cells' out_path = f'./data/{DS_NAME}/images/local/{seg_name}.n5' key = 'data' if not os.path.exists(out_path.replace('.n5', '.xml')): add_segmentation( path0, key, './data', DS_NAME, segmentation_name=seg_name, resolution=RESOLUTION, chunks=CHUNKS, scale_factors=SCALE_FACTORS, max_jobs=8, add_default_table=False ) assert os.path.exists(out_path) for tp, path in enumerate(seg_paths[1:], 1): add_timepoint(path, out_path, tp, channel=0, mode='nearest', in_key=key) tp_key = get_key(False, tp, 0, 0) add_max_id(out_path, tp_key, out_path, tp_key, tmp_folder=f'tmp_max_ids/tp{tp}', target='local', max_jobs=8)
def make_table(seg_path, n_timepoints, out_path): tmp_tables = './tmp_tables' table = None for tp in range(n_timepoints): tmp_folder = os.path.join(tmp_tables, f'table{tp}') res_path = os.path.join(tmp_folder, 'table2.csv') if os.path.exists(res_path): this_table = pd.read_csv(res_path, sep='\t') else: tmp_path = os.path.join(tmp_folder, 'table.csv') key = get_key(False, timepoint=tp, setup_id=0, scale=0) compute_default_table(seg_path, key, tmp_path, resolution=RESOLUTION, tmp_folder=tmp_folder, target='local', max_jobs=8) this_table = pd.read_csv(tmp_path, sep='\t') this_table = update_table(this_table, tp, seg_path, key) this_table.to_csv(res_path, sep='\t', index=False) if table is None: table = this_table else: table = pd.concat([table, this_table]) table.to_csv(out_path, sep='\t', index=False)
def check_segmentation(self, dataset_folder, name): self.assertTrue(os.path.exists(dataset_folder)) exp_data = self.data # check the segmentation metadata metadata = read_dataset_metadata(dataset_folder) self.assertIn(name, metadata["sources"]) validate_source_metadata(name, metadata["sources"][name], dataset_folder) # check the segmentation data seg_path = os.path.join(dataset_folder, "images", "bdv-n5", f"{name}.n5") self.assertTrue(os.path.exists(seg_path)) key = get_key(False, 0, 0, 0) with open_file(seg_path, "r") as f: data = f[key][:] self.assertTrue(np.array_equal(data, exp_data)) # check the table table_path = os.path.join(dataset_folder, "tables", name, "default.tsv") self.assertTrue(os.path.exists(table_path)), table_path table = pd.read_csv(table_path, sep="\t") label_ids = table["label_id"].values exp_label_ids = np.unique(data) if 0 in exp_label_ids: exp_label_ids = exp_label_ids[1:] self.assertTrue(np.array_equal(label_ids, exp_label_ids))
def check_result(timepoint): import napari print("Checking results for timepoint", timepoint) halo = [64, 384, 384] key = get_key(is_h5=True, timepoint=timepoint, setup_id=0, scale=0) with open_file(PATH, 'r') as f: ds = f[key] bb = tuple( slice(sh // 2 - ha, sh // 2 + ha) for sh, ha in zip(ds.shape, halo)) raw = ds[bb] pred_path = os.path.join( 'tmp_plantseg/tp_%03i/PreProcessing' % timepoint, 'generic_light_sheet_3d_unet/PostProcessing/raw_predictions.h5') with open_file(pred_path, 'r') as f: pred = f['predictions'][bb] seg_path = 'tmp_plantseg/tp_%03i/segmentation.h5' % timepoint with open_file(seg_path, 'r') as f: seg = f['data'][bb] with napari.gui_qt(): viewer = napari.Viewer() viewer.add_image(raw) viewer.add_image(pred) viewer.add_labels(seg)
def _get_number_of_labels(self): seg_path = self.cell_segmentation_path if self.compute_cell_features else\ self.nucleus_segmentation_path is_h5 = os.path.splitext(seg_path)[1].lower() in ('.hdf', '.hdf5', '.h5') key = get_key(is_h5, time_point=0, setup_id=0, scale=0) with vu.file_reader(seg_path, 'r') as f: n_labels = int(f[key].attrs['maxId']) + 1 return n_labels
def get_scale_key(self, scale): if self.metadata_format == 'paintera': prefix = 's%i' % scale out_key = os.path.join(self.output_key_prefix, prefix) else: is_h5 = self.metadata_format in ('bdv', 'bdv.hdf5') # TODO support multiple set-ups for multi-channel data out_key = get_key(is_h5, time_point=0, setup_id=0, scale=scale) return out_key
def check_seg(self, exp_data, scales): key = get_key(False, 0, 0, 0) with open_file(self.out_path, 'r') as f: ds = f[key] data = ds[:] max_id = ds.attrs['maxId'] self.assertEqual(data.shape, exp_data.shape) self.assertTrue(np.array_equal(data, exp_data)) self.assertAlmostEqual(max_id, data.max()) exp_shape = data.shape for scale, scale_facor in enumerate(scales, 1): key = get_key(False, 0, 0, scale) with open_file(self.out_path, 'r') as f: self.assertIn(key, f) this_shape = f[key].shape exp_shape = sample_shape(exp_shape, scale_facor) self.assertEqual(this_shape, exp_shape)
def check_result(self): with open_file(self.in_path, 'r') as f: exp = f['data'][:] key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0) with open_file(self.out_path, 'r') as f: self.assertTrue(key in f) res = f[key][:] self.assertEqual(res.shape, exp.shape) self.assertTrue(np.allclose(res, exp))
def number_of_voxels(): p = '../data/rawdata/sbem-6dpf-1-whole-raw.n5' is_h5 = is_h5_file(p) key = get_key(is_h5, setup_id=0, time_point=0, scale=0) with h5py.File(p, 'r') as f: ds = f[key] shape = ds.shape n_vox = np.prod(list(shape)) print("Number of voxel:") print(n_vox) print("corresponds to") print(float(n_vox) / 1e12, "TVoxel")
def get_data_key(file_format, scale, path=None): if file_format.startswith("bdv"): is_h5 = file_format == "bdv.hdf5" key = get_key(is_h5, timepoint=0, setup_id=0, scale=scale) elif file_format == "ome.zarr": assert path is not None with open_file(path, "r") as f: mscales = f.attrs["multiscales"][0] key = mscales["datasets"][0]["path"] else: raise NotImplementedError(file_format) return key
def segment_timepoint(timepoint, gpu=None): # create the input data for this timepoint tmp_folder = 'tmp_plantseg/tp_%03i' % timepoint os.makedirs(tmp_folder, exist_ok=True) if gpu is not None: assert isinstance(gpu, int) os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) with ChangeDir(tmp_folder): raw_path = './raw.h5' config_path = './config.yaml' res_path = './segmentation.h5' if os.path.exists(res_path): return with open(TEMPLATE_CONFIG, 'r') as f: config = yaml.load(f) config['path'] = raw_path with open(config_path, 'w') as f: yaml.dump(config, f) with open_file(raw_path, 'a') as f: if 'raw' not in f: key = get_key(is_h5=True, timepoint=timepoint, setup_id=0, scale=0) with open_file(PATH, 'r') as f_in: raw = f_in[key][:] f.create_dataset('raw', data=raw, chunks=(32, 128, 128)) cmd = [PYTHON, PLANTSEG, '--config', config_path] run(cmd) print("Run post-processing ...") seg_path = 'PreProcessing/generic_light_sheet_3d_unet/MultiCut/raw_predictions_multicut.h5' with open_file(seg_path, 'r') as f, open_file(res_path, 'a') as f_out: seg = f['segmentation'][:] ids, sizes = np.unique(seg, return_counts=True) if 0 in ids: ids += 1 seg += 1 bg_id = ids[np.argmax(sizes)] seg[seg == bg_id] = 0 seg = seg.astype('uint32') vigra.analysis.relabelConsecutive(seg, out=seg, start_label=1, keep_zeros=True) f_out.create_dataset('data', data=seg, compression='gzip')
def append_nephridia_table(): table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cilia/cell_mapping.csv' table = pd.read_csv(table_path, sep='\t') cell_ids = table['cell_id'].values cell_ids = np.unique(cell_ids) if cell_ids[0] == 0: cell_ids = cell_ids[1:] out_table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cells/regions.csv' seg_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-cells.n5' nep_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-nephridia.n5' table = pd.read_csv(out_table_path, sep='\t') new_col = np.zeros(len(table), dtype='float32') print("Loading volumes ...") scale = 4 is_h5 = is_h5_file(seg_path) key = get_key(is_h5, setup_id=0, time_point=0, scale=scale) with open_file(seg_path, 'r') as f: seg = f[key][:] scale = 0 is_h5 = is_h5_file(nep_path) key = get_key(is_h5, setup_id=0, time_point=0, scale=scale) with open_file(nep_path, 'r') as f: nep = f[key][:] assert nep.shape == seg.shape print("Iterating over cells ...") for cid in cell_ids: nid = np.unique(nep[seg == cid]) if 0 in nid: nid = nid[1:] assert len(nid) == 1 new_col[cid] = nid table['nephridia'] = new_col table.to_csv(out_table_path, sep='\t', index=False)
def _test_simple(self, shape, affine=None): from pybdv import make_bdv data = np.random.rand(*shape).astype('float32') make_bdv(data, self.out_path, affine=affine) key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0) self.assertTrue(os.path.exists(self.out_path)) with open_file(self.out_path, 'r') as f: self.assertTrue(key in f) ds = f[key] self.assertEqual(ds.shape, shape) out_data = ds[:] self.assertTrue(np.allclose(data, out_data))
def test_multi_threaded(self): from pybdv import make_bdv shape = (128,) * 3 chunks = (64,) * 3 data = np.random.rand(*shape) scale_factors = 2 * [[2, 2, 2]] make_bdv(data, self.out_path, chunks=chunks, n_threads=4, downscale_factors=scale_factors) key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0) with open_file(self.out_path, 'r') as f: d = f[key][:] self.assertTrue(np.allclose(d, data))
def make_nephridia_segmentation(): table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cilia/cell_mapping.csv' seg_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-cells.n5' out_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-nephridia.xml' table = pd.read_csv(table_path, sep='\t') cell_ids = table['cell_id'].values cell_ids = np.unique(cell_ids) if cell_ids[0] == 0: cell_ids = cell_ids[1:] print(cell_ids) scale = 4 is_h5 = is_h5_file(seg_path) key = get_key(is_h5, setup_id=0, time_point=0, scale=scale) with open_file(seg_path, 'r') as f: ds = f[key] seg = ds[:].astype('uint32') bshape = (32, 256, 256) tmp = np.zeros_like(seg) print("Isin ...") tmp = elf.parallel.isin(seg, cell_ids, out=tmp, n_threads=16, verbose=True, block_shape=bshape) print("Label ...") tmp = vigra.analysis.labelVolumeWithBackground(tmp) print("Size filter ...") ids, counts = elf.parallel.unique(tmp, return_counts=True, n_threads=16, verbose=True, block_shape=bshape) keep_ids = np.argsort(counts)[::-1] keep_ids = ids[keep_ids[:3]] assert keep_ids[0] == 0 out = np.zeros(tmp.shape, dtype='uint8') for new_id, keep_id in enumerate(keep_ids[1:], 1): out[tmp == keep_id] = new_id factors = 3 * [[2, 2, 2]] res = [.4, .32, .32] make_bdv(out, out_path, factors, resolution=res, unit='micrometer')
def test_custom_chunks(self): from pybdv import make_bdv shape = (128,) * 3 chunks = (64, 42, 59) data = np.random.rand(*shape) make_bdv(data, self.out_path, chunks=chunks) key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0) with open_file(self.out_path, 'r') as f: ds = f[key] chunks_out = ds.chunks d = ds[:] self.assertEqual(chunks, chunks_out) self.assertTrue(np.allclose(d, data))
def _check(exp_data, exp_sf, exp_attrs, exp_affine): key = get_key(self.is_h5, timepoint=0, setup_id=0, scale=0) with open_file(self.out_path, 'r') as f: data = f[key][:] self.assertTrue(np.allclose(data, exp_data)) sf = get_scale_factors(self.out_path, setup_id=0) sf = absolute_to_relative_scale_factors(sf) self.assertEqual(sf, [[1, 1, 1]] + exp_sf) attrs = get_attributes(self.xml_path, setup_id=0) self.assertEqual(attrs, exp_attrs) affine = get_affine(self.xml_path, setup_id=0, timepoint=0)['affine0'] self.assertTrue(np.allclose(np.array(affine), np.array(exp_affine), atol=1e-4))
def write_h5_files(table, folder, raw_seg_path): """ Writes individual h5 file for each row in the table, equal to the bounding box of that object + a 10 pixel border on all dimensions Args: table [pd.Dataframe] - table of nucleus statistics folder [str] - a temporary folder to write files to raw_seg_path [str] - path to the raw segmentation .h5 """ for row in table.itertuples(index=False): # min max coordinates in microns for segmentation minmax_seg = [ row.bb_min_x, row.bb_min_y, row.bb_min_z, row.bb_max_x, row.bb_max_y, row.bb_max_z ] # raw scale (from xml) for 2x downsampled raw_scale = [0.02, 0.02, 0.025] # slice for raw file raw_slice = calculate_slice(raw_scale, minmax_seg, addBorder=True) is_h5 = is_h5_file(raw_seg_path) raw_key = get_key(is_h5, setup=0, time_point=0, scale=1) with open_file(raw_seg_path, 'r') as f: # get 2x downsampled nuclei data = f[raw_key] img_array = data[raw_slice] # write h5 file for nucleus result_path = folder + os.sep + str(row.label_id) + '.h5' with open_file(result_path, 'a') as f: # check dataset is bigger than 64x64x64 if img_array.shape[0] >= 64 and img_array.shape[ 1] >= 64 and img_array.shape[2] >= 64: chunks = (64, 64, 64) else: chunks = img_array.shape f.create_dataset('dataset', chunks=chunks, compression='gzip', shape=img_array.shape, dtype=img_array.dtype) f['dataset'][:] = img_array