def view_segmentations(version, raw_scale, seg_names=[], seg_scales=[], bb=np.s_[:]): folder = os.path.join(ROOT, version, 'images', 'local') raw_file = os.path.join(folder, 'sbem-6dpf-1-whole-raw.xml') raw_file = get_data_path(raw_file, return_absolute_path=True) raw_key = get_key(False, time_point=0, setup_id=0, scale=raw_scale) with z5py.File(raw_file, 'r') as f: ds = f[raw_key] ds.n_threads = 16 raw = ds[bb] ref_shape = raw.shape data = [to_source(raw, name='raw')] for seg_name, seg_scale in zip(seg_names, seg_scales): seg_file = os.path.join(folder, seg_name + '.xml') seg_file = get_data_path(seg_file, return_absolute_path=True) seg_key = get_key(False, time_point=0, setup_id=0, scale=seg_scale) with z5py.File(seg_file, 'r') as f: ds = f[seg_key] ds.n_threads = 16 seg = ds[bb].astype('uint32') if seg.shape != ref_shape: # FIXME this will fail with bounding box print("Resize", ref_shape) seg = ResizeWrapper(to_source(seg, name=seg_name), ref_shape) data.append(to_source(seg, name=seg_name)) view(*data)
def segment_chromatin(version, ilastik_project, ilastik_directory): version_folder = os.path.join(ROOT, version) assert os.path.exists(version_folder), version_folder raw_path = os.path.join(version_folder, 'images', 'local', 'sbem-6dpf-1-whole-raw.xml') raw_path = get_data_path(raw_path, return_absolute_path=True) nucleus_seg_path = os.path.join(version_folder, 'images', 'local', 'sbem-6dpf-1-whole-segmented-nuclei.xml') nucleus_seg_path = get_data_path(nucleus_seg_path, return_absolute_path=True) nuclei_table = os.path.join(version_folder, 'tables', 'sbem-6dpf-1-whole-segmented-nuclei-labels', 'default.csv') tmp_input = 'tmp_chromatin_prediction/tmp_input' tmp_output = 'tmp_chromatin_prediction/tmp_output' os.makedirs(tmp_input, exist_ok=True) os.makedirs(tmp_output, exist_ok=True) final_output = './chromatin_prediction.h5' # in general run on cluster - 256GB ram, 32 cores chromatin_segmentation_workflow(nuclei_table, nucleus_seg_path, ilastik_project, ilastik_directory, tmp_input, tmp_output, final_output, raw_path, chunk_size=3000, cores=32, memory=254000)
def make_nuclei_tables(old_folder, folder, name, tmp_folder, resolution, target='slurm', max_jobs=100, seg_has_changed=True): # make the table folder table_folder = os.path.join(folder, 'tables', name) os.makedirs(table_folder, exist_ok=True) seg_key = get_seg_key(folder, name, scale=0) seg_path = get_seg_path(folder, name, seg_key) # make the basic attributes table base_out = os.path.join(table_folder, 'default.csv') base_attributes(seg_path, seg_key, base_out, resolution, tmp_folder, target=target, max_jobs=max_jobs, correct_anchors=True) # make the morphology attribute table xml_raw = os.path.join(folder, 'images', 'local', 'sbem-6dpf-1-whole-raw.xml') raw_path = get_data_path(xml_raw, return_absolute_path=True) chromatin_seg_path = get_seg_path(folder, 'sbem-6dpf-1-whole-segmented-chromatin') morpho_out = os.path.join(table_folder, 'morphology.csv') write_morphology_nuclei(raw_path, seg_path, chromatin_seg_path, base_out, morpho_out, tmp_folder, target, max_jobs) # mapping to extrapolated intensities mask_name = 'sbem-6dpf-1-whole-segmented-extrapolated' k1 = get_seg_key(folder, name, 1) k2 = get_seg_key(folder, mask_name, 0) extrapol_mask = os.path.join(folder, 'images', 'local', '%s.xml' % mask_name) extrapol_mask = get_data_path(extrapol_mask, return_absolute_path=True) extrapol_out = os.path.join(table_folder, 'extrapolated_intensity_correction.csv') extrapolated_intensities(seg_path, k1, extrapol_mask, k2, extrapol_out, tmp_folder, target, max_jobs) write_additional_table_file(table_folder)
def _to_bdv_s3(file_format, dataset_folder, dataset_name, storage, service_endpoint, bucket_name, region): new_format = file_format + ".s3" os.makedirs(os.path.join(dataset_folder, "images", new_format.replace(".", "-")), exist_ok=True) xml = storage["relativePath"] xml_remote = xml.replace(file_format.replace(".", "-"), new_format.replace(".", "-")) # the absolute xml paths xml_path = os.path.join(dataset_folder, xml) xml_remote_path = os.path.join(dataset_folder, xml_remote) data_rel_path = os.path.join(os.path.split(xml)[0], get_data_path(xml_path)) data_abs_path = os.path.join(dataset_folder, data_rel_path) if not os.path.exists(data_abs_path): warn(f"Could not find data path at {data_abs_path} corresponding to xml {xml_path}") path_in_bucket = os.path.join(dataset_name, data_rel_path) # copy to the xml for remote data copy_xml_as_n5_s3(xml_path, xml_remote_path, service_endpoint=service_endpoint, bucket_name=bucket_name, path_in_bucket=path_in_bucket, region=region, bdv_type=new_format) return new_format, {"relativePath": xml_remote}
def make_n5_files(version): version_folder = os.path.join(ROOT, version) # default chunk size default_chunks = 3 * (128, ) # special chunk sizes chunk_dict = {'sbem-6dpf-1-whole-raw': None} # don't copy raw yet copied = [] xmls = glob(os.path.join(version_folder, 'images', 'local', '*.xml')) for xml in xmls: name = os.path.splitext(os.path.split(xml)[1])[0] chunks = chunk_dict.get(name, default_chunks) # chunks None means we skip copying for now if chunks is None: continue h5_path = get_data_path(xml, return_absolute_path=True) n5_path = os.path.splitext(h5_path)[0] + '.n5' copied.append(h5_path) if os.path.exists(n5_path): continue # load resolution from xml resolution = get_resolution(xml, 0) copy_to_bdv_n5(h5_path, n5_path, chunks, resolution) return copied
def create_auxiliary_gene_file(meds_root, out_file, return_result=False): all_genes_dset = 'genes' names_dset = 'gene_names' # get all the prospr gene xmls in the image folder med_files = glob(os.path.join(meds_root, "prospr*.xml")) # filter out prospr files that are not genes (=semgneted regions and virtual cells) med_files = [name for name in med_files if 'segmented' not in name] med_files = [name for name in med_files if 'virtual' not in name] # get the gene names from filenames gene_names = [os.path.splitext(os.path.basename(f))[0] for f in med_files] # cut all the preceeding prospr-... part gene_names = ['-'.join(name.split('-')[4:]) for name in gene_names] num_genes = len(gene_names) assert num_genes == len(med_files) # get the data paths from the xmls med_files = [ get_data_path(med_file, return_absolute_path=True) for med_file in med_files ] is_h5 = os.path.splitext(med_files[0])[1] == '.h5' med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0) with open_file(med_files[0], 'r') as f: spatial_shape = f[med_key].shape shape = (num_genes, ) + spatial_shape # iterate through med files and write down binarized into one file with open_file(out_file) as f: out_dset = f.create_dataset(all_genes_dset, shape=shape, dtype='bool', chunks=(1, 64, 64, 64), compression='gzip') out_dset.n_threads = 8 for i, med_file in enumerate(tqdm(med_files)): is_h5 = os.path.splitext(med_file)[1] == '.h5' med_key = get_key(is_h5, time_point=0, setup_id=0, scale=0) with open_file(med_file, 'r') as f2: ds = f2[med_key] this_shape = ds.shape if this_shape != spatial_shape: raise RuntimeError("Incompatible shapes %s, %s" % (str(this_shape), str(spatial_shape))) ds.n_threads = 8 data = ds[:] out_dset[i] = data gene_names_ascii = [n.encode('ascii', 'ignore') for n in gene_names] f.create_dataset(names_dset, data=gene_names_ascii, dtype='S40') if return_result: # reload the binarized version with open_file(out_file, 'r') as f: all_genes = f[all_genes_dset][:] return all_genes
def export_meshes(xml_path, table_path, cell_ids, out_folder, scale, resolution=None, n_jobs=16): os.makedirs(out_folder, exist_ok=True) if resolution is None: resolution = get_resolution(xml_path, 0) if scale > 0: resolution = [re * 2**scale for re in resolution] # load the segmentation dataset path = get_data_path(xml_path, return_absolute_path=True) key = 'setup0/timepoint0/s%i' % scale f = z5py.File(path, 'r') ds = f[key] ds.n_threads = 8 # load the default table to get the bounding boxes if table_path is None: bb_starts, bb_stops = None, None else: bb_starts, bb_stops = load_bounding_boxes(table_path, resolution) def _mesh(cell_id): out_path = os.path.join(out_folder, 'mesh_%i.obj' % cell_id) export_mesh(cell_id, ds, bb_starts, bb_stops, resolution, out_path) print("Computing meshes ...") with futures.ThreadPoolExecutor(n_jobs) as tp: list(tqdm(tp.map(_mesh, cell_ids), total=len(cell_ids)))
def upload_source(dataset_folder, metadata, data_format, bucket_name, s3_prefix="embl", client="minio"): if data_format.endswith(".s3"): base_format = data_format.rstrip(".s3") raise ValueError(f"Cannot upload data in format {data_format}, use format {base_format} instead.") s3_format = data_format + ".s3" if data_format.startswith("bdv"): local_xml = os.path.join(dataset_folder, metadata["image"]["imageData"][data_format]["relativePath"]) remote_xml = os.path.join(dataset_folder, metadata["image"]["imageData"][s3_format]["relativePath"]) data_path = get_data_path(local_xml, return_absolute_path=True) path_in_bucket = read_path_in_bucket(remote_xml) elif data_format == "ome.zarr": data_path = os.path.join(dataset_folder, metadata["image"]["imageData"][data_format]["relativePath"]) s3_address = metadata["image"]["imageData"][s3_format]["s3Address"] bucket_end_pos = s3_address.find(bucket_name) + len(bucket_name) + 1 path_in_bucket = s3_address[bucket_end_pos:] else: raise ValueError(f"Invalid data format {data_format}") if client != "minio": raise ValueError(f"Invalid client {client}, currently only minio is supported") assert os.path.exists(data_path) cmd = ["mc", "cp", "-r", f"{data_path}/", f"{s3_prefix}/{bucket_name}/{path_in_bucket}/"] subprocess.run(cmd)
def get_seg_path(folder, name, key=None): xml_path = os.path.join(folder, 'images', 'local', '%s.xml' % name) path = get_data_path(xml_path, return_absolute_path=True) assert os.path.exists(path), path if key is not None: with open_file(path, 'r') as f: assert key in f, "%s not in %s" % (key, path) return path
def _remove_image_data(storage_type, path): if storage_type.startswith("bdv"): # bdv data: remove xml and data data_path = bdv_metadata.get_data_path(path, return_absolute_path=True) os.remove(path) rmtree(data_path) if storage_type.endswith("n5") else os.remove( data_path) else: # ome.zarr data, can just rmtree rmtree(path)
def update_n5_xmls(version): version_folder = os.path.join(ROOT, version) xmls = glob(os.path.join(version_folder, 'images', 'local', '*.xml')) for xml in xmls: data_rel_path = get_data_path(xml) # is this already n5? -> continue if os.path.splitext(data_rel_path) == '.n5': continue # get the absolute path and check if the corresponding n5 file exists data_abs_path = get_data_path(xml, return_absolute_path=True) new_abs_path = os.path.splitext(data_abs_path)[0] + '.n5' # n5 file is not there? -> continue if not os.path.exists(new_abs_path): continue # write the new relative path new_rel_path = os.path.splitext(data_rel_path)[0] + '.n5' copy_xml_with_newpath(xml, xml, new_rel_path, data_format='bdv.n5')
def copy_and_check_image_dict(folder, new_folder): image_dict_in = os.path.join(folder, 'images', 'images.json') image_dict_out = os.path.join(new_folder, 'images', 'images.json') with open(image_dict_in) as f: image_dict = json.load(f) for name, properties in image_dict.items(): intersection = set(properties.keys()) - IMAGE_DICT_KEYS if len(intersection) > 0: raise RuntimeError("Validating image dict: invalid keys %s" % str(intersection)) storage = properties['Storage'] # validate local xml location xml = storage['local'] xml = os.path.join(new_folder, 'images', xml) if not os.path.exists(xml): raise RuntimeError("Validating image dict: could not find %s" % xml) # validate data location data_path = get_data_path(xml, return_absolute_path=True) if not os.path.exists(data_path): raise RuntimeError("Validating image dict: could not find %s" % data_path) # validate remote xml location if 'remote' in storage: xml = storage['remote'] xml = os.path.join(new_folder, 'images', xml) if not os.path.exists(xml): raise RuntimeError("Validating image dict: could not find %s" % xml) # validate tables if 'TableFolder' in properties: # check that we have the table folder table_folder = os.path.join(new_folder, properties['TableFolder']) if not os.path.exists(table_folder): raise RuntimeError("Validating image dict: could not find %s" % table_folder) default_table = os.path.join(table_folder, 'default.csv') # check that we have the default table if not os.path.exists(default_table): raise RuntimeError("Validating image dict: could not find %s" % default_table) # if we have an additional table file, check that the additional tables exist additional_table_file = os.path.join(table_folder, 'additional_tables.txt') if os.path.exists(additional_table_file): with open(additional_table_file, 'r') as f: for fname in f: additional_table = os.path.join(table_folder, fname.rstrip('\n')) if not os.path.exists(additional_table): raise RuntimeError("Validating image dict: could not find %s" % additional_table) with open(image_dict_out, 'w') as f: json.dump(image_dict, f)
def copy_file(xml_in, xml_out, storage='local'): if storage == 'local': data_path = get_data_path(xml_in, return_absolute_path=True) bdv_format = get_bdv_format(xml_in) xml_dir = os.path.split(xml_out)[0] data_path = os.path.relpath(data_path, start=xml_dir) copy_xml_with_newpath(xml_in, xml_out, data_path, path_type='relative', data_format=bdv_format) elif storage == 'remote': shutil.copyfile(xml_in, xml_out) else: raise ValueError("Invalid storage spec %s" % storage)
def eval_seg(version): seg_path = os.path.join(ROOT, version, 'images', 'local', NAME + '.xml') seg_path = get_data_path(seg_path, return_absolute_path=True) if seg_path.endswith('.n5'): key = 'setup0/timepoint0/s0' else: key = 't00000/s00/0/cells' fp, fn, tot = eval_nuclei(seg_path, key, ANNOTATIONS) print("Evaluation yields:") print("False positives:", fp) print("False negatives:", fn) print("Total number of annotations:", tot)
def fix_chunks_dataset(source, scale, corrupted_chunks): bucket = 'platybrowser' source_s3_key = read_path_in_bucket(source) local_ds_path = get_data_path(source.replace('remote', 'local'), return_absolute_path=True) ds_key = f'setup0/timepoint0/s{scale}' return fix_corrupted_chunks_minio(corrupted_chunks, local_ds_path, ds_key, bucket, source_s3_key, ds_key, server='embl')
def copy_xml_file(xml_in, xml_out, file_format): if file_format in ('bdv.hdf5', 'bdv.n5'): data_path = get_data_path(xml_in, return_absolute_path=True) bdv_format = get_bdv_format(xml_in) xml_dir = os.path.split(xml_out)[0] data_path = os.path.relpath(data_path, start=xml_dir) copy_xml_with_newpath(xml_in, xml_out, data_path, path_type='relative', data_format=bdv_format) elif file_format == 'bdv.n5.s3': shutil.copyfile(xml_in, xml_out) else: raise ValueError(f"Invalid file format {file_format}")
def make_remote_xmls(version): version_folder = os.path.join(ROOT, version) xmls = glob(os.path.join(version_folder, 'images', 'local', '*.xml')) # iterate over the xmls, check if target is a n5 file # if it is, make xml with correct path in bucket in the remote folder for xml in xmls: data_path = get_data_path(xml, return_absolute_path=True) if not data_path.endswith('.n5'): continue path_in_bucket = os.path.relpath(data_path, ROOT) if 'local' in path_in_bucket: path_in_bucket = path_in_bucket.replace('local', 'remote') xml_out = xml.replace('local', 'remote') write_s3_xml(xml, xml_out, path_in_bucket)
def move_image_file(image_folder, xml_path): name = os.path.splitext(os.path.split(xml_path)[1])[0] new_name = look_up_filename(name) # get the linked hdf5 path image_path = get_data_path(xml_path, return_absolute_path=True) # move the xml to 'images/local' new_xml_path = os.path.join(image_folder, 'local', new_name + '.xml') if DRY_RUN: print("Moving", xml_path, "to", new_xml_path) else: shutil.move(xml_path, new_xml_path) # if the hdf5 file is in the same folder, move it to 'images/local' as well h5_is_local = len( os.path.relpath(image_path, os.path.split(xml_path)[0]).split('/')) == 1 if h5_is_local: new_image_path = os.path.join(image_folder, 'local', new_name + '.h5') if DRY_RUN: print("Moving", image_path, "to", new_image_path) else: assert os.path.exists(image_path), image_path shutil.move(image_path, new_image_path) # if not, construct the new correct data path else: # the new image path might be in rawdata; in this case there is now '/local' # subfolder, if it is in a version folder, it is in '/local' im_root, im_name = os.path.split(image_path) # take care of 'segmentations' if os.path.split(im_root)[1] == 'segmentations': im_root = os.path.join(os.path.split(im_root)[0], 'images') new_image_path = os.path.join(im_root, new_name + '.h5') if not os.path.exists(new_image_path): new_image_path = os.path.join(im_root, 'local', new_name + '.h5') new_rel_data_path = os.path.relpath(new_image_path, os.path.split(new_xml_path)[0]) if DRY_RUN: print("Setting new xml path to", new_rel_data_path) else: assert os.path.exists(new_image_path), new_image_path # set path in xml copy_xml_with_newpath(new_xml_path, new_xml_path, new_rel_data_path) return new_name
def get_seg_path(folder, name): # check if we have a data sub folder, if we have it load # the segmentation from there data_folder = os.path.join(folder, 'images', 'local') data_folder = data_folder if os.path.exists(data_folder) else folder # check if we have an xml path = os.path.join(data_folder, '%s.xml' % name) # read h5 path from the xml if os.path.exists(path): path = get_data_path(path, return_absolute_path=True) if not os.path.exists(path): raise RuntimeError("Invalid path in xml") return path else: raise RuntimeError( "The specified folder does not contain segmentation file with name %s" % name)
def _check_data(storage, format_, name, dataset_folder, require_local_data, require_remote_data, assert_true, assert_equal): # checks for bdv format if format_.startswith("bdv"): path = os.path.join(dataset_folder, storage["relativePath"]) assert_true(os.path.exists(path), f"Could not find data for {name} at {path}") # check that the source name and name in the xml agree for bdv formats bdv_name = get_name(path, setup_id=0) msg = f"{path}: Source name and name in bdv metadata disagree: {name} != {bdv_name}" assert_equal(name, bdv_name, msg) # check that the remote s3 address exists if format_.endswith(".s3") and require_remote_data: _check_bdv_n5_s3(path, assert_true) # check that the referenced local file path exists elif require_local_data: data_path = get_data_path(path, return_absolute_path=True) assert_true(os.path.exists(data_path)) # local ome.zarr check: source name and name in the ome.zarr metadata agree elif format_ == "ome.zarr" and require_local_data: path = os.path.join(dataset_folder, storage["relativePath"]) assert_true(os.path.exists(path), f"Could not find data for {name} at {path}") with open_file(path, "r", ext=".zarr") as f: ome_name = f.attrs["multiscales"][0]["name"] # we can't do this check if we only load a sub-channel if "channel" not in storage: assert_equal( name, ome_name, f"Source name and name in ngff metadata don't match: {name} != {ome_name}" ) # remote ome.zarr check: elif format_ == "ome.zarr.s3" and require_remote_data: s3_address = storage["s3Address"] channel = storage.get("channel") _check_ome_zarr_s3(s3_address, name, assert_true, assert_equal, channel)
def migrate_rawfolder(): raw_folder = os.path.join(ROOT, 'rawdata') xmls = glob(os.path.join(raw_folder, "*.xml")) for xml_path in xmls: name = os.path.splitext(os.path.split(xml_path)[1])[0] new_name = look_up_filename(name) # get the linked hdf5 path image_path = get_data_path(xml_path, return_absolute_path=True) # move the xml to 'images/local' new_xml_path = os.path.join(raw_folder, new_name + '.xml') if DRY_RUN: print("Moving", xml_path, "to", new_xml_path) else: shutil.move(xml_path, new_xml_path) new_image_path = os.path.join(raw_folder, new_name + '.h5') if DRY_RUN: print("Moving", image_path, "to", new_image_path) else: assert os.path.exists(image_path), image_path shutil.move(image_path, new_image_path) new_rel_data_path = new_name + '.h5' if DRY_RUN: print("Setting new xml path to", new_rel_data_path) else: assert os.path.exists(new_image_path), new_image_path # set path in xml copy_xml_with_newpath(new_xml_path, new_xml_path, new_rel_data_path) # rename the tables folder if it exists table_folder = os.path.join(raw_folder, 'tables', name) if os.path.exists(table_folder): new_table_folder = os.path.join(raw_folder, 'tables', new_name) if DRY_RUN: print("Rename", table_folder, "to", new_table_folder) else: os.rename(table_folder, new_table_folder)
def cutout_data(tag, name, scale, bb_start, bb_stop): assert all(sta < sto for sta, sto in zip(bb_start, bb_stop)) path = os.path.join('data', tag, name_to_path(name)) path = get_data_path(path, return_absolute_path=True) resolution = get_res_level(scale) base_scale = name_to_base_scale(name) assert base_scale <= scale, "%s does not support scale %i; minimum is %i" % ( name, scale, base_scale) data_scale = scale - base_scale bb_start_ = [int(sta / re) for sta, re in zip(bb_start, resolution)][::-1] bb_stop_ = [int(sto / re) for sto, re in zip(bb_stop, resolution)][::-1] bb = tuple(slice(sta, sto) for sta, sto in zip(bb_start_, bb_stop_)) key = 't00000/s00/%i/cells' % data_scale with h5py.File(path, 'r') as f: ds = f[key] data = ds[bb] return data
def check_dataset(self, dataset_folder, exp_shape, raw_name, file_format="bdv.n5"): # validate the full project validate_project( self.root, assert_true=self.assertTrue, assert_in=self.assertIn, assert_equal=self.assertEqual ) # check the raw data folder_name = file_format.replace(".", "-") if file_format.startswith("bdv"): xml_path = os.path.join(dataset_folder, "images", folder_name, f"{raw_name}.xml") raw_path = get_data_path(xml_path, return_absolute_path=True) is_h5 = file_format == "bdv.hdf5" key = get_key(is_h5, 0, 0, 0) else: self.assertEqual(file_format, "ome.zarr") raw_path = os.path.join(dataset_folder, "images", folder_name, f"{raw_name}.ome.zarr") key = "s0" with open_file(raw_path, "r") as f: data = f[key][:] shape = data.shape self.assertEqual(shape, exp_shape) self.assertFalse(np.allclose(data, 0.))
def compare_seg_to_ref(seg_path, seg_key, version, with_roi, target, max_jobs): ref_path = os.path.join(ROOT, version, 'images', 'local', 'sbem-6dpf-1-whole-segmented-cells.xml') ref_path = get_data_path(ref_path, return_absolute_path=True) if ref_path.endswith('.n5'): ref_key = 'setup0/timepoint0/so' else: ref_key = 't00000/s00/0/cells' shape = check_segmentations(ref_path, ref_key, seg_path, seg_key) halo = [100, 1024, 1024] if with_roi: roi_begin = [sh // 2 - ha for sh, ha in zip(shape, halo)] roi_end = [sh // 2 + ha for sh, ha in zip(shape, halo)] else: roi_begin = roi_end = None tmp_folder = './tmp_partition_comparison_%s' % seg_key res = partition_comparison(seg_path, seg_key, ref_path, ref_key, tmp_folder, target, max_jobs, roi_begin=roi_begin, roi_end=roi_end) print("Have evaluated segmentation:") print(seg_path, ":", seg_key) print("against refetence:") print(ref_path, ":", ref_key) print("Result:") print("VI:", vis, "(split)", vim, "(merge)", vis + vim, "(total)") print("Adapted Rand error:", ari)
def eval_seg(version, semantic_eval): seg_path = os.path.join(ROOT, version, 'images', 'local', NAME + '.xml') seg_path = get_data_path(seg_path, return_absolute_path=True) table_path = os.path.join(ROOT, version, 'tables', NAME, 'regions.csv') if seg_path.endswith('.n5'): key = 'setup0/timepoint0/s0' else: key = 't00000/s00/0/cells' if semantic_eval: semantic_mapping = load_semantic_mapping(table_path) else: semantic_mapping = None ignore_ids = get_ignore_seg_ids(table_path) fm, fs, tot = eval_cells(seg_path, key, ANNOTATIONS, ignore_seg_ids=ignore_ids, semantic_mapping=semantic_mapping) print("Evaluation yields:") print("False merges:", fm) print("False splits:", fs) print("Total number of annotations:", tot)
def xml_to_h5_path(xml_path): path = get_data_path(xml_path, return_absolute_path=True) return path
def make_cell_tables(old_folder, folder, name, tmp_folder, resolution, target='slurm', max_jobs=100, seg_has_changed=True): # make the table folder table_folder = os.path.join(folder, 'tables', name) os.makedirs(table_folder, exist_ok=True) seg_key = get_seg_key(folder, name, scale=0) seg_path = get_seg_path(folder, name, seg_key) # make the basic attributes table base_out = os.path.join(table_folder, 'default.csv') label_ids = base_attributes(seg_path, seg_key, base_out, resolution, tmp_folder, target=target, max_jobs=max_jobs, correct_anchors=False) # make table with cell nucleus mapping nuc_mapping_table = os.path.join(table_folder, 'cells_to_nuclei.csv') nuc_path = get_seg_path(folder, 'sbem-6dpf-1-whole-segmented-nuclei', seg_key) map_cells_to_nuclei(label_ids, seg_path, nuc_path, nuc_mapping_table, tmp_folder, target, max_jobs) # add a column with (somewhat stringent) cell criterion to the default table add_cell_criterion_column(base_out, nuc_mapping_table) # make table with gene mapping aux_gene_xml = os.path.join(folder, 'misc', 'prospr-6dpf-1-whole_meds_all_genes.xml') aux_gene_path = get_data_path(aux_gene_xml, return_absolute_path=True) if not os.path.exists(aux_gene_path): raise RuntimeError("Can't find auxiliary gene file @ %s" % aux_gene_path) gene_out = os.path.join(table_folder, 'genes.csv') gene_assignment_table(seg_path, aux_gene_path, gene_out, label_ids, tmp_folder, target) # make table with gene mapping via VCs vc_name = 'prospr-6dpf-1-whole-virtual-cells' vc_vol_path = get_seg_path(folder, vc_name) vc_key = get_seg_key(folder, vc_name, scale=0) vc_expression_path = os.path.join(folder, 'tables', vc_name, 'profile_clust_curated.csv') med_expression_path = gene_out vc_out = os.path.join(table_folder, 'vc_assignments.csv') vc_assignment_table(seg_path, vc_vol_path, vc_key, vc_expression_path, med_expression_path, vc_out, tmp_folder, target) # region and semantic mapping region_out = os.path.join(table_folder, 'regions.csv') # need to make sure the inputs are copied / updated in # the segmentation folder beforehand segmentation_folder = os.path.join(folder, 'images', 'local') region_attributes(seg_path, region_out, segmentation_folder, label_ids, tmp_folder, target, max_jobs) # make table with morphology xml_raw = os.path.join(folder, 'images', 'local', 'sbem-6dpf-1-whole-raw.xml') raw_path = get_data_path(xml_raw, return_absolute_path=True) morpho_out = os.path.join(table_folder, 'morphology.csv') write_morphology_cells(raw_path, seg_path, nuc_path, base_out, morpho_out, nuc_mapping_table, region_out, tmp_folder, target, max_jobs) # mapping to extrapolated intensities mask_name = 'sbem-6dpf-1-whole-segmented-extrapolated' k1 = get_seg_key(folder, name, 3) k2 = get_seg_key(folder, mask_name, 0) extrapol_mask = os.path.join(folder, 'images', 'local', '%s.xml' % mask_name) extrapol_mask = get_data_path(extrapol_mask, return_absolute_path=True) extrapol_out = os.path.join(table_folder, 'extrapolated_intensity_correction.csv') extrapolated_intensities(seg_path, k1, extrapol_mask, k2, extrapol_out, tmp_folder, target, max_jobs) # TODO need to update the neuron trace table as well old_ganglia_table = os.path.join(old_folder, 'tables', name, 'ganglia_ids.csv') new_ganglia_table = os.path.join(table_folder, 'ganglia_ids.csv') old_gcluster_table = os.path.join(old_folder, 'tables', name, 'gene_clusters.csv') new_gcluster_table = os.path.join(table_folder, 'gene_clusters.csv') old_symm_pair_table = os.path.join(old_folder, 'tables', name, 'symmetric_cells.csv') new_symm_pair_table = os.path.join(table_folder, 'symmetric_cells.csv') old_mcluster_table = os.path.join(old_folder, 'tables', name, 'morphology_clusters.csv') new_mcluster_table = os.path.join(table_folder, 'morphology_clusters.csv') # we only need to trigger the label id propagation if the segmentation was updated if seg_has_changed: id_lut = os.path.join( folder, 'misc', 'new_id_lut_sbem-6dpf-1-whole-segmented-cells.json') # update the cell id column of the cilia cell_id_mapping table cilia_name = 'sbem-6dpf-1-whole-segmented-cilia' old_cilia_table = os.path.join(old_folder, 'tables', cilia_name, 'cell_mapping.csv') new_cilia_table = os.path.join(folder, 'tables', cilia_name, 'cell_mapping.csv') propagate_attributes(id_lut, old_cilia_table, new_cilia_table, 'cell_id', override=True) # update the ganglia id mapping table, gene clusters and symmetric pairs propagate_attributes(id_lut, old_ganglia_table, new_ganglia_table, 'label_id', override=True) propagate_attributes(id_lut, old_gcluster_table, new_gcluster_table, 'label_id', override=True) propagate_attributes(id_lut, old_symm_pair_table, new_symm_pair_table, 'label_id', override=True) propagate_attributes(id_lut, old_mcluster_table, new_mcluster_table, 'label_id', override=True) else: # otherwise, need to copy the ganglia, gene cluster and symmetric pair table make_squashed_link(old_ganglia_table, new_ganglia_table) make_squashed_link(old_gcluster_table, new_gcluster_table) make_squashed_link(old_symm_pair_table, new_symm_pair_table) make_squashed_link(old_mcluster_table, new_mcluster_table) write_additional_table_file(table_folder)
def make_traces_table(traces, reference_scale, resolution, out_path, seg_infos={}): """ Make table from traces compatible with the platy browser. """ files = {} datasets = {} for seg_name, seg_info in seg_infos.items(): seg_path = seg_info['path'] if seg_path.endswith('.xml'): seg_path = get_data_path(seg_path, return_absolute_path=True) seg_scale = seg_info['scale'] is_h5 = is_h5_file(seg_path) seg_key = get_key(is_h5, time_point=0, setup_id=0, scale=seg_scale) f = open_file(seg_path, 'r') ds = f[seg_key] if len(files) == 0: ref_shape = ds.shape else: assert ds.shape == ref_shape, "%s, %s" % (str( ds.shape), str(ref_shape)) files[seg_name] = f datasets[seg_name] = ds table = [] for nid, vals in tqdm(traces.items()): coords = vals_to_coords(vals, resolution) bb_min = coords.min(axis=0) bb_max = coords.max(axis=0) + 1 # get spatial attributes anchor = coords[0].astype('float32') * resolution / 1000. bb_min = bb_min.astype('float32') * resolution / 1000. bb_max = bb_max.astype('float32') * resolution / 1000. # get cell and nucleus ids point_slice = tuple(slice(int(c), int(c) + 1) for c in coords[0]) # attributes: # label_id # anchor_x anchor_y anchor_z # bb_min_x bb_min_y bb_min_z bb_max_x bb_max_y bb_max_z # n_points + seg ids attributes = [ nid, anchor[2], anchor[1], anchor[0], bb_min[2], bb_min[1], bb_min[0], bb_max[2], bb_max[1], bb_max[0], len(coords) ] for ds in datasets.values(): seg_id = ds[point_slice][0, 0, 0] attributes += [seg_id] table.append(attributes) for f in files.values(): f.close() table = np.array(table, dtype='float32') header = [ 'label_id', 'anchor_x', 'anchor_y', 'anchor_z', 'bb_min_x', 'bb_min_y', 'bb_min_z', 'bb_max_x', 'bb_max_y', 'bb_max_z', 'n_points' ] header += ['%s_id' % seg_name for seg_name in seg_infos] table = pd.DataFrame(table, columns=header) table.to_csv(out_path, index=False, sep='\t')
def copy_xml_with_relpath(xml_in, xml_out): path = get_data_path(xml_in, return_absolute_path=True) xml_root = os.path.split(xml_out)[0] path = os.path.relpath(path, xml_root) copy_xml_with_newpath(xml_in, xml_out, path, path_type="relative")
def copy_xml_with_abspath(xml_in, xml_out): path = get_data_path(xml_in, return_absolute_path=True) copy_xml_with_newpath(xml_in, xml_out, path, path_type='absolute')