def migrate_bookmark_file(bookmark_file, dataset_folder, is_default=False, parse_source_name=None): dataset_metadata = metadata.read_dataset_metadata(dataset_folder) all_sources = dataset_metadata["sources"] all_views = dataset_metadata["views"] with open(bookmark_file) as f: bookmarks = json.load(f) new_bookmarks = {} for name, bookmark in bookmarks.items(): new_bookmarks[name] = migrate_bookmark( name, bookmark, all_sources, all_views, parse_source_name=parse_source_name) if is_default: all_views.update(new_bookmarks) dataset_metadata["views"] = all_views metadata.write_dataset_metadata(dataset_folder, dataset_metadata) else: write_metadata(bookmark_file, {"bookmarks": new_bookmarks})
def _check_remote_metadata(self, file_format, service_endpoint, bucket_name): dataset_folder = os.path.join(self.root, self.dataset_name) dataset_metadata = read_dataset_metadata(dataset_folder) validate_with_schema(dataset_metadata, "dataset") new_file_format = file_format + ".s3" sources = dataset_metadata["sources"] for name, source in sources.items(): source_type = list(source.keys())[0] storage = source[source_type]["imageData"] self.assertIn(new_file_format, storage) if new_file_format.startswith("bdv"): xml = storage[new_file_format]["relativePath"] xml_path = os.path.join(dataset_folder, xml) self.assertTrue(os.path.exists(xml_path)) _, ep, bn, _ = parse_s3_xml(xml_path) self.assertEqual(ep, service_endpoint) self.assertEqual(bn, bucket_name) else: address = storage[new_file_format]["s3Address"] self.assertTrue(address.startswith(service_endpoint)) proj_metadata = read_project_metadata(self.root) validate_with_schema(proj_metadata, "project") file_formats = proj_metadata["imageDataFormats"] self.assertIn(file_format, file_formats) self.assertIn(new_file_format, file_formats)
def check_segmentation(self, dataset_folder, name): self.assertTrue(os.path.exists(dataset_folder)) exp_data = self.data # check the segmentation metadata metadata = read_dataset_metadata(dataset_folder) self.assertIn(name, metadata["sources"]) validate_source_metadata(name, metadata["sources"][name], dataset_folder) # check the segmentation data seg_path = os.path.join(dataset_folder, "images", "bdv-n5", f"{name}.n5") self.assertTrue(os.path.exists(seg_path)) key = get_key(False, 0, 0, 0) with open_file(seg_path, "r") as f: data = f[key][:] self.assertTrue(np.array_equal(data, exp_data)) # check the table table_path = os.path.join(dataset_folder, "tables", name, "default.tsv") self.assertTrue(os.path.exists(table_path)), table_path table = pd.read_csv(table_path, sep="\t") label_ids = table["label_id"].values exp_label_ids = np.unique(data) if 0 in exp_label_ids: exp_label_ids = exp_label_ids[1:] self.assertTrue(np.array_equal(label_ids, exp_label_ids))
def migrate_data_spec(dataset_folder): """ Update to the new imageData and tableData spec. See https://github.com/mobie/mobie.github.io/issues/49 for details. """ dataset_metadata = metadata.read_dataset_metadata(dataset_folder) sources = dataset_metadata["sources"] file_formats = {"bdv.n5"} new_sources = {} for source_name, source in sources.items(): new_source = deepcopy(source) source_type = list(new_source.keys())[0] image_data = new_source[source_type].pop("imageDataLocations") relative_xml = image_data["fileSystem"] new_image_data = { "bdv.n5": {"relativePath": relative_xml} } if "s3store" in image_data: new_image_data["bdv.n5.s3"] = {"relativePath": image_data["s3store"]} file_formats.add("bdv.n5.s3") new_source[source_type]["imageData"] = new_image_data if "tableDataLocation" in source[source_type]: table_location = new_source[source_type].pop("tableDataLocation") new_source[source_type]["tableData"] = get_table_metadata(table_location) new_sources[source_name] = new_source dataset_metadata["sources"] = new_sources # update the view spec views = dataset_metadata["views"] new_views = {} for name, view in views.items(): if metadata.is_grid_view(view): new_view = deepcopy(view) new_trafos = [] for trafo in view["sourceTransforms"]: if "grid" in trafo: new_trafo = deepcopy(trafo) table_location = new_trafo["grid"].pop("tableDataLocation") new_trafo["grid"]["tableData"] = get_table_metadata(table_location) else: new_trafo = trafo new_trafos.append(new_trafo) new_view["sourceTransforms"] = new_trafos new_views[name] = new_view else: new_views[name] = view dataset_metadata["views"] = new_views metadata.write_dataset_metadata(dataset_folder, dataset_metadata) return list(file_formats)
def migrate_sources(folder): sources = metadata.read_dataset_metadata(folder)["sources"] for source_name, source in sources.items(): source_type = list(source.keys())[0] storage = source[source_type]["imageData"] for storage_type, loc in storage.items(): xml = os.path.join(folder, loc["relativePath"]) write_name(xml, 0, source_name) if storage_type == "bdv.n5.s3": remove_authentication_field(xml)
def test_bdv_importer(self): add_bdv_image(self.xml_path, self.root, self.dataset_name, tmp_folder=self.tmp_folder) validate_project(self.root) meta = read_dataset_metadata(f"{self.root}/{self.dataset_name}") self.assertIn(self.image_name, meta["sources"]) im_path = meta["sources"][self.image_name]["image"]["imageData"]["bdv.n5"]["relativePath"] im_path = os.path.join(self.root, self.dataset_name, im_path).replace("xml", "n5") self.assertTrue(os.path.exists(im_path)) with open_file(im_path, "r") as f: data = f["setup0/timepoint0/s0"][:] self.assertTrue(np.allclose(data, self.data))
def _test_float(self, dtype): def float_data(shape): data = np.random.rand(*shape).astype(dtype) return data self.test_init_from_hdf5(float_data) ds_folder = os.path.join(self.root, self.dataset_name) mdata = read_dataset_metadata(ds_folder) clims = mdata["views"]["test-raw"]["sourceDisplays"][0]["imageDisplay"]["contrastLimits"] c0, c1 = clims self.assertEqual(c0, 0.0) self.assertEqual(c1, 1.0)
def check_data(self, dataset_folder, name): exp_data = self.data # check the image metadata metadata = read_dataset_metadata(dataset_folder) sources = metadata["sources"] self.assertIn(name, sources) validate_source_metadata(name, sources[name], dataset_folder) # check the image data im_path = os.path.join(dataset_folder, "images", "bdv-n5", f"{name}.n5") self.assertTrue(os.path.exists(im_path)) key = get_key(False, 0, 0, 0) with open_file(im_path, "r") as f: data = f[key][:] self.assertTrue(np.array_equal(data, exp_data))
def migrate_name_spec(dataset_folder): """Update to name changes in https://github.com/mobie/mobie.github.io/pull/74. """ ds_metadata = metadata.read_dataset_metadata(dataset_folder) views = ds_metadata["views"] new_views = _update_views(views, dataset_folder) ds_metadata["views"] = new_views metadata.write_dataset_metadata(dataset_folder, ds_metadata) extra_view_files = glob( os.path.join(dataset_folder, "misc", "views", "*.json")) for view_file in extra_view_files: with open(view_file) as f: views = json.load(f)["views"] new_views = _update_views(views, dataset_folder) with open(view_file, "w") as f: json.dump({"views": new_views}, f)
def check_data(self, source_names, is_seg): ds_folder = os.path.join(self.root, self.ds_name) ds_meta = metadata.read_dataset_metadata(ds_folder) source_type = "segmentation" if is_seg else "image" sources = ds_meta["sources"] expected_sources = self.segs if is_seg else self.images self.assertEqual(len(source_names), len(expected_sources)) def _check_table(data, name): tab_path = os.path.join(self.root, self.ds_name, "tables", name, "default.tsv") self.assertTrue(os.path.exists(tab_path)) tab = pd.read_csv(tab_path, sep="\t") # check seg-ids and sizes seg_ids, counts = np.unique(data, return_counts=True) if seg_ids[0] == 0: seg_ids, counts = seg_ids[1:], counts[1:] self.assertTrue(np.allclose(tab["label_id"].values, seg_ids)) self.assertTrue(np.allclose(tab["n_pixels"].values, counts)) # check anchors for seg_id in seg_ids: anchor = ( int(np.round(tab["anchor_y"].loc[tab["label_id"] == seg_id].values[0])), int(np.round(tab["anchor_x"].loc[tab["label_id"] == seg_id].values[0])), ) anchor_id = data[anchor] self.assertEqual(seg_id, anchor_id) for im_id, name in enumerate(source_names): self.assertIn(name, sources) source = sources[name] data_path = source[source_type]["imageData"]["ome.zarr"]["relativePath"] data_path = os.path.join(ds_folder, data_path) self.assertTrue(os.path.exists(data_path)) with open_file(data_path, "r") as f: data = f["s0"][:] expected = expected_sources[im_id] self.assertTrue(np.allclose(expected, data)) if is_seg: _check_table(data, name)
def migrate_grid_spec(dataset_folder): """ Update to the new grid and sourceAnnotationDisplay spec. See https://github.com/mobie/mobie-viewer-fiji/issues/343 for details """ ds_meta = metadata.read_dataset_metadata(dataset_folder) views = ds_meta["views"] new_views = update_views(views) update_tables(new_views, dataset_folder) ds_meta["views"] = new_views metadata.write_dataset_metadata(dataset_folder, ds_meta) views_folder = os.path.join(dataset_folder, "misc", "views") view_files = glob(os.path.join(views_folder, "*.json")) for view_file in view_files: with open(view_file, "r") as f: views = json.load(f)["views"] new_views = update_views(views) update_tables(new_views, dataset_folder) metadata.utils.write_metadata(view_file, {"views": new_views})
def check_traces(self, dataset_folder, trace_name): self.assertTrue(os.path.exists(dataset_folder)) # check the segmentation metadata metadata = read_dataset_metadata(dataset_folder) self.assertIn(trace_name, metadata['sources']) validate_source_metadata(trace_name, metadata['sources'][trace_name], dataset_folder, assert_true=self.assertTrue, assert_equal=self.assertEqual) # check the table table_path = os.path.join(dataset_folder, 'tables', trace_name, 'default.tsv') self.assertTrue(os.path.exists(table_path)) table = pd.read_csv(table_path, sep='\t') label_ids = table['label_id'].values exp_label_ids = np.arange(1, self.n_traces + 1) self.assertTrue(np.array_equal(label_ids, exp_label_ids))
def _test_int(self, dtype, int_to_uint=False): def int_data(shape): if dtype == "int8": min_, max_ = -127, 127 elif dtype == "uint8": min_, max_ = 0, 255 elif dtype == "int16": min_, max_ = -32000, 32000 elif dtype == "uint16": min_, max_ = 0, 64000 else: min_, max_ = 0, int(1e6) data = np.random.randint(min_, max_, size=shape, dtype=dtype) return data self.test_init_from_hdf5(int_data, int_to_uint=int_to_uint) ds_folder = os.path.join(self.root, self.dataset_name) mdata = read_dataset_metadata(ds_folder) clims = mdata["views"]["test-raw"]["sourceDisplays"][0]["imageDisplay"]["contrastLimits"] c0, c1 = clims self.assertEqual(c0, np.iinfo(dtype).min) self.assertEqual(c1, np.iinfo(dtype).max)
def test_plate_grid_view(self): from mobie.htm import get_merged_plate_grid_view from mobie.htm.grid_views import _get_default_site_table, _get_default_well_table ds_folder = os.path.join(self.root, self.ds_name) metadata = read_dataset_metadata(ds_folder) source_prefixes = ["a"] source_types = ["image"] source_settings = [{"color": "white"}] menu_name = "images" def to_site_name(source_name, prefix): return source_name[len(prefix):] def to_well_name(site_name): return site_name.split("-")[1] site_table = _get_default_site_table(ds_folder, metadata, source_prefixes, to_site_name, to_well_name, None) well_table = _get_default_well_table(ds_folder, metadata, source_prefixes, to_site_name, to_well_name, None) view = get_merged_plate_grid_view(metadata, source_prefixes, source_types, source_settings, menu_name, to_site_name, to_well_name, site_table=site_table, well_table=well_table) validate_view_metadata(view, dataset_folder=ds_folder, assert_true=self.assertTrue)
def migrate_view_spec(dataset_folder): """ Move the "default" view of a source from nested inside the source to dataset.json:views. See https://github.com/mobie/mobie.github.io/issues/46 for details. """ dataset_metadata = metadata.read_dataset_metadata(dataset_folder) sources = dataset_metadata['sources'] views = dataset_metadata['views'] new_sources = {} for source_name, source in sources.items(): new_source = deepcopy(source) source_type = list(new_source.keys())[0] view = new_source[source_type].pop('view') new_sources[source_name] = new_source views[source_name] = view dataset_metadata['sources'] = new_sources dataset_metadata['views'] = views metadata.write_dataset_metadata(dataset_folder, dataset_metadata)
def add_traces(input_folder, root, dataset_name, traces_name, reference_name, reference_scale, resolution, scale_factors, chunks, menu_name=None, file_format="bdv.n5", view=None, max_jobs=multiprocessing.cpu_count(), add_default_table=True, seg_infos={}, unit='micrometer', description=None): """ Add traces to an existing MoBIE dataset. Currently supports nmx and swc format. Arguments: input_folder [str] - input folder with trace files. root [str] - data root folder. dataset_name [str] - name of the dataset the segmentation should be added to. traces_name [str] - name of the segmentation. reference_name [str] - name of the reference data, from which the shape of the trace volume will be derived reference_scale [int] - scale level of the reference data to use resolution [list[float]] - resolution of the segmentation in micrometer. scale_factors [list[list[int]]] - scale factors used for down-sampling. menu_name [str] - menu item for this source. If none is given will be created based on the image name. (default: None) file_format [str] - the file format used to store the data internally (default: bdv.n5) view [dict] - default view settings for this source (default: None) chunks [list[int]] - chunks for the data. max_jobs [int] - number of jobs (default: number of cores) add_default_table [bool] - whether to add the default table (default: True) seg_infos [dict] - segmentation information that will be added to the table (default: {}) unit [str] - physical unit of the coordinate system (default: micrometer) description [str] - description for the traces (default: None) """ view = utils.require_dataset_and_view(root, dataset_name, file_format, source_type="segmentation", source_name=traces_name, menu_name=menu_name, view=view, is_default_dataset=False) # get the path to the reference data dataset_folder = os.path.join(root, dataset_name) # NOTE: we require that the reference data and traces are in the same file format # and that it is a file format that can be read locally, i.e. has 'relativePath' source_metadata = metadata.read_dataset_metadata( dataset_folder)['sources'][reference_name] source_metadata = source_metadata[list( source_metadata.keys())[0]]['imageData'] assert file_format in source_metadata assert 'relativePath' in source_metadata[file_format] reference_path = os.path.join(dataset_folder, source_metadata[file_format]['relativePath']) if file_format.startswith('bdv'): reference_path = get_data_path(reference_path, return_absolute_path=True) data_path, image_metadata_path = utils.get_internal_paths( dataset_folder, file_format, traces_name) # import the segmentation data import_traces(input_folder, data_path, reference_path, reference_scale, resolution=resolution, scale_factors=scale_factors, chunks=chunks, max_jobs=max_jobs, unit=unit, source_name=traces_name) # compute the default segmentation table if add_default_table: table_folder = os.path.join(dataset_folder, 'tables', traces_name) table_path = os.path.join(table_folder, 'default.tsv') os.makedirs(table_folder, exist_ok=True) compute_trace_default_table(input_folder, table_path, resolution, seg_infos=seg_infos) else: table_folder = None metadata.add_source_to_dataset(dataset_folder, 'segmentation', traces_name, image_metadata_path, view=view, table_folder=table_folder, description=description)
def add_open_organelle_data( address, root, internal_path, source_name=None, dataset_name=None, # region="us-west-2", # we don't seem to need this anon=True, view=None, menu_name=None, is_default_dataset=False, overwrite=False): """ address [str] - root [str] - internal_path [str] - source_name [str] - dataset_name [str] - anon [bool] - view [dict] - default view settings for this source (default: None) menu_name [str] - menu name for this source. If none will be derived from the source name. (default: None) is_default_dataset [bool] - overwrite [bool] """ if not s3_utils.have_boto(): raise RuntimeError( "boto3 is required to access open organelle data. Please install it." ) file_format = 'openOrganelle.s3' if not metadata.project_exists(root): metadata.create_project_metadata(root, [file_format]) endpoint, bucket, container = parse_address(address) dataset_name = bucket if dataset_name is None else dataset_name ds_exists = metadata.dataset_exists(root, dataset_name) ds_folder = os.path.join(root, dataset_name) if ds_exists: ds_metadata = metadata.read_dataset_metadata(ds_folder) sources, views = ds_metadata['sources'], ds_metadata['views'] else: sources, views = {}, {} client = s3_utils.get_client(endpoint, anon=anon) name, source, view = get_source(client, bucket, container, internal_path, endpoint, ds_folder, source_name, view, menu_name) if name in sources: if overwrite: print("The source", name, "exists already and will be over-written") else: print("The source", name, "exists already and will not be over-written") return sources[name] = source views[name] = view if ds_exists: ds_metadata['sources'] = sources ds_metadata['views'] = views metadata.write_dataset_metadata(ds_folder, ds_metadata) else: os.makedirs(ds_folder, exist_ok=True) default_view = views[list(views.keys())[0]] default_view["sourceDisplays"]["uiSelectionGroup"] = "bookmarks" views["default"] = default_view metadata.create_dataset_metadata(ds_folder, sources=sources, views=views) metadata.add_dataset(root, dataset_name, is_default_dataset) validate_project(root)