def load_dataset(src_dir, remap, flip): ds = open_dataset(src_dir) if len(remap) > 1 and remap != "xyz"[:len(remap)]: remap = {a: b for a, b in zip("xyz", remap)} ds.remap_tiling_axes(remap) if flip: ds.flip_tiling_axes(list(flip)) return ds
def net(ctx, path, gap): """Generate net of data blocks.""" try: from utoolbox.util.preview import cuboid_net except ImportError: logger.error("please install `utoolbox-image` to support surface view") show_trace = logger.getEffectiveLevel() <= logging.DEBUG ds = open_dataset(path, show_trace=show_trace) for time, _ in TimeSeriesDatasetIterator(ds): if time is None: break else: raise TypeError( "net generation currently does not support time series dataset" ) # calculate scale factor for nets scale = _normalized_scale(ds.voxel_size) # updated resolution res = scale[0] * ds.voxel_size[0] desc = ", ".join(f"{k}:{v:.3f}" for k, v in zip("xyz", reversed(scale))) logger.debug(f"net scale ({desc}), effective resolution {res:.3f} um") # IJ hyperstack order T[Z][C]YXS # re-purpose axis meaning: # - Z, slice # - C, channel iterator = TiledDatasetIterator(ds, axes="zyx", return_key=True, return_format="index") for tile, t_ds in iterator: tile_desc = "-".join(f"{label}{ax:03d}" for label, ax in zip("xyz", reversed(tile))) desc = f"tile-{tile_desc}" for view, v_ds in MultiViewDatasetIterator(t_ds): if view: desc = f"view-{view}_{desc}" nets = [] for channel, c_ds in MultiChannelDatasetIterator(v_ds): array = ds[c_ds] net = cuboid_net(array, scale, gap=gap) nets.append(net) # reshape to TZCYXS ny, nx = nets[0].shape nets = np.stack(nets, axis=0) nets.shape = 1, 1, len(nets), ny, nx, 1 tifffile.imwrite( f"{desc}.tif", nets, imagej=True, resolution=(res, res), metadata={"unit": "um"}, )
def aszarr(path, verbose, remap, flip, host, output): """ Convert arbitrary dataset into Zarr dataset format. If OUTPUT is not specified, it will default to 'SOURCE.zarr' \f Args: path (str): path to the original dataset verbose (str, optional): how verbose should the logger behave output (str, optional): path to the destination """ # we know this is annoying, silence it logging.getLogger("tifffile").setLevel(logging.ERROR) # convert verbose level verbose = 2 if verbose > 2 else verbose level = {0: "WARNING", 1: "INFO", 2: "DEBUG"}.get(verbose) coloredlogs.install(level=level, fmt="%(asctime)s %(levelname)s %(message)s", datefmt="%H:%M:%S") # ensure we does not have ambiguous input src_path = os.path.abspath(path) logger.info("loading source dataset") show_trace = logger.getEffectiveLevel() <= logging.DEBUG ds = open_dataset(src_path, show_trace=show_trace) ds = _remap_and_flip(ds, remap, flip) # generate the output if output is None: parent, dname = os.path.split(src_path) dst_path = os.path.join(parent, f"{dname}.zarr") else: dst_path = output logger.info(f'converted dataset will save to "{dst_path}"') dump, overwrite = True, False if os.path.exists(dst_path): # output already exists, ask user what's next dump, overwrite = button_dialog( title="Zarr dataset exists", text="What should we do?", buttons=[ ("Skip", (False, None)), ("Update", (True, False)), ("Overwrite", (True, True)), ], ).run() else: dump, overwrite = True, False if dump: with get_client(address=host): ZarrDataset.dump(dst_path, ds, overwrite=overwrite) logger.info("complete zarr dataset conversion")
def main(src_dir, dst_dir, dry_run, downsamples, chunk): """ Convert Micro-Manager dataset to BigDataViewer complient XML/HDF5 format. \f Args: src_path (str): path to the MM dataset dst_path (str, optional): where to save the BDV dataset dry_run (bool, optinal): save XML only downsamples (tuple of int, optional): downsample ratio along (X, Y, Z) axis chunk (tuple of int, optional): chunk size """ ds_src = open_dataset(src_dir, show_trace=True) if dst_dir is None: dst_dir = f"{src_dir}_bdv" if os.path.exists(dst_dir): dump = button_dialog( title="BDV dataset exists", text="What should we do?", buttons=[("Cancel", False), ("Overwrite", True)], ).run() if dump: # we have to unlink first logger.warning("remove previous dataset dump") rmtree(dst_dir) else: dump = True if dump: # NOTE we should already deal with FileExistError os.mkdir(dst_dir) # ensure downsamples is wrapped if isinstance(downsamples[0], int): downsamples = [downsamples] # reverse downsampling ratio downsamples = [tuple(reversed(s)) for s in downsamples] # reverse chunk size chunk = tuple(reversed(chunk)) logger.info("convert to zarr dataset") try: BigDataViewerDataset.dump( dst_dir, ds_src, pyramid=downsamples, chunks=chunk, dry_run=dry_run, ) except InvalidChunkSizeError as err: logger.error(str(err))
def test_dump_from_latticescope(ds_src_dir, ds_dst_dir, overwrite=False): logger.info("loading source dataset") ds_src = open_dataset(ds_src_dir, show_trace=True) ds_src.remap_tiling_axes({"x": "z", "y": "x", "z": "y"}) ds_src.flip_tiling_axes(["x", "y"]) pprint(ds_src.metadata) with get_client(address="localhost:8786", auto_spawn=False): logger.info("convert to zarr dataset") ZarrDataset.dump(ds_dst_dir, ds_src, overwrite=overwrite)
def label(ctx, ds_path, level): """ Export specific label from a ZarrDataset. \f Args: ds_path (str): path to the dataset """ show_trace = logger.getEffectiveLevel() <= logging.DEBUG ds = open_dataset(ds_path, show_trace=show_trace) if not isinstance(ds, ZarrDataset): raise TypeError("input is not a ZarrDataset") labels = ds.labels if len(labels) < 2: if len(labels) == 1: desc = "Only a single label exists in this dataset.\n" else: desc = "Cannot find a valid label (something very wrong?)\n" message_dialog(title="Unable to export label", text=desc).run() return values = [(label, ) * 2 for label in labels] label = radiolist_dialog( title="Found multiple labels", text="Which label would you like to export?", values=values, ).run() if label is None: logger.info("cancelled") return # reload dataset logger.info(f'reload dataset with default label "{label}"') ds = open_dataset(ds_path, label=label, show_trace=show_trace)
def mosaic(ctx, path, screen_size): """ Generate mosaic for each layer. \f Args: path (str): path to the dataset size (str, optional): screen size to fit the result in """ show_trace = logger.getEffectiveLevel() <= logging.DEBUG ds = open_dataset(path, show_trace=show_trace) _, dy, dx = ds.voxel_size iz = 0 for tz, ds_xy in TiledDatasetIterator(ds, axes="z", return_key=True): if tz: logger.info(f"iterate over z tile, {tz}") # populating layers layer = [] for ds_x in TiledDatasetIterator(ds_xy, axes="y", return_key=False): row = [] for uuid in TiledDatasetIterator(ds_x, axes="x", return_key=False): row.append(ds[uuid]) layer.append(row) layer = da.block(layer) sampler = None for mosaic in layer: if sampler is None: ratio = _estaimte_resize_ratio(mosaic, resolution=screen_size) sampler = (slice(None, None, ratio), ) * 2 mosaic = mosaic[sampler] print(iz) tifffile.imwrite( f"mosaic_z{iz:05}.tif", mosaic, imagej=True, resolution=(dx, dy), metadata={"unit": "um"}, ) iz += 1
def main(ds_src_dir, ds_dst_dir, client=None): logger.info("loading source dataset") ds_src = open_dataset(ds_src_dir) pprint(ds_src.metadata) logger.info("dump dataset info") for key, value in TiledDatasetIterator(ds_src, return_key=True, return_format="both"): print(key) print(value) print() with pd.option_context("display.max_rows", None): print(">> tile_coords") print(ds_src.tile_coords) print() print(">> inventory") print(ds_src.inventory) print() if os.path.exists(ds_dst_dir): dump = button_dialog( title="BDV dataset exists", text="What should we do?", buttons=[ ("Skip", False), ("Overwrite", True), ], ).run() if dump: # we have to unlink first logger.warning("remove previous dataset dump") rmtree(ds_dst_dir) else: dump = True if dump: logger.info("convert to zarr dataset") BigDataViewerDataset.dump(ds_dst_dir, ds_src, pyramid=[(1, 1, 1), (2, 4, 4)], chunks=(16, 128, 128))
def coords(ctx, ds_path, csv_path, precision): """ Export filename-coordinate mapping. \f Args: ds_path (str): path to the dataset csv_path (str): where to dump the CSV output precision (int, optional): maximum number of the decimal place """ show_trace = logger.getEffectiveLevel() <= logging.DEBUG ds = open_dataset(ds_path, show_trace=show_trace) if not isinstance(ds, TiledDataset): raise TypeError("only tiled dataset contains coordinate information") if isinstance(ds, SessionDataset): raise ValueError( "session-based dataset cannot cherry pick internal arrays") # reload dataset with alterantive class class DumpFilename(type(ds)): @property def read_func(self): def func(uri, shape, dtype): return uri return func logger.debug("reload with DumpFilename") with change_logging_level(logging.ERROR): ds = DumpFilename.load(ds_path) # iterate over uuid and re-interpret the result logger.info("mapping UUID to actual filename") inventory = ds.inventory.reset_index(name="uuid") filenames = [ds[uuid] if uuid else "" for uuid in inventory["uuid"]] inventory["filename"] = filenames # for multi-file stacks, we explode to expand lists to separate rows inventory = inventory.explode("filename") # drop uuid column inventory.drop("uuid", axis="columns", inplace=True) # extract real world coordinate coords = ds.tile_coords.reset_index() # merge tables index_col_names = [ name for name in coords.columns if name.startswith("tile_") ] inventory = inventory.merge(coords, how="left", on=index_col_names) # rename columns ax = [k.split("_")[1] for k in index_col_names] inventory.rename(columns={k: f"i{v}" for k, v in zip(index_col_names, ax)}, inplace=True) inventory.rename(columns={f"{k}_coord": k for k in ax}, inplace=True) inventory.to_csv( csv_path, sep=",", index=False, # ignore row number header=True, # we need column headers float_format=f"%.{precision}f", # 4 digit decimals )
def info(ctx, path, show_all): """ Dump dataset info. This script is not designed with performance in mind, it may take sometime to compile all the info to become human-friendly texts. \f Args: path (str): path to the dataset show_all (bool, optional): list all attributes """ show_trace = logger.getEffectiveLevel() <= logging.DEBUG ds = open_dataset(path, show_trace=show_trace) print() # basic type path = os.path.abspath(path) printi(f'Dataset in "{os.path.basename(path)}" is a "{type(ds).__name__}"', indent=1) if isinstance(ds, SessionDataset): printi(f'(Using internal path "{ds.path}")', indent=1) print() # setup setup = _extract_setup_keys(ds) n_keys = [] for k, v in setup.items(): n_v = len(v) if n_v > 1: k = k + "s" n_keys.append(f"{n_v} {k}") desc = " / ".join(n_keys) printi(desc, indent=1) # setup - statitics n_missing = (ds.inventory.values == "").sum() if n_missing > 0: printi(f"({n_missing} missing data)", indent=1) # setup - detail if show_all: rows = [] for i, values0 in enumerate(setup.values()): values = [] # reformat for value in values0: if value is None: value = "-" elif isinstance(value, pd.Timedelta): value = format_timespan(value) values.append(value) rows.append(values) rows = zip_longest(*rows, fillvalue="") table_str = format_pretty_table(rows, setup.keys()) # add indent table_str = table_str.split("\n") for row in table_str: printi(row, indent=1) print() # labels if isinstance(ds, ZarrDataset): printi("Available labels", indent=1) for label in ds.labels: printi(f"- {label}", indent=2) print() # timeseries # TODO time information # tiles if setup["tile"][0] is not None: m, M = ds.tile_coords.min().to_dict(), ds.tile_coords.max().to_dict() shape, _ = ds._load_array_info() s = { k: dv * nv for k, dv, nv in zip("xyz", reversed(ds.voxel_size), reversed( shape)) } ax_names, extents = [], [] for ax in TILE_INDEX_STR: ax = ax.split("_")[1] index = f"{ax}_coord" if index in m: delta = round(M[index] - m[index], 4) + s[ax] extents.append(delta) ax_names.append(ax) # tiles - index printi("Number of tiles", indent=1) desc = [f"{k}:{v}" for k, v in zip(ax_names, reversed(ds.tile_shape))] desc = ", ".join(desc) printi(f"({desc})", indent=2) print() # tiles - coords printi("Tile extents (um)", indent=1) desc = [f"{k}:{v}" for k, v in zip(ax_names, extents)] desc = ", ".join(desc) printi(f"({desc})", indent=2) print() # voxel size if isinstance(ds, DenseDataset): printi("Voxel size (um)", indent=1) voxel_size = [round(v, 4) for v in reversed(ds.voxel_size)] desc = [f"{k}:{v}" for k, v in zip("xyz", voxel_size)] desc = ", ".join(desc) printi(f"({desc})", indent=2) print()
datefmt="%H:%M:%S") logger = logging.getLogger(__name__) use_local = True if use_local: logger.info("using local cluster") cluster = LocalCluster(n_workers=4, threads_per_worker=4) client = Client(cluster) else: logger.info("using remote cluster") client = Client("10.109.20.6:8786") logger.info(client) src_ds = open_dataset("Y:/ARod/4F/20200317_No5_CamA") print(src_ds.inventory) logger.info(f"tile by {src_ds.tile_shape}") # INPUT (x, y, z) -> TRUE (z, x, y) src_ds.remap_tiling_axes({"x": "z", "y": "x", "z": "y"}) src_ds.flip_tiling_axes(["x", "y"]) print(src_ds.inventory) # import ipdb; ipdb.set_trace() dst_dir = "Y:/ARod/4F/20200317_No5_CamA" preview_mip(src_ds, dst_dir)