Example #1
0
def load_dataset(src_dir, remap, flip):
    ds = open_dataset(src_dir)
    if len(remap) > 1 and remap != "xyz"[:len(remap)]:
        remap = {a: b for a, b in zip("xyz", remap)}
        ds.remap_tiling_axes(remap)
    if flip:
        ds.flip_tiling_axes(list(flip))
    return ds
Example #2
0
def net(ctx, path, gap):
    """Generate net of data blocks."""
    try:
        from utoolbox.util.preview import cuboid_net
    except ImportError:
        logger.error("please install `utoolbox-image` to support surface view")

    show_trace = logger.getEffectiveLevel() <= logging.DEBUG
    ds = open_dataset(path, show_trace=show_trace)

    for time, _ in TimeSeriesDatasetIterator(ds):
        if time is None:
            break
        else:
            raise TypeError(
                "net generation currently does not support time series dataset"
            )

    # calculate scale factor for nets
    scale = _normalized_scale(ds.voxel_size)
    # updated resolution
    res = scale[0] * ds.voxel_size[0]

    desc = ", ".join(f"{k}:{v:.3f}" for k, v in zip("xyz", reversed(scale)))
    logger.debug(f"net scale ({desc}), effective resolution {res:.3f} um")

    # IJ hyperstack order T[Z][C]YXS
    # re-purpose axis meaning:
    #   - Z, slice
    #   - C, channel
    iterator = TiledDatasetIterator(ds,
                                    axes="zyx",
                                    return_key=True,
                                    return_format="index")
    for tile, t_ds in iterator:
        tile_desc = "-".join(f"{label}{ax:03d}"
                             for label, ax in zip("xyz", reversed(tile)))
        desc = f"tile-{tile_desc}"
        for view, v_ds in MultiViewDatasetIterator(t_ds):
            if view:
                desc = f"view-{view}_{desc}"
            nets = []
            for channel, c_ds in MultiChannelDatasetIterator(v_ds):
                array = ds[c_ds]
                net = cuboid_net(array, scale, gap=gap)
                nets.append(net)

            # reshape to TZCYXS
            ny, nx = nets[0].shape
            nets = np.stack(nets, axis=0)
            nets.shape = 1, 1, len(nets), ny, nx, 1
            tifffile.imwrite(
                f"{desc}.tif",
                nets,
                imagej=True,
                resolution=(res, res),
                metadata={"unit": "um"},
            )
Example #3
0
def aszarr(path, verbose, remap, flip, host, output):
    """
    Convert arbitrary dataset into Zarr dataset format.

    If OUTPUT is not specified, it will default to 'SOURCE.zarr'
    \f

    Args:
        path (str): path to the original dataset
        verbose (str, optional): how verbose should the logger behave
        output (str, optional): path to the destination
    """
    # we know this is annoying, silence it
    logging.getLogger("tifffile").setLevel(logging.ERROR)

    # convert verbose level
    verbose = 2 if verbose > 2 else verbose
    level = {0: "WARNING", 1: "INFO", 2: "DEBUG"}.get(verbose)
    coloredlogs.install(level=level,
                        fmt="%(asctime)s %(levelname)s %(message)s",
                        datefmt="%H:%M:%S")

    # ensure we does not have ambiguous input
    src_path = os.path.abspath(path)

    logger.info("loading source dataset")
    show_trace = logger.getEffectiveLevel() <= logging.DEBUG
    ds = open_dataset(src_path, show_trace=show_trace)
    ds = _remap_and_flip(ds, remap, flip)

    # generate the output
    if output is None:
        parent, dname = os.path.split(src_path)
        dst_path = os.path.join(parent, f"{dname}.zarr")
    else:
        dst_path = output
    logger.info(f'converted dataset will save to "{dst_path}"')

    dump, overwrite = True, False
    if os.path.exists(dst_path):
        # output already exists, ask user what's next
        dump, overwrite = button_dialog(
            title="Zarr dataset exists",
            text="What should we do?",
            buttons=[
                ("Skip", (False, None)),
                ("Update", (True, False)),
                ("Overwrite", (True, True)),
            ],
        ).run()
    else:
        dump, overwrite = True, False

    if dump:
        with get_client(address=host):
            ZarrDataset.dump(dst_path, ds, overwrite=overwrite)

    logger.info("complete zarr dataset conversion")
Example #4
0
def main(src_dir, dst_dir, dry_run, downsamples, chunk):
    """
    Convert Micro-Manager dataset to BigDataViewer complient XML/HDF5 format.
    \f

    Args:
        src_path (str): path to the MM dataset
        dst_path (str, optional): where to save the BDV dataset
        dry_run (bool, optinal): save XML only
        downsamples (tuple of int, optional): downsample ratio along (X, Y, Z) axis
        chunk (tuple of int, optional): chunk size
    """
    ds_src = open_dataset(src_dir, show_trace=True)

    if dst_dir is None:
        dst_dir = f"{src_dir}_bdv"

    if os.path.exists(dst_dir):
        dump = button_dialog(
            title="BDV dataset exists",
            text="What should we do?",
            buttons=[("Cancel", False), ("Overwrite", True)],
        ).run()
        if dump:
            # we have to unlink first
            logger.warning("remove previous dataset dump")
            rmtree(dst_dir)
    else:
        dump = True

    if dump:
        # NOTE we should already deal with FileExistError
        os.mkdir(dst_dir)

        # ensure downsamples is wrapped
        if isinstance(downsamples[0], int):
            downsamples = [downsamples]
        # reverse downsampling ratio
        downsamples = [tuple(reversed(s)) for s in downsamples]

        # reverse chunk size
        chunk = tuple(reversed(chunk))

        logger.info("convert to zarr dataset")
        try:
            BigDataViewerDataset.dump(
                dst_dir,
                ds_src,
                pyramid=downsamples,
                chunks=chunk,
                dry_run=dry_run,
            )
        except InvalidChunkSizeError as err:
            logger.error(str(err))
Example #5
0
def test_dump_from_latticescope(ds_src_dir, ds_dst_dir, overwrite=False):
    logger.info("loading source dataset")
    ds_src = open_dataset(ds_src_dir, show_trace=True)

    ds_src.remap_tiling_axes({"x": "z", "y": "x", "z": "y"})
    ds_src.flip_tiling_axes(["x", "y"])

    pprint(ds_src.metadata)

    with get_client(address="localhost:8786", auto_spawn=False):
        logger.info("convert to zarr dataset")
        ZarrDataset.dump(ds_dst_dir, ds_src, overwrite=overwrite)
Example #6
0
def label(ctx, ds_path, level):
    """
    Export specific label from a ZarrDataset.
    \f

    Args:
        ds_path (str): path to the dataset
    """

    show_trace = logger.getEffectiveLevel() <= logging.DEBUG
    ds = open_dataset(ds_path, show_trace=show_trace)

    if not isinstance(ds, ZarrDataset):
        raise TypeError("input is not a ZarrDataset")

    labels = ds.labels
    if len(labels) < 2:
        if len(labels) == 1:
            desc = "Only a single label exists in this dataset.\n"
        else:
            desc = "Cannot find a valid label (something very wrong?)\n"
        message_dialog(title="Unable to export label", text=desc).run()
        return

    values = [(label, ) * 2 for label in labels]
    label = radiolist_dialog(
        title="Found multiple labels",
        text="Which label would you like to export?",
        values=values,
    ).run()

    if label is None:
        logger.info("cancelled")
        return

    # reload dataset
    logger.info(f'reload dataset with default label "{label}"')
    ds = open_dataset(ds_path, label=label, show_trace=show_trace)
Example #7
0
def mosaic(ctx, path, screen_size):
    """
    Generate mosaic for each layer.
    \f

    Args:
        path (str): path to the dataset    
        size (str, optional): screen size to fit the result in
    """
    show_trace = logger.getEffectiveLevel() <= logging.DEBUG
    ds = open_dataset(path, show_trace=show_trace)

    _, dy, dx = ds.voxel_size

    iz = 0
    for tz, ds_xy in TiledDatasetIterator(ds, axes="z", return_key=True):
        if tz:
            logger.info(f"iterate over z tile, {tz}")

        # populating layers
        layer = []
        for ds_x in TiledDatasetIterator(ds_xy, axes="y", return_key=False):
            row = []
            for uuid in TiledDatasetIterator(ds_x, axes="x", return_key=False):
                row.append(ds[uuid])
            layer.append(row)
        layer = da.block(layer)

        sampler = None
        for mosaic in layer:
            if sampler is None:
                ratio = _estaimte_resize_ratio(mosaic, resolution=screen_size)
                sampler = (slice(None, None, ratio), ) * 2
            mosaic = mosaic[sampler]

            print(iz)

            tifffile.imwrite(
                f"mosaic_z{iz:05}.tif",
                mosaic,
                imagej=True,
                resolution=(dx, dy),
                metadata={"unit": "um"},
            )

            iz += 1
Example #8
0
def main(ds_src_dir, ds_dst_dir, client=None):
    logger.info("loading source dataset")
    ds_src = open_dataset(ds_src_dir)

    pprint(ds_src.metadata)

    logger.info("dump dataset info")
    for key, value in TiledDatasetIterator(ds_src,
                                           return_key=True,
                                           return_format="both"):
        print(key)
        print(value)
        print()

    with pd.option_context("display.max_rows", None):
        print(">> tile_coords")
        print(ds_src.tile_coords)
        print()
        print(">> inventory")
        print(ds_src.inventory)
        print()

    if os.path.exists(ds_dst_dir):
        dump = button_dialog(
            title="BDV dataset exists",
            text="What should we do?",
            buttons=[
                ("Skip", False),
                ("Overwrite", True),
            ],
        ).run()
        if dump:
            # we have to unlink first
            logger.warning("remove previous dataset dump")
            rmtree(ds_dst_dir)
    else:
        dump = True

    if dump:
        logger.info("convert to zarr dataset")
        BigDataViewerDataset.dump(ds_dst_dir,
                                  ds_src,
                                  pyramid=[(1, 1, 1), (2, 4, 4)],
                                  chunks=(16, 128, 128))
Example #9
0
def coords(ctx, ds_path, csv_path, precision):
    """
    Export filename-coordinate mapping.
    \f

    Args:
        ds_path (str): path to the dataset
        csv_path (str): where to dump the CSV output
        precision (int, optional): maximum number of the decimal place
    """

    show_trace = logger.getEffectiveLevel() <= logging.DEBUG
    ds = open_dataset(ds_path, show_trace=show_trace)

    if not isinstance(ds, TiledDataset):
        raise TypeError("only tiled dataset contains coordinate information")
    if isinstance(ds, SessionDataset):
        raise ValueError(
            "session-based dataset cannot cherry pick internal arrays")

    # reload dataset with alterantive class
    class DumpFilename(type(ds)):
        @property
        def read_func(self):
            def func(uri, shape, dtype):
                return uri

            return func

    logger.debug("reload with DumpFilename")
    with change_logging_level(logging.ERROR):
        ds = DumpFilename.load(ds_path)

    # iterate over uuid and re-interpret the result
    logger.info("mapping UUID to actual filename")
    inventory = ds.inventory.reset_index(name="uuid")
    filenames = [ds[uuid] if uuid else "" for uuid in inventory["uuid"]]
    inventory["filename"] = filenames
    # for multi-file stacks, we explode to expand lists to separate rows
    inventory = inventory.explode("filename")
    # drop uuid column
    inventory.drop("uuid", axis="columns", inplace=True)

    # extract real world coordinate
    coords = ds.tile_coords.reset_index()

    # merge tables
    index_col_names = [
        name for name in coords.columns if name.startswith("tile_")
    ]
    inventory = inventory.merge(coords, how="left", on=index_col_names)

    # rename columns
    ax = [k.split("_")[1] for k in index_col_names]
    inventory.rename(columns={k: f"i{v}"
                              for k, v in zip(index_col_names, ax)},
                     inplace=True)
    inventory.rename(columns={f"{k}_coord": k for k in ax}, inplace=True)

    inventory.to_csv(
        csv_path,
        sep=",",
        index=False,  # ignore row number
        header=True,  # we need column headers
        float_format=f"%.{precision}f",  # 4 digit decimals
    )
Example #10
0
def info(ctx, path, show_all):
    """
    Dump dataset info.

    This script is not designed with performance in mind, it may take sometime to 
    compile all the info to become human-friendly texts.
    \f

    Args:
        path (str): path to the dataset
        show_all (bool, optional): list all attributes
    """

    show_trace = logger.getEffectiveLevel() <= logging.DEBUG
    ds = open_dataset(path, show_trace=show_trace)

    print()

    # basic type
    path = os.path.abspath(path)
    printi(f'Dataset in "{os.path.basename(path)}" is a "{type(ds).__name__}"',
           indent=1)
    if isinstance(ds, SessionDataset):
        printi(f'(Using internal path "{ds.path}")', indent=1)

    print()

    # setup
    setup = _extract_setup_keys(ds)
    n_keys = []
    for k, v in setup.items():
        n_v = len(v)
        if n_v > 1:
            k = k + "s"
        n_keys.append(f"{n_v} {k}")
    desc = " / ".join(n_keys)
    printi(desc, indent=1)

    # setup - statitics
    n_missing = (ds.inventory.values == "").sum()
    if n_missing > 0:
        printi(f"({n_missing} missing data)", indent=1)

    # setup - detail
    if show_all:
        rows = []
        for i, values0 in enumerate(setup.values()):
            values = []
            # reformat
            for value in values0:
                if value is None:
                    value = "-"
                elif isinstance(value, pd.Timedelta):
                    value = format_timespan(value)
                values.append(value)
            rows.append(values)
        rows = zip_longest(*rows, fillvalue="")
        table_str = format_pretty_table(rows, setup.keys())
        # add indent
        table_str = table_str.split("\n")
        for row in table_str:
            printi(row, indent=1)

    print()

    # labels
    if isinstance(ds, ZarrDataset):
        printi("Available labels", indent=1)

        for label in ds.labels:
            printi(f"- {label}", indent=2)

    print()

    # timeseries
    # TODO time information

    # tiles
    if setup["tile"][0] is not None:
        m, M = ds.tile_coords.min().to_dict(), ds.tile_coords.max().to_dict()
        shape, _ = ds._load_array_info()
        s = {
            k: dv * nv
            for k, dv, nv in zip("xyz", reversed(ds.voxel_size), reversed(
                shape))
        }
        ax_names, extents = [], []
        for ax in TILE_INDEX_STR:
            ax = ax.split("_")[1]
            index = f"{ax}_coord"
            if index in m:
                delta = round(M[index] - m[index], 4) + s[ax]
                extents.append(delta)
                ax_names.append(ax)

        # tiles - index
        printi("Number of tiles", indent=1)
        desc = [f"{k}:{v}" for k, v in zip(ax_names, reversed(ds.tile_shape))]
        desc = ", ".join(desc)
        printi(f"({desc})", indent=2)

        print()

        # tiles - coords
        printi("Tile extents (um)", indent=1)
        desc = [f"{k}:{v}" for k, v in zip(ax_names, extents)]
        desc = ", ".join(desc)
        printi(f"({desc})", indent=2)

        print()

    # voxel size
    if isinstance(ds, DenseDataset):
        printi("Voxel size (um)", indent=1)

        voxel_size = [round(v, 4) for v in reversed(ds.voxel_size)]

        desc = [f"{k}:{v}" for k, v in zip("xyz", voxel_size)]
        desc = ", ".join(desc)
        printi(f"({desc})", indent=2)

    print()
Example #11
0
                        datefmt="%H:%M:%S")

    logger = logging.getLogger(__name__)

    use_local = True

    if use_local:
        logger.info("using local cluster")
        cluster = LocalCluster(n_workers=4, threads_per_worker=4)
        client = Client(cluster)
    else:
        logger.info("using remote cluster")
        client = Client("10.109.20.6:8786")
    logger.info(client)

    src_ds = open_dataset("Y:/ARod/4F/20200317_No5_CamA")
    print(src_ds.inventory)

    logger.info(f"tile by {src_ds.tile_shape}")

    # INPUT (x, y, z) -> TRUE (z, x, y)
    src_ds.remap_tiling_axes({"x": "z", "y": "x", "z": "y"})
    src_ds.flip_tiling_axes(["x", "y"])

    print(src_ds.inventory)

    # import ipdb; ipdb.set_trace()

    dst_dir = "Y:/ARod/4F/20200317_No5_CamA"
    preview_mip(src_ds, dst_dir)