Exemple #1
0
def sizefilter(filename_src,
               dataset_src,
               filename_tgt,
               dataset_tgt,
               thr,
               dat_file=None):

    srcf = zarr.open(filename_src, mode="r")
    if not os.path.exists(filename_tgt):
        os.makedirs(filename_tgt)
    tgtf = zarr.open(filename_tgt, mode="a")
    tgtf.empty(
        name=dataset_tgt,
        shape=srcf[dataset_src].shape,
        compressor=numcodecs.GZip(6),
        dtype="uint64",
        chunks=srcf[dataset_src].chunks,
    )
    tgt = np.array(srcf[dataset_src][:])
    ids, counts = np.unique(tgt, return_counts=True)
    if dat_file is not None:
        np.savetxt(dat_file, counts, "%.4g")
    remove_ids = []
    for id, count in zip(ids, counts):
        if count <= thr:
            remove_ids.append(id)

    tgt[np.isin(tgt, remove_ids)] = BG_VAL
    tgtf[dataset_tgt][:] = tgt.astype(np.uint64)
    tgtf[dataset_tgt].attrs["offset"] = srcf[dataset_src].attrs["offset"]
 def run(self):
     thrs = [127, 42]
     progress = 0.0
     self.set_progress_percentage(progress)
     for s in self.samples:
         filename = os.path.join(os.path.dirname(self.input().fn),
                                 s + ".n5")
         dataset_src = "clefts_cropped"
         dataset_tgt = "clefts_cropped_thr{0:}"
         f = zarr.open(filename, mode="a")
         for t in thrs:
             f.empty(
                 name=dataset_tgt.format(t),
                 shape=f[dataset_src].shape,
                 compressor=numcodecs.GZip(6),
                 dtype="uint8",
                 chunks=f[dataset_src].chunks,
             )
             f[dataset_tgt.format(t)][:] = (f[dataset_src][:] > t).astype(
                 np.uint8)
             f[dataset_tgt.format(
                 t)].attrs["offset"] = f[dataset_src].attrs["offset"]
         progress += 100.0 / len(self.samples)
         try:
             self.set_progress_percentage(progress)
         except:
             pass
     done = self.output().open("w")
     done.close()
Exemple #3
0
    def test_read_zarr(self):
        from z5py.dataset import Dataset
        dtypes = list(Dataset._dtype_dict.keys())
        zarr_compressors = {'blosc': numcodecs.Blosc(),
                            'zlib': numcodecs.Zlib(),
                            'raw': None,
                            'bzip2': numcodecs.BZ2()}

        # conda-forge version of numcodecs is not up-to-data
        # for python 3.5 and GZip is missing
        # thats why we need to check explicitly here to not fail the test
        if hasattr(numcodecs, 'GZip'):
            zarr_compressors.update({'gzip': numcodecs.GZip()})

        zarr.open(self.path)
        for dtype in dtypes:
            for compression in zarr_compressors:
                data = np.random.randint(0, 127, size=self.shape).astype(dtype)
                # write the data with zarr
                key = 'test_%s_%s' % (dtype, compression)
                ar = zarr.open(os.path.join(self.path, key), mode='w',
                               shape=self.shape, chunks=self.chunks,
                               dtype=dtype, compressor=zarr_compressors[compression])
                ar[:] = data
                # read with z5py
                out = z5py.File(self.path)[key][:]
                self.assertEqual(data.shape, out.shape)
                self.assertTrue(np.allclose(data, out))
Exemple #4
0
def slicefilter(
    filename_src, dataset_src, filename_tgt, dataset_tgt, thr, dat_file=None
):

    srcf = zarr.open(filename_src, mode="r")
    if not os.path.exists(filename_tgt):
        os.makedirs(filename_tgt)
    tgtf = zarr.open(filename_tgt, mode="a")
    tgtf.empty(
        name=dataset_tgt,
        shape=srcf[dataset_src].shape,
        compressor=numcodecs.GZip(6),
        dtype="uint64",
        chunks=srcf[dataset_src].chunks,
    )
    tgt = np.array(srcf[dataset_src][:])

    ids, relabeled = np.unique(tgt, return_inverse=True)
    relabeled = relabeled.reshape(tgt.shape) + 1
    if BG_VAL1 in ids:
        relabeled[tgt == BG_VAL1] = 0
    if BG_VAL2 in ids:
        relabeled[tgt == BG_VAL2] = 0

    obj_slices = scipy.ndimage.measurements.find_objects(relabeled)
    set_to_bg = []
    for k, obs in enumerate(obj_slices):
        if not None:
            if relabeled[obs].shape[0] <= thr:
                set_to_bg.append(k + 1)

    tgt[np.isin(relabeled, set_to_bg)] = 0
    tgtf[dataset_tgt][:] = tgt.astype(np.uint64)
    tgtf[dataset_tgt].attrs["offset"] = srcf[dataset_src].attrs["offset"]
def cc2(
    filename_src, dataset_src_high_thr, dataset_src_low_thr, filename_tgt, dataset_tgt
):
    srcf = zarr.open(filename_src, mode="r")
    if not os.path.exists(filename_tgt):
        os.makedirs(filename_tgt)
    assert (
        srcf[dataset_src_high_thr].attrs["offset"]
        == srcf[dataset_src_low_thr].attrs["offset"]
    )
    assert srcf[dataset_src_high_thr].shape == srcf[dataset_src_low_thr].shape
    tgtf = zarr.open(filename_tgt, mode="a")
    tgtf.empty(
        name=dataset_tgt,
        shape=srcf[dataset_src_high_thr].shape,
        compressor=numcodecs.GZip(6),
        dtype="uint64",
        chunks=srcf[dataset_src_high_thr].chunks,
    )
    data_high_thr = np.array(srcf[dataset_src_high_thr][:])
    data_low_thr = np.array(srcf[dataset_src_low_thr][:])
    tgt = np.ones(data_low_thr.shape, dtype=np.uint64)
    maxid = scipy.ndimage.label(data_low_thr, output=tgt)
    maxes = scipy.ndimage.maximum(
        data_high_thr, labels=tgt, index=list(range(1, maxid + 1))
    )
    maxes = np.array([0] + list(maxes))
    factors = maxes[tgt]

    tgt *= factors.astype(np.uint64)
    maxid = scipy.ndimage.label(tgt, output=tgt)
    tgtf[dataset_tgt][:] = tgt.astype(np.uint64)
    tgtf[dataset_tgt].attrs["offset"] = srcf[dataset_src_high_thr].attrs["offset"]
    tgtf[dataset_tgt].attrs["max_id"] = maxid
Exemple #6
0
def crop_to_seg(filename_src, dataset_src, filename_tgt, dataset_tgt, offset, shape):
    srcf = zarr.open(filename_src, mode="r")
    if not os.path.exists(filename_tgt):
        os.makedirs(filename_tgt)
    tgtf = zarr.open(filename_tgt, mode="a")
    chunk_size = tuple(min(c, s) for c, s in zip(srcf[dataset_src].chunks, shape))
    if os.path.exists(os.path.join(filename_tgt, dataset_tgt)):
        assert (
            tgtf[dataset_tgt].shape == shape
            and tgtf[dataset_tgt].dtype == srcf[dataset_src].dtype
            and tgtf[dataset_tgt].chunks == chunk_size
        )
        skip_ds_creation = True

    else:
        skip_ds_creation = False
    if not skip_ds_creation:
        tgtf.empty(
            name=dataset_tgt,
            shape=shape,
            compression=numcodecs.GZip(6),
            dtype=srcf[dataset_src].dtype,
            chunks=chunk_size,
        )
    bb = tuple(slice(off, off + sh, None) for off, sh in zip(offset, shape))
    tgtf[dataset_tgt][:] = srcf[dataset_src][bb]
    tgtf[dataset_tgt].attrs["offset"] = offset[::-1]
def save_mask(data_file, data_dsname_raw, data_dsname_mask, chunks, cut_axis):
    f = zarr.open(data_file, "a")

    raw_ds = f[data_dsname_raw]
    mask_ds = f.empty(name=data_dsname_mask, shape=raw_ds.shape, chunks=(256,256,256), dtype=np.uint64,
                      compressor=numcodecs.GZip(6))
    mask_ds.attrs["pixelResolution"] = raw_ds.attrs["pixelResolution"]
    start = (0,0,0)
    end = mask_ds.shape
    boundary = int(0.5 * mask_ds.shape[cut_axis])
    for z, y, x in itertools.product(range(start[0], end[0], chunks[0]),
                                     range(start[1], end[1], chunks[1]),
                                     range(start[2], end[2], chunks[2])):
        sl = (
            slice(z, min(z+chunks[0], end[0])),
            slice(y, min(y+chunks[1], end[1])),
            slice(x, min(x+chunks[2], end[2]))
        )


        if sl[cut_axis].stop <= boundary:
            mask_ds[sl] = np.ones(get_slice_shape(sl), dtype=np.uint64)
        elif boundary <= sl[cut_axis].start:
            mask_ds[sl] = np.zeros(get_slice_shape(sl), dtype=np.uint64)
        else:
            sl_before = tuple(slice(sl[k].start, sl[k].stop, sl[k].step) if k!=cut_axis else slice(sl[k].start, boundary,
                                                                                                   sl[k].step) for k in
                              range(3))
            mask_ds[sl_before] = np.ones(get_slice_shape(sl_before), dtype=np.uint64)
            sl_after = tuple(slice(sl[k].start, sl[k].stop, sl[k].step) if k!=cut_axis else slice(boundary, sl[k].stop,
                                                                                                   sl[k].step) for k in
                              range(3))
            mask_ds[sl_after] = np.zeros(get_slice_shape(sl_after), dtype=np.uint64)
Exemple #8
0
def _decode_codec_metadata(meta: Mapping) -> Optional[Codec]:
    if meta is None:
        return None

    # only support gzip for now
    if meta['codec'] != 'https://purl.org/zarr/spec/codec/gzip/1.0':
        raise NotImplementedError
    codec = numcodecs.GZip(level=meta['configuration']['level'])
    return codec
Exemple #9
0
def add_ds(target, name, shape, dtype, chunks, resolution, offset, **kwargs):
    logging.info("Preparing dataset {0:} in {1:}".format(name, target.path))
    ds = target.empty(name=name,
                      shape=shape,
                      chunks=chunks,
                      dtype=dtype,
                      compressor=numcodecs.GZip(6))
    ds.attrs["resolution"] = resolution
    ds.attrs["offset"] = offset
    for k in kwargs:
        ds.attrs[k] = kwargs[k]
    return ds
Exemple #10
0
 def run(self):
     thr_high = 127
     thr_low = 42
     dataset_src = "clefts_cropped_thr{0:}"
     dataset_tgt = "clefts_cropped_thr{0:}_cc{1:}".format(thr_high, thr_low)
     progress = 0.0
     self.set_progress_percentage(progress)
     for s in self.samples:
         filename = os.path.join(os.path.dirname(self.input().fn), s + ".n5")
         f = zarr.open(filename, mode="a")
         assert (
             f[dataset_src.format(thr_high)].attrs["offset"]
             == f[dataset_src.format(thr_low)].attrs["offset"]
         )
         assert (
             f[dataset_src.format(thr_high)].shape
             == f[dataset_src.format(thr_low)].shape
         )
         f.empty(
             name=dataset_tgt,
             shape=f[dataset_src.format(thr_high)].shape,
             compressor=numcodecs.GZip(6),
             dtype="uint64",
             chunks=f[dataset_src.format(thr_high)].chunks,
         )
         data_high_thr = np.array(f[dataset_src.format(thr_high)][:])
         data_low_thr = np.array(f[dataset_src.format(thr_low)][:])
         tgt = np.ones(data_low_thr.shape, dtype=np.uint64)
         maxid = scipy.ndimage.label(data_low_thr, output=tgt)
         maxes = scipy.ndimage.maximum(
             data_high_thr, labels=tgt, index=list(range(1, maxid + 1))
         )
         maxes = np.array([0] + list(maxes))
         factors = maxes[tgt]
         tgt *= factors.astype(np.uint64)
         maxid = scipy.ndimage.label(tgt, output=tgt)
         f[dataset_tgt][:] = tgt.astype(np.uint64)
         f[dataset_tgt].attrs["offset"] = f[dataset_src.format(thr_high)].attrs[
             "offset"
         ]
         f[dataset_tgt].attrs["max_id"] = maxid
         progress += 100.0 / len(self.samples)
         try:
             self.set_progress_percentage(progress)
         except:
             pass
     done = self.output().open("w")
     done.close()
Exemple #11
0
def cc(filename_src, dataset_src, filename_tgt, dataset_tgt):
    srcf = zarr.open(filename_src, mode="r")
    if not os.path.exists(filename_tgt):
        os.makedirs(filename_tgt)
    tgtf = zarr.open(filename_tgt, mode="a")
    tgtf.empty(name=dataset_tgt,
               shape=srcf[dataset_src].shape,
               compressor=numcodecs.GZip(6),
               dtype="uint64",
               chunks=srcf[dataset_src].chunks)
    data = np.array(srcf[dataset_src][:])
    tgt = np.ones(data.shape, dtype=np.uint64)
    maxid = scipy.ndimage.label(data, output=tgt)
    tgtf[dataset_tgt][:] = tgt.astype(np.uint64)
    if "offset" in srcf[dataset_src].attrs.keys():
        tgtf[dataset_tgt].attrs["offset"] = srcf[dataset_src].attrs["offset"]
    tgtf[dataset_tgt].attrs["max_id"] = maxid
Exemple #12
0
def threshold(filename_src, dataset_src, filename_tgt, dataset_tgt, thr):

    srcf = zarr.open(filename_src, mode="r")
    if not os.path.exists(filename_tgt):
        os.makedirs(filename_tgt)
    tgtf = zarr.open(filename_tgt, mode="a")
    tgtf.create_dataset(
        name=dataset_tgt,
        shape=srcf[dataset_src].shape,
        compressor=numcodecs.GZip(6),
        dtype="uint8",
        chunks=srcf[dataset_src].chunks,
    )
    ds = srcf[dataset_src][:]
    print(np.sum(ds > thr))
    print(np.min(ds))
    print(np.max(ds))
    tgtf[dataset_tgt][:] = (srcf[dataset_src][:] > thr).astype(np.uint8)
    if "offset" in srcf[dataset_src].attrs.keys():
        tgtf[dataset_tgt].attrs["offset"] = srcf[dataset_src].attrs["offset"]
Exemple #13
0
def initialize_group(
    group_path: Pathlike,
    arrays: Sequence[NDArray[Any]],
    array_paths: Sequence[str],
    chunks: Sequence[int],
    group_attrs: Dict[str, Any] = {},
    compressor: Codec = numcodecs.GZip(-1),
    array_attrs: Optional[Sequence[Dict[str, Any]]] = None,
    modes: Tuple[AccessMode, AccessMode] = ("w", "w"),
    group_kwargs: Dict[str, Any] = {},
    array_kwargs: Dict[str, Any] = {},
) -> zarr.hierarchy.Group:
    group_access_mode, array_access_mode = modes
    group = access(group_path,
                   mode=group_access_mode,
                   attrs=group_attrs,
                   **group_kwargs)

    if array_attrs is None:
        _array_attrs: Tuple[Dict[str, Any], ...] = ({},) * len(arrays)
    else:
        _array_attrs = array_attrs

    for name, arr, attrs, chnks in zip(array_paths,
                                       arrays,
                                       _array_attrs,
                                       chunks):
        path = os.path.join(group.path, name)
        z_arr = zarr.open_array(
            store=group.store,
            mode=array_access_mode,
            fill_value=0,
            path=path,
            shape=arr.shape,
            dtype=arr.dtype,
            chunks=chnks,
            compressor=compressor,
            **array_kwargs)
        z_arr.attrs.update(attrs)

    return group
Exemple #14
0
    def run(self):
        progress = 0.0
        self.set_progress_percentage(progress)
        if "unaligned" in self.de:
            aligned = False
        else:
            aligned = True
        for s in self.samples:
            filename = os.path.join(os.path.dirname(self.input().fn), self.de,
                                    s + ".n5")
            datasets_src = ["clefts", "pre_dist", "post_dist"]
            datasets_tgt = [
                "clefts_cropped", "pre_dist_cropped", "post_dist_cropped"
            ]
            off = offsets[s][aligned]
            sh = shapes[s][aligned]
            f = zarr.open(filename, mode="a")
            for dss, dst in zip(datasets_src, datasets_tgt):
                chunk_size = tuple(
                    min(c, shi) for c, shi in zip(f[dss].chunks, sh))
                f.create_dataset(
                    name=dst,
                    shape=sh,
                    compressor=numcodecs.GZip(6),
                    dtype=f[dss].dtype,
                    chunks=chunk_size,
                )
                bb = tuple(slice(o, o + shi, None) for o, shi in zip(off, sh))
                f[dst][:] = f[dss][bb]
                f[dst].attrs["offset"] = off[::-1]

                progress += 100.0 / (len(self.samples) * len(datasets_src))
                try:
                    self.set_progress_percentage(progress)
                except:
                    pass

        done = self.output().open("w")
        done.close()
def set_mask_to_zero(
    filename_src,
    dataset_src,
    filename_mask,
    dataset_mask,
    filename_tgt,
    dataset_tgt,
    offset,
    shape,
):
    logging.info("setting mask to zero for " + filename_src + "/" +
                 dataset_src)
    srcf = zarr.open(filename_src, mode="r")
    maskf = zarr.open(filename_mask, mode="r")

    if not os.path.exists(filename_tgt):
        os.makedirs(filename_tgt)
    tgtf = zarr.open(filename_tgt, mode="a")
    # grps = ""
    # for grp in dataset_tgt.split("/")[:-1]:
    #     grps += grp
    #     if not os.path.exists(os.path.join(filename_tgt, grps)):
    #         tgtf.create_group(grps)
    #     grps += "/"
    chunk_size = tuple(
        min(c, s) for c, s in zip(srcf[dataset_src].chunks, shape))

    tgtf.empty(
        dataset_tgt,
        shape=shape,
        compressor=numcodecs.GZip(6),
        dtype=srcf[dataset_src].dtype,
        chunks=chunk_size,
    )
    a = srcf[dataset_src][:]
    a[maskf[dataset_mask][:] == 0] = 0
    tgtf[dataset_tgt][:] = a
    tgtf[dataset_tgt].attrs["offset"] = offset[::-1]
Exemple #16
0
import zarr
import numcodecs
from skimage.data import astronaut

# choose chunks s.t. we do have overhanging edge-chunks
CHUNKS = (100, 100, 1)
STR_TO_COMPRESSOR = {
    'gzip': numcodecs.GZip(),
    'blosc': numcodecs.Blosc(),
    'zlib': numcodecs.Zlib()
}


def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', None]):
    path = '../data/zarr.zr'
    im = astronaut()

    f = zarr.open(path)
    for compressor in compressors:
        name = compressor if compressor is not None else 'raw'
        compressor_impl = STR_TO_COMPRESSOR[
            compressor] if compressor is not None else None
        f.create_dataset(name,
                         data=im,
                         chunks=CHUNKS,
                         compressor=compressor_impl)


# this needs PR https://github.com/zarr-developers/zarr/pull/309
def generate_n5_format(compressors=['gzip', None]):
    path = '../data/zarr.n5'
def prepare_cell_inference(n_jobs, raw_data_path, dataset_id, sigma, raw_ds,
                           setup_path, output_path, factor, min_sc, max_sc,
                           float_range, safe_scale, n_cpus,
                           finish_interrupted):
    # assert os.path.exists(setup_path), "Path to experiment directory does not exist"
    # sys.path.append(setup_path)
    # import unet_template
    if raw_data_path.endswith('/'):
        raw_data_path = raw_data_path[:-1]
    assert os.path.exists(
        raw_data_path
    ), "Path to N5 dataset with raw data and mask does not exist"
    # assert os.path.exists(os.path.join(setup_path, "blur.meta"))
    rf = zarr.open(raw_data_path, mode="r")
    assert raw_ds in rf, "Raw data not present in N5 dataset"
    shape_vc = rf[raw_ds].shape

    output_dir, out_file = get_output_paths(raw_data_path, setup_path,
                                            output_path)

    if not finish_interrupted:
        names = blur_tf(size, sigma)
        input_shape_vc = Coordinate(size)

        output_shape_wc = Coordinate(size) * voxel_size
        output_shape_vc = Coordinate(size)
        chunk_shape_vc = Coordinate(size)
        chunk_shape_wc = output_shape_wc

        full_shape_wc = Coordinate(shape_vc) * voxel_size
        full_shape_vc_output = Coordinate(shape_vc)

        # offset file, e.g. "(...)/setup01/HeLa_Cell2_4x4x4nm/offsets_volumes_masks_foreground_shape180x180x180.json"
        offset_filename = "offsets_{0:}_shape{1:}x{2:}x{3:}.json".format(
            mask_ds.replace("/", "_"), *output_shape_wc)
        offset_file = os.path.join(output_dir, offset_filename)

        # prepare datasets
        factor, scale, shift = get_contrast_adjustment(rf, raw_ds, factor,
                                                       min_sc, max_sc)

        f = zarr.open(out_file)
        dataset_target_keys = ["raw_blurred"]
        for dstk in dataset_target_keys:
            if dstk not in f:
                ds = f.empty(name=dstk,
                             shape=full_shape_vc_output,
                             compressor=numcodecs.GZip(6),
                             dtype="uint8",
                             chunks=chunk_shape_vc)
            else:
                ds = f[dstk]
            ds.attrs["resolution"] = tuple(voxel_size)[::-1]
            ds.attrs["offset"] = (0, 0, 0)
            ds.attrs["raw_data_path"] = raw_data_path
            ds.attrs["raw_ds"] = raw_ds
            ds.attrs["parent_dataset_id"] = dataset_id
            ds.attrs["sigma"] = sigma
            ds.attrs["raw_scale"] = scale
            ds.attrs["raw_shift"] = shift
            ds.attrs["raw_normalize_factor"] = factor
            ds.attrs["float_range"] = float_range
            ds.attrs["safe_scale"] = safe_scale

        if not os.path.exists(offset_file):
            generate_full_list(offset_file, output_shape_wc, raw_data_path,
                               raw_ds)
        shapes_file = os.path.join(
            setup_path, "shapes_steps_{0:}x{1:}x{2:}.json".format(*size))
        if not os.path.exists(shapes_file):
            shapes = {
                "input_shape_vc": tuple(int(isv) for isv in input_shape_vc),
                "output_shape_vc": tuple(int(osv) for osv in output_shape_vc),
                "chunk_shape_vc": tuple(int(csv) for csv in chunk_shape_vc)
            }
            with open(shapes_file, "w") as f:
                json.dump(shapes, f)

    p_proc = re.compile("list_gpu_\d+_\S+_processed.txt")
    print(any([p_proc.match(f) is not None for f in os.listdir(out_file)]))
    if any([p_proc.match(f) is not None for f in os.listdir(out_file)]):
        print("Redistributing offset lists over {0:} jobs".format(n_jobs))
        redistribute_offset_lists(list(range(n_jobs)), out_file)
    else:
        with open(offset_file, 'r') as f:
            offset_list = json.load(f)
            offset_list_from_precomputed(offset_list, list(range(n_jobs)),
                                         out_file)
    return input_shape_vc, output_shape_vc, chunk_shape_vc
Exemple #18
0
    def run(self):

        src = os.path.join(config_loader.get_config()["synapses"]["cremieval_path"], "{0:}/{1:}.n5")
        tgt = os.path.join(os.path.dirname(self.input().fn), "{0:}", "{1:}.n5")
        output_shape = (71, 650, 650)
        gpu_list = []
        for i in range(8):
            nvsmi = subprocess.Popen(
                "nvidia-smi -d PIDS -q -i {0:}".format(i),
                shell=True,
                stdout=subprocess.PIPE,
            ).stdout.read()
            if "None" in nvsmi:
                gpu_list.append(i)
        completed = []
        for de in self.data_eval:
            for s in self.samples:
                srcf = zarr.open(src.format(de, s), mode="r")
                shape = srcf["volumes/raw"].shape
                tgtf = zarr.open(tgt.format(de, s), mode="a")
                if not os.path.exists(os.path.join(tgt.format(de, s), "clefts")):
                    tgtf.empty(
                        name="clefts",
                        shape=shape,
                        compressor=numcodecs.GZip(6),
                        dtype="uint8",
                        chunks=output_shape,
                    )
                    completed.append(False)
                else:
                    if self.check_completeness()[0]:
                        completed.append(True)
                    else:
                        completed.append(False)
                if not os.path.exists(os.path.join(tgt.format(de, s), "pre_dist")):
                    tgtf.empty(
                        name="pre_dist",
                        shape=shape,
                        compressor=numcodecs.GZip(6),
                        dtype="uint8",
                        chunks=output_shape,
                    )
                    completed.append(False)
                else:
                    if self.check_completeness()[0]:
                        completed.append(True)
                    else:
                        completed.append(False)

                if not os.path.exists(os.path.join(tgt.format(de, s), "post_dist")):

                    tgtf.empty(
                        name="post_dist",
                        shape=shape,
                        compressor=numcodecs.GZip(6),
                        dtype="uint8",
                        chunks=output_shape,
                    )
                    completed.append(False)
                else:
                    if self.check_completeness()[0]:
                        completed.append(True)
                    else:
                        completed.append(False)
                get_offset_lists(
                    shape, gpu_list, tgt.format(de, s), output_shape=output_shape
                )
        if all(completed):
            self.finish()
            return
        self.submit_inference(self.data_eval, gpu_list)

        reprocess_attempts = 0
        while reprocess_attempts < 4:
            complete, reprocess_list = self.check_completeness(gpu_list)
            if complete:
                self.finish()
                return
            else:
                self.set_status_message(
                    "Reprocessing {0:}, try {1:}".format(
                        list(reprocess_list), reprocess_attempts
                    )
                )
                self.submit_inference(tuple(reprocess_list), gpu_list)
                reprocess_attempts += 1
        if reprocess_attempts >= 4:
            raise AssertionError
Exemple #19
0
    def _ensure_datasets_exist(self, volume_config):
        dtype = volume_config["zarr"]["creation-settings"]["dtype"]
        create_if_necessary = volume_config["zarr"]["create-if-necessary"]
        writable = volume_config["zarr"]["writable"]
        if writable is None:
            writable = create_if_necessary

        mode = 'r'
        if writable:
            mode = 'a'
        self._filemode = mode

        block_shape = volume_config["zarr"]["creation-settings"]["chunk-shape"][::-1]

        global_offset = volume_config["zarr"]["global-offset"][::-1]
        bounding_box_zyx = np.array(volume_config["geometry"]["bounding-box"])[:,::-1]
        creation_shape = np.array(volume_config["zarr"]["creation-settings"]["shape"][::-1])
        replace_default_entries(creation_shape, bounding_box_zyx[1] - global_offset)

        compression = volume_config["zarr"]["creation-settings"]["compression"]
        if compression == 'gzip':
            compressor = numcodecs.GZip()
        elif compression.startswith('blosc-'):
            cname = compression[len('blosc-'):]
            compressor = numcodecs.Blosc(cname)
        else:
            assert compression == "", f"Unimplemented compression: {compression}"

        if create_if_necessary:
            max_scale = volume_config["zarr"]["creation-settings"]["max-scale"]
            if max_scale == -1:
                if -1 in creation_shape:
                    raise RuntimeError("Can't auto-determine the appropriate max-scale to create "
                                       "(or extend) the data with, because you didn't specify a "
                                       "volume creation shape (or bounding box")
                max_scale = choose_pyramid_depth(creation_shape, 512)

            available_scales = [*range(1+max_scale)]
        else:
            available_scales = volume_config["geometry"]["available-scales"]

            if not os.path.exists(self._path):
                raise RuntimeError(f"File does not exist: {self._path}\n"
                                   "You did not specify 'create-if-necessary' in the config, so I won't create it.:\n")

            if self._dataset_name and not os.path.exists(f"{self._path}/{self._dataset_name}"):
                raise RuntimeError(f"File does not exist: {self._path}/{self._dataset_name}\n"
                                   "You did not specify 'create-if-necessary' in the config, so I won't create it.:\n")

        for scale in available_scales:
            if scale == 0:
                name = self._dataset_name
            else:
                name = self._dataset_name[:-1] + f'{scale}'

            if name not in self.zarr_file:
                if not writable:
                    raise RuntimeError(f"Dataset for scale {scale} does not exist, and you "
                                       "didn't specify 'writable' in the config, so I won't create it.")

                if dtype == "auto":
                    raise RuntimeError(f"Can't create Zarr array {self._path}/{self._dataset_name}: "
                                       "No dtype specified in the config.")

                # Use 128 if the user didn't specify a chunkshape
                replace_default_entries(block_shape, 3*[128])

                # zarr misbehaves if the chunks are larger than the shape,
                # which could happen here if we aren't careful (for higher scales).
                scaled_shape = (creation_shape // (2**scale))
                chunks = np.minimum(scaled_shape, block_shape).tolist()
                if (chunks != block_shape) and (scale == 0):
                    logger.warning(f"Block shape ({block_shape}) is too small for "
                                   f"the dataset shape ({creation_shape}). Shrinking block shape.")

                self._zarr_datasets[scale] = self.zarr_file.create_dataset( name,
                                                                            shape=scaled_shape.tolist(),
                                                                            dtype=np.dtype(dtype),
                                                                            chunks=chunks,
                                                                            compressor=compressor )
Exemple #20
0
import zarr


def write_n5(path, shape, block_size, compressor):
  store = zarr.N5Store(path)
  data = np.arange(np.prod(shape), dtype=np.uint16)
  data = data.reshape(shape)
  data_transpose = data.transpose()
  z = zarr.zeros(
      data_transpose.shape,
      chunks=block_size[::-1],
      store=store,
      dtype=data.dtype,
      overwrite=True,
      compressor=compressor)
  z[...] = data_transpose


write_n5(path='raw', shape=[5, 4], block_size=[3, 2], compressor=None)
write_n5(
    path='gzip', shape=[5, 4], block_size=[3, 2], compressor=numcodecs.GZip())
write_n5(
    path='bzip2', shape=[5, 4], block_size=[3, 2], compressor=numcodecs.BZ2())
write_n5(
    path='xz',
    shape=[5, 4],
    block_size=[3, 2],
    compressor=numcodecs.LZMA(preset=4))
write_n5(
    path='blosc', shape=[5, 4], block_size=[3, 2], compressor=numcodecs.Blosc())
Exemple #21
0
from typing import Tuple, List, Union

import time
import dask
import dask.array as da
from fst.pyramid import lazy_pyramid, get_downsampled_offset
import zarr

OUTPUT_DTYPES = ("same", "uint8", "uint16")
OUTPUT_FMTS = ("n5",)

program_name = "dat_to_n5.py"

grid_spacing_unit = "nm"
max_chunksize = 1024
compressor = numcodecs.GZip(level=-1)
# all channels will the stored as subgroups under root_group_path
root_group_path = Path("volumes/raw/")
# raw data are stored z | c y x, we will split images into two channels along the channel axis
channel_dim = 0
downscale_factor = 2

# set up logging
logger = logging.getLogger(program_name)
c_handler = logging.StreamHandler()
c_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
c_handler.setFormatter(c_formatter)
logger.addHandler(c_handler)
logger.setLevel(logging.INFO)

Exemple #22
0
def prepare_cell_inference(n_jobs, raw_data_path, dataset_id, iteration,
                           raw_ds, mask_ds, setup_path, output_path, factor,
                           min_sc, max_sc, float_range, safe_scale, n_cpus,
                           finish_interrupted):
    assert os.path.exists(
        setup_path), "Path to experiment directory does not exist"
    sys.path.append(setup_path)
    import setup_config

    if raw_data_path.endswith('/'):
        raw_data_path = raw_data_path[:-1]
    assert os.path.exists(
        raw_data_path
    ), "Path to N5 dataset with raw data and mask does not exist"
    assert os.path.exists(
        os.path.join(
            setup_path,
            "{0:}_train_checkpoint_{1:}.meta".format(setup_config.network_name,
                                                     iteration)))
    assert os.path.exists(
        os.path.join(
            setup_path, "{0:}_train_checkpoint_{1:}.index".format(
                setup_config.network_name, iteration)))
    assert os.path.exists(
        os.path.join(
            setup_path,
            "{0:}_train_checkpoint_{1:}.data-00000-of-00001".format(
                setup_config.network_name, iteration)))
    assert os.path.exists(
        os.path.join(setup_path, setup_config.network_name + "_io_names.json"))
    rf = zarr.open(raw_data_path, mode="r")
    assert raw_ds in rf, "Raw data not present in N5 dataset"
    if mask_ds is not None:
        assert mask_ds in rf, "Mask data not present in N5 dataset"
    shape_vc = rf[raw_ds].shape

    output_dir, out_file = get_output_paths(raw_data_path, setup_path,
                                            output_path)

    if not finish_interrupted:
        net_name, input_shape_vc, output_shape_vc = setup_config.build_net(
            steps=setup_config.steps_inference, mode="inference")
        voxel_size_input = setup_config.voxel_size
        voxel_size_output = setup_config.voxel_size

        output_shape_wc = Coordinate(output_shape_vc) * voxel_size_output
        chunk_shape_vc = output_shape_vc
        chunk_shape_wc = Coordinate(output_shape_vc) * voxel_size_output

        full_shape_wc = Coordinate(shape_vc) * voxel_size_input
        full_shape_vc_output = full_shape_wc / voxel_size_output

        # offset file, e.g. "(...)/setup01/HeLa_Cell2_4x4x4nm/offsets_volumes_masks_foreground_shape180x180x180.json"
        if mask_ds is not None:
            offset_filename = "offsets_{0:}_shape{1:}x{2:}x{3:}.json".format(
                mask_ds.replace("/", "_"), *output_shape_wc)
        else:
            offset_filename = "offsets_{0:}_shape{1:}x{2:}x{3:}.json".format(
                "nomask", *output_shape_wc)
        offset_file = os.path.join(output_dir, offset_filename)

        # prepare datasets
        factor, scale, shift = get_contrast_adjustment(rf, raw_ds, factor,
                                                       min_sc, max_sc)

        f = zarr.open(out_file)
        for out_name in setup_config.output_names:
            if out_name not in f:
                ds = f.empty(name=out_name + "_predicted",
                             shape=full_shape_vc_output,
                             compressor=numcodecs.GZip(6),
                             dtype="uint8",
                             chunks=chunk_shape_vc)
            else:
                ds = f[out_name + "_predicted"]
            ds.attrs["resolution"] = tuple(voxel_size_output)[::-1]
            ds.attrs["offset"] = (0, 0, 0)
            ds.attrs["raw_data_path"] = raw_data_path
            ds.attrs["raw_ds"] = raw_ds
            ds.attrs["parent_dataset_id"] = dataset_id
            ds.attrs["iteration"] = iteration
            ds.attrs["raw_scale"] = scale
            ds.attrs["raw_shift"] = shift
            ds.attrs["raw_normalize_factor"] = factor
            ds.attrs["float_range"] = float_range
            ds.attrs["safe_scale"] = safe_scale

        if not os.path.exists(offset_file):
            if mask_ds is not None:
                generate_list_for_mask(offset_file, output_shape_wc,
                                       raw_data_path, mask_ds, n_cpus)
            else:
                generate_full_list(offset_file, output_shape_wc, raw_data_path,
                                   raw_ds)
        shapes_file = os.path.join(
            setup_path,
            "shapes_steps{0:}.json".format(setup_config.steps_inference))
        if not os.path.exists(shapes_file):
            shapes = {
                "input_shape_vc": tuple(int(isv) for isv in input_shape_vc),
                "output_shape_vc": tuple(int(osv) for osv in output_shape_vc),
                "chunk_shape_vc": tuple(int(csv) for csv in chunk_shape_vc)
            }
            with open(shapes_file, "w") as f:
                json.dump(shapes, f)

    p_proc = re.compile("list_gpu_\d+_\S+_processed.txt")
    print(any([p_proc.match(f) is not None for f in os.listdir(out_file)]))
    if any([p_proc.match(f) is not None for f in os.listdir(out_file)]):
        print("Redistributing offset lists over {0:} jobs".format(n_jobs))
        redistribute_offset_lists(list(range(n_jobs)), out_file)
    else:
        with open(offset_file, 'r') as f:
            offset_list = json.load(f)
            offset_list_from_precomputed(offset_list, list(range(n_jobs)),
                                         out_file)
    return input_shape_vc, output_shape_vc, chunk_shape_vc
                    help="Maximum distance for stardist computation",
                    type=float,
                    default=None)
args = parser.parse_args()
directory = args.directory
max_dist = args.max_dist

# generate a dataset with a binary sphere
sphere = raster_geometry.sphere(200, 70).astype(
    np.uint64)  # image size: 200, radius: 70
sphere2 = raster_geometry.sphere(200, 50).astype(np.uint64)
sphere = (sphere + sphere2 - 1).astype(np.uint64)
f = zarr.open(os.path.join(directory, "sphere.n5"), mode="a")
f.create_dataset(name="sphere",
                 shape=sphere.shape,
                 compressor=numcodecs.GZip(6),
                 dtype=sphere.dtype,
                 chunks=(50, 50, 50),
                 overwrite=True)
f["sphere"].attrs["offset"] = (0, 0, 0)
f["sphere"].attrs["resolution"] = (1, 1, 1)
f["sphere"][:] = sphere

# declare arrays to use
labels = gp.ArrayKey("LABELS")
stardists = gp.ArrayKey("STARDIST")

# prepare requests
scan_request = gp.BatchRequest()
scan_request[stardists] = gp.Roi((0, 0, 0), (50, 50, 50))
request = gp.BatchRequest()
Exemple #24
0
def prepare_cell_inference(
    n_jobs: int,
    raw_data_path: str,
    dataset_id: str,
    iteration: int,
    raw_ds: str,
    mask_ds: Optional[str],
    setup_path: str,
    output_path: Optional[str],
    factor: Optional[int],
    min_sc: Optional[int],
    max_sc: Optional[int],
    float_range: Tuple[int, int],
    safe_scale: bool,
    n_cpus: int,
    finish_interrupted: bool,
    resolution: Optional[Tuple[int, int, int]] = None
) -> Tuple[Coordinate, Coordinate, Coordinate]:
    """
    Set up output directories for inference, prepare offset lists and read input shape, output shape and chunk shape
    from network setup.

    Args:
        n_jobs: Number of jobs to split inference over.
        raw_data_path: Path to n5 container that contains raw data.
        dataset_id: Identifier of parent dataset.
        iteration: Iteration to pull inference for.
        raw_ds: Dataset in n5 container (`raw_data_path`) for raw data.
        mask_ds: Dataset in n5 container (`raw_data_path`) for mask data. Can be None if no mask exists.
        setup_path: Path containing setup.
        output_path: N5 container to save output to, autogenerated if None.
        factor: Factor to normalize raw data by, tried to infer from datatype if None.
        min_sc: Minimum intensity (mapped to -1)
        max_sc: Maximum intensity (mapped to 1)
        float_range: Range of output floats for conversion to uint8.
        safe_scale: If True, values are scaled such that all values within `float_range` fall within (0, 255).
            and are not cropped.  If False, values at the lower end of `float_range` may be scaled to < 0 and then
            cropped to 0.
        n_cpus: Number of cpus to use per job.
        finish_interrupted: Whether running this is to finish an interrupted inference job.
        resolution: Resolution of raw data, tried to infer from metadata if None.

    Returns:
        Input shape, output shape and chunk shape in voxel coordinates.
    """
    # todo: use setup_utils instead
    assert os.path.exists(
        setup_path), "Path to experiment directory does not exist"
    sys.path.append(setup_path)
    import unet_template

    if raw_data_path.endswith('/'):
        raw_data_path = raw_data_path[:-1]
    assert os.path.exists(
        raw_data_path
    ), "Path to N5 dataset with raw data and mask does not exist"
    assert os.path.exists(
        os.path.join(setup_path,
                     "unet_train_checkpoint_{0:}.meta".format(iteration)))
    assert os.path.exists(
        os.path.join(setup_path,
                     "unet_train_checkpoint_{0:}.index".format(iteration)))
    assert os.path.exists(
        os.path.join(
            setup_path,
            "unet_train_checkpoint_{0:}.data-00000-of-00001".format(
                iteration)))
    assert os.path.exists(os.path.join(setup_path, "net_io_names.json"))
    rf = zarr.open(raw_data_path, mode="r")
    assert raw_ds in rf, "Raw data not present in N5 dataset"
    if mask_ds is not None:
        assert mask_ds in rf, "Mask data not present in N5 dataset"
    shape_vc = rf[raw_ds].shape

    output_dir, out_file = get_output_paths(raw_data_path, setup_path,
                                            output_path, iteration)

    if not finish_interrupted:
        net_name, input_shape_vc, output_shape_vc = unet_template.build_net(
            steps=unet_template.steps_inference, mode="inference")
        voxel_size_input = unet_template.voxel_size_input
        voxel_size_output = unet_template.voxel_size

        output_shape_wc = Coordinate(output_shape_vc) * voxel_size_output
        chunk_shape_vc = output_shape_vc
        chunk_shape_wc = Coordinate(output_shape_vc) * voxel_size_output

        full_shape_wc = Coordinate(shape_vc) * voxel_size_input
        full_shape_vc_output = full_shape_wc / voxel_size_output

        # offset file, e.g. "(...)/setup01/HeLa_Cell2_4x4x4nm/offsets_volumes_masks_foreground_shape180x180x180.json"
        if mask_ds is not None:
            offset_filename = "offsets_{0:}_shape{1:}x{2:}x{3:}.json".format(
                mask_ds.replace("/", "_"), *output_shape_wc)
        else:
            offset_filename = "offsets_{0:}_shape{1:}x{2:}x{3:}.json".format(
                "nomask", *output_shape_wc)
        offset_file = os.path.join(output_dir, offset_filename)

        # prepare datasets
        factor, scale, shift = get_contrast_adjustment(rf, raw_ds, factor,
                                                       min_sc, max_sc)

        f = zarr.open(out_file)
        for label in unet_template.labels:
            if label.labelname not in f:
                ds = f.empty(name=label.labelname,
                             shape=full_shape_vc_output,
                             compressor=numcodecs.GZip(6),
                             dtype="uint8",
                             chunks=chunk_shape_vc)
            else:
                ds = f[label.labelname]
            ds.attrs["resolution"] = tuple(voxel_size_output)[::-1]
            ds.attrs["offset"] = (0, 0, 0)
            ds.attrs["raw_data_path"] = raw_data_path
            ds.attrs["raw_ds"] = raw_ds
            ds.attrs["parent_dataset_id"] = dataset_id
            ds.attrs["iteration"] = iteration
            ds.attrs["raw_scale"] = scale
            ds.attrs["raw_shift"] = shift
            ds.attrs["raw_normalize_factor"] = factor
            ds.attrs["float_range"] = float_range
            ds.attrs["safe_scale"] = safe_scale
            ds.attrs["mask_ds"] = mask_ds

        if not os.path.exists(offset_file):
            if mask_ds is not None:
                generate_list_for_mask(offset_file, output_shape_wc,
                                       raw_data_path, mask_ds, n_cpus)
            else:
                generate_full_list(offset_file,
                                   output_shape_wc,
                                   raw_data_path,
                                   raw_ds,
                                   raw_voxel_size=resolution)
        shapes_file = os.path.join(
            setup_path,
            "shapes_steps{0:}.json".format(unet_template.steps_inference))
        if not os.path.exists(shapes_file):
            shapes = {
                "input_shape_vc": tuple(int(isv) for isv in input_shape_vc),
                "output_shape_vc": tuple(int(osv) for osv in output_shape_vc),
                "chunk_shape_vc": tuple(int(csv) for csv in chunk_shape_vc)
            }
            with open(shapes_file, "w") as f:
                json.dump(shapes, f)

    p_proc = re.compile("list_gpu_\d+_\S+_processed.txt")
    print(any([p_proc.match(f) is not None for f in os.listdir(out_file)]))
    if any([p_proc.match(f) is not None for f in os.listdir(out_file)]):
        print("Redistributing offset lists over {0:} jobs".format(n_jobs))
        redistribute_offset_lists(list(range(n_jobs)), out_file)
    else:
        with open(offset_file, 'r') as f:
            offset_list = json.load(f)
            offset_list_from_precomputed(offset_list, list(range(n_jobs)),
                                         out_file)
    return input_shape_vc, output_shape_vc, chunk_shape_vc