def correct_attributes( h5path_in, h5path_aux='', axlab='', elsize=[], ): """Correct attributes of a h5 dataset.""" h5file_in, ds_in, _, _ = utils.h5_load(h5path_in) if h5path_aux: h5file_aux, _, h5elsize, h5axlab = utils.h5_load(h5path_aux) elsize = elsize or h5elsize axlab = axlab or h5axlab # FIXME: sloppy insertion if len(elsize) < ds_in.ndim: elsize = np.append(elsize, 1) if len(axlab) < ds_in.ndim: axlab.append('c') utils.h5_write_attributes(ds_in, element_size_um=elsize, axislabels=axlab) try: h5file_in.close() h5file_aux.close() except (ValueError, AttributeError): pass
def construct_wsmask(outpaths, ds_mma, h5path_mds='', h5path_mmm='', MAdilation=0, elsize=None, axlab=None): """Construct a mask of valid myelin voxels.""" mask_mmregion = np.ones_like(ds_mma, dtype='bool') if MAdilation: mask_distance = binary_dilation(ds_mma, iterations=MAdilation) np.logical_and(mask_mmregion, mask_distance, mask_mmregion) mask_mmregion = mask_mmregion - ds_mma utils.save_step(outpaths, 'madil{:02d}'.format(MAdilation), mask_distance.astype('bool'), elsize, axlab) if h5path_mmm: ds_mmm = utils.h5_load(h5path_mmm, load_data=True, dtype='bool')[0] np.logical_and(mask_mmregion, ds_mmm, mask_mmregion) if h5path_mds: ds_mds = utils.h5_load(h5path_mds, load_data=True, dtype='bool')[0] np.logical_and(mask_mmregion, ds_mds, mask_mmregion) utils.save_step(outpaths, 'wsmask', mask_mmregion.astype('bool'), elsize, axlab) return mask_mmregion
def CC_clf_gtgen( h5path_in, h5path_mask, outputfile='', ): """Label connected components in a 3D stack.""" # get 2D labels and mask of identified 3D MA compartment labels = utils.h5_load(h5path_in, load_data=True)[0] maskMA = utils.h5_load(h5path_mask, load_data=True)[0] # mask = np.zeros_like(labels, dtype='bool') # m = h5py.File(os.path.join(datadir, maskfile), 'r') # mask[:, :-1, :-1] = m[maskdset][:].astype('bool') # m.close() # split the labels in MA and notMA labelsALL = np.unique(labels) maskdil = binary_dilation(maskMA) labelsMA = np.unique(labels[maskdil]) labelsNOTMA = np.unique(labels[~maskdil]) # filter labels that are split between compartments labelsTRUE = set(labelsMA) - set(labelsNOTMA) labelsFALSE = set(labelsALL) - set(labelsMA) print(len(labelsTRUE), len(labelsFALSE)) # generate final ground truth forward map y = np.zeros_like(labelsALL, dtype='bool') for l in labelsTRUE: y[l] = True y[0] = False np.save(outputfile, y)
def get_mask(h5path_in1, h5path_in2): """Load the set of masks for method4.""" h5file_in1, ds_in1, _, _ = utils.h5_load(h5path_in1, load_data=True) h5file_in2, ds_in2, _, _ = utils.h5_load(h5path_in2, load_data=True) mask = ~ds_in1 | ds_in2 return mask
def fill_holes_m4(labels, h5path_md, h5path_mm, h5path_mx): """Fill holes in labels.""" ds_md = utils.h5_load(h5path_md, load_data=True)[1] ds_mm = utils.h5_load(h5path_mm, load_data=True)[1] mask = ~ds_md | ds_mm MMlabels = fill_holes_watershed(labels, mask) ds_mx = utils.h5_load(h5path_mx, load_data=True)[1] mask = ~ds_md | ds_mx MXlabels = fill_holes_watershed(labels, mask) labels = np.maximum(MMlabels, MXlabels) return labels
def CC_2Dto3D( h5path_in, h5path_out='', protective=False, ): """Label connected components in 3D from the 2D-generated mask.""" # check output path if h5path_out: status, info = utils.h5_check(h5path_out, protective) print(info) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_in.shape, 'uint32', h5path_out, element_size_um=elsize, axislabels=axlab) # NOTE: scipy has much lower memory consumption than scikit-image # TODO: input mask directly, not labelimage? ds_out[:] = label(ds_in[:, :, :] != 0) # ds_out[:] = scipy_label(ds_in[:, :, :] != 0)[0] # close and return try: h5file_in.close() h5file_out.close() except (ValueError, AttributeError): return ds_out
def map_to_volume(fw, h5path_in, h5path_out, h5path_mask=''): """Map the prediction to a labelvolume.""" h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) a = ds_in[:] h5file_in.close() # myelinated axon labels h5file_out, ds_out = utils.h5_write(None, a.shape, a.dtype, h5path_out, element_size_um=elsize, axislabels=axlab) mask = fw[a] a[~mask] = 0 ds_out[:] = a h5file_out.close() # myelinated axon mask if not h5path_mask: return h5file_out, ds_out = utils.h5_write(None, a.shape, 'bool', h5path_mask, element_size_um=elsize, axislabels=axlab) ds_out[:] = a.astype('bool') h5file_out.close()
def filter_NoR( h5path_in, h5path_2D, h5path_out='', save_steps=False, protective=False, ): """Filter nodes of ranvier.""" # check output paths outpaths = {'out': h5path_out} status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) h5file_2D, ds_2D, _, _ = utils.h5_load(h5path_2D) # open data for writing h5file_out, ds_out = utils.h5_write(ds_in, ds_in.shape, ds_in.dtype, h5path_out, element_size_um=elsize, axislabels=axlab) labelsets = {i: set(np.unique(ds_2D[i, :, :])) for i in range(ds_2D.shape[0])} ulabels = np.unique(ds_in) m = {l: np.array([True if l in lsv else False for _, lsv in labelsets.items()]) for l in ulabels} rp = regionprops(ds_in) for prop in rp: z, y, x, Z, Y, X = tuple(prop.bbox) mask = prop.image mask[m[prop.label][z:Z], :, :] = 0 ds_out[z:Z, y:Y, x:X][mask] = 0 # close and return h5file_in.close() h5file_2D.close() try: h5file_out.close() except (ValueError, AttributeError): return ds_out
def downsample_blockwise( h5path_in, blockreduce=[3, 3, 3], func='np.amax', dataslices=None, h5path_out='', save_steps=False, protective=False, ): """Downsample volume by blockwise reduction.""" # Check if any output paths already exist. outpaths = {'out': h5path_out} status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # Open the inputfile for reading. # TODO: option to get the input data passed h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) # Get the matrix size and resolution of the outputdata. outsize, elsize = get_new_sizes(func, blockreduce, ds_in.shape, elsize) # Open the outputfile for writing and create the dataset or output array. h5file_out, ds_out = utils.h5_write(None, outsize, ds_in.dtype, h5path_out, element_size_um=elsize, axislabels=axlab) # Get the slice objects for the input data. slices = utils.get_slice_objects_prc(dataslices, ds_in.shape) # Reformat the data to the outputsize. if func == 'expand': out = ds_in[slices[0], ...] for axis in range(0, ds_out.ndim): out = np.repeat(out, blockreduce[axis], axis=axis) ds_out[slices[0], ...] = out else: """ TODO: flexible mapping from in to out now: the reduction factor of the first axis must be 1; the extent of the remaining axes must be full """ ds_out[slices[0], ...] = block_reduce(ds_in[slices[0], ...], block_size=tuple(blockreduce), func=eval(func)) # Close the h5 files or return the output array. try: h5file_in.close() h5file_out.close() except (ValueError, AttributeError): return ds_out
def merge_watershed(labels, labelsets={}, h5path_data='', h5path_mmm='', min_labelsize=10, searchradius=[100, 30, 30]): """Find candidates for label merge based on watershed.""" rp_nt = regionprops(labels) labels_filled = np.copy(labels) ds_data = utils.h5_load(h5path_data, load_data=True)[0] if h5path_mmm: ds_mask = utils.h5_load(h5path_mmm, load_data=True, dtype='bool')[0] else: ds_mask = np.zeros_like(ds_data, dtype='bool') for prop in rp_nt: # investigate image region above and below bbox for direction in ['down', 'up']: print('processing {}, direction {}'.format(prop.label, direction)) C = find_region_coordinates(direction, labels, prop, searchradius) x, X, y, Y, z, Z = C if ((z == 0) or (z == labels.shape[0] - 1)): continue # TODO: improve searchregion by projecting along axon direction # TODO: improve searchregion by not taking the groundplane of the whole label region imregion = labels[z:Z, y:Y, x:X] labels_in_region = np.unique(imregion) print(labels_in_region) if len(labels_in_region) < 2: continue # label 0 and prop.label assumed to be there labelsets, wsout = find_candidate_ws(direction, labelsets, prop, imregion, ds_data[z:Z, y:Y, x:X], ds_mask[z:Z, y:Y, x:X], min_labelsize) if wsout is not None: labels_filled[z:Z, y:Y, x:X] = np.copy(wsout) return labelsets, labels_filled
def get_boundarymask(h5path_mask, masktype='invdil'): """Load or generate a mask.""" mask = utils.h5_load(h5path_mask, load_data=True, dtype='bool')[0] if masktype == 'ero': mask = binary_erosion(mask, ball(3)) elif masktype == 'invdil': mask = scipy_binary_dilation(~mask, iterations=7, border_value=0) mask[:4, :, :] = False mask[-4:, :, :] = False return mask
def splitblocks( h5path_in, dset_name, dataslices=None, blocksize=[500, 500, 500], margin=[20, 20, 20], blockrange=[], usempi=False, outputdir='', save_steps=False, protective=False, ): """"Convert a directory of tifs to an hdf5 stack.""" # Prepare for processing with MPI. mpi_info = utils.get_mpi_info(usempi) # Determine the outputpaths. basepath, h5path_dset = h5path_in.split('.h5/') datadir, fname = os.path.split(basepath) postfix = fname.split(dset_name)[-1] if not outputdir: blockdir = 'blocks_{:04d}'.format(blocksize[0]) outputdir = os.path.join(datadir, blockdir) utils.mkdir_p(outputdir) fname = '{}_{}{}.h5'.format(dset_name, '{}', postfix) h5path_tpl = os.path.join(outputdir, fname, h5path_dset) # Open data for reading. h5_info = utils.h5_load(h5path_in, comm=mpi_info['comm']) h5file_in, ds_in, elsize, axlab = h5_info # Divide the data into a series of blocks. blocks = get_blocks(ds_in.shape, blocksize, margin, h5path_tpl, dataslices) if blockrange: blocks = blocks[blockrange[0]:blockrange[1]] series = np.array(range(0, len(blocks)), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # Write blocks to the outputfile(s). for blocknr in series: block = blocks[blocknr] write_block(ds_in, elsize, axlab, block) # Close the h5 files or return the output array. try: h5file_in.close() except (ValueError, AttributeError): pass except UnboundLocalError: pass
def map_labels( h5path_in, h5path_out='', save_steps=False, protective=False, ): """Map groups of labels to a single label.""" # check output path outpaths = {'out': h5path_out, 'stitched': ''} root, ds_main = outpaths['out'].split('.h5') for dsname, outpath in outpaths.items(): grpname = ds_main + "_steps" outpaths[dsname] = os.path.join(root + '.h5' + grpname, dsname) status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_in.shape, ds_in.dtype, h5path_out, element_size_um=elsize, axislabels=axlab) # load the pickled set of neighbours lsroot = h5path_out.split('.h5')[0] lspath = '{}_{}.pickle'.format(lsroot, ds_out.name[1:]) with open(lspath, "r") as f: labelsets = pickle.load(f) # apply forward map ulabels = np.unique(ds_in[:]) maxlabel = np.amax(ulabels) fw = [l if l in ulabels else 0 for l in range(0, maxlabel + 1)] labels = utils.forward_map(np.array(fw), ds_in[:], labelsets) if save_steps: fw = np.zeros(maxlabel + 1, dtype='i') MAlabels = utils.forward_map(np.array(fw), ds_in[:], labelsets) utils.save_step(outpaths, 'stitched', MAlabels, elsize, axlab) ds_out[:, :, :] = labels # close and return try: h5file_in.close() h5file_out.close() except (ValueError, AttributeError): return ds_out
def CC_2Dprops( h5path_labels, basename, map_propnames, usempi=False, h5path_out='', protective=False, ): """Map the labels/properties.""" # check output paths if '.h5' in h5path_out: for propname in map_propnames: h5path_prop = os.path.join(h5path_out, propname) status, info = utils.h5_check(h5path_out, protective) print(info) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_labels) # prepare mpi n_props = len(map_propnames) series = np.array(range(0, n_props), dtype=int) mpi_info = utils.get_mpi_info(usempi) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] fws = {} for i in series: propname = map_propnames[i] print("processing prop %s" % propname) nppath = '{}_{}.npy'.format(basename, propname) fws[propname] = np.load(nppath) # open data for writing h5path_prop = os.path.join(h5path_out, propname) h5file_prop, ds_prop = utils.h5_write(None, ds_in.shape, fws[propname].dtype, h5path_prop, element_size_um=elsize, axislabels=axlab, comm=mpi_info['comm']) ds_prop[:] = fws[propname][ds_in[:]] h5file_prop.close() # close and return h5file_in.close()
def get_wsmask(outpaths, ds_mma, h5path_wsmask='', h5path_mds='', h5path_mmm='', MAdilation=0, elsize=None, axlab=None): """Load or construct a mask of the myelin region.""" if h5path_wsmask: mask_mmregion = utils.h5_load(h5path_wsmask, load_data=True, dtype='bool')[0] else: mask_mmregion = construct_wsmask(outpaths, ds_mma, h5path_mds, h5path_mmm, MAdilation, elsize, axlab) return mask_mmregion
print(len(labelsTRUE), len(labelsFALSE)) ## generate final ground truth labels y = np.zeros_like(labelsALL, dtype='bool') for l in labelsTRUE: y[l] = True y[0] = False np.save(gtpath, y) # In[11]: # map the groundtruth labels to a volume h5file_in, ds_in, elsize, axlab = utils.h5_load(labelpath) h5path_out = os.path.join(datadir, '{}_gt.h5'.format(dataset)) h5file_out, ds_out = utils.h5_write(None, ds_in.shape, 'uint8', os.path.join(h5path_out, 'class0'), element_size_um=elsize, axislabels=axlab) ds_out[:] = ~y[ds_in[:]] h5file_out.close() h5file_out, ds_out = utils.h5_write(None, ds_in.shape, 'uint8', os.path.join(h5path_out, 'class1'), element_size_um=elsize, axislabels=axlab) ds_out[:] = y[ds_in[:]]
def stack2stack( inputfile, outputfile, dset_name='', blockoffset=[], datatype='', uint8conv=False, inlayout='', outlayout='', elsize=[], chunksize=[], additional_outputs=[], nzfills=5, dataslices=None, save_steps=False, protective=False, ): """Convert/select/downscale/transpose/... an hdf5 dataset.""" # output root and exts root, ext = split_filepath(outputfile) outexts = list(set(additional_outputs + [ext])) # Check if any output paths already exist. outpaths = {'out': outputfile, 'addext': (root, additional_outputs)} status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return h5file_in, ds_in, h5elsize, h5axlab = utils.h5_load(inputfile) try: ndim = ds_in.ndim except AttributeError: ndim = len(ds_in.dims) # data layout # FIXME: h5axlab not necessarily xyzct! # FIXME: this forces a possibly erroneous outlayout when inlayout is not found inlayout = inlayout or ''.join(h5axlab) or 'zyxct'[0:ndim] outlayout = outlayout or inlayout in2out = [inlayout.index(l) for l in outlayout] # element size elsize = elsize or (h5elsize[in2out] if h5elsize is not None else None) # chunksize if chunksize is not None: chunksize = tuple(chunksize) or ( True if not any(chunksize) else (tuple([ds_in.chunks[i] for i in in2out]) if ds_in.chunks else None ) ) # datatype datatype = datatype or ds_in.dtype if dset_name: _, x, X, y, Y, z, Z = utils.split_filename(dset_name, blockoffset) slices = {'x': [x, X, 1], 'y': [y, Y, 1], 'z': [z, Z, 1]} if ndim > 3: C = ds_in.shape[inlayout.index('c')] slices['c'] = [0, C, 1] sliceslist = [slices[dim] for dim in inlayout] dataslices = [item for sl in sliceslist for item in sl] # get the selected and transformed data # TODO: most memory-efficient solution data = utils.load_dataset(ds_in, elsize, inlayout, outlayout, datatype, dataslices, uint8conv)[0] h5file_in.close() # write the data for ext in outexts: if '.nii' in ext: if data.dtype == 'float16': data = data.astype('float') utils.write_to_nifti(root + '.nii.gz', data, elsize) if '.h5' in ext: utils.h5_write(data, data.shape, data.dtype, outputfile, element_size_um=elsize, axislabels=outlayout, chunks=chunksize) if (('.tif' in ext) | ('.png' in ext) | ('.jpg' in ext)) & (data.ndim < 4): if data.ndim == 2: data = data.atleast_3d() outlayout += 'z' utils.write_to_img(root, data, outlayout, nzfills, ext) return data
def watershed_ics( h5path_in, masks=[], h5path_seeds='', seed_size=64, lower_threshold=None, upper_threshold=None, invert=False, h5path_out='', save_steps=False, protective=False, ): """Perform watershed on the intracellular space compartments.""" # check output paths outpaths = {'out': h5path_out, 'seeds': h5path_seeds, 'mask': ''} root, ds_main = outpaths['out'].split('.h5') for dsname, _ in outpaths.items(): grpname = ds_main + "_steps" outpaths[dsname] = os.path.join(root + '.h5' + grpname, dsname) status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_in.shape[:3], 'uint32', h5path_out, element_size_um=elsize, axislabels=axlab) # load/generate the seeds if h5path_seeds: h5file_sds, ds_sds, _, _ = utils.h5_load(h5path_seeds) else: h5file_sds, ds_sds = utils.h5_write(None, ds_in.shape[:3], 'uint32', outpaths['seeds'], element_size_um=elsize, axislabels=axlab) lower_threshold = lower_threshold or np.amin(ds_in[:]) - 1 upper_threshold = upper_threshold or np.amax(ds_in[:]) + 1 ds_sds[:] = np.logical_and(ds_in[:] > lower_threshold, ds_in[:] <= upper_threshold) ds_sds[:], _ = label(ds_sds[:]) ds_sds[:] = remove_small_objects(ds_sds[:], min_size=seed_size) ds_sds[:] = relabel_sequential(ds_sds[:])[0] """ NOTE: numpy/scipy/skimage inplace is not written when using hdf5 Therefore, we cannot use: np.logical_and(ds_in[:] > lower_threshold, ds_in[:] <= upper_threshold, ds_sds[:]) num = label(ds_sds[:], output=ds_sds[:]) remove_small_objects(ds_sds[:], min_size=seed_size, in_place=True) """ # determine the mask mask = np.ones(ds_in.shape[:3], dtype='bool') mask = utils.string_masks(masks, mask) h5file_mask, ds_mask = utils.h5_write(None, ds_in.shape[:3], 'uint8', outpaths['mask'], element_size_um=elsize, axislabels=axlab) ds_mask[:] = mask # ds_mask[:].fill(1) # ds_mask[:] = utils.string_masks(masks, ds_mask[:]) # perform the watershed if invert: ds_out[:] = watershed(-ds_in[:], ds_sds[:], mask=ds_mask[:]) else: ds_out[:] = watershed(ds_in[:], ds_sds[:], mask=ds_mask[:]) # close and return h5file_in.close() try: h5file_out.close() h5file_sds.close() h5file_mask.close() except (ValueError, AttributeError): return ds_out, ds_sds, ds_mask
def evaluate_overlaps( h5path_in, slicedim, offsets, threshold_overlap, do_map_labels=False, h5path_mm='', min_labelsize=0, close=None, relabel_from=0, usempi=False, h5path_out='', save_steps=False, protective=False, ): """Check for slicewise overlaps between labels.""" # prepare mpi # TODO: could allow selection of slices/subset here mpi_info = utils.get_mpi_info(usempi) # open data for reading h5file_in, ds_in, _, _ = utils.h5_load(h5path_in, comm=mpi_info['comm']) n_slices = ds_in.shape[slicedim] - offsets series = np.array(range(0, n_slices), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # merge overlapping neighbours labelsets = {} for i in series: print("processing slice {}".format(i)) for j in range(1, offsets): data_section = utils.get_slice(ds_in, i, slicedim) nb_section = utils.get_slice(ds_in, i+j, slicedim) labelsets = merge_neighbours(labelsets, data_section, nb_section, threshold_overlap) # dump the list of overlapping neighbours in a pickle h5root = h5file_in.filename.split('.h5')[0] ds_out_name = os.path.split(h5path_out)[1] mname = "host-{}_rank-{:02d}".format(socket.gethostname(), mpi_info['rank']) lsroot = '{}_{}_{}'.format(h5root, ds_out_name, mname) utils.write_labelsets(labelsets, lsroot, ['pickle']) h5file_in.close() # wait for all processes to finish if mpi_info['enabled']: mpi_info['comm'].Barrier() # let one process combine the overlaps found in the separate processes if mpi_info['rank'] == 0: lsroot = '{}_{}'.format(h5root, ds_out_name) match = "{}_host*_rank*.pickle".format(lsroot) infiles = glob.glob(match) for ppath in infiles: with open(ppath, "r") as f: newlabelsets = pickle.load(f) for lsk, lsv in newlabelsets.items(): labelsets = utils.classify_label_set(labelsets, lsv, lsk) utils.write_labelsets(labelsets, lsroot, ['txt', 'pickle']) if do_map_labels: map_labels(h5path_in, h5path_mm, min_labelsize, close, relabel_from, h5path_out, save_steps, protective)
def filter_labels( h5path_in, h5path_mm='', min_labelsize=0, close=None, relabel_from=0, h5path_out='', save_steps=False, protective=False, ): """Map groups of labels to a single label.""" # check output path outpaths = {'out': h5path_out, 'closed': ''} root, ds_main = outpaths['out'].split('.h5') for dsname, outpath in outpaths.items(): grpname = ds_main + "_steps" outpaths[dsname] = os.path.join(root + '.h5' + grpname, dsname) status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_in.shape, ds_in.dtype, h5path_out, element_size_um=elsize, axislabels=axlab) labels = ds_in[:] # if min_labelsize: # remove_small_objects(labels, min_size=min_labelsize, in_place=True) if close is not None: labels = close_labels(labels, close) if h5path_mm: print('removing voxels in mask') h5file_mm, ds_mm, _, _ = utils.h5_load(h5path_mm) labels[ds_mm[:].astype('bool')] = 0 h5file_mm.close() if save_steps: utils.save_step(outpaths, 'closed', labels, elsize, axlab) if min_labelsize: print('removing small labels') remove_small_objects(labels, min_size=min_labelsize, in_place=True) # if save_steps: # utils.save_step(outpaths, 'small', smalllabels, elsize, axlab) if relabel_from > 1: print('relabeling from {}'.format(relabel_from)) labels = relabel_sequential(labels, relabel_from)[0] # TODO: save mapping? ds_out[:, :, :] = labels # close and return try: h5file_in.close() h5file_out.close() except (ValueError, AttributeError): return ds_out
def mergeblocks( h5paths_in, blockoffset=[0, 0, 0], blocksize=[], margin=[0, 0, 0], fullsize=[], is_labelimage=False, relabel=False, neighbourmerge=False, save_fwmap=False, blockreduce=[], func='np.amax', datatype='', usempi=False, h5path_out='', save_steps=False, protective=False, ): """Merge blocks of data into a single hdf5 file.""" # prepare mpi mpi_info = utils.get_mpi_info(usempi) series = np.array(range(0, len(h5paths_in)), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # TODO: save_steps # check output paths outpaths = {'out': h5path_out} status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5paths_in[0], comm=mpi_info['comm']) try: ndim = ds_in.ndim except AttributeError: ndim = len(ds_in.dims) # get the size of the outputfile # TODO: option to derive fullsize from dset_names? if blockreduce: datasize = np.subtract(fullsize, blockoffset) outsize = [ int(np.ceil(d / np.float(b))) for d, b in zip(datasize, blockreduce) ] elsize = [e * b for e, b in zip(elsize, blockreduce)] else: # FIXME: 'zyx(c)' stack assumed outsize = np.subtract(fullsize, blockoffset) if ndim == 4: outsize = list(outsize) + [ds_in.shape[3]] # TODO: flexible insert datatype = datatype or ds_in.dtype chunks = ds_in.chunks or None h5file_in.close() # open data for writing h5file_out, ds_out = utils.h5_write(data=None, shape=outsize, dtype=datatype, h5path_full=h5path_out, chunks=chunks, element_size_um=elsize, axislabels=axlab, comm=mpi_info['comm']) # merge the datasets maxlabel = 0 for i in series: h5path_in = h5paths_in[i] try: maxlabel = process_block(h5path_in, ndim, blockreduce, func, blockoffset, blocksize, margin, fullsize, ds_out, is_labelimage, relabel, neighbourmerge, save_fwmap, maxlabel, usempi, mpi_info) print('processed block {:03d}: {}'.format(i, h5path_in)) except Exception as e: print('failed block {:03d}: {}'.format(i, h5path_in)) print(e) # close and return try: h5file_out.close() except (ValueError, AttributeError): return ds_out
def separate_sheaths( h5path_in, h5path_lmm='', h5path_wsmask='', h5path_mask='', h5path_mmm='', MAdilation=0, h5path_dist='', sigmoidweighting=0, margin=50, h5path_out='', save_steps=False, protective=False, ): """Separate the myelin compartment into individual myelin sheaths.""" # check output paths outpaths = { 'out': h5path_out, 'wsmask': '', 'madil{:02d}'.format(MAdilation): '', 'distance_simple': '', 'sheaths_simple': '', 'distance_sigmod': '', } root, ds_main = outpaths['out'].split('.h5') for dsname, outpath in outpaths.items(): grpname = ds_main + "_steps" outpaths[dsname] = os.path.join(root + '.h5' + grpname, dsname) status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) ds_mma = ds_in[:] != 0 # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_in.shape, ds_in.dtype, h5path_out, element_size_um=elsize, axislabels=axlab) # load/calculate a mask to constrain the watershed in mask_mmregion = get_wsmask(outpaths, ds_mma, h5path_wsmask, h5path_mask, h5path_mmm, MAdilation, elsize, axlab) elsize_abs = np.absolute(elsize) seeds = grey_dilation(ds_in, size=(3, 3, 3)) # load/calculate the distance transform if h5path_dist: h5file_dist, ds_dist, _, _ = utils.h5_load(h5path_dist) else: if sigmoidweighting: if h5path_lmm: ds_lmm = utils.h5_load(h5path_lmm, load_data=True)[0] else: ds_dist = distance_transform_edt(~ds_mma, sampling=elsize_abs) ds_dist = img_as_float(ds_dist) utils.save_step(outpaths, 'distance_simple', ds_dist, elsize, axlab) ds_lmm = watershed(ds_dist, seeds, mask=mask_mmregion) utils.save_step(outpaths, 'sheaths_simple', ds_lmm, elsize, axlab) ds_dist, _ = distance_transform_sw(ds_in, ds_lmm, elsize_abs, sigmoidweighting, margin) utils.save_step(outpaths, 'distance_sigmod', ds_dist, elsize, axlab) ds_out[:] = watershed(ds_dist, seeds, mask=mask_mmregion) # TODO: save median widths mw else: ds_dist = distance_transform_edt(~ds_mma, sampling=elsize_abs) utils.save_step(outpaths, 'distance_simple', ds_dist, elsize, axlab) ds_out[:] = watershed(ds_dist, seeds, mask=mask_mmregion) # close and return h5file_in.close() if h5path_dist: h5file_dist.close() try: h5file_out.close() except (ValueError, AttributeError): return ds_out
def get_fibre_stats(h5path_in, stats=[]): """Calculate statistics.""" ds_in = utils.h5_load(h5path_in, load_data=True)[0] ulabels = np.unique(ds_in) Nlabels = len(ulabels) - 1 # print("number of %s labels: %d" % (name, Nlabels)) props = {} props['area'] = np.empty([ds_in.shape[0], Nlabels], dtype='float') props['area'][:, :] = np.NAN if 'AD' in stats: props['AD'] = np.copy(props['area']) if 'centroid' in stats: props['centroid'] = np.empty([ds_in.shape[0], Nlabels, 2], dtype='float') props['centroid'][:, :, :] = np.NAN if 'eccentricity' in stats: props['eccentricity'] = np.copy(props['area']) if 'solidity' in stats: props['solidity'] = np.copy(props['area']) for i in range(0, ds_in.shape[0]): rp = regionprops(ds_in[i, :, :]) areas = {prop.label: prop.area for prop in rp} if 'AD' in stats: eqdia = {prop.label: prop.equivalent_diameter for prop in rp} if 'centroid' in stats: centr = {prop.label: prop.centroid for prop in rp} if 'eccentricity' in stats: ecctr = {prop.label: prop.eccentricity for prop in rp} if 'solidities' in stats: solid = {prop.label: prop.solidity for prop in rp} for j, l in enumerate(ulabels[1:]): try: props['area'][i, j] = areas[l] except: pass if 'AD' in stats: try: props['AD'][i, j] = eqdia[l] except: pass if 'centroid' in stats: try: props['centroid'][i, j, :] = centr[l] except: pass if 'eccentricity' in stats: try: props['eccentricity'][i, j] = ecctr[l] except: pass if 'solidity' in stats: try: props['solidity'][i, j] = solid[l] except: pass return props
def CC_2Dfilter( h5path_labels, map_propnames, criteria, h5path_int='', slicedim=0, usempi=False, outputfile='', protective=False, ): """Get forward mapping of labels/properties filtered by criteria.""" (min_area, max_area, max_intensity_mb, max_eccentricity, min_solidity, min_euler_number, min_extent) = criteria # prepare mpi mpi_info = utils.get_mpi_info(usempi) # TODO: check output path # open data for reading h5file_mm, ds_mm, _, _ = utils.h5_load(h5path_labels, comm=mpi_info['comm']) if h5path_int: h5file_mb, ds_mb, _, _ = utils.h5_load(h5path_int, comm=mpi_info['comm']) else: ds_mb = None # mask used as intensity image in mean_intensity criterium # get the maximum labelvalue in the input root = h5path_labels.split('.h5')[0] maxlabel = get_maxlabel(root, ds_mm) # prepare mpi n_slices = ds_mm.shape[slicedim] series = np.array(range(0, n_slices), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] if mpi_info['rank'] == 0: fws_reduced = np.zeros((maxlabel + 1, len(map_propnames)), dtype='float') else: fws_reduced = None fws = np.zeros((maxlabel + 1, len(map_propnames)), dtype='float') mapall = criteria.count(None) == len(criteria) # pick labels observing the constraints go2D = ((max_eccentricity is not None) or (min_solidity is not None) or (min_euler_number is not None) or mapall) if go2D: for i in series: slcMM = utils.get_slice(ds_mm, i, slicedim) if h5path_int: slcMB = utils.get_slice(ds_mb, i, slicedim) # , 'bool' else: slcMB = None fws = check_constraints(slcMM, fws, map_propnames, criteria, slcMB, mapall) if mpi_info['enabled']: mpi_info['comm'].Reduce(fws, fws_reduced, op=MPI.MAX, root=0) else: fws_reduced = fws else: if mpi_info['rank'] == 0: fws = check_constraints(ds_mm, fws, map_propnames, criteria, ds_mb, mapall) fws_reduced = fws # write the forward maps to a numpy vector if mpi_info['rank'] == 0: slc = int(n_slices/2) slcMM = ds_mm[slc, :, :] slcMB = ds_mb[slc, :, :] if h5path_int else None datatypes = get_prop_datatypes(slcMM, map_propnames, slcMB) for i, propname in enumerate(map_propnames): root = outputfile.split('.h5')[0] nppath = '{}_{}.npy'.format(root, propname) outarray = np.array(fws_reduced[:, i], dtype=datatypes[i]) np.save(nppath, outarray) # close and return h5file_mm.close() if h5path_int: h5file_mb.close() if mpi_info['rank'] == 0: return outarray
def CC_2D( h5path_in, h5path_mask='', slicedim=0, usempi=False, h5path_out='', protective=False, ): """Label connected components in all slices.""" # check output path if '.h5' in h5path_out: status, info = utils.h5_check(h5path_out, protective) print(info) if status == "CANCELLED": return # open data for reading h5file_mm, ds_mm, elsize, axlab = utils.h5_load(h5path_in) if h5path_mask: h5file_md, ds_md, _, _ = utils.h5_load(h5path_mask) # prepare mpi # TODO: could allow selection of slices/subset here mpi_info = utils.get_mpi_info(usempi) n_slices = ds_mm.shape[slicedim] series = np.array(range(0, n_slices), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_mm.shape, 'uint32', h5path_out, element_size_um=elsize, axislabels=axlab, comm=mpi_info['comm']) # slicewise labeling maxlabel = 0 for i in series: slcMM = utils.get_slice(ds_mm, i, slicedim, 'bool') if h5path_mask: slcMD = utils.get_slice(ds_md, i, slicedim, 'bool') labels, num = label(np.logical_and(~slcMM, slcMD), return_num=True) else: labels, num = label(~slcMM, return_num=True) print("found %d labels in slice %d" % (num, i)) if mpi_info['enabled']: # NOTE: assumed max number of labels in slice is 10000 labels[~slcMM] += 10000 * i if i == n_slices - 1: maxlabel = np.amax(labels) else: labels[~slcMM] += maxlabel maxlabel += num if slicedim == 0: ds_out[i, :, :] = labels elif slicedim == 1: ds_out[:, i, :] = labels elif slicedim == 2: ds_out[:, :, i] = labels # save the maximum labelvalue in the dataset print("found %d labels" % (maxlabel)) if mpi_info['rank'] == mpi_info['size'] - 1: root = h5path_out.split('.h5')[0] fpath = root + '.npy' np.save(fpath, np.array([maxlabel])) # close and return try: h5file_mm.close() h5file_out.close() if h5path_mask: h5file_md.close() except (ValueError, AttributeError): return ds_out
from __future__ import print_function import matplotlib import matplotlib.pyplot as plt import numpy as np import os from wmem import utils datadir = "/Users/michielk/oxdata/P01/EM/Myrf_01/SET-B/B-NT-S10-2f_ROI_00" fname = 'B-NT-S10-2f_ROI_00ds7' dset0 = 'data' h5path_in = os.path.join(datadir, '{}.h5/{}'.format(fname, dset0)) h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) fname = 'B-NT-S10-2f_ROI_00ds7_labels' dset0 = 'labelMA_core2D_pred' h5path_lb = os.path.join(datadir, '{}.h5/{}'.format(fname, dset0)) h5file_lb, ds_lb, elsize, axlab = utils.h5_load(h5path_lb) V = 0 cmap = matplotlib.colors.ListedColormap(np.vstack( ((0, 0, 0), np.random.rand(1e6, 3))) ) fig, axs = plt.subplots(1, 2) ax = axs[0] ax.imshow(ds_in[V,:,:], cmap=plt.get_cmap('Greys')) ax.set_title('orig') ax = axs[1]
def downsample_slices( inputdir, outputdir, regex='*.tif', ds_factor=4, dataslices=None, usempi=False, protective=False, ): """Downsample a series of 2D images.""" if '.h5' in outputdir: status, info = utils.h5_check(outputdir, protective) print(info) if status == "CANCELLED": return if '.h5' in inputdir: # FIXME: assumed zyx for now h5file_in, ds_in, elsize, axlab = utils.h5_load(inputdir) zyxdims = ds_in.shape else: # Get the list of input filepaths. files = sorted(glob.glob(os.path.join(inputdir, regex))) zyxdims = [len(files)] + list(io.imread(files[0]).shape) axlab = 'zyx' if '.h5' in outputdir: elsize[1] = elsize[1] / ds_factor elsize[2] = elsize[2] / ds_factor outsize = [ds_in.shape[0], ds_in.shape[1] / ds_factor, ds_in.shape[2] / ds_factor] h5file_out, ds_out = utils.h5_write(None, outsize, ds_in.dtype, outputdir, element_size_um=elsize, axislabels=axlab) else: # Get the list of output filepaths. utils.mkdir_p(outputdir) outpaths = [] for fpath in files: root, ext = os.path.splitext(fpath) tail = os.path.split(root)[1] outpaths.append(os.path.join(outputdir, tail + ext)) # Check if any output paths already exist. status = utils.output_check_dir(outpaths, protective) if status == "CANCELLED": return # Get the slice objects for the input data. slices = utils.get_slice_objects_prc(dataslices, zyxdims) # Prepare for processing with MPI. mpi_info = utils.get_mpi_info(usempi) series = np.array(range(slices[0].start, slices[0].stop, slices[0].step), dtype=int) if mpi_info['enabled']: series = utils.scatter_series(mpi_info, series)[0] # Downsample and save the images. for slc in series: if '.h5' in inputdir: sub = ds_in[slc, slices[1], slices[2]] else: sub = io.imread(files[slc])[slices[1], slices[2]] img_ds = resize(sub, (sub.shape[0] / ds_factor, sub.shape[1] / ds_factor)) if '.h5' in outputdir: ds_out[slc, :, :] = img_ds else: imsave(outpaths[slc], img_ds) # downsample_image(outpaths[slc], sub, ds_factor) try: h5file_in.close() h5file_out.close() except (ValueError, AttributeError): pass
def nodes_of_ranvier( h5path_in, min_labelsize=0, remove_small_labels=False, h5path_boundarymask='', merge_methods=['neighbours'], overlap_threshold=20, h5path_data='', h5path_mmm='', searchradius=[100, 30, 30], h5path_out='', save_steps=False, protective=False, ): """Find labels that do not traverse through the volume.""" # check output paths outpaths = {'out': h5path_out, 'largelabels': '', 'smalllabelmask': '', 'boundarymask': '', 'labels_nt': '', 'labels_tv': '', 'filled': '', } root, ds_main = outpaths['out'].split('.h5') for dsname, outpath in outpaths.items(): grpname = ds_main + "_steps" outpaths[dsname] = os.path.join(root + '.h5' + grpname, dsname) status = utils.output_check(outpaths, save_steps, protective) if status == "CANCELLED": return # open data for reading h5file_in, ds_in, elsize, axlab = utils.h5_load(h5path_in) labels = ds_in[:] # FIXME: do we make a copy, or use ds_out? # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_in.shape, ds_in.dtype, h5path_out, element_size_um=elsize, axislabels=axlab) # start with the set of all labels ulabels = np.unique(labels) maxlabel = np.amax(ulabels) labelset = set(ulabels) print("number of labels in labelvolume: {}".format(len(labelset))) # get the labelsets that touch the borders sidesmask = get_boundarymask(h5path_boundarymask, 'invdil') ls_bot = set(np.unique(labels[:4, :, :])) ls_top = set(np.unique(labels[-4:, :, :])) ls_sides = set(np.unique(labels[sidesmask])) ls_border = ls_bot | ls_top | ls_sides ls_centre = labelset - ls_border # get the labels that do not touch the border twice ls_bts = (ls_bot ^ ls_top) ^ ls_sides ls_tbs = (ls_top ^ ls_bot) ^ ls_sides ls_sbt = (ls_sides ^ ls_bot) ^ ls_top ls_nt = ls_centre | ls_bts | ls_tbs | ls_sbt # filter labels on size root = os.path.splitext(h5file_out.filename)[0] ls_small = utils.filter_on_size(labels, labelset, min_labelsize, remove_small_labels, save_steps, root, ds_out.name[1:], outpaths, elsize, axlab)[2] labelset -= ls_small ls_nt -= ls_small ls_short = filter_on_heigth(labels, 5) labelset -= ls_short ls_nt -= ls_short ls_tv = labelset - ls_nt print('number of large, long labels: {}'.format(len(labelset))) print('number of large, long in-volume labels: {}'.format(len(ls_nt))) print('number of large, long through-volume labels: {}'.format(len(ls_tv))) # map the large labels that don't traverse the volume fw_nt = np.zeros(maxlabel + 1, dtype='i') for l in ls_nt: fw_nt[l] = l labels_nt = fw_nt[labels] # automated label merge labelsets = {} min_labelsize = 10 labelsets, filled = merge_labels(labels_nt, labelsets, merge_methods, overlap_threshold, h5path_data, h5path_mmm, min_labelsize, searchradius) fw = np.zeros(maxlabel + 1, dtype='i') ds_out[:] = utils.forward_map(np.array(fw), labels, labelsets) if save_steps: utils.save_step(outpaths, 'boundarymask', sidesmask, elsize, axlab) utils.save_step(outpaths, 'labels_nt', labels_nt, elsize, axlab) fw_tv = np.zeros(maxlabel + 1, dtype='i') for l in ls_tv: fw_tv[l] = l labels_tv = fw_tv[labels] utils.save_step(outpaths, 'labels_tv', labels_tv, elsize, axlab) if filled is not None: fw = np.zeros(maxlabel + 1, dtype='i') filled = utils.forward_map(np.array(fw), filled, labelsets) utils.save_step(outpaths, 'filled', filled, elsize, axlab) filestem = '{}_{}_automerged'.format(root, ds_out.name[1:]) utils.write_labelsets(labelsets, filestem, filetypes=['txt', 'pickle']) # close and return h5file_in.close() try: h5file_out.close() except (ValueError, AttributeError): return ds_out
def CC_3D( h5path_in, h5path_mask='', min_size_maskMM=0, min_area=0, h5path_out='', protective=False, ): """Label connected components in a 3D stack.""" # check output path if '.h5' in h5path_out: status, info = utils.h5_check(h5path_out, protective) print(info) if status == "CANCELLED": return # open data for reading h5file_mm, ds_mm, elsize, axlab = utils.h5_load(h5path_in) if h5path_mask: h5file_md, ds_md, _, _ = utils.h5_load(h5path_mask) # open data for writing h5file_out, ds_out = utils.h5_write(None, ds_mm.shape, 'uint32', h5path_out, element_size_um=elsize, axislabels=axlab) # 3D labeling with label size constraints # NOTE: could save memory here by applying the constraints to input before # if h5path_mask: # mask = np.logical_or(binary_dilation(ds_mm[:]), ~ds_md[:]) # else: # mask = binary_dilation(ds_mm[:]) if min_size_maskMM: mask = label(ds_mm[:], return_num=False, connectivity=None) remove_small_objects(mask, min_size_maskMM, in_place=True) mask = ~mask.astype('bool') else: mask = ~ds_mm[:] labels = label(mask, return_num=False, connectivity=None) # shuffle_labels = True # if shuffle_labels: # fw = utils.shuffle_labels(labels) # labels = fw[labels] if min_area: remove_small_objects(labels, min_area, in_place=True) # remove the largest label (assumed unmyelinated axon compartment) rp = regionprops(labels) areas = [prop.area for prop in rp] labs = [prop.label for prop in rp] llab = labs[np.argmax(areas)] labels[labels == llab] = 0 labels = relabel_sequential(labels)[0] print('maxlabel: {}'.format(np.amax(labels))) ds_out[:] = labels # close and return try: h5file_mm.close() h5file_out.close() if h5path_mask: h5file_md.close() except (ValueError, AttributeError): return labels
def process_block(h5path_in, ndim, blockreduce, func, blockoffset, blocksize, margin, fullsize, ds_out, is_labelimage, relabel, neighbourmerge, save_fwmap, maxlabel, usempi, mpi_info): """Write a block of data into a hdf5 file.""" # open data for reading h5file_in, ds_in, _, _ = utils.h5_load(h5path_in) # get the indices into the input and output datasets # TODO: get indices from attributes """NOTE: # x, X, y, Y, z, Z are indices into the full dataset # ix, iX, iy, iY, iz, iZ are indices into the input dataset # ox, oX, oy, oY, oz, oZ are indices into the output dataset """ _, x, X, y, Y, z, Z = utils.split_filename( h5file_in.filename, [blockoffset[2], blockoffset[1], blockoffset[0]]) (oz, oZ), (iz, iZ) = margins(z, Z, blocksize[0], margin[0], fullsize[0]) (oy, oY), (iy, iY) = margins(y, Y, blocksize[1], margin[1], fullsize[1]) (ox, oX), (ix, iX) = margins(x, X, blocksize[2], margin[2], fullsize[2]) ixyz = ix, iX, iy, iY, iz, iZ oxyz = ox, oX, oy, oY, oz, oZ # simply copy the data from input to output """NOTE: it is assumed that the inputs are not 4D labelimages """ if ndim == 4: ds_out[oz:oZ, oy:oY, ox:oX, :] = ds_in[iz:iZ, iy:iY, ix:iX, :] h5file_in.close() return if ((not is_labelimage) or ((not relabel) and (not neighbourmerge) and (not blockreduce))): ds_out[oz:oZ, oy:oY, ox:oX] = ds_in[iz:iZ, iy:iY, ix:iX] h5file_in.close() return # forward map to relabel the blocks in the output if relabel: fw, maxlabel = relabel_block(ds_in, maxlabel, mpi_info) if save_fwmap: root = os.path.splitext(h5file_in.filename)[0] fpath = '{}_{}.npy'.format(root, ds_in.name[1:]) np.save(fpath, fw) if (not neighbourmerge) and (not blockreduce): ds_out[oz:oZ, oy:oY, ox:oX] = fw[ds_in[iz:iZ, iy:iY, ix:iX]] h5file_in.close() return else: ulabels = np.unique(ds_in[:]) fw = [l for l in range(0, np.amax(ulabels) + 1)] fw = np.array(fw) # blockwise reduction of input datasets if blockreduce is not None: data, ixyz, oxyz = blockreduce_datablocks(ds_in, ds_out, ixyz, oxyz, blockreduce, func) ix, iX, iy, iY, iz, iZ = ixyz ox, oX, oy, oY, oz, oZ = oxyz margin = (int(margin[i] / blockreduce[i]) for i in range(0, 3)) if (not neighbourmerge): ds_out[oz:oZ, oy:oY, ox:oX] = fw[data] h5file_in.close() return else: data = ds_in[iz:iZ, iy:iY, ix:iX] # merge overlapping labels fw = merge_overlap(fw, data, ds_out, oxyz, ixyz, margin) ds_out[oz:oZ, oy:oY, ox:oX] = fw[data] h5file_in.close()