def watershed_cube(self): writeVerbose = False; #writeVerbose = self.dpWatershedTypes_verbose readVerbose = False; #readVerbose = self.dpWatershedTypes_verbose # load the probability data, allocate as array of volumes instead of 4D ndarray to maintain C-order volumes probs = [None]*self.ntypes; bwseeds = [None]*self.nfg_types if self.srclabels: # this code path is typically not used in favor of the label checker for fully labeled 3d gt components. # but, some ground truth (for example, 2d ECS cases) was only labeled with voxel type, # so this is used to create ground truth components from the voxel types. loadh5 = emLabels.readLabels(srcfile=self.srclabels, chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist(), data_type='uint16', verbose=writeVerbose) self.datasize = loadh5.datasize; self.chunksize = loadh5.chunksize; self.attrs = loadh5.data_attrs # pre-allocate for srclabels method, labeled areas are set to prob of 1 below for i in range(self.ntypes): probs[i] = np.zeros(self.size, dtype=emProbabilities.PROBS_DTYPE, order='C') if self.TminSrc < 2: # simple method with no "cleaning" for i in range(self.ntypes): probs[i][loadh5.data_cube==i] = 1 else: # optionally "clean" labels by removing small bg and fg components for each foreground type fgbwlabels = np.zeros(self.size, dtype=np.bool) for i in range(self.nfg_types): # background connected components and threshold comps, nlbls = nd.measurements.label(loadh5.data_cube!=i+1) comps, sizes = emLabels.thresholdSizes(comps, minSize=self.TminSrc) # foreground connected components and threshold comps, nlbls = nd.measurements.label(comps==0) comps, sizes = emLabels.thresholdSizes(comps, minSize=self.TminSrc) # keep track of mask for all foreground types bwlabels = (comps > 0); fgbwlabels = np.logical_or(fgbwlabels, bwlabels) probs[i+1][bwlabels] = 1 # set background type as all areas that are not in foreground types after "cleaning" probs[0][np.logical_not(fgbwlabels)] = 1 else: # check if background is in the prob file hdf = h5py.File(self.probfile,'r'); has_bg = self.bg_type in hdf; hdf.close() for i in range(0 if has_bg else 1, self.ntypes): loadh5 = dpLoadh5.readData(srcfile=self.probfile, dataset=self.types[i], chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist(), data_type=emProbabilities.PROBS_STR_DTYPE, verbose=readVerbose) self.datasize = loadh5.datasize; self.chunksize = loadh5.chunksize; self.attrs = loadh5.data_attrs probs[i] = loadh5.data_cube; del loadh5 # if background was not in hdf5 then create it as 1-sum(fg type probs) if not has_bg: probs[0] = np.ones_like(probs[1]) for i in range(1,self.ntypes): probs[0] -= probs[i] #assert( (probs[0] >= 0).all() ) # comment for speed probs[0][probs[0] < 0] = 0 # rectify # save some of the parameters as attributes self.attrs['types'] = self.types; self.attrs['fg_types'] = self.fg_types self.attrs['fg_types_labels'] = self.fg_types_labels # save connnetivity structure and warping LUT because used on each iteration (for speed) self.bwconn = nd.morphology.generate_binary_structure(dpLoadh5.ND, self.connectivity) self.bwconn2d = self.bwconn[:,:,1]; self.simpleLUT = None # load the warpings if warping mode is enabled warps = None if self.warpfile: warps = [None]*self.nwarps for i in range(self.nwarps): loadh5 = dpLoadh5.readData(srcfile=self.warpfile, dataset=self.warp_datasets[i], chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist(), verbose=readVerbose) warps[i] = loadh5.data_cube; del loadh5 # xxx - may need to revisit cropping, only intended to be used with warping method. if self.docrop: c = self.cropborder; s = self.size # DO NOT use variables c or s below # optionally apply filters in attempt to fill small background (membrane) probability gaps. if self.close_bg > 0: # create structuring element n = 2*self.close_bg + 1; h = self.close_bg; strel = np.zeros((n,n,n),dtype=np.bool); strel[h,h,h]=1; strel = nd.binary_dilation(strel,iterations=self.close_bg) # xxx - this was the only thing tried here that helped some but didn't work well against the skeletons probs[0] = nd.grey_closing( probs[0], structure=strel ) for i in range(self.nfg_types): probs[i+1] = nd.grey_opening( probs[i+1], structure=strel ) # xxx - this gave worse results #probs[0] = nd.maximum_filter( probs[0], footprint=strel ) # xxx - this had almost no effect #probs[0] = nd.grey_closing( probs[0], structure=strel ) # argmax produces the winner-take-all assignment for each supervoxel. # background type was put first, so voxType of zero is background (membrane). voxType = np.concatenate([x.reshape(x.shape + (1,)) for x in probs], axis=3).argmax(axis=3) # write out the winning type for each voxel # save some params from this watershed run in the attributes d = self.attrs.copy(); d['thresholds'] = self.Ts; d['Tmins'] = self.Tmins data = voxType.astype(emVoxelType.VOXTYPE_DTYPE) if self.docrop: data = data[c[0]:s[0]-c[0],c[1]:s[1]-c[1],c[2]:s[2]-c[2]] emVoxelType.writeVoxType(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), verbose=writeVerbose, attrs=d, data=data) # only allow a voxel to be included in the type of component that had max prob for that voxel. # do this by setting the non-winning probabilities to zero. for i in range(self.ntypes): probs[i][voxType != i] = 0; # create a type mask for each foreground type to select only current voxel type (winner-take-all from network) voxTypeSel = [None] * self.nfg_types; voxTypeNotSel = [None] * self.nfg_types for i in range(self.nfg_types): voxTypeSel[i] = (voxType == i+1) # create an inverted version, only used for complete fill not for warping (which requires C-contiguous), # so apply crop here if cropping enabled voxTypeNotSel[i] = np.logical_not(voxTypeSel[i]) if self.docrop: voxTypeNotSel[i] = voxTypeNotSel[i][c[0]:s[0]-c[0],c[1]:s[1]-c[1],c[2]:s[2]-c[2]] # need C-contiguous probabilities for binary_warping. for i in range(self.nfg_types): if not probs[i+1].flags.contiguous or np.isfortran(probs[i+1]): probs[i+1] = np.ascontiguousarray(probs[i+1]) # iteratively apply thresholds, each time only keeping components that have fallen under size Tmin. # at last iteration keep all remaining components. # do this separately for foreground types. for k in range(self.nTmin): for i in range(self.nfg_types): bwseeds[i] = np.zeros(self.size, dtype=np.bool, order='C') for i in range(self.nthresh): if self.dpWatershedTypes_verbose: print('creating supervoxels at threshold = %.8f with Tmin = %d' % (self.Ts[i], self.Tmins[k])) t = time.time() types_labels = [None]*self.nfg_types; types_uclabels = [None]*self.nfg_types; if self.skeletonize: types_sklabels = [None]*self.nfg_types types_nlabels = np.zeros((self.nfg_types,),dtype=np.int64) types_ucnlabels = np.zeros((self.nfg_types,),dtype=np.int64) for j in range(self.nfg_types): # run connected components at this threshold on labels labels, nlabels = nd.measurements.label(probs[j+1] > self.Ts[i], self.bwconn) # merge the current thresholded components with the previous seeds to get current bwlabels bwlabels = np.logical_or(labels, bwseeds[j]) # take the current components under threshold and merge with the seeds for the next iteration if i < self.nthresh-1: labels, sizes = emLabels.thresholdSizes(labels, minSize=-self.Tmins[k]) bwseeds[j] = np.logical_or(labels, bwseeds[j]) # this if/elif switch determines the main method for creating the labels. # xxx - make cropping to be done in more efficient way, particular to avoid filling cropped areas if self.method == 'overlap': # definite advantage to this method over other methods, but cost is about 2-3 times slower. # labels are linked per zslice using precalculated slice to slice warpings based on the probs. labels, nlabels = self.label_overlap(bwlabels, voxTypeSel[j], warps) # xxx - add switches to only optionally export the unconnected labels #uclabels = labels; ucnlabels = nlabels; # crop right after the labels are created and stay uncropped from here. # xxx - labels will be wrong unless method implicitly handled the cropping during the labeling. # currently only the warping method is doing, don't need cropping for other methods anyways. if self.docrop: labels = labels[c[0]:s[0]-c[0],c[1]:s[1]-c[1],c[2]:s[2]-c[2]] # this method can not create true unconnected 3d labels, but should be unconnected in 2d. # NOTE: currently this only removes 6-connectivity, no matter what specified connecitity is # xxx - some method of removing adjacencies with arbitrary connectivity? uclabels, ucnlabels = emLabels.remove_adjacencies(labels) elif self.method == 'skim-ws': # xxx - still trying to evaluate if there is any advantage to this more traditional watershed. # it does not leave a non-adjacency boundary and is about 1.5 times slower than bwmorph # run connected components on the thresholded labels merged with previous seeds labels, nlabels = nd.measurements.label(bwlabels, self.bwconn) # run a true watershed based the current foreground probs using current components as markers labels = morph.watershed(probs[j+1], labels, connectivity=self.bwconn, mask=voxTypeSel[j]) # remove any adjacencies created during the watershed # NOTE: currently this only removes 6-connectivity, no matter what specified connecitity is # xxx - some method of removing adjacencies with arbitrary connectivity? uclabels, ucnlabels = emLabels.remove_adjacencies(labels) else: if self.method == 'comps-ws' and i>1: # this is an alternative to the traditional watershed that warps out only based on stepping # back through the thresholds in reverse order. has advantages of non-connectivity. # may help slightly for small supervoxels but did not show much improved metrics in # terms of large-scale connectivity (against skeletons) # about 4-5 times slower than regular warping method. # make an unconnected version of bwlabels by warping out but with mask only for this type # everything above current threshold is already labeled, so only need to use gray thresholds # starting below the current threshold level. bwlabels, diff, self.simpleLUT = binary_warping(bwlabels, np.ones(self.size,dtype=np.bool), mask=voxTypeSel[j], borderval=False, slow=True, simpleLUT=self.simpleLUT, connectivity=self.connectivity, gray=probs[j+1], grayThresholds=self.Ts[i-1::-1].astype(np.float32, order='C')) else: assert( self.method == 'comps' ) # bad method option # make an unconnected version of bwlabels by warping out but with mask only for this type bwlabels, diff, self.simpleLUT = binary_warping(bwlabels, np.ones(self.size,dtype=np.bool), mask=voxTypeSel[j], borderval=False, slow=True, simpleLUT=self.simpleLUT, connectivity=self.connectivity) # run connected components on the thresholded labels merged with previous seeds (warped out) uclabels, ucnlabels = nd.measurements.label(bwlabels, self.bwconn); # in this case the normal labels are the same as the unconnected labels because of warping labels = uclabels; nlabels = ucnlabels; # optionally make a skeletonized version of the unconnected labels # xxx - revisit this, currently not being used for anything, started as a method to skeletonize GT if self.skeletonize: # method to skeletonize using max range endpoints only sklabels, sknlabels = emLabels.ucskeletonize(uclabels, mask=voxTypeSel[j], sampling=self.attrs['scale'] if hasattr(self.attrs,'scale') else None) assert( sknlabels == ucnlabels ) # fill out these labels out so that they fill in remaining voxels based on voxType. # this uses bwdist method for finding nearest neighbors, so connectivity can be violoated. # this is mitigated by first filling out background using the warping transformation # (or watershed) above, then this step is only to fill in remaining voxels for the # current foreground voxType. labels = emLabels.nearest_neighbor_fill(labels, mask=voxTypeNotSel[j], sampling=self.attrs['scale'] if hasattr(self.attrs,'scale') else None) # save the components labels generated for this type types_labels[j] = labels.astype(emLabels.LBLS_DTYPE, copy=False); types_uclabels[j] = uclabels.astype(emLabels.LBLS_DTYPE, copy=False); types_nlabels[j] = nlabels if self.fg_types_labels[j] < 0 else 1 types_ucnlabels[j] = ucnlabels if self.fg_types_labels[j] < 0 else 1 if self.skeletonize: types_sklabels[j] = sklabels.astype(emLabels.LBLS_DTYPE, copy=False) # merge the fg components labels. they can not overlap because voxel type is winner-take-all. nlabels = 0; ucnlabels = 0; labels = np.zeros(self.size_crop, dtype=emLabels.LBLS_DTYPE); uclabels = np.zeros(self.size_crop, dtype=emLabels.LBLS_DTYPE); if self.skeletonize: sklabels = np.zeros(self.size, dtype=emLabels.LBLS_DTYPE); for j in range(self.nfg_types): sel = (types_labels[j] > 0); ucsel = (types_uclabels[j] > 0); if self.skeletonize: sksel = (types_sklabels[j] > 0); if self.fg_types_labels[j] < 0: labels[sel] += (types_labels[j][sel] + nlabels); uclabels[ucsel] += (types_uclabels[j][ucsel] + ucnlabels); if self.skeletonize: sklabels[sksel] += (types_sklabels[j][sksel] + ucnlabels); nlabels += types_nlabels[j]; ucnlabels += types_ucnlabels[j]; else: labels[sel] = self.fg_types_labels[j]; uclabels[ucsel] = self.fg_types_labels[j]; if self.skeletonize: sklabels[sksel] = self.fg_types_labels[j] nlabels += 1; ucnlabels += 1; if self.dpWatershedTypes_verbose: print('\tnlabels = %d' % (nlabels,)) #print('\tnlabels = %d %d' % (nlabels,labels.max())) # for debug only #assert(nlabels == labels.max()) # sanity check for non-overlapping voxTypeSel, comment for speed print('\tdone in %.4f s' % (time.time() - t,)) # make a fully-filled out version using bwdist nearest foreground neighbor wlabels = emLabels.nearest_neighbor_fill(labels, mask=None, sampling=self.attrs['scale'] if hasattr(self.attrs,'scale') else None) # write out the results if self.nTmin == 1: subgroups = ['%.8f' % (self.Ts[i],)] else: subgroups = ['%d' % (self.Tmins[k],), '%.8f' % (self.Ts[i],)] d = self.attrs.copy(); d['threshold'] = self.Ts[i]; d['types_nlabels'] = types_nlabels; d['Tmin'] = self.Tmins[k] emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=labels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['with_background']+subgroups ) emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=wlabels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['zero_background']+subgroups ) d['type_nlabels'] = types_ucnlabels; emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=uclabels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['no_adjacencies']+subgroups ) if self.skeletonize: emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=sklabels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['skeletonized']+subgroups )
def clean(self): # smoothing operates on each label one at a time if self.smooth: if self.dpCleanLabels_verbose: print('Smoothing labels object by object'); t = time.time() # threshold sizes to remove empty labels self.data_cube, sizes = emLabels.thresholdSizes(self.data_cube, minSize=1) # xxx - fix old comments from matlab meshing code, fix this # xxx - local parameters, expose if find any need to change these rad = 5; # amount to pad (need greater than one for method 3 because of smoothing contour_level = 0.5; # binary threshold for calculating surface mesh smooth_size = [3, 3, 3]; #emptyLabel = 65535; % should define this in attribs? sizes = np.array(self.data_cube.shape); sz = sizes + 2*rad; image_with_zeros = np.zeros(sz, dtype=self.data_cube.dtype); # create zeros 3 dimensional array image_with_zeros[rad:-rad,rad:-rad,rad:-rad] = self.data_cube # embed label array into zeros array image_with_brd = np.lib.pad(self.data_cube,((rad,rad), (rad,rad), (rad,rad)),'edge'); nSeeds = self.data_cube.max() # do not smooth ECS labels sel_ECS, ECS_label = self.getECS(image_with_brd) if self.dpCleanLabels_verbose and ECS_label: print('\tignoring ECS label %d' % (ECS_label,)) # get bounding boxes for each supervoxel in zero padded label volume svox_bnd = nd.measurements.find_objects(image_with_zeros) # iterate over labels nSeeds = self.data_cube.max(); lbls = np.zeros(sz, dtype=self.data_cube.dtype) assert( nSeeds == len(svox_bnd) ) for j in range(nSeeds): if ECS_label and j+1 == ECS_label: continue #if self.dpCleanLabels_verbose: # print('Smoothing label %d / %d' % (j+1,nSeeds)); t = time.time() pbnd = tuple([slice(x.start-rad,x.stop+rad) for x in svox_bnd[j]]) Lcrp = (image_with_brd[pbnd] == j+1).astype(np.double) Lfilt = nd.filters.uniform_filter(Lcrp, size=smooth_size, mode='constant') # incase smoothing below contour level, use without smoothing if not (Lfilt > contour_level).any(): Lfilt = Lcrp # assign smoothed output for current label lbls[pbnd][Lfilt > contour_level] = j+1 # put ECS labels back if ECS_label: lbls[sel_ECS] = ECS_label if self.dpCleanLabels_verbose: print('\tdone in %.4f s' % (time.time() - t)) self.data_cube = lbls[rad:-rad,rad:-rad,rad:-rad] if self.remove_adjacencies: labels = self.data_cube.astype(np.uint32, copy=True, order='C') sel_ECS, ECS_label = self.getECS(labels); labels[sel_ECS] = 0 if self.dpCleanLabels_verbose: print('Removing adjacencies with conn %d%s' % (self.fg_connectivity, ', ignoring ECS label %d' % (ECS_label,) if ECS_label else '')) t = time.time() self.data_cube = emLabels.remove_adjacencies_nconn(labels, bwconn=self.fgbwconn) if ECS_label: self.data_cube[sel_ECS] = ECS_label if self.dpCleanLabels_verbose: print('\tdone in %.4f s' % (time.time() - t)) if self.minsize > 0: labels = self.data_cube sel_ECS, ECS_label = self.getECS(labels); labels[sel_ECS] = 0 if self.dpCleanLabels_verbose: print('Scrubbing labels with minsize %d%s' % (self.minsize, ', ignoring ECS label %d' % (ECS_label,) if ECS_label else '')) print('\tnlabels = %d, before re-label' % (labels.max(),)) t = time.time() selbg = np.logical_and((labels == 0), np.logical_not(sel_ECS)) labels, sizes = emLabels.thresholdSizes(labels, minSize=self.minsize) if self.minsize_fill: if self.dpCleanLabels_verbose: print('Nearest neighbor fill scrubbed labels') labels = emLabels.nearest_neighbor_fill(labels, mask=selbg, sampling=self.data_attrs['scale']) nlabels = sizes.size labels, nlabels = self.setECS(labels, sel_ECS, ECS_label, nlabels) self.data_cube = labels # allow this to work before self.get_svox_type or self.write_voxel_type self.data_attrs['types_nlabels'] = [nlabels] if self.dpCleanLabels_verbose: print('\tnlabels = %d after re-label' % (nlabels,)) print('\tdone in %.4f s' % (time.time() - t)) if self.cavity_fill: if self.dpCleanLabels_verbose: print('Removing cavities using conn %d' % (self.bg_connectivity,)); t = time.time() selbg = (self.data_cube == 0) if self.dpCleanLabels_verbose: print('\tnumber bg vox before = %d' % (selbg.sum(dtype=np.int64),)) labels = np.ones([x + 2 for x in self.data_cube.shape], dtype=np.bool) labels[1:-1,1:-1,1:-1] = selbg # don't connect the top and bottom xy planes labels[1:-1,1:-1,0] = 0; labels[1:-1,1:-1,-1] = 0 labels, nlabels = nd.measurements.label(labels, self.bgbwconn) msk = np.logical_and((labels[1:-1,1:-1,1:-1] != labels[0,0,0]), selbg); del labels self.data_cube[msk] = 0; selbg[msk] = 0 self.data_cube = emLabels.nearest_neighbor_fill(self.data_cube, mask=selbg, sampling=self.data_attrs['scale']) if self.dpCleanLabels_verbose: print('\tdone in %.4f s' % (time.time() - t)) print('\tnumber bg vox after = %d' % ((self.data_cube==0).sum(dtype=np.int64),)) if self.relabel: labels = self.data_cube sel_ECS, ECS_label = self.getECS(labels); labels[sel_ECS] = 0 if self.dpCleanLabels_verbose: print('Relabeling fg components with conn %d%s' % (self.fg_connectivity, ', ignoring ECS label %d' % (ECS_label,) if ECS_label else '')) print('\tnlabels = %d, max = %d, before re-label' % (len(np.unique(labels)), labels.max())) t = time.time() labels, nlabels = nd.measurements.label(labels, self.fgbwconn) labels, nlabels = self.setECS(labels, sel_ECS, ECS_label, nlabels) self.data_cube = labels if self.dpCleanLabels_verbose: print('\tnlabels = %d after re-label' % (nlabels,)) print('\tdone in %.4f s' % (time.time() - t)) # this step is always last, as writes new voxel_type depending on the cleaning that was done if self.get_svox_type or self.write_voxel_type: if self.dpCleanLabels_verbose: print('Recomputing supervoxel types and re-ordering labels'); t = time.time() voxType = emVoxelType.readVoxType(srcfile=self.srcfile, chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist()) voxel_type = voxType.data_cube.copy(order='C') labels = self.data_cube.copy(order='C') #nlabels = labels.max(); assert(nlabels == self.data_attrs['types_nlabels'][0]) nlabels = sum(self.data_attrs['types_nlabels']); ntypes = len(voxType.data_attrs['types']) supervoxel_type, voxel_type = emLabels.type_components(labels, voxel_type, nlabels, ntypes) assert( supervoxel_type.size == nlabels ) # reorder labels so that supervoxels are grouped by / in order of supervoxel type remap = np.zeros((nlabels+1,), dtype=self.data_cube.dtype) remap[np.argsort(supervoxel_type)+1] = np.arange(1,nlabels+1,dtype=self.data_cube.dtype) self.data_cube = remap[self.data_cube] types_nlabels = [(supervoxel_type==x).sum(dtype=np.int64) for x in range(1,ntypes)] assert( sum(types_nlabels) == nlabels ) # indicates voxel type does not match supervoxels self.data_attrs['types_nlabels'] = types_nlabels if self.write_voxel_type: if self.dpCleanLabels_verbose: print('Rewriting voxel type pixel data based on supervoxel types') d = voxType.data_attrs.copy(); #d['types_nlabels'] = emVoxelType.writeVoxType(outfile=self.outfile, chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist(), datasize=voxType.datasize.tolist(), chunksize=voxType.chunksize.tolist(), data=voxel_type.astype(emVoxelType.VOXTYPE_DTYPE), attrs=d) if self.dpCleanLabels_verbose: print('\tdone in %.4f s' % (time.time() - t))