def __iter__(self): for self.volume_info,n in zip(self.cubeIter, range(self.cubeIter.volume_size)): _, self.size, self.chunk, self.offset, suffix, _, _, _ = self.volume_info self.inith5() if self.dpCubeStitcher_verbose: print('Loading chunk %d %d %d, size %d %d %d, offset %d %d %d' % tuple(self.chunk.tolist() + \ self.size.tolist() + self.offset.tolist())); t = time.time() srcfile = os.path.join(self.filepaths[0], self.fileprefixes[0] + suffix + '.h5') if self.first_pass \ else self.srcfile loadh5 = emLabels.readLabels(srcfile=srcfile, chunk=self.chunk.tolist(), subgroups=self.subgroups, offset=self.offset.tolist(), size=self.size.tolist(), verbose=self.dpLoadh5_verbose) assert( (self.chunksize == loadh5.chunksize).all() ) cur_data = loadh5.data_cube.astype(self.data_type_out) cur_ncomps = loadh5.data_attrs['types_nlabels'].sum() cur_attrs = loadh5.data_attrs; cur_attrs['datasize'] = loadh5.datasize # xxx - make these as option? need if each volume being read is a portion of a larger labeled volume #cur_data = emLabels.relabel_sequential(cur_data) #cur_ncomps = cur_data.max() if self.dpCubeStitcher_verbose: print('\tdone in %.4f s, ncomps = %d' % (time.time() - t, cur_ncomps)) yield cur_data, cur_attrs, cur_ncomps, n
def watershed_cube(self): writeVerbose = False; #writeVerbose = self.dpWatershedTypes_verbose readVerbose = False; #readVerbose = self.dpWatershedTypes_verbose # load the probability data, allocate as array of volumes instead of 4D ndarray to maintain C-order volumes probs = [None]*self.ntypes; bwseeds = [None]*self.nfg_types if self.srclabels: # this code path is typically not used in favor of the label checker for fully labeled 3d gt components. # but, some ground truth (for example, 2d ECS cases) was only labeled with voxel type, # so this is used to create ground truth components from the voxel types. loadh5 = emLabels.readLabels(srcfile=self.srclabels, chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist(), data_type='uint16', verbose=writeVerbose) self.datasize = loadh5.datasize; self.chunksize = loadh5.chunksize; self.attrs = loadh5.data_attrs # pre-allocate for srclabels method, labeled areas are set to prob of 1 below for i in range(self.ntypes): probs[i] = np.zeros(self.size, dtype=emProbabilities.PROBS_DTYPE, order='C') if self.TminSrc < 2: # simple method with no "cleaning" for i in range(self.ntypes): probs[i][loadh5.data_cube==i] = 1 else: # optionally "clean" labels by removing small bg and fg components for each foreground type fgbwlabels = np.zeros(self.size, dtype=np.bool) for i in range(self.nfg_types): # background connected components and threshold comps, nlbls = nd.measurements.label(loadh5.data_cube!=i+1) comps, sizes = emLabels.thresholdSizes(comps, minSize=self.TminSrc) # foreground connected components and threshold comps, nlbls = nd.measurements.label(comps==0) comps, sizes = emLabels.thresholdSizes(comps, minSize=self.TminSrc) # keep track of mask for all foreground types bwlabels = (comps > 0); fgbwlabels = np.logical_or(fgbwlabels, bwlabels) probs[i+1][bwlabels] = 1 # set background type as all areas that are not in foreground types after "cleaning" probs[0][np.logical_not(fgbwlabels)] = 1 else: # check if background is in the prob file hdf = h5py.File(self.probfile,'r'); has_bg = self.bg_type in hdf; hdf.close() for i in range(0 if has_bg else 1, self.ntypes): loadh5 = dpLoadh5.readData(srcfile=self.probfile, dataset=self.types[i], chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist(), data_type=emProbabilities.PROBS_STR_DTYPE, verbose=readVerbose) self.datasize = loadh5.datasize; self.chunksize = loadh5.chunksize; self.attrs = loadh5.data_attrs probs[i] = loadh5.data_cube; del loadh5 # if background was not in hdf5 then create it as 1-sum(fg type probs) if not has_bg: probs[0] = np.ones_like(probs[1]) for i in range(1,self.ntypes): probs[0] -= probs[i] #assert( (probs[0] >= 0).all() ) # comment for speed probs[0][probs[0] < 0] = 0 # rectify # save some of the parameters as attributes self.attrs['types'] = self.types; self.attrs['fg_types'] = self.fg_types self.attrs['fg_types_labels'] = self.fg_types_labels # save connnetivity structure and warping LUT because used on each iteration (for speed) self.bwconn = nd.morphology.generate_binary_structure(dpLoadh5.ND, self.connectivity) self.bwconn2d = self.bwconn[:,:,1]; self.simpleLUT = None # load the warpings if warping mode is enabled warps = None if self.warpfile: warps = [None]*self.nwarps for i in range(self.nwarps): loadh5 = dpLoadh5.readData(srcfile=self.warpfile, dataset=self.warp_datasets[i], chunk=self.chunk.tolist(), offset=self.offset.tolist(), size=self.size.tolist(), verbose=readVerbose) warps[i] = loadh5.data_cube; del loadh5 # xxx - may need to revisit cropping, only intended to be used with warping method. if self.docrop: c = self.cropborder; s = self.size # DO NOT use variables c or s below # optionally apply filters in attempt to fill small background (membrane) probability gaps. if self.close_bg > 0: # create structuring element n = 2*self.close_bg + 1; h = self.close_bg; strel = np.zeros((n,n,n),dtype=np.bool); strel[h,h,h]=1; strel = nd.binary_dilation(strel,iterations=self.close_bg) # xxx - this was the only thing tried here that helped some but didn't work well against the skeletons probs[0] = nd.grey_closing( probs[0], structure=strel ) for i in range(self.nfg_types): probs[i+1] = nd.grey_opening( probs[i+1], structure=strel ) # xxx - this gave worse results #probs[0] = nd.maximum_filter( probs[0], footprint=strel ) # xxx - this had almost no effect #probs[0] = nd.grey_closing( probs[0], structure=strel ) # argmax produces the winner-take-all assignment for each supervoxel. # background type was put first, so voxType of zero is background (membrane). voxType = np.concatenate([x.reshape(x.shape + (1,)) for x in probs], axis=3).argmax(axis=3) # write out the winning type for each voxel # save some params from this watershed run in the attributes d = self.attrs.copy(); d['thresholds'] = self.Ts; d['Tmins'] = self.Tmins data = voxType.astype(emVoxelType.VOXTYPE_DTYPE) if self.docrop: data = data[c[0]:s[0]-c[0],c[1]:s[1]-c[1],c[2]:s[2]-c[2]] emVoxelType.writeVoxType(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), verbose=writeVerbose, attrs=d, data=data) # only allow a voxel to be included in the type of component that had max prob for that voxel. # do this by setting the non-winning probabilities to zero. for i in range(self.ntypes): probs[i][voxType != i] = 0; # create a type mask for each foreground type to select only current voxel type (winner-take-all from network) voxTypeSel = [None] * self.nfg_types; voxTypeNotSel = [None] * self.nfg_types for i in range(self.nfg_types): voxTypeSel[i] = (voxType == i+1) # create an inverted version, only used for complete fill not for warping (which requires C-contiguous), # so apply crop here if cropping enabled voxTypeNotSel[i] = np.logical_not(voxTypeSel[i]) if self.docrop: voxTypeNotSel[i] = voxTypeNotSel[i][c[0]:s[0]-c[0],c[1]:s[1]-c[1],c[2]:s[2]-c[2]] # need C-contiguous probabilities for binary_warping. for i in range(self.nfg_types): if not probs[i+1].flags.contiguous or np.isfortran(probs[i+1]): probs[i+1] = np.ascontiguousarray(probs[i+1]) # iteratively apply thresholds, each time only keeping components that have fallen under size Tmin. # at last iteration keep all remaining components. # do this separately for foreground types. for k in range(self.nTmin): for i in range(self.nfg_types): bwseeds[i] = np.zeros(self.size, dtype=np.bool, order='C') for i in range(self.nthresh): if self.dpWatershedTypes_verbose: print('creating supervoxels at threshold = %.8f with Tmin = %d' % (self.Ts[i], self.Tmins[k])) t = time.time() types_labels = [None]*self.nfg_types; types_uclabels = [None]*self.nfg_types; if self.skeletonize: types_sklabels = [None]*self.nfg_types types_nlabels = np.zeros((self.nfg_types,),dtype=np.int64) types_ucnlabels = np.zeros((self.nfg_types,),dtype=np.int64) for j in range(self.nfg_types): # run connected components at this threshold on labels labels, nlabels = nd.measurements.label(probs[j+1] > self.Ts[i], self.bwconn) # merge the current thresholded components with the previous seeds to get current bwlabels bwlabels = np.logical_or(labels, bwseeds[j]) # take the current components under threshold and merge with the seeds for the next iteration if i < self.nthresh-1: labels, sizes = emLabels.thresholdSizes(labels, minSize=-self.Tmins[k]) bwseeds[j] = np.logical_or(labels, bwseeds[j]) # this if/elif switch determines the main method for creating the labels. # xxx - make cropping to be done in more efficient way, particular to avoid filling cropped areas if self.method == 'overlap': # definite advantage to this method over other methods, but cost is about 2-3 times slower. # labels are linked per zslice using precalculated slice to slice warpings based on the probs. labels, nlabels = self.label_overlap(bwlabels, voxTypeSel[j], warps) # xxx - add switches to only optionally export the unconnected labels #uclabels = labels; ucnlabels = nlabels; # crop right after the labels are created and stay uncropped from here. # xxx - labels will be wrong unless method implicitly handled the cropping during the labeling. # currently only the warping method is doing, don't need cropping for other methods anyways. if self.docrop: labels = labels[c[0]:s[0]-c[0],c[1]:s[1]-c[1],c[2]:s[2]-c[2]] # this method can not create true unconnected 3d labels, but should be unconnected in 2d. # NOTE: currently this only removes 6-connectivity, no matter what specified connecitity is # xxx - some method of removing adjacencies with arbitrary connectivity? uclabels, ucnlabels = emLabels.remove_adjacencies(labels) elif self.method == 'skim-ws': # xxx - still trying to evaluate if there is any advantage to this more traditional watershed. # it does not leave a non-adjacency boundary and is about 1.5 times slower than bwmorph # run connected components on the thresholded labels merged with previous seeds labels, nlabels = nd.measurements.label(bwlabels, self.bwconn) # run a true watershed based the current foreground probs using current components as markers labels = morph.watershed(probs[j+1], labels, connectivity=self.bwconn, mask=voxTypeSel[j]) # remove any adjacencies created during the watershed # NOTE: currently this only removes 6-connectivity, no matter what specified connecitity is # xxx - some method of removing adjacencies with arbitrary connectivity? uclabels, ucnlabels = emLabels.remove_adjacencies(labels) else: if self.method == 'comps-ws' and i>1: # this is an alternative to the traditional watershed that warps out only based on stepping # back through the thresholds in reverse order. has advantages of non-connectivity. # may help slightly for small supervoxels but did not show much improved metrics in # terms of large-scale connectivity (against skeletons) # about 4-5 times slower than regular warping method. # make an unconnected version of bwlabels by warping out but with mask only for this type # everything above current threshold is already labeled, so only need to use gray thresholds # starting below the current threshold level. bwlabels, diff, self.simpleLUT = binary_warping(bwlabels, np.ones(self.size,dtype=np.bool), mask=voxTypeSel[j], borderval=False, slow=True, simpleLUT=self.simpleLUT, connectivity=self.connectivity, gray=probs[j+1], grayThresholds=self.Ts[i-1::-1].astype(np.float32, order='C')) else: assert( self.method == 'comps' ) # bad method option # make an unconnected version of bwlabels by warping out but with mask only for this type bwlabels, diff, self.simpleLUT = binary_warping(bwlabels, np.ones(self.size,dtype=np.bool), mask=voxTypeSel[j], borderval=False, slow=True, simpleLUT=self.simpleLUT, connectivity=self.connectivity) # run connected components on the thresholded labels merged with previous seeds (warped out) uclabels, ucnlabels = nd.measurements.label(bwlabels, self.bwconn); # in this case the normal labels are the same as the unconnected labels because of warping labels = uclabels; nlabels = ucnlabels; # optionally make a skeletonized version of the unconnected labels # xxx - revisit this, currently not being used for anything, started as a method to skeletonize GT if self.skeletonize: # method to skeletonize using max range endpoints only sklabels, sknlabels = emLabels.ucskeletonize(uclabels, mask=voxTypeSel[j], sampling=self.attrs['scale'] if hasattr(self.attrs,'scale') else None) assert( sknlabels == ucnlabels ) # fill out these labels out so that they fill in remaining voxels based on voxType. # this uses bwdist method for finding nearest neighbors, so connectivity can be violoated. # this is mitigated by first filling out background using the warping transformation # (or watershed) above, then this step is only to fill in remaining voxels for the # current foreground voxType. labels = emLabels.nearest_neighbor_fill(labels, mask=voxTypeNotSel[j], sampling=self.attrs['scale'] if hasattr(self.attrs,'scale') else None) # save the components labels generated for this type types_labels[j] = labels.astype(emLabels.LBLS_DTYPE, copy=False); types_uclabels[j] = uclabels.astype(emLabels.LBLS_DTYPE, copy=False); types_nlabels[j] = nlabels if self.fg_types_labels[j] < 0 else 1 types_ucnlabels[j] = ucnlabels if self.fg_types_labels[j] < 0 else 1 if self.skeletonize: types_sklabels[j] = sklabels.astype(emLabels.LBLS_DTYPE, copy=False) # merge the fg components labels. they can not overlap because voxel type is winner-take-all. nlabels = 0; ucnlabels = 0; labels = np.zeros(self.size_crop, dtype=emLabels.LBLS_DTYPE); uclabels = np.zeros(self.size_crop, dtype=emLabels.LBLS_DTYPE); if self.skeletonize: sklabels = np.zeros(self.size, dtype=emLabels.LBLS_DTYPE); for j in range(self.nfg_types): sel = (types_labels[j] > 0); ucsel = (types_uclabels[j] > 0); if self.skeletonize: sksel = (types_sklabels[j] > 0); if self.fg_types_labels[j] < 0: labels[sel] += (types_labels[j][sel] + nlabels); uclabels[ucsel] += (types_uclabels[j][ucsel] + ucnlabels); if self.skeletonize: sklabels[sksel] += (types_sklabels[j][sksel] + ucnlabels); nlabels += types_nlabels[j]; ucnlabels += types_ucnlabels[j]; else: labels[sel] = self.fg_types_labels[j]; uclabels[ucsel] = self.fg_types_labels[j]; if self.skeletonize: sklabels[sksel] = self.fg_types_labels[j] nlabels += 1; ucnlabels += 1; if self.dpWatershedTypes_verbose: print('\tnlabels = %d' % (nlabels,)) #print('\tnlabels = %d %d' % (nlabels,labels.max())) # for debug only #assert(nlabels == labels.max()) # sanity check for non-overlapping voxTypeSel, comment for speed print('\tdone in %.4f s' % (time.time() - t,)) # make a fully-filled out version using bwdist nearest foreground neighbor wlabels = emLabels.nearest_neighbor_fill(labels, mask=None, sampling=self.attrs['scale'] if hasattr(self.attrs,'scale') else None) # write out the results if self.nTmin == 1: subgroups = ['%.8f' % (self.Ts[i],)] else: subgroups = ['%d' % (self.Tmins[k],), '%.8f' % (self.Ts[i],)] d = self.attrs.copy(); d['threshold'] = self.Ts[i]; d['types_nlabels'] = types_nlabels; d['Tmin'] = self.Tmins[k] emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=labels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['with_background']+subgroups ) emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=wlabels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['zero_background']+subgroups ) d['type_nlabels'] = types_ucnlabels; emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=uclabels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['no_adjacencies']+subgroups ) if self.skeletonize: emLabels.writeLabels(outfile=self.outlabels, chunk=self.chunk.tolist(), offset=self.offset_crop.tolist(), size=self.size_crop.tolist(), datasize=self.datasize.tolist(), chunksize=self.chunksize.tolist(), data=sklabels, verbose=writeVerbose, attrs=d, strbits=self.outlabelsbits, subgroups=['skeletonized']+subgroups )
metrics = { "are_gala": ma.array(np.zeros((nsegs, nchunks, mparams), dtype=np.double), mask=True), "are_precrec_gala": ma.array(np.zeros((nsegs, nchunks, mparams, 2), dtype=np.double), mask=True), "split_vi_gala": ma.array(np.zeros((nsegs, nchunks, mparams, 2), dtype=np.double), mask=True), "nlabels": ma.array(np.zeros((nsegs, nchunks, mparams), dtype=np.int64), mask=True), # nsegparams repeated across chunks just for convenience "nsegparams": ma.array(np.zeros((nsegs, nchunks, mparams), dtype=np.double), mask=True), #'voxel_sizes_skel' : [[None]*nchunks]*nsegs, "cat_error": ma.array(np.zeros((nsegs, nchunks), dtype=np.double), mask=True), } globals().update(metrics) for j, chunk in zip(range(nchunks), chunks): # load ground truth and components from segmented labels file loadh5 = emLabels.readLabels(srcfile=gth5, chunk=chunk, offset=offset, size=size) gtComps = loadh5.data_cube gtIsECS = gtComps == gt_ECS_label gtLbls = np.zeros(gtComps.shape, dtype=np.uint8) gtLbls[np.logical_and(gtComps > 0, np.logical_not(gtIsECS))] = 1 gtLbls[gtIsECS] = 2 gtComps[gtIsECS] = 0 n = gtComps.max() gtComps[gtComps == n] = gt_ECS_label gtnlabels = n - 1 for i, seg, segp in zip(range(nsegs), segmentations, segpaths): fps = os.path.join(segp, seg) print("calculating metrics for " + seg + (" chunk %d %d %d" % tuple(chunk))) t = time.time()
metrics = { 'are_gala' : ma.array(np.zeros((nsegs,nchunks,mparams),dtype=np.double),mask=True), 'are_precrec_gala' : ma.array(np.zeros((nsegs,nchunks,mparams,2),dtype=np.double),mask=True), 'split_vi_gala' : ma.array(np.zeros((nsegs,nchunks,mparams,2),dtype=np.double),mask=True), #'nlabels_skel' : ma.array(np.zeros((nsegs,nchunks),dtype=np.int64),mask=True), #'voxel_sizes_skel' : [[None]*nchunks]*nsegs, } globals().update(metrics) for i,seg,segp in zip(range(nsegs), segmentations, segpaths): for j,chunk in zip(range(nchunks),chunks[i]): # xxx - potential savings here for same chunks, currently was not worth the effort relative to load time # load ground truth and components from segmented labels file loadh5 = emLabels.readLabels(srcfile=gth5[i], chunk=chunk, offset=offset, size=size) gtComps = loadh5.data_cube; gtIsECS = (gtComps == gt_ECS_label); gtComps[gtIsECS] = 0; n = gtComps.max(); gtComps[gtComps == n] = gt_ECS_label; gtnlabels = n-1 fps = os.path.join(segp, seg) print('calculating metrics for ' + seg + (' chunk %d %d %d' % tuple(chunk))); t = time.time() for k,prm in zip(range(nparams[i]),segparams[i]): loadh5 = emLabels.readLabels(srcfile=fps, chunk=chunk, offset=offset, size=size, subgroups=subgroups[i] + ['%.8f' % (prm,)]) segComps = loadh5.data_cube # calculate the ISBI2013 rand error (gala, excludes gt background) using the full out components are, prec, rec = ev.adapted_rand_error(segComps, gtComps, all_stats=True) #are, prec, rec = adapted_rand_error( gtComps, segComps, nogtbg=True) are_gala[i,j,k] = are; are_precrec_gala[i,j,k,:] = np.array([prec,rec])
# distinguishable colors lut dlut = np.fromfile('/usr/local/Fiji.app/luts/distinguish2.lut',dtype=np.uint8).reshape([3,-1]).T # different four/five colors options, 90's couch used for ECS paper: #({'e9' '6d' '63' '7f' 'ca' '9f' 'f4' 'ba' '70' '85' 'c1' 'f5'})/255,[3 4])'; % sth else #({'39' 'ff' '14' 'f3' 'f3' '15' '00' 'be' 'ff' 'ec' '13' '41'})/255,[3 4])'; % neons # https://kuler.adobe.com/Theme-26-color-theme-3895203/ #({'04' '68' 'bf' '14' 'a6' '70' 'f2' 'bc' '1b' 'f2' '29' '29'})/255,[3 4])'; % 90's couch # 90's couch plus one c4lut = np.array([int(x,16) for x in ['99','99','99', '04','68','bf', '14','a6','70', 'f2','bc','1b', 'f2','29','29', 'da','cc','ab']],dtype=np.uint8).reshape((-1,3)) for j,chunk in zip(range(nchunks),chunks): # load ground truth and components from segmented labels file loadh5 = emLabels.readLabels(srcfile=gth5, chunk=chunk, offset=offset, size=size) gtComps = loadh5.data_cube; gtIsECS = (gtComps == gt_ECS_label); gtLbls = np.zeros(gtComps.shape, dtype=np.uint8) gtLbls[np.logical_and(gtComps > 0, np.logical_not(gtIsECS))] = 1; gtLbls[gtIsECS] = 2; fn1 = os.path.join(segpaths[0], segmentations[0]) loadh5 = emVoxelType.readVoxType(srcfile=fn1, chunk=chunk, offset=offset, size=size) outLbls1 = loadh5.data_cube; attrs = loadh5.data_attrs fn2 = os.path.join(segpaths[1], segmentations[1]) loadh5 = emVoxelType.readVoxType(srcfile=fn2, chunk=chunk, offset=offset, size=size) outLbls2 = loadh5.data_cube; attrs = loadh5.data_attrs chunk_str = 'x%04d_y%04d_z%04d' % tuple(chunk) print('exporting images for ' + chunk_str); t = time.time()