def main(pred_file, result_folder='.', pred_keys=['volumes/pred_affs'], result_key='volumes/instances', patchshape=[7, 7, 7], chunksize=[92, 92, 92], num_workers=8, remove_small_comps=0, save_mip=False, **kwargs): sample = os.path.basename(pred_file).split('.')[0] result_file = os.path.join(result_folder, sample + '.zarr') tmp_key = 'volumes/tmp_worker' if num_workers == 1: single_worker(pred_file, result_file, num_workers, **kwargs) else: non_overlapping_chessboard_worker(pred_file, result_file, pred_keys, tmp_key, num_workers, chunksize, patchshape, **kwargs, **{'result_folder': result_folder}) # stitch blocks instances = stitch_vote_instances(result_file, tmp_key, result_key, chunksize, patchshape) # convert result to hdf if kwargs['output_format'] == 'hdf': zarr2hdf(result_file, hdf_file=os.path.join(result_folder, sample + '.hdf'), zarr_key=['volumes/instances'], hdf_key=['volumes/instances']) if remove_small_comps > 0: instances = remove_small_components(instances, remove_small_comps) instances = relabel(instances) io.imsave(os.path.join(result_folder, sample + '.tif'), instances.astype(np.uint16), plugin='tifffile') if save_mip: colored = color(np.max(instances, axis=0)) io.imsave(os.path.join(result_folder, sample + '.png'), colored.astype(np.uint8))
single_worker(pred_file, out_file, args.num_workers) else: non_overlapping_chessboard_worker(pred_file, out_file, args.in_key, tmp_key, args.num_workers, args.chunksize, args.patchshape, **kwargs) instances = stitch_vote_instances(out_file, tmp_key, args.out_key, args.chunksize, args.patchshape) if args.remove_small_comps > 0: cleaned = remove_small_components(instances, args.remove_small_comps) relabeled = relabel(cleaned) io.imsave(out_file.replace('.zarr', '.tif'), relabeled.astype(np.uint16), plugin='tifffile') if args.save_mip: colored = color(np.max(relabeled, axis=0)) io.imsave(out_file.replace('.zarr', '_mip.png'), colored.astype(np.uint8)) zarr2hdf(out_file, hdf_file=out_file.replace('.zarr', '.hdf'), zarr_key=['volumes/instances'], hdf_key=['volumes/instances']) logger.info("%s", datetime.now() - t1)
def main(pred_file, result_folder='.', **kwargs): """Calls vote_instances blockwise and stitches them afterwards. Args: pred_file (``string``): Filename of prediction. Should be zarr. result_folder (``string``): Path to result folder. **kwargs (``dict``): All arguments needed for vote_instances and stitching. In the following only stitching arguments are listed: (maybe they should be renamed?) aff_key fg_key res_key overlapping_inst only_bb patchshape patch_threshold ignore_small_comps skeletonize_foreground chunksize num_parallel_blocks OUTPUT / POSTPROCESSING: save_mip remove_small_comps dilate_instances """ assert os.path.exists(pred_file), \ 'Prediction file {} does not exist. Please check!'.format(pred_file) sample = os.path.basename(pred_file).split('.')[0] result_file = os.path.join(result_folder, sample + '.zarr') kwargs['result_folder'] = result_folder kwargs['return_intermediates'] = True # dataset keys aff_key = kwargs['aff_key'] fg_key = kwargs.get('fg_key') res_key = kwargs.get('res_key', 'vote_instances') cleaned_mask_key = 'volumes/foreground_cleaned' tmp_key = 'volumes/blocks' # read input shape if pred_file.endswith('.zarr'): in_f = zarr.open(pred_file, mode='a') else: raise NotImplementedError aff_shape = in_f[aff_key].shape channel_order = [slice(0, aff_shape[0])] pred_keys = [aff_key] if kwargs['overlapping_inst']: numinst_shape = in_f[fg_key].shape channel_order.append(slice(0, numinst_shape[0])) pred_keys += [fg_key] assert aff_shape[1:] == numinst_shape[1:], \ 'Please check: affinity and numinst shape do not match!' input_shape = aff_shape[1:] # without first channel dimension # get bounding box for foreground if kwargs.get('only_bb'): mid = np.prod(kwargs['patchshape']) // 2 if cleaned_mask_key in in_f: pred_keys += [cleaned_mask_key] channel_order.append(slice(0, 1)) shape = np.array(in_f[cleaned_mask_key].attrs['fg_shape']) bb_offset = np.array(in_f[cleaned_mask_key].attrs['offset']) else: mask = np.array(in_f[aff_key][mid]) mask = mask > kwargs['patch_threshold'] if np.sum(mask) == 0: logger.info('Volume has no foreground voxel, returning...') return # remove small components if kwargs.get('ignore_small_comps', 0) > 0: mask = clean_mask(mask, np.ones([3] * mask.ndim), kwargs.get('ignore_small_comps')) mask = mask.astype(np.uint8) # skeletonize mask (flylight specific) if kwargs.get('skeletonize_foreground'): mask = skeletonize_3d(mask) > 0 mask = mask.astype(np.uint8) # save processed mask to input file if kwargs.get('ignore_small_comps', 0) > 0 or kwargs.get('skeletonize_foreground'): in_f.create_dataset(cleaned_mask_key, data=np.reshape(mask, (1, ) + mask.shape), shape=(1, ) + mask.shape, compressor=compressor, dtype=np.uint8, overwrite=True) min = np.min(np.transpose(np.nonzero(mask)), axis=0) max = np.max(np.transpose(np.nonzero(mask)), axis=0) shape = max - min + 1 bb_offset = min if kwargs.get('ignore_small_comps', 0) > 0 or kwargs.get('skeletonize_foreground'): in_f[cleaned_mask_key].attrs['offset'] = [ int(off) for off in bb_offset ] in_f[cleaned_mask_key].attrs['fg_shape'] = [ int(dim) for dim in shape ] else: shape = input_shape bb_offset = [0] * len(shape) if len(shape) == 2: shape = (1, ) + tuple(shape) logger.info("input shape: {}, bb cropped shape: {}, offset: {}".format( input_shape, shape, bb_offset)) # create offset lists offsets = get_offsets(shape, kwargs['chunksize']) # offsets = [offset + bb_offset for offset in offsets] logger.info("processing {} blocks".format(len(offsets))) logger.debug("blocks: {}".format(offsets)) def init(l): global mutex mutex = l mutex = Lock() if kwargs['num_parallel_blocks'] > 1: pool = Pool(processes=kwargs['num_parallel_blocks'], initializer=init, initargs=(mutex, )) pool.map( functools.partial(blockwise_vote_instances, pred_file, pred_keys, result_file, tmp_key, shape, channel_order, bb_offset, kwargs), offsets) pool.close() pool.join() else: kwargs['mutex'] = mutex for idx, offset in enumerate(offsets): logger.info("start block idx: %s/%s (file %s)", idx, len(offsets), sample) blockwise_vote_instances(pred_file, pred_keys, result_file, tmp_key, shape, channel_order, bb_offset, kwargs, offset) # stitch blocks #child_pid = os.fork() #if child_pid == 0: # child process instances = stitch_vote_instances(result_file, tmp_key, res_key, input_shape, bb_offset, shape, pred_file, pred_keys, channel_order, **kwargs) # save mip if kwargs.get('save_mip', False): colored = color(np.max(instances, axis=0)) io.imsave(os.path.join(result_folder, sample + '.png'), colored.astype(np.uint8)) # remove small components remove_small_comps = kwargs.get('remove_small_comps', 0) if remove_small_comps > 0: instances = remove_small_components(instances, remove_small_comps) instances = relabel(instances) io.imsave(os.path.join(result_folder, sample + '.tif'), instances.astype(np.uint16), plugin='tifffile') if kwargs.get('save_mip', False): colored = color(np.max(instances, axis=0)) io.imsave(os.path.join(result_folder, sample + '_cleaned.png'), colored.astype(np.uint8)) if kwargs['output_format'] == 'hdf': hf = h5py.File(os.path.join(result_folder, sample + '.hdf'), 'w') hf.create_dataset(res_key, data=instances.astype(np.uint16), dtype=np.uint16, compression='gzip') if kwargs.get("dilate_instances", False): logger.info("dilating") instdil = np.copy(instances) for lbl in np.unique(instances): if lbl == 0: continue label_mask = instdil == lbl dilated_label_mask = ndimage.binary_dilation(label_mask, iterations=1) instdil[dilated_label_mask] = lbl hf.create_dataset(res_key + "_dil_1", data=instdil.astype(np.uint16), dtype=np.uint16, compression='gzip')
def visualize_instances(label_fn, label_key, output_file, max_axis=None, show_outline=False, raw_file=None, raw_key=None): # read labeling if label_fn.endswith('.zarr'): inf = zarr.open(label_fn, mode='r') elif label_fn.endswith('.hdf'): inf = h5py.File(label_fn, 'r') else: raise NotImplementedError label = np.squeeze(np.array(inf[label_key])) print(label.shape, label.dtype) if label_fn.endswith('.hdf'): inf.close() if max_axis is not None: label = np.max(label, axis=max_axis) # read raw if given raw = None if raw_file is not None and raw_key is not None: if raw_file.endswith('.zarr'): inf = zarr.open(raw_file, mode='r') elif raw_file.endswith('.hdf'): inf = h5py.File(raw_file, 'r') else: raise NotImplementedError raw = np.squeeze(np.array(inf[raw_key])) if raw_file.endswith('.hdf'): raw_file.close() if show_outline: # label = label.astype(np.uint16) # for i in range(label.shape[0]): # label[i] *= (i+1) labels, locations = np.unique(label, return_index=True) print(labels) locations = np.delete(locations, np.where(labels == 0)) labels = np.delete(labels, np.where(labels == 0)) # for different colormaps, see https://colorcet.pyviz.org/ colormap = cc.glasbey_light # uncomment to choose randomly from colormap # colormap = np.random.choice(colormap, size=len(labels), # replace=(len(labels)>len(colormap))) if raw is None: colored = np.zeros(label.shape[1:] + (3, ), dtype=np.uint8) else: # convert raw to np.uint8 # heads up: assuming normalized raw between [0, 1] raw = (raw * 255).astype(np.uint8) colored = np.stack([raw] * 3, axis=-1) for i, (lbl, loc) in enumerate(zip(labels, locations)): if lbl == 0: continue # heads up: assuming one instance per channel c = np.unravel_index(loc, label.shape)[0] outline = ndimage.distance_transform_cdt(label[c] == lbl) == 1 #colored[outline, :] = np.random.randint(0, 255, 3) colored[outline, :] = hex_to_rgb(colormap[i]) else: colored = color(label) io.imsave(output_file, colored.astype(np.uint8))
def main(pred_file, result_folder='.', **kwargs): assert os.path.exists(pred_file), \ 'Prediction file {} does not exists. Please check!'.format(pred_file) sample = os.path.basename(pred_file).split('.')[0] if sample == 'GMR_38F04_AE_01-20181005_63_G5': return if sample == 'BJD_127B01_AE_01-20171124_64_H5': return result_file = os.path.join(result_folder, sample + '.zarr') aff_key = kwargs['aff_key'] fg_key = kwargs.get('fg_key') # read input shape if pred_file.endswith('.zarr'): in_f = zarr.open(pred_file, mode='r') else: raise NotImplementedError aff_shape = in_f[aff_key].shape channel_order = [slice(0, aff_shape[0])] pred_keys = [aff_key] if kwargs['overlapping_inst']: numinst_shape = in_f[fg_key].shape channel_order.append(slice(0, numinst_shape[0])) pred_keys += [fg_key] assert aff_shape[1:] == numinst_shape[1:], \ 'Please check: affinity and numinst shape do not match!' input_shape = aff_shape[1:] # without first channel dimension # check if blocks should only be within bounding box if kwargs.get('only_bb'): mid = np.prod(kwargs['patchshape']) // 2 mask = np.array(in_f[aff_key][mid]) mask = mask > kwargs['patch_threshold'] if np.sum(mask) == 0: logger.warning("bb empty") return if kwargs.get('ignore_small_comps', 0) > 0: labeled = ndimage.label(mask, np.ones([3] * len(input_shape)))[0] labels, counts = np.unique(labeled, return_counts=True) labels = labels[counts <= kwargs.get('ignore_small_comps')] labeled = replace(labeled, np.array(labels), np.array([0] * len(labels))) print('num small comps: ', len(labels)) # for label in labels: # mask[labeled == label] = 0 mask = labeled > 0 min = np.min(np.transpose(np.nonzero(mask)), axis=0) max = np.max(np.transpose(np.nonzero(mask)), axis=0) # TODO for l1 data # min = np.array([31, 31, 0]) # max = np.array([input_shape[0]-30, input_shape[1]-30, input_shape[2]]) shape = max - min + 1 bb_offset = min else: shape = input_shape bb_offset = [0] * len(shape) if len(shape) == 2: shape = (1, ) + tuple(shape) bb_offset = [0] * len(shape) logger.info("input shape: %s, bb cropped shape: %s, offset: %s", input_shape, shape, bb_offset) if pred_file.endswith('.hdf'): in_f.close() # create offset lists offsets = get_chessboard_offsets(shape, kwargs['chunksize']) #offsets = [offset + bb_offset for offset in offsets] logger.info('num blocks: %s', len(offsets)) logger.info("%s", offsets) # create temporary zarr dataset for blocks (2^dim x shape) tmp_key = 'volumes/tmp_worker' skip_tmp_worker = False if not os.path.exists(result_file): compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE) f = zarr.open(result_file, mode='w') f.create_dataset(tmp_key, shape=(2**len(shape), ) + tuple(shape), compressor=compressor, dtype=np.uint32, chunks=(1, ) + tuple(kwargs['chunksize'])) else: f = zarr.open(result_file, mode='r') if tmp_key in f: skip_tmp_worker = True def init(l): global mutex mutex = l if not skip_tmp_worker: mutex = Lock() if kwargs['num_parallel_blocks'] > 1: pool = Pool(processes=kwargs['num_parallel_blocks'], initializer=init, initargs=(mutex, )) pool.map( functools.partial(blockwise_vote_instances, pred_file, pred_keys, result_file, tmp_key, shape, channel_order, bb_offset, kwargs), offsets) pool.close() pool.join() else: kwargs['mutex'] = mutex for idx, offset in enumerate(offsets): # if idx < 7 or idx > 8: # continue logger.info("start block idx: %s/%s (file %s)", idx, len(offsets), sample) blockwise_vote_instances(pred_file, pred_keys, result_file, tmp_key, shape, channel_order, bb_offset, kwargs, offset) else: logger.info("skipping tmp_worker (blocks already exist?)") # stitch blocks res_key = kwargs.get('res_key', 'vote_instances') logger.info("%s", kwargs) instances = stitch_vote_instances(result_file, tmp_key, res_key, input_shape, bb_offset, **kwargs) # save mip save_mip = kwargs.get('save_mip', False) if save_mip: colored = color(np.max(instances, axis=0)) io.imsave(os.path.join(result_folder, sample + '.png'), colored.astype(np.uint8)) # remove small components remove_small_comps = kwargs.get('remove_small_comps', 0) if remove_small_comps > 0: instances = remove_small_components(instances, remove_small_comps) instances = relabel(instances) io.imsave(os.path.join(result_folder, sample + '.tif'), instances.astype(np.uint16), plugin='tifffile') if save_mip: colored = color(np.max(instances, axis=0)) io.imsave(os.path.join(result_folder, sample + '_cleaned.png'), colored.astype(np.uint8)) if kwargs['output_format'] == 'hdf': hf = h5py.File(os.path.join(result_folder, sample + '.hdf'), 'w') hf.create_dataset(res_key, data=instances.astype(np.uint16), dtype=np.uint16, compression='gzip') if kwargs.get("dilate_instances", False): logger.info("dilating") instdil = np.copy(instances) for lbl in np.unique(instances): if lbl == 0: continue label_mask = instdil == lbl dilated_label_mask = ndimage.binary_dilation(label_mask, iterations=1) instdil[dilated_label_mask] = lbl hf.create_dataset(res_key + "_dil_1", data=instdil.astype(np.uint16), dtype=np.uint16, compression='gzip')