def watershed(viewer): nonlocal ws_id, next_id nonlocal node_labels nonlocal node_label_history nonlocal sub_nodes, sub_edges nonlocal sub_graph, sub_weights nonlocal mapping if mask_id is None: print("Need to select segment to run watershed") return if ws_id != mask_id or sub_graph is None: print("Computing sub-graph for", mask_id, " ...") sub_nodes = np.where( node_labels == mask_id)[0].astype('uint64') sub_edges, _ = graph.extractSubgraphFromNodes( sub_nodes, allowInvalidNodes=True) sub_weights = weights[sub_edges] nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive( sub_nodes, start_label=0, keep_zeros=False) sub_uvs = uv_ids[sub_edges] sub_uvs = nt.takeDict(mapping, sub_uvs) n_nodes = max_id + 1 sub_graph = nifty.graph.undirectedGraph(n_nodes) sub_graph.insertEdges(sub_uvs) ws_id = mask_id mask = viewer.layers['mask'].data seeds = viewer.layers['seeds'].data seeds[np.logical_not(mask)] = 0 seed_ids = np.unique(seeds)[1:] seed_nodes = np.zeros(sub_graph.numberOfNodes, dtype='uint64') for seed_id in seed_ids: seeded = np.unique(ws_base[seeds == seed_id]) if seeded[0] == 0: seeded = seeded[1:] seeded = nt.takeDict(mapping, seeded) seed_nodes[seeded] = seed_id print("Computing graph watershed") sub_labels = nifty.graph.edgeWeightedWatershedsSegmentation( sub_graph, seed_nodes, sub_weights) node_label_history.append(node_labels.copy()) node_labels[sub_nodes] = sub_labels + (next_id - 1) mask_node_labels = np.zeros_like(node_labels) mask_node_labels[sub_nodes] = sub_labels mask = _seg_from_labels(mask_node_labels) viewer.layers['mask'].data = mask # TODO should also update the seg, but for now skip this to speed this up # seg = _seg_from_labels(node_labels) # viewer.layers['segments'].data = seg next_id = int(node_labels.max()) + 1
def _apply_node_labels(seg, node_labels, allow_empty_assignments): # choose the appropriate mapping: # - 1d np.array -> just apply it # - 2d np.array -> extract the local dict and apply # - dict -> extract the local dict and apply apply_array = False if isinstance( node_labels, dict) else (True if node_labels.ndim == 1 else False) if apply_array: seg = nt.take(node_labels, seg) else: # this copys the dict and hence is extremely RAM hungry # so we make the dict as small as possible this_labels = np.unique(seg) if isinstance(node_labels, dict): # do we allow for assignments that are not in the assignment table? if allow_empty_assignments: this_assignment = { label: node_labels.get(label, label) for label in this_labels } else: this_assignment = { label: node_labels[label] for label in this_labels } else: this_assignment = node_labels[:, 1][np.in1d(node_labels[:, 0], this_labels)] this_assignment = { label: this_assignment[ii] for ii, label in enumerate(this_labels) } seg = nt.takeDict(this_assignment, seg) return seg
def watershed(viewer): nonlocal node_labels, seeds print("Run watershed from seed layer ...") layers = viewer.layers ws = layers['ws'].data seeds = layers['seeds'].data new_node_labels = self.graph_watershed(graph, probs, ws, seeds, mapping) if new_node_labels is None: print("Did not find any seeds, doing nothing") return else: node_labels = new_node_labels label_dict = { wsid: node_labels[mapping[wsid]] for wsid in ws_ids } label_dict[0] = 0 seg = nt.takeDict(label_dict, ws) layers['seg'].data = seg print("... done")
def load_subgraph(self, node_ids): # weird, this sometimes happens ... if len(node_ids) == 0: return None, None, None inner_edges, _ = self.graph.extractSubgraphFromNodes( node_ids, allowInvalidNodes=True) assert len(inner_edges) > 0 nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive( node_ids, start_label=0, keep_zeros=False) uv_ids = self.uv_ids[inner_edges] uv_ids = nt.takeDict(mapping, uv_ids) # get rid of paintera ignore label pt_ignore_label = 18446744073709551615 edge_mask = (uv_ids == pt_ignore_label).sum(axis=1) == 0 uv_ids = uv_ids[edge_mask] if len(uv_ids) == 0: return None, None, None max_id = int(nodes_relabeled.max()) assert uv_ids.max() <= max_id n_nodes = max_id + 1 graph = nifty.graph.undirectedGraph(n_nodes) graph.insertEdges(uv_ids) probs = self.probs[inner_edges] assert len(probs) == graph.numberOfEdges return graph, probs, mapping
def propagate_lut(lut_path, ids): with open(lut_path) as f: lut = json.load(f) lut = {int(k): v for k, v in lut.items()} if isinstance(lut[0], list): lut = {k: v[0] for k, v in lut.items()} return nt.takeDict(lut, np.array(ids, dtype='uint32')).tolist()
def _write_block_res(ds_in, ds_out, block_id, blocking, block_res): fu.log("start processing block %i" % block_id) block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) ws = ds_in[bb] seg = nt.takeDict(block_res, ws) ds_out[bb] = seg fu.log_block_success(block_id)
def solve_object(obj_id): # try to load the object's edges and costs and continue if not present lifted_uvs = lifted_uv_ds.read_chunk((obj_id, )) if lifted_uvs is None: return None n_lifted_edges = len(lifted_uvs) // 2 lifted_uvs = lifted_uvs.reshape((n_lifted_edges, 2)) lifted_costs = lifted_costs_ds.read_chunk((obj_id, )) assert lifted_costs is not None assert len(lifted_costs) == len( lifted_uvs), "%i, %i" % (len(lifted_costs), len(lifted_uvs)) # get node ids for this object obj_mask = assignments == obj_id node_ids = np.where(obj_mask)[0].astype('uint64') inner_edges, _ = graph.extractSubgraphFromNodes(node_ids) sub_uvs = uv_ids[inner_edges] sub_costs = costs[inner_edges] assert len(sub_uvs) == len(sub_costs) # relabel all consecutive nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive( node_ids, start_label=0, keep_zeros=False) sub_uvs = nt.takeDict(mapping, sub_uvs) lifted_uvs = nt.takeDict(mapping, lifted_uvs) n_local_nodes = max_id + 1 sub_graph = nifty.graph.undirectedGraph(n_local_nodes) sub_graph.insertEdges(sub_uvs) sub_assignments = agglomerator(sub_graph, sub_costs, lifted_uvs, lifted_costs, time_limit=time_limit) vigra.analysis.relabelConsecutive(sub_assignments, out=sub_assignments, start_label=1, keep_zeros=False) return obj_mask, sub_assignments
def propagate_ids(root, src_version, trgt_version, seg_name, ids): """ Propagate list of ids from source version to target version. """ version_file = os.path.join(root, 'versions.json') with open(version_file) as f: versions = json.load(f) versions.sort() lut_name = 'new_id_lut_%s.json' % seg_name src_lut_file = os.path.join(root, src_version, 'misc', lut_name) if not os.path.exists(src_lut_file): raise ValueError("Src lut %s does not exist." % src_lut_file) trgt_lut_file = os.path.join(root, trgt_version, 'misc', lut_name) if not os.path.exists(trgt_lut_file): raise ValueError("Target lut %s does not exist." % trgt_lut_file) def get_abs_lut(lut): return os.path.abspath(os.path.realpath(lut)) # follow links from src-lut to target lut and pick up # all existing luts on the way. luts = [] exclude_luts = [get_abs_lut(src_lut_file)] lut = src_lut_file version = src_version while True: abs_lut = get_abs_lut(lut) if abs_lut not in exclude_luts: luts.append(abs_lut) exclude_luts.append(abs_lut) version_index = versions.index(version) version = versions[version_index + 1] lut = os.path.join(root, version, 'misc', lut_name) if version == trgt_version: abs_lut = get_abs_lut(lut) if abs_lut not in luts: luts.append(abs_lut) break def load_lut(lut_path): with open(lut_path) as f: lut = json.load(f) lut = {int(k): v for k, v in lut.items()} return lut luts = [load_lut(lut) for lut in luts] # propagate ids through all luts propagated = np.array(ids, dtype='uint64') for lut in luts: propagated = nt.takeDict(lut, propagated) return propagated.tolist()
def _fragment_segment_assignment(self, dep): if self.assignment_path == '': # get the framgent max id with z5py.File(self.path) as f: max_id = f[self.label_in_key].attrs['maxId'] return dep, max_id else: assert self.assignment_key != '' assert os.path.exists(self.assignment_path), self.assignment_path # TODO should make this a task with z5py.File(self.assignment_path) as f, z5py.File( self.path) as f_out: assignments = f[self.assignment_key][:] n_fragments = len(assignments) # find the fragments which have non-trivial assignment segment_ids, counts = np.unique(assignments, return_counts=True) seg_ids_to_counts = { seg_id: count for seg_id, count in zip(segment_ids, counts) } fragment_ids_to_counts = nt.takeDict(seg_ids_to_counts, assignments) fragment_ids = np.arange(n_fragments, dtype='uint64') non_triv_fragments = fragment_ids[fragment_ids_to_counts > 1] non_triv_segments = assignments[non_triv_fragments] non_triv_segments += n_fragments # determine the overall max id max_id = int(non_triv_segments.max()) # TODO do we need to assign a special value to ignore label (0) ? frag_to_seg = np.vstack( (non_triv_fragments, non_triv_segments)) # fragment_ids = np.arange(n_fragments, dtype='uint64') # assignments += n_fragments # frag_to_seg = np.vstack((fragment_ids, assignments)) # max_id = int(frag_to_seg.max()) out_key = os.path.join(self.label_out_key, 'fragment-segment-assignment') chunks = (1, frag_to_seg.shape[1]) f_out.require_dataset(out_key, data=frag_to_seg, shape=frag_to_seg.shape, compression='gzip', chunks=chunks) return dep, max_id
def _write_block_with_offsets(ds_in, ds_out, blocking, block_id, node_labels, offsets): fu.log("start processing block %i" % block_id) off = offsets[block_id] block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) seg = ds_in[bb] seg[seg != 0] += off # choose the appropriate function for array or dictionary if isinstance(node_labels, np.ndarray): seg = nt.take(node_labels, seg) else: seg = nt.takeDict(node_labels, seg) ds_out[bb] = seg fu.log_block_success(block_id)
def graph_watershed(graph, probs, ws, seed_points, mapping): seed_ids = np.unique(seed_points)[1:] if len(seed_ids) == 0: return None seeds = np.zeros(graph.numberOfNodes, dtype='uint64') # TODO this is what takes a long time for large volumes I guess # should speed it up for seed_id in seed_ids: mask = seed_points == seed_id seed_nodes = np.unique(ws[mask]) if seed_nodes[0] == 0: seed_nodes = seed_nodes[1:] seed_nodes = nt.takeDict(mapping, seed_nodes) seeds[seed_nodes] = seed_id node_labels = nifty.graph.edgeWeightedWatershedsSegmentation( graph, seeds, probs) return node_labels
def fix_object(object_id): # find the nodes corresponding to this object node_ids = np.where(assignments == object_id)[0].astype('uint64') # extract the subgraph corresponding to this object # we allow for invalid nodes here, # which can occur for un-connected graphs resulting from bad masks ... inner_edges, _ = graph.extractSubgraphFromNodes(node_ids, allowInvalidNodes=True) sub_uvs = uv_ids[inner_edges] # relanbel the sub-nodes / edges nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive( node_ids, start_label=0, keep_zeros=False) sub_uvs = nt.takeDict(mapping, sub_uvs) n_local_nodes = max_id + 1 # make sub-graph and get the edge costs sub_graph = nifty.graph.undirectedGraph(n_local_nodes) sub_graph.insertEdges(sub_uvs) sub_features = features[inner_edges] assert len(sub_features) == sub_graph.numberOfEdges # get the seeds from the mapped nuclei sub_node_labels = node_labels[node_ids] nucleus_ids = merge_objects[object_id] seeds = np.zeros(n_local_nodes, dtype='uint64') for seed_id, n_id in enumerate(nucleus_ids): has_seed = sub_node_labels == n_id seeds[has_seed] = seed_id + 1 # check that we have at least two seeds present # note that we can have discrepencies here due to differences in mapping strategies if len(np.unique(seeds)) < 3: return None # resolve by graph watershed sub_result = nifty.graph.edgeWeightedWatershedsSegmentation( sub_graph, seeds, sub_features) return sub_result
def _apply_node_labels(seg, node_labels, allow_empty_assignments): # choose the appropriate mapping: # - 1d np.array -> just apply it # - 2d np.array -> extract the local dict and apply # - dict -> extract the local dict and apply apply_array = False if isinstance( node_labels, dict) else (True if node_labels.ndim == 1 else False) if apply_array: assert seg.max() < len( node_labels), "Max id %i exceeds number of node labels %i" % ( seg.max(), len(node_labels)) seg = nt.take(node_labels, seg) else: # this copys the dict and hence is extremely RAM hungry # so we make the dict as small as possible this_labels = np.unique(seg) if isinstance(node_labels, dict): # do we allow for assignments that are not in the assignment table? if allow_empty_assignments: this_assignment = { label: node_labels.get(label, label) for label in this_labels } else: this_assignment = { label: node_labels[label] for label in this_labels } else: this_assignment = node_labels[:, 1][np.in1d(node_labels[:, 0], this_labels)] this_assignment = { label: this_assignment[ii] for ii, label in enumerate(this_labels) } # FIXME this casts to uint32 which can lead to nasty over-flows seg = nt.takeDict(this_assignment, seg) return seg
def _split_segment_impl(self, fragment_ids, seed_fragments): sub_edges, _ = self.graph.extractSubgraphFromNodes( fragment_ids, allowInvalidNodes=True) sub_uvs = self.uv_ids[sub_edges] sub_weights = self.weights[sub_edges] assert len(sub_edges) == len(sub_weights) # relabel the local fragment ids nodes, max_id, mapping = vigra.analysis.relabelConsecutive( fragment_ids, start_label=0, keep_zeros=False) sub_uvs = nt.takeDict(mapping, sub_uvs) n_sub_nodes = max_id + 1 # build watershed problem and run watershed sub_graph = nifty.graph.undirectedGraph(n_sub_nodes) sub_graph.insertEdges(sub_uvs) # make seeds sub_seeds = np.zeros(n_sub_nodes, dtype='uint64') seed_id = 1 # TODO vectorize for seed_group in seed_fragments: for seed_fragment in seed_group: # assert seed_fragment in fragment_ids, str(seed_fragment) mapped_id = mapping.get(seed_fragment, None) # FIXME I don't really know why this would happen, do assignments go stale ? if mapped_id is None: print("Warning: could not find seed-fragment", seed_fragment) continue sub_seeds[mapped_id] = seed_id seed_id += 1 # TODO support other splitting options, e.g. LMC # run graph watershed sub_assignment = nifty.graph.edgeWeightedWatershedsSegmentation( sub_graph, sub_seeds, sub_weights) assert len(sub_assignment) == n_sub_nodes == len(fragment_ids) return sub_assignment
def extract_from_commit(path, key, scale=0, relabel_output=False, n_threads=8): """ Extract corrected segmentation from commited project and return it as array. """ f = open_file(path, 'r') g = f[key] # make sure this is a paintera group seg_key = 'data' assignment_in_key = 'fragment-segment-assignment' assert seg_key in g have_assignments = assignment_in_key in g seg_in_key = os.path.join(seg_key, 's%i' % scale) # TODO support label multiset here ! ds = g[seg_in_key] ds.n_threads = n_threads seg = ds[:] if have_assignments: fragment_ids = np.unique(seg) assignments = g[assignment_in_key][:].T assignments = make_dense_assignments(fragment_ids, assignments) if relabel_output: assignments[:, 1] = vigra.analysis.relabelConsecutive(assignments[:, 1], start_label=1, keep_zeros=True) assignments = dict(zip(assignments[:, 0], assignments[:, 1])) seg = nt.takeDict(assignments, seg) elif relabel_output: seg = vigra.analysis.relabelConsecutive(seg, start_label=1, keep_zeros=True) return seg
def _relabel(block_id): block = blocking.getBlock(block_id) bb = tuple(slice(beg, end) for beg, end in zip(block.begin, block.end)) # check if we have a mask and if we do if we # have pixels in the mask if mask is not None: m = mask[bb].astype('bool') if m.sum() == 0: return None d = data[bb] if mask is None or m.sum() == m.size: un_block = np.unique(d) mapping_block = {un: mapping[un] for un in un_block} o = nt.takeDict(mapping_block, d) else: v = d[m] un_block = np.unique(v) mapping_block = {un: mapping[un] for un in un_block} o = d.copy() o[m] = nt.takedDict(mapping_block, v) out[bb] = o
def debug(debug_folder, n_threads=8): with open_file(os.path.join(debug_folder, 'data.n5')) as f: ds = f['raw'] ds.n_threads = n_threads raw = ds[:] ds = f['ws'] ds.n_threads = n_threads ws = ds[:] ds = f['seeds'] ds.n_threads = n_threads seed_points = ds[:] with open(os.path.join(debug_folder, 'mapping.json'), 'r') as f: mapping = json.load(f) mapping = {int(k): v for k, v in mapping.items()} uv_ids = np.load(os.path.join(debug_folder, 'graph.npy')) n_nodes = int(uv_ids.max()) + 1 graph = nifty.graph.undirectedGraph(n_nodes) graph.insertEdges(uv_ids) probs = np.load(os.path.join(debug_folder, 'probs.npy')) node_labels = CorrectionTool.graph_watershed(graph, probs, ws, seed_points, mapping) ws_ids = np.unique(ws)[1:] label_dict = {wsid: node_labels[mapping[wsid]] for wsid in ws_ids} label_dict[0] = 0 seg = nt.takeDict(label_dict, ws) with napari.gui_qt(): viewer = napari.Viewer() viewer.add_image(raw, name='raw') viewer.add_labels(ws, name='ws') viewer.add_labels(seg, name='seg')
def propagate_table(): table_path = './20191030_table_ciliaID_cellID' table = pd.read_csv(table_path, sep='\t') cilia_ids = table['cilia_id'].values.astype('uint32') cell_ids = table['cell_id'].values cell_ids[np.isinf(cell_ids)] = 0 cell_ids[np.isnan(cell_ids)] = 0 cell_ids = cell_ids.astype('uint32') cell_id_mapping = {24024: 24723, 22925: 23531, 22700: 23296, 22699: 23295, 22584: 23199, 22515: 23132, 22182: 22827, 22181: 22826, 21915: 22549, 21911: 22546, 21910: 22545, 21904: 22541, 21594: 22214, 21590: 22211, 0: 0} unique_vals, unique_counts = np.unique(list(cell_id_mapping.values()), return_counts=True) print(unique_vals) assert (unique_counts == 1).all() cell_ids = nt.takeDict(cell_id_mapping, cell_ids) cilia_id_mapping = '../../data/0.6.2/misc/new_id_lut_sbem-6dpf-1-whole-segmented-cilia.json' with open(cilia_id_mapping) as f: cilia_id_mapping = json.load(f) cilia_id_mapping = {int(k): v for k, v in cilia_id_mapping.items()} cilia_ids = [cilia_id_mapping.get(cil_id, 0) for cil_id in cilia_ids] cilia_ids = np.array(cilia_ids) valid_mask = ~(cilia_ids == 0) cilia_ids = cilia_ids[valid_mask] cell_ids = cell_ids[valid_mask] sorter = np.argsort(cilia_ids) cilia_ids = cilia_ids[sorter] cell_ids = cell_ids[sorter] table_out = './20191030_table_ciliaID_cellID_out' new_table = np.concatenate([cilia_ids[:, None], cell_ids[:, None]], axis=1) new_table = pd.DataFrame(new_table, columns=['label_id', 'cell_id']) new_table.to_csv(table_out, sep='\t', index=False)
def _write_block(ds_in, ds_out, blocking, block_id, node_labels): fu.log("start processing block %i" % block_id) block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) seg = ds_in[bb] # check if this block is empty and don't write if it is if np.sum(seg != 0) == 0: fu.log_block_success(block_id) return # choose the appropriate function for array or dictionary if isinstance(node_labels, np.ndarray): # this should actually amount to the same as # seg = node_labels[seg] seg = nt.take(node_labels, seg) else: # this copys the dict and hence is extremely RAM hungry # so we make the dict as small as possible this_labels = nt.unique(seg) this_assignment = {label: node_labels[label] for label in this_labels} seg = nt.takeDict(this_assignment, seg) ds_out[bb] = seg fu.log_block_success(block_id)
def _solve_component(component_id, graph, uv_ids, graph_labels, costs, agglomerator): fu.log("start processing block %i" % component_id) # get the nodes belonging to the current # component nodes = np.where(graph_labels == component_id)[0].astype('uint64') inner_edges, _ = graph.extractSubgraphFromNodes(nodes) sub_uvs = uv_ids[inner_edges] assert len(sub_uvs) == len(inner_edges) # if we had only a single node (i.e. no edge, return None) if len(sub_uvs) == 0: fu.log_block_success(component_id) return None # relabel the sub-nodes and associated uv-ids for more efficient processing nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive( nodes, start_label=0, keep_zeros=False) sub_uvs = nt.takeDict(mapping, sub_uvs) n_local_nodes = max_id + 1 sub_graph = nifty.graph.undirectedGraph(n_local_nodes) sub_graph.insertEdges(sub_uvs) sub_costs = costs[inner_edges] assert len(sub_costs) == sub_graph.numberOfEdges sub_result = agglomerator(sub_graph, sub_costs) sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]] assert len(sub_edgeresult) == len(inner_edges) cut_edge_ids = inner_edges[sub_edgeresult] fu.log_block_success(component_id) return cut_edge_ids
def check_exported(paintera_path, old_assignment_key, assignment_key, table_path, table_key, scale_factor, raw_path, raw_key, ws_path, ws_key, check_ids): print("Start to check exported node labels") import napari import nifty.tools as nt with open_file(paintera_path, 'r') as f: ds = f[old_assignment_key] ds.n_threads = 8 old_assignments = ds[:].T ds = f[assignment_key] ds.n_threads = 8 assignments = ds[:].T fragment_ids, segment_ids = assignments[:, 0], assignments[:, 1] old_fragment_ids, old_segment_ids = old_assignments[:, 0], old_assignments[:, 1] assert np.array_equal(fragment_ids, old_fragment_ids) print("Loading bounding boxes ...") bounding_boxes = get_bounding_boxes(table_path, table_key, scale_factor) print("... done") with open_file(raw_path, 'r') as fraw, open_file(ws_path, 'r') as fws: ds_raw = fraw[raw_key] ds_raw.n_thread = 8 ds_ws = fws[ws_key] ds_ws.n_thread = 8 ds_ws = LabelMultisetWrapper(ds_ws) for seg_id in check_ids: print("Check object", seg_id) bb = bounding_boxes[seg_id] print("Within bounding box", bb) raw = ds_raw[bb] ws = ds_ws[bb] id_mask = old_segment_ids == seg_id ws_ids = fragment_ids[id_mask] seg_mask = np.isin(ws, ws_ids) ws[~seg_mask] = 0 ids_old = old_segment_ids[id_mask] dict_old = {wid: oid for wid, oid in zip(ws_ids, ids_old)} dict_old[0] = 0 seg_old = nt.takeDict(dict_old, ws) ids_new = segment_ids[id_mask] dict_new = {wid: oid for wid, oid in zip(ws_ids, ids_new)} dict_new[0] = 0 seg_new = nt.takeDict(dict_new, ws) with napari.gui_qt(): viewer = napari.Viewer() viewer.add_image(raw, name='raw') viewer.add_labels(seg_mask, name='seg-mask') viewer.add_labels(seg_old, name='old-seg') viewer.add_labels(seg_new, name='new-seg')
def two_pass_assignments(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) path = config['path'] key = config['key'] assignments_path = config['assignments_path'] assignments_key = config['assignments_key'] relabel_key = config['relabel_key'] block_shape = config['block_shape'] tmp_folder = config['tmp_folder'] with vu.file_reader(path, 'r') as f: ds = f[key] shape = ds.shape blocking = nt.blocking([0, 0, 0], list(shape), block_shape) n_blocks = blocking.numberOfBlocks # load block assignments pattern = os.path.join(tmp_folder, 'mws_two_pass_assignments_block_%i.npy') assignments = [] for block_id in range(n_blocks): save_path = pattern % block_id # NOTE, we only have assignments for some of the blocks # due to checkerboard procesing (and potentially roi) if os.path.exists(save_path): assignments.append(np.load(save_path)) assignments = np.concatenate(assignments, axis=0).astype('uint64') fu.log("Loaded assignments of shape %s" % str(assignments.shape)) # load the relabeling and use it to relabel the assignments with vu.file_reader(assignments_path, 'r') as f: relabeling = f[relabel_key][:] # expected format of relabeling: # array[n_labels, 2] # first column holds the new old ids # second column holds the corresponding new (consecutive!) ids assert relabeling.ndim == 2 assert relabeling.shape[1] == 2 n_labels = len(relabeling) old_to_new = dict(zip(relabeling[:, 0], relabeling[:, 1])) assignments = nt.takeDict(old_to_new, assignments) assert n_labels > assignments.max(), "%i, %i" % (n_labels, assignments.max()) fu.log("merge %i labels with ufd" % n_labels) ufd = nufd.ufd(n_labels) ufd.merge(assignments) node_labels = ufd.elementLabeling() # make sure 0 is mapped to 0 # TODO should refactor this into util function and use it # wherever we need it after ufd labeling if node_labels[0] != 0: # we have 0 in labels -> need to remap if 0 in node_labels: node_labels[node_labels == 0] = node_labels.max() + 1 node_labels[0] = 0 vigra.analysis.relabelConsecutive(node_labels, out=node_labels, start_label=1, keep_zeros=True) with vu.file_reader(assignments_path) as f: chunk_size = min(int(1e6), len(node_labels)) chunks = (chunk_size, ) ds = f.create_dataset(assignments_key, data=node_labels, compression='gzip', chunks=chunks) fu.log_job_success(job_id)
def propagate_attributes(id_mapping_path, table_path, output_path, column_name, merge_rule=None, override=False): """ Propagate id column to new ids. """ # if the output already exists, we assume that the propagation # was already done and we just continue if os.path.exists(output_path) and override: if os.path.islink(output_path): os.unlink(output_path) else: os.remove(output_path) elif os.path.exists(output_path) and not override: return with open(id_mapping_path, 'r') as f: id_mapping = json.load(f) # we have two different versions of the id mapping: # the old one that only saves the mapped ids # and the new one that also saves the mapped counts # in the second case, we use the counts to decide the labeling for # mapping ids that result from several merged previous ids id_mapping = {int(k): v for k, v in id_mapping.items()} if isinstance(id_mapping[0], list): mapping_counts = np.array([v[1] for v in id_mapping.values()]) id_mapping = {k: v[0] for k, v in id_mapping.items()} else: mapping_counts = None assert os.path.exists(table_path), table_path table = pd.read_csv(table_path, sep='\t') id_col = table[column_name].values id_col[np.isnan(id_col)] = 0 id_col = id_col.astype('uint32') # use mapping counts to decide the mapped ids for merges if mapping_counts is not None: mapping_keys = np.array([int(key) for key in id_mapping.keys()]) mapping_values = np.array([int(val) for val in id_mapping.values()]) unique_vals, val_counts = np.unique(mapping_values, return_counts=True) merged_ids = unique_vals[val_counts > 1] if merged_ids[0] == 0: merged_ids = merged_ids[1:] # this could be sped up with np.unique tricks, but I don't expect there to be many merged ids # between versions, so this should not matter for now keep_mask = np.ones(len(id_col), dtype='bool') for merged_id in merged_ids: id_mask = mapping_values == merged_id source_ids = mapping_keys[id_mask] ids_sorted = np.argsort(mapping_counts[id_mask])[::-1] drop_ids = source_ids[ids_sorted[1:]] keep_mask[np.isin(id_col, drop_ids)] = False columns = table.columns table = table.values table = table[keep_mask] table = pd.DataFrame(table, columns=columns) id_col = id_col[keep_mask] assert len(table) == len(id_col) # map values for the id col id_col = nt.takeDict(id_mapping, id_col) table[column_name] = id_col table.to_csv(output_path, index=False, sep='\t')
def relabel_sequential(data, unique_values): start_val = 0 if unique_values[0] == 0 else 1 relabeling = {val: ii for ii, val in enumerate(unique_values, start_val)} return nt.takeDict(relabeling, data)
def _apply_watershed_with_seeds(input_, dt, initial_seeds, config, mask, offset): apply_2d = config.get('apply_ws_2d', True) size_filter = config.get('size_filter', 25) sigma_weights = config.get('sigma_weights', 2.) alpha = config.get('alpha', 0.8) # apply the watersheds in 2d if apply_2d: ws = np.zeros_like(input_, dtype='uint64') for z in range(ws.shape[0]): dtz = dt[z] # get the initial seeds for this slice # and a mask for the inital seeds initial_seeds_z = initial_seeds[z] initial_seed_mask = initial_seeds_z != 0 # don't place maxima at initial seeds dtz[initial_seed_mask] = 0 seeds = _make_seeds(dtz, config) # remove seeds in mask if mask is not None: seeds[mask[z]] = 0 # add offset to seeds seeds[seeds != 0] += offset # add initial seeds seeds[initial_seed_mask] = initial_seeds_z[initial_seed_mask] # we need to remap the seeds consecutively, because vigra # watersheds can only handle uint32 seeds, and we WILL overflow uint32 seeds, _, old_to_new = vigra.analysis.relabelConsecutive( seeds, start_label=1, keep_zeros=True) new_to_old = {new: old for old, new in old_to_new.items()} # run watershed hmap = _make_hmap(input_[z], dtz, alpha, sigma_weights) wsz, max_id = vu.watershed(hmap, seeds=seeds, size_filter=size_filter, exclude=initial_seeds_z) wsz = wsz.astype('uint64') # mask the result if we have a mask if mask is not None: wsz[mask[z]] = 0 inv_mask = np.logical_not(mask[z]) # NOTE we might not have any pixels in mask for 2d slice max_id = int(wsz[inv_mask].max()) if inv_mask.sum() > 0 else 0 # increase the offset offset += max_id # map back to original ids wsz = nt.takeDict(new_to_old, wsz) ws[z] = wsz # return ws # apply the watersheds in 3d else: # find seeds seeds = _make_seeds(dt, config) # remove seeds in mask if mask is not None: seeds[mask] = 0 seeds[seeds != 0] += offset # add the initial seeds initial_seed_mask = initial_seeds != 0 seeds[initial_seed_mask] = initial_seeds[initial_seed_mask] # we need to remap the seeds consecutively, because vigra # watersheds can only handle uint32 seeds, and we WILL overflow uint32 seeds, _, old_to_new = vigra.analysis.relabelConsecutive( seeds, start_label=1, keep_zeros=True) new_to_old = {new: old for old, new in old_to_new.items()} # run watershed initial_seed_ids = np.unique(initial_seeds[initial_seed_mask]) hmap = _make_hmap(input_, dt, alpha, sigma_weights) ws, max_id = vu.watershed(hmap, seeds=seeds, size_filter=size_filter, exclude=initial_seed_ids) ws = ws.astype('uint64') ws = nt.takeDict(new_to_old, ws) if mask is not None: ws[mask] = 0 return ws
def _solve_block_problem(block_id, graph, uv_ids, ds_nodes, costs, solver, ignore_label, blocking, out, time_limit): fu.log("Start processing block %i" % block_id) # load the nodes in this sub-block and map them # to our current node-labeling chunk_id = blocking.blockGridPosition(block_id) nodes = ds_nodes.read_chunk(chunk_id) if nodes is None: fu.log_block_success(block_id) return # if we have an ignore label, remove zero from the nodes # (nodes are sorted, so it will always be at pos 0) if ignore_label and nodes[0] == 0: nodes = nodes[1:] removed_ignore_label = True if len(nodes) == 0: fu.log_block_success(block_id) return else: removed_ignore_label = False # we allow for invalid nodes here, # which can occur for un-connected graphs resulting from bad masks ... inner_edges, outer_edges = graph.extractSubgraphFromNodes( nodes, allowInvalidNodes=True) # if we only have no inner edges, return # the outer edges as cut edges if len(inner_edges) == 0: if len(nodes) > 1: assert removed_ignore_label,\ "Can only have trivial sub-graphs for more than one node if we removed ignore label" cut_edge_ids = outer_edges sub_result = None fu.log("Block %i: has no inner edges" % block_id) # otherwise solve the multicut for this block else: fu.log("Block %i: Solving sub-block with %i nodes and %i edges" % (block_id, len(nodes), len(inner_edges))) sub_uvs = uv_ids[inner_edges] # relabel the sub-nodes and associated uv-ids for more efficient processing nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive( nodes, start_label=0, keep_zeros=False) sub_uvs = nt.takeDict(mapping, sub_uvs) n_local_nodes = max_id + 1 sub_graph = nifty.graph.undirectedGraph(n_local_nodes) sub_graph.insertEdges(sub_uvs) sub_costs = costs[inner_edges] assert len(sub_costs) == sub_graph.numberOfEdges # solve multicut and relabel the result sub_result = solver(sub_graph, sub_costs, time_limit=time_limit) assert len(sub_result) == len(nodes), "%i, %i" % (len(sub_result), len(nodes)) sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]] assert len(sub_edgeresult) == len(inner_edges) cut_edge_ids = inner_edges[sub_edgeresult] cut_edge_ids = np.concatenate([cut_edge_ids, outer_edges]) _, res_max_id, _ = vigra.analysis.relabelConsecutive(sub_result, start_label=1, keep_zeros=False, out=sub_result) fu.log("Block %i: Subresult has %i unique ids" % (block_id, res_max_id)) # IMPORTANT !!! # we can only add back the ignore label after getting the edge-result !!! if removed_ignore_label: sub_result = np.concatenate((np.zeros(1, dtype='uint64'), sub_result)) # get chunk id of this block block = blocking.getBlock(block_id) chunk_id = tuple(beg // sh for beg, sh in zip(block.begin, blocking.blockShape)) # serialize the cut-edge-ids and the (local) node labeling ds_edge_res = out['cut_edge_ids'] fu.log("Block %i: Serializing %i cut edges" % (block_id, len(cut_edge_ids))) ds_edge_res.write_chunk(chunk_id, cut_edge_ids, True) if sub_result is not None: ds_node_res = out['node_result'] fu.log("Block %i: Serializing %i node results" % (block_id, len(sub_result))) ds_node_res.write_chunk(chunk_id, sub_result, True) fu.log_block_success(block_id)
def _apply_watershed_with_seeds(input_, dt, offset, initial_seeds, config, mask=None): apply_2d = config.get('apply_ws_2d', True) sigma_seeds = config.get('sigma_seeds', 2.) size_filter = config.get('size_filter', 25) sigma_weights = config.get('sigma_weights', 2.) alpha = config.get('alpha', 0.2) # apply the watersheds in 2d if apply_2d: ws = np.zeros_like(input_, dtype='uint64') for z in range(ws.shape[0]): # smooth the distance transform if specified dtz = vu.apply_filter(dt[z], 'gaussianSmoothing', sigma_seeds) if sigma_seeds != 0 else dt[z] # get the initial seeds for this slice # and a mask for the inital seeds initial_seeds_z = initial_seeds[z] initial_seed_mask = initial_seeds_z != 0 # don't place maxima at initial seeds dtz[initial_seed_mask] = 0 seeds = vigra.analysis.localMaxima(dtz, marker=np.nan, allowAtBorder=True, allowPlateaus=True) seeds = vigra.analysis.labelImageWithBackground(np.isnan(seeds).view('uint8')) # remove seeds in mask if mask is not None: seeds[mask[z]] = 0 # add offset to seeds seeds[seeds != 0] += offset # add initial seeds seeds[initial_seed_mask] = initial_seeds_z[initial_seed_mask] # we need to remap the seeds consecutively, because vigra # watersheds can only handle uint32 seeds, and we WILL overflow uint32 seeds, _, old_to_new = vigra.analysis.relabelConsecutive(seeds, start_label=1, keep_zeros=True) new_to_old = {new: old for old, new in old_to_new.items()} # run watershed hmap = _make_hmap(input_[z], dtz, alpha, sigma_weights) wsz, max_id = vu.watershed(hmap, seeds=seeds, size_filter=size_filter, exclude=initial_seeds_z) # mask the result if we have a mask if mask is not None: wsz[mask[z]] = 0 inv_mask = np.logical_not(mask[z]) # NOTE we might not have any pixels in mask for 2d slice max_id = int(wsz[inv_mask].max()) if inv_mask.sum() > 0 else 0 # increase the offset offset += max_id # map back to original ids wsz = nt.takeDict(new_to_old, wsz) ws[z] = wsz # return ws # apply the watersheds in 3d else: if sigma_seeds != 0: dt = vu.apply_filter(dt, 'gaussianSmoothing', sigma_seeds) # find seeds seeds = vigra.analysis.localMaxima3D(dt, marker=np.nan, allowAtBorder=True, allowPlateaus=True) seeds = vigra.analysis.labelVolumeWithBackground(np.isnan(seeds).view('uint8')) # remove seeds in mask if mask is not None: seeds[mask] = 0 seeds[seeds != 0] += offset # add the initial seeds initial_seed_mask = initial_seeds != 0 seeds[initial_seed_mask] = initial_seeds[initial_seed_mask] # we need to remap the seeds consecutively, because vigra # watersheds can only handle uint32 seeds, and we WILL overflow uint32 seeds, _, old_to_new = vigra.analysis.relabelConsecutive(seeds, start_label=1, keep_zeros=True) new_to_old = {new: old for old, new in old_to_new.items()} # run watershed initial_seed_ids = np.unique(initial_seeds[initial_seed_mask]) hmap = _make_hmap(input_, dt, alpha, sigma_weights) ws, max_id = vu.watershed(hmap, seeds=seeds, size_filter=size_filter, exclude=initial_seed_ids) ws = nt.takeDict(new_to_old, ws) if mask is not None: ws[mask] = 0 return ws