Example #1
0
        def watershed(viewer):
            nonlocal ws_id, next_id
            nonlocal node_labels
            nonlocal node_label_history
            nonlocal sub_nodes, sub_edges
            nonlocal sub_graph, sub_weights
            nonlocal mapping

            if mask_id is None:
                print("Need to select segment to run watershed")
                return

            if ws_id != mask_id or sub_graph is None:
                print("Computing sub-graph for", mask_id, " ...")
                sub_nodes = np.where(
                    node_labels == mask_id)[0].astype('uint64')
                sub_edges, _ = graph.extractSubgraphFromNodes(
                    sub_nodes, allowInvalidNodes=True)
                sub_weights = weights[sub_edges]

                nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive(
                    sub_nodes, start_label=0, keep_zeros=False)
                sub_uvs = uv_ids[sub_edges]
                sub_uvs = nt.takeDict(mapping, sub_uvs)

                n_nodes = max_id + 1
                sub_graph = nifty.graph.undirectedGraph(n_nodes)
                sub_graph.insertEdges(sub_uvs)
                ws_id = mask_id

            mask = viewer.layers['mask'].data
            seeds = viewer.layers['seeds'].data
            seeds[np.logical_not(mask)] = 0
            seed_ids = np.unique(seeds)[1:]

            seed_nodes = np.zeros(sub_graph.numberOfNodes, dtype='uint64')
            for seed_id in seed_ids:
                seeded = np.unique(ws_base[seeds == seed_id])
                if seeded[0] == 0:
                    seeded = seeded[1:]
                seeded = nt.takeDict(mapping, seeded)
                seed_nodes[seeded] = seed_id

            print("Computing graph watershed")
            sub_labels = nifty.graph.edgeWeightedWatershedsSegmentation(
                sub_graph, seed_nodes, sub_weights)
            node_label_history.append(node_labels.copy())

            node_labels[sub_nodes] = sub_labels + (next_id - 1)

            mask_node_labels = np.zeros_like(node_labels)
            mask_node_labels[sub_nodes] = sub_labels
            mask = _seg_from_labels(mask_node_labels)
            viewer.layers['mask'].data = mask

            # TODO should also update the seg, but for now skip this to speed this up
            # seg = _seg_from_labels(node_labels)
            # viewer.layers['segments'].data = seg

            next_id = int(node_labels.max()) + 1
Example #2
0
def _apply_node_labels(seg, node_labels, allow_empty_assignments):
    # choose the appropriate mapping:
    # - 1d np.array -> just apply it
    # - 2d np.array -> extract the local dict and apply
    # - dict -> extract the local dict and apply
    apply_array = False if isinstance(
        node_labels, dict) else (True if node_labels.ndim == 1 else False)
    if apply_array:
        seg = nt.take(node_labels, seg)
    else:
        # this copys the dict and hence is extremely RAM hungry
        # so we make the dict as small as possible
        this_labels = np.unique(seg)
        if isinstance(node_labels, dict):
            # do we allow for assignments that are not in the assignment table?
            if allow_empty_assignments:
                this_assignment = {
                    label: node_labels.get(label, label)
                    for label in this_labels
                }
            else:
                this_assignment = {
                    label: node_labels[label]
                    for label in this_labels
                }
        else:
            this_assignment = node_labels[:,
                                          1][np.in1d(node_labels[:, 0],
                                                     this_labels)]
            this_assignment = {
                label: this_assignment[ii]
                for ii, label in enumerate(this_labels)
            }
        seg = nt.takeDict(this_assignment, seg)
    return seg
            def watershed(viewer):
                nonlocal node_labels, seeds

                print("Run watershed from seed layer ...")
                layers = viewer.layers
                ws = layers['ws'].data
                seeds = layers['seeds'].data

                new_node_labels = self.graph_watershed(graph, probs, ws, seeds,
                                                       mapping)
                if new_node_labels is None:
                    print("Did not find any seeds, doing nothing")
                    return
                else:
                    node_labels = new_node_labels

                label_dict = {
                    wsid: node_labels[mapping[wsid]]
                    for wsid in ws_ids
                }
                label_dict[0] = 0
                seg = nt.takeDict(label_dict, ws)

                layers['seg'].data = seg
                print("... done")
    def load_subgraph(self, node_ids):
        # weird, this sometimes happens ...
        if len(node_ids) == 0:
            return None, None, None

        inner_edges, _ = self.graph.extractSubgraphFromNodes(
            node_ids, allowInvalidNodes=True)
        assert len(inner_edges) > 0
        nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive(
            node_ids, start_label=0, keep_zeros=False)
        uv_ids = self.uv_ids[inner_edges]
        uv_ids = nt.takeDict(mapping, uv_ids)

        # get rid of paintera ignore label
        pt_ignore_label = 18446744073709551615
        edge_mask = (uv_ids == pt_ignore_label).sum(axis=1) == 0
        uv_ids = uv_ids[edge_mask]
        if len(uv_ids) == 0:
            return None, None, None

        max_id = int(nodes_relabeled.max())
        assert uv_ids.max() <= max_id

        n_nodes = max_id + 1
        graph = nifty.graph.undirectedGraph(n_nodes)
        graph.insertEdges(uv_ids)

        probs = self.probs[inner_edges]
        assert len(probs) == graph.numberOfEdges
        return graph, probs, mapping
Example #5
0
def propagate_lut(lut_path, ids):
    with open(lut_path) as f:
        lut = json.load(f)
    lut = {int(k): v for k, v in lut.items()}
    if isinstance(lut[0], list):
        lut = {k: v[0] for k, v in lut.items()}
    return nt.takeDict(lut, np.array(ids, dtype='uint32')).tolist()
Example #6
0
def _write_block_res(ds_in, ds_out, block_id, blocking, block_res):
    fu.log("start processing block %i" % block_id)
    block = blocking.getBlock(block_id)
    bb = vu.block_to_bb(block)
    ws = ds_in[bb]

    seg = nt.takeDict(block_res, ws)
    ds_out[bb] = seg
    fu.log_block_success(block_id)
    def solve_object(obj_id):

        # try to load the object's edges and costs and continue if not present
        lifted_uvs = lifted_uv_ds.read_chunk((obj_id, ))
        if lifted_uvs is None:
            return None
        n_lifted_edges = len(lifted_uvs) // 2
        lifted_uvs = lifted_uvs.reshape((n_lifted_edges, 2))
        lifted_costs = lifted_costs_ds.read_chunk((obj_id, ))
        assert lifted_costs is not None
        assert len(lifted_costs) == len(
            lifted_uvs), "%i, %i" % (len(lifted_costs), len(lifted_uvs))

        # get node ids for this object
        obj_mask = assignments == obj_id
        node_ids = np.where(obj_mask)[0].astype('uint64')
        inner_edges, _ = graph.extractSubgraphFromNodes(node_ids)

        sub_uvs = uv_ids[inner_edges]
        sub_costs = costs[inner_edges]
        assert len(sub_uvs) == len(sub_costs)

        # relabel all consecutive
        nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive(
            node_ids, start_label=0, keep_zeros=False)
        sub_uvs = nt.takeDict(mapping, sub_uvs)
        lifted_uvs = nt.takeDict(mapping, lifted_uvs)

        n_local_nodes = max_id + 1
        sub_graph = nifty.graph.undirectedGraph(n_local_nodes)
        sub_graph.insertEdges(sub_uvs)

        sub_assignments = agglomerator(sub_graph,
                                       sub_costs,
                                       lifted_uvs,
                                       lifted_costs,
                                       time_limit=time_limit)
        vigra.analysis.relabelConsecutive(sub_assignments,
                                          out=sub_assignments,
                                          start_label=1,
                                          keep_zeros=False)
        return obj_mask, sub_assignments
Example #8
0
def propagate_ids(root, src_version, trgt_version, seg_name, ids):
    """ Propagate list of ids from source version to target version.
    """
    version_file = os.path.join(root, 'versions.json')
    with open(version_file) as f:
        versions = json.load(f)
    versions.sort()

    lut_name = 'new_id_lut_%s.json' % seg_name
    src_lut_file = os.path.join(root, src_version, 'misc', lut_name)
    if not os.path.exists(src_lut_file):
        raise ValueError("Src lut %s does not exist." % src_lut_file)
    trgt_lut_file = os.path.join(root, trgt_version, 'misc', lut_name)
    if not os.path.exists(trgt_lut_file):
        raise ValueError("Target lut %s does not exist." % trgt_lut_file)

    def get_abs_lut(lut):
        return os.path.abspath(os.path.realpath(lut))

    # follow links from src-lut to target lut and pick up
    # all existing luts on the way.
    luts = []
    exclude_luts = [get_abs_lut(src_lut_file)]
    lut = src_lut_file
    version = src_version
    while True:

        abs_lut = get_abs_lut(lut)
        if abs_lut not in exclude_luts:
            luts.append(abs_lut)
            exclude_luts.append(abs_lut)

        version_index = versions.index(version)
        version = versions[version_index + 1]
        lut = os.path.join(root, version, 'misc', lut_name)
        if version == trgt_version:
            abs_lut = get_abs_lut(lut)
            if abs_lut not in luts:
                luts.append(abs_lut)
            break

    def load_lut(lut_path):
        with open(lut_path) as f:
            lut = json.load(f)
        lut = {int(k): v for k, v in lut.items()}
        return lut

    luts = [load_lut(lut) for lut in luts]

    # propagate ids through all luts
    propagated = np.array(ids, dtype='uint64')
    for lut in luts:
        propagated = nt.takeDict(lut, propagated)
    return propagated.tolist()
Example #9
0
    def _fragment_segment_assignment(self, dep):
        if self.assignment_path == '':
            # get the framgent max id
            with z5py.File(self.path) as f:
                max_id = f[self.label_in_key].attrs['maxId']
            return dep, max_id
        else:
            assert self.assignment_key != ''
            assert os.path.exists(self.assignment_path), self.assignment_path
            # TODO should make this a task
            with z5py.File(self.assignment_path) as f, z5py.File(
                    self.path) as f_out:
                assignments = f[self.assignment_key][:]
                n_fragments = len(assignments)

                # find the fragments which have non-trivial assignment
                segment_ids, counts = np.unique(assignments,
                                                return_counts=True)
                seg_ids_to_counts = {
                    seg_id: count
                    for seg_id, count in zip(segment_ids, counts)
                }
                fragment_ids_to_counts = nt.takeDict(seg_ids_to_counts,
                                                     assignments)
                fragment_ids = np.arange(n_fragments, dtype='uint64')

                non_triv_fragments = fragment_ids[fragment_ids_to_counts > 1]
                non_triv_segments = assignments[non_triv_fragments]
                non_triv_segments += n_fragments

                # determine the overall max id
                max_id = int(non_triv_segments.max())

                # TODO do we need to assign a special value to ignore label (0) ?
                frag_to_seg = np.vstack(
                    (non_triv_fragments, non_triv_segments))

                # fragment_ids = np.arange(n_fragments, dtype='uint64')
                # assignments += n_fragments
                # frag_to_seg = np.vstack((fragment_ids, assignments))

                # max_id = int(frag_to_seg.max())

                out_key = os.path.join(self.label_out_key,
                                       'fragment-segment-assignment')
                chunks = (1, frag_to_seg.shape[1])
                f_out.require_dataset(out_key,
                                      data=frag_to_seg,
                                      shape=frag_to_seg.shape,
                                      compression='gzip',
                                      chunks=chunks)
            return dep, max_id
Example #10
0
def _write_block_with_offsets(ds_in, ds_out, blocking, block_id, node_labels,
                              offsets):
    fu.log("start processing block %i" % block_id)
    off = offsets[block_id]
    block = blocking.getBlock(block_id)
    bb = vu.block_to_bb(block)
    seg = ds_in[bb]
    seg[seg != 0] += off
    # choose the appropriate function for array or dictionary
    if isinstance(node_labels, np.ndarray):
        seg = nt.take(node_labels, seg)
    else:
        seg = nt.takeDict(node_labels, seg)
    ds_out[bb] = seg
    fu.log_block_success(block_id)
    def graph_watershed(graph, probs, ws, seed_points, mapping):
        seed_ids = np.unique(seed_points)[1:]
        if len(seed_ids) == 0:
            return None

        seeds = np.zeros(graph.numberOfNodes, dtype='uint64')
        # TODO this is what takes a long time for large volumes I guess
        # should speed it up
        for seed_id in seed_ids:
            mask = seed_points == seed_id
            seed_nodes = np.unique(ws[mask])
            if seed_nodes[0] == 0:
                seed_nodes = seed_nodes[1:]
            seed_nodes = nt.takeDict(mapping, seed_nodes)
            seeds[seed_nodes] = seed_id

        node_labels = nifty.graph.edgeWeightedWatershedsSegmentation(
            graph, seeds, probs)
        return node_labels
Example #12
0
    def fix_object(object_id):
        # find the nodes corresponding to this object
        node_ids = np.where(assignments == object_id)[0].astype('uint64')

        # extract the subgraph corresponding to this object
        # we allow for invalid nodes here,
        # which can occur for un-connected graphs resulting from bad masks ...
        inner_edges, _ = graph.extractSubgraphFromNodes(node_ids,
                                                        allowInvalidNodes=True)
        sub_uvs = uv_ids[inner_edges]

        # relanbel the sub-nodes / edges
        nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive(
            node_ids, start_label=0, keep_zeros=False)
        sub_uvs = nt.takeDict(mapping, sub_uvs)
        n_local_nodes = max_id + 1

        # make sub-graph and get the edge costs
        sub_graph = nifty.graph.undirectedGraph(n_local_nodes)
        sub_graph.insertEdges(sub_uvs)
        sub_features = features[inner_edges]
        assert len(sub_features) == sub_graph.numberOfEdges

        # get the seeds from the mapped nuclei
        sub_node_labels = node_labels[node_ids]
        nucleus_ids = merge_objects[object_id]

        seeds = np.zeros(n_local_nodes, dtype='uint64')
        for seed_id, n_id in enumerate(nucleus_ids):
            has_seed = sub_node_labels == n_id
            seeds[has_seed] = seed_id + 1

        # check that we have at least two seeds present
        # note that we can have discrepencies here due to differences in mapping strategies
        if len(np.unique(seeds)) < 3:
            return None

        # resolve by graph watershed
        sub_result = nifty.graph.edgeWeightedWatershedsSegmentation(
            sub_graph, seeds, sub_features)
        return sub_result
Example #13
0
def _apply_node_labels(seg, node_labels, allow_empty_assignments):
    # choose the appropriate mapping:
    # - 1d np.array -> just apply it
    # - 2d np.array -> extract the local dict and apply
    # - dict -> extract the local dict and apply
    apply_array = False if isinstance(
        node_labels, dict) else (True if node_labels.ndim == 1 else False)
    if apply_array:
        assert seg.max() < len(
            node_labels), "Max id %i exceeds number of node labels %i" % (
                seg.max(), len(node_labels))
        seg = nt.take(node_labels, seg)
    else:
        # this copys the dict and hence is extremely RAM hungry
        # so we make the dict as small as possible
        this_labels = np.unique(seg)
        if isinstance(node_labels, dict):
            # do we allow for assignments that are not in the assignment table?
            if allow_empty_assignments:
                this_assignment = {
                    label: node_labels.get(label, label)
                    for label in this_labels
                }
            else:
                this_assignment = {
                    label: node_labels[label]
                    for label in this_labels
                }
        else:
            this_assignment = node_labels[:,
                                          1][np.in1d(node_labels[:, 0],
                                                     this_labels)]
            this_assignment = {
                label: this_assignment[ii]
                for ii, label in enumerate(this_labels)
            }
        # FIXME this casts to uint32 which can lead to nasty over-flows
        seg = nt.takeDict(this_assignment, seg)
    return seg
Example #14
0
    def _split_segment_impl(self, fragment_ids, seed_fragments):
        sub_edges, _ = self.graph.extractSubgraphFromNodes(
            fragment_ids, allowInvalidNodes=True)
        sub_uvs = self.uv_ids[sub_edges]
        sub_weights = self.weights[sub_edges]
        assert len(sub_edges) == len(sub_weights)

        # relabel the local fragment ids
        nodes, max_id, mapping = vigra.analysis.relabelConsecutive(
            fragment_ids, start_label=0, keep_zeros=False)
        sub_uvs = nt.takeDict(mapping, sub_uvs)
        n_sub_nodes = max_id + 1
        # build watershed problem and run watershed
        sub_graph = nifty.graph.undirectedGraph(n_sub_nodes)
        sub_graph.insertEdges(sub_uvs)

        # make seeds
        sub_seeds = np.zeros(n_sub_nodes, dtype='uint64')
        seed_id = 1
        # TODO vectorize
        for seed_group in seed_fragments:
            for seed_fragment in seed_group:
                # assert seed_fragment in fragment_ids, str(seed_fragment)
                mapped_id = mapping.get(seed_fragment, None)
                # FIXME I don't really know why this would happen, do assignments go stale ?
                if mapped_id is None:
                    print("Warning: could not find seed-fragment",
                          seed_fragment)
                    continue
                sub_seeds[mapped_id] = seed_id
            seed_id += 1

        # TODO support other splitting options, e.g. LMC
        # run graph watershed
        sub_assignment = nifty.graph.edgeWeightedWatershedsSegmentation(
            sub_graph, sub_seeds, sub_weights)
        assert len(sub_assignment) == n_sub_nodes == len(fragment_ids)
        return sub_assignment
Example #15
0
def extract_from_commit(path, key, scale=0, relabel_output=False, n_threads=8):
    """ Extract corrected segmentation from commited project
    and return it as array.
    """
    f = open_file(path, 'r')
    g = f[key]

    # make sure this is a paintera group
    seg_key = 'data'
    assignment_in_key = 'fragment-segment-assignment'
    assert seg_key in g
    have_assignments = assignment_in_key in g
    seg_in_key = os.path.join(seg_key, 's%i' % scale)

    # TODO support label multiset here !
    ds = g[seg_in_key]
    ds.n_threads = n_threads
    seg = ds[:]

    if have_assignments:
        fragment_ids = np.unique(seg)
        assignments = g[assignment_in_key][:].T
        assignments = make_dense_assignments(fragment_ids, assignments)
        if relabel_output:
            assignments[:,
                        1] = vigra.analysis.relabelConsecutive(assignments[:,
                                                                           1],
                                                               start_label=1,
                                                               keep_zeros=True)
        assignments = dict(zip(assignments[:, 0], assignments[:, 1]))
        seg = nt.takeDict(assignments, seg)

    elif relabel_output:
        seg = vigra.analysis.relabelConsecutive(seg,
                                                start_label=1,
                                                keep_zeros=True)

    return seg
Example #16
0
    def _relabel(block_id):
        block = blocking.getBlock(block_id)
        bb = tuple(slice(beg, end) for beg, end in zip(block.begin, block.end))

        # check if we have a mask and if we do if we
        # have pixels in the mask
        if mask is not None:
            m = mask[bb].astype('bool')
            if m.sum() == 0:
                return None

        d = data[bb]
        if mask is None or m.sum() == m.size:
            un_block = np.unique(d)
            mapping_block = {un: mapping[un] for un in un_block}
            o = nt.takeDict(mapping_block, d)
        else:
            v = d[m]
            un_block = np.unique(v)
            mapping_block = {un: mapping[un] for un in un_block}
            o = d.copy()
            o[m] = nt.takedDict(mapping_block, v)
        out[bb] = o
    def debug(debug_folder, n_threads=8):
        with open_file(os.path.join(debug_folder, 'data.n5')) as f:
            ds = f['raw']
            ds.n_threads = n_threads
            raw = ds[:]

            ds = f['ws']
            ds.n_threads = n_threads
            ws = ds[:]

            ds = f['seeds']
            ds.n_threads = n_threads
            seed_points = ds[:]

        with open(os.path.join(debug_folder, 'mapping.json'), 'r') as f:
            mapping = json.load(f)
        mapping = {int(k): v for k, v in mapping.items()}

        uv_ids = np.load(os.path.join(debug_folder, 'graph.npy'))
        n_nodes = int(uv_ids.max()) + 1
        graph = nifty.graph.undirectedGraph(n_nodes)
        graph.insertEdges(uv_ids)
        probs = np.load(os.path.join(debug_folder, 'probs.npy'))

        node_labels = CorrectionTool.graph_watershed(graph, probs, ws,
                                                     seed_points, mapping)

        ws_ids = np.unique(ws)[1:]
        label_dict = {wsid: node_labels[mapping[wsid]] for wsid in ws_ids}
        label_dict[0] = 0
        seg = nt.takeDict(label_dict, ws)

        with napari.gui_qt():
            viewer = napari.Viewer()
            viewer.add_image(raw, name='raw')
            viewer.add_labels(ws, name='ws')
            viewer.add_labels(seg, name='seg')
def propagate_table():
    table_path = './20191030_table_ciliaID_cellID'
    table = pd.read_csv(table_path, sep='\t')
    cilia_ids = table['cilia_id'].values.astype('uint32')
    cell_ids = table['cell_id'].values
    cell_ids[np.isinf(cell_ids)] = 0
    cell_ids[np.isnan(cell_ids)] = 0
    cell_ids = cell_ids.astype('uint32')

    cell_id_mapping = {24024: 24723, 22925: 23531, 22700: 23296, 22699: 23295,
                       22584: 23199, 22515: 23132, 22182: 22827, 22181: 22826,
                       21915: 22549, 21911: 22546, 21910: 22545, 21904: 22541,
                       21594: 22214, 21590: 22211, 0: 0}
    unique_vals, unique_counts = np.unique(list(cell_id_mapping.values()), return_counts=True)
    print(unique_vals)
    assert (unique_counts == 1).all()
    cell_ids = nt.takeDict(cell_id_mapping, cell_ids)

    cilia_id_mapping = '../../data/0.6.2/misc/new_id_lut_sbem-6dpf-1-whole-segmented-cilia.json'
    with open(cilia_id_mapping) as f:
        cilia_id_mapping = json.load(f)
    cilia_id_mapping = {int(k): v for k, v in cilia_id_mapping.items()}

    cilia_ids = [cilia_id_mapping.get(cil_id, 0) for cil_id in cilia_ids]
    cilia_ids = np.array(cilia_ids)

    valid_mask = ~(cilia_ids == 0)
    cilia_ids = cilia_ids[valid_mask]
    cell_ids = cell_ids[valid_mask]
    sorter = np.argsort(cilia_ids)
    cilia_ids = cilia_ids[sorter]
    cell_ids = cell_ids[sorter]

    table_out = './20191030_table_ciliaID_cellID_out'
    new_table = np.concatenate([cilia_ids[:, None], cell_ids[:, None]], axis=1)
    new_table = pd.DataFrame(new_table, columns=['label_id', 'cell_id'])
    new_table.to_csv(table_out, sep='\t', index=False)
Example #19
0
def _write_block(ds_in, ds_out, blocking, block_id, node_labels):
    fu.log("start processing block %i" % block_id)
    block = blocking.getBlock(block_id)
    bb = vu.block_to_bb(block)
    seg = ds_in[bb]
    # check if this block is empty and don't write if it is
    if np.sum(seg != 0) == 0:
        fu.log_block_success(block_id)
        return

    # choose the appropriate function for array or dictionary
    if isinstance(node_labels, np.ndarray):
        # this should actually amount to the same as
        # seg = node_labels[seg]
        seg = nt.take(node_labels, seg)
    else:
        # this copys the dict and hence is extremely RAM hungry
        # so we make the dict as small as possible
        this_labels = nt.unique(seg)
        this_assignment = {label: node_labels[label] for label in this_labels}
        seg = nt.takeDict(this_assignment, seg)

    ds_out[bb] = seg
    fu.log_block_success(block_id)
Example #20
0
def _solve_component(component_id, graph, uv_ids, graph_labels, costs,
                     agglomerator):
    fu.log("start processing block %i" % component_id)

    # get the nodes belonging to the current
    # component
    nodes = np.where(graph_labels == component_id)[0].astype('uint64')

    inner_edges, _ = graph.extractSubgraphFromNodes(nodes)
    sub_uvs = uv_ids[inner_edges]
    assert len(sub_uvs) == len(inner_edges)

    # if we had only a single node (i.e. no edge, return None)
    if len(sub_uvs) == 0:
        fu.log_block_success(component_id)
        return None

    # relabel the sub-nodes and associated uv-ids for more efficient processing
    nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive(
        nodes, start_label=0, keep_zeros=False)
    sub_uvs = nt.takeDict(mapping, sub_uvs)
    n_local_nodes = max_id + 1
    sub_graph = nifty.graph.undirectedGraph(n_local_nodes)
    sub_graph.insertEdges(sub_uvs)

    sub_costs = costs[inner_edges]
    assert len(sub_costs) == sub_graph.numberOfEdges

    sub_result = agglomerator(sub_graph, sub_costs)
    sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]]

    assert len(sub_edgeresult) == len(inner_edges)
    cut_edge_ids = inner_edges[sub_edgeresult]

    fu.log_block_success(component_id)
    return cut_edge_ids
def check_exported(paintera_path, old_assignment_key, assignment_key,
                   table_path, table_key, scale_factor, raw_path, raw_key,
                   ws_path, ws_key, check_ids):
    print("Start to check exported node labels")
    import napari
    import nifty.tools as nt

    with open_file(paintera_path, 'r') as f:
        ds = f[old_assignment_key]
        ds.n_threads = 8
        old_assignments = ds[:].T

        ds = f[assignment_key]
        ds.n_threads = 8
        assignments = ds[:].T

    fragment_ids, segment_ids = assignments[:, 0], assignments[:, 1]
    old_fragment_ids, old_segment_ids = old_assignments[:,
                                                        0], old_assignments[:,
                                                                            1]
    assert np.array_equal(fragment_ids, old_fragment_ids)

    print("Loading bounding boxes ...")
    bounding_boxes = get_bounding_boxes(table_path, table_key, scale_factor)
    print("... done")
    with open_file(raw_path, 'r') as fraw, open_file(ws_path, 'r') as fws:

        ds_raw = fraw[raw_key]
        ds_raw.n_thread = 8

        ds_ws = fws[ws_key]
        ds_ws.n_thread = 8
        ds_ws = LabelMultisetWrapper(ds_ws)

        for seg_id in check_ids:
            print("Check object", seg_id)
            bb = bounding_boxes[seg_id]
            print("Within bounding box", bb)

            raw = ds_raw[bb]
            ws = ds_ws[bb]

            id_mask = old_segment_ids == seg_id
            ws_ids = fragment_ids[id_mask]
            seg_mask = np.isin(ws, ws_ids)
            ws[~seg_mask] = 0

            ids_old = old_segment_ids[id_mask]
            dict_old = {wid: oid for wid, oid in zip(ws_ids, ids_old)}
            dict_old[0] = 0
            seg_old = nt.takeDict(dict_old, ws)

            ids_new = segment_ids[id_mask]
            dict_new = {wid: oid for wid, oid in zip(ws_ids, ids_new)}
            dict_new[0] = 0
            seg_new = nt.takeDict(dict_new, ws)

            with napari.gui_qt():
                viewer = napari.Viewer()
                viewer.add_image(raw, name='raw')
                viewer.add_labels(seg_mask, name='seg-mask')
                viewer.add_labels(seg_old, name='old-seg')
                viewer.add_labels(seg_new, name='new-seg')
def two_pass_assignments(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)

    path = config['path']
    key = config['key']
    assignments_path = config['assignments_path']
    assignments_key = config['assignments_key']
    relabel_key = config['relabel_key']
    block_shape = config['block_shape']
    tmp_folder = config['tmp_folder']

    with vu.file_reader(path, 'r') as f:
        ds = f[key]
        shape = ds.shape

    blocking = nt.blocking([0, 0, 0], list(shape), block_shape)
    n_blocks = blocking.numberOfBlocks

    # load block assignments
    pattern = os.path.join(tmp_folder, 'mws_two_pass_assignments_block_%i.npy')
    assignments = []
    for block_id in range(n_blocks):
        save_path = pattern % block_id
        # NOTE, we only have assignments for some of the blocks
        # due to checkerboard procesing (and potentially roi)
        if os.path.exists(save_path):
            assignments.append(np.load(save_path))
    assignments = np.concatenate(assignments, axis=0).astype('uint64')
    fu.log("Loaded assignments of shape %s" % str(assignments.shape))

    # load the relabeling and use it to relabel the assignments
    with vu.file_reader(assignments_path, 'r') as f:
        relabeling = f[relabel_key][:]
    # expected format of relabeling:
    # array[n_labels, 2]
    # first column holds the new old ids
    # second column holds the corresponding new (consecutive!) ids
    assert relabeling.ndim == 2
    assert relabeling.shape[1] == 2

    n_labels = len(relabeling)
    old_to_new = dict(zip(relabeling[:, 0], relabeling[:, 1]))
    assignments = nt.takeDict(old_to_new, assignments)
    assert n_labels > assignments.max(), "%i, %i" % (n_labels,
                                                     assignments.max())

    fu.log("merge %i labels with ufd" % n_labels)
    ufd = nufd.ufd(n_labels)
    ufd.merge(assignments)
    node_labels = ufd.elementLabeling()

    # make sure 0 is mapped to 0
    # TODO should refactor this into util function and use it
    # wherever we need it after ufd labeling
    if node_labels[0] != 0:
        # we have 0 in labels -> need to remap
        if 0 in node_labels:
            node_labels[node_labels == 0] = node_labels.max() + 1
        node_labels[0] = 0

    vigra.analysis.relabelConsecutive(node_labels,
                                      out=node_labels,
                                      start_label=1,
                                      keep_zeros=True)

    with vu.file_reader(assignments_path) as f:
        chunk_size = min(int(1e6), len(node_labels))
        chunks = (chunk_size, )
        ds = f.create_dataset(assignments_key,
                              data=node_labels,
                              compression='gzip',
                              chunks=chunks)

    fu.log_job_success(job_id)
Example #23
0
def propagate_attributes(id_mapping_path,
                         table_path,
                         output_path,
                         column_name,
                         merge_rule=None,
                         override=False):
    """ Propagate id column to new ids.
    """
    # if the output already exists, we assume that the propagation
    # was already done and we just continue
    if os.path.exists(output_path) and override:
        if os.path.islink(output_path):
            os.unlink(output_path)
        else:
            os.remove(output_path)
    elif os.path.exists(output_path) and not override:
        return

    with open(id_mapping_path, 'r') as f:
        id_mapping = json.load(f)

    # we have two different versions of the id mapping:
    # the old one that only saves the mapped ids
    # and the new one that also saves the mapped counts
    # in the second case, we use the counts to decide the labeling for
    # mapping ids that result from several merged previous ids
    id_mapping = {int(k): v for k, v in id_mapping.items()}
    if isinstance(id_mapping[0], list):
        mapping_counts = np.array([v[1] for v in id_mapping.values()])
        id_mapping = {k: v[0] for k, v in id_mapping.items()}
    else:
        mapping_counts = None

    assert os.path.exists(table_path), table_path
    table = pd.read_csv(table_path, sep='\t')
    id_col = table[column_name].values
    id_col[np.isnan(id_col)] = 0
    id_col = id_col.astype('uint32')

    # use mapping counts to decide the mapped ids for merges
    if mapping_counts is not None:

        mapping_keys = np.array([int(key) for key in id_mapping.keys()])
        mapping_values = np.array([int(val) for val in id_mapping.values()])
        unique_vals, val_counts = np.unique(mapping_values, return_counts=True)
        merged_ids = unique_vals[val_counts > 1]
        if merged_ids[0] == 0:
            merged_ids = merged_ids[1:]

        # this could be sped up with np.unique tricks, but I don't expect there to be many merged ids
        # between versions, so this should not matter for now
        keep_mask = np.ones(len(id_col), dtype='bool')
        for merged_id in merged_ids:
            id_mask = mapping_values == merged_id
            source_ids = mapping_keys[id_mask]
            ids_sorted = np.argsort(mapping_counts[id_mask])[::-1]
            drop_ids = source_ids[ids_sorted[1:]]
            keep_mask[np.isin(id_col, drop_ids)] = False

        columns = table.columns
        table = table.values
        table = table[keep_mask]
        table = pd.DataFrame(table, columns=columns)
        id_col = id_col[keep_mask]
        assert len(table) == len(id_col)

    # map values for the id col
    id_col = nt.takeDict(id_mapping, id_col)
    table[column_name] = id_col

    table.to_csv(output_path, index=False, sep='\t')
def relabel_sequential(data, unique_values):
    start_val = 0 if unique_values[0] == 0 else 1
    relabeling = {val: ii for ii, val in enumerate(unique_values, start_val)}
    return nt.takeDict(relabeling, data)
Example #25
0
def _apply_watershed_with_seeds(input_, dt, initial_seeds, config, mask,
                                offset):
    apply_2d = config.get('apply_ws_2d', True)
    size_filter = config.get('size_filter', 25)
    sigma_weights = config.get('sigma_weights', 2.)
    alpha = config.get('alpha', 0.8)

    # apply the watersheds in 2d
    if apply_2d:
        ws = np.zeros_like(input_, dtype='uint64')
        for z in range(ws.shape[0]):

            dtz = dt[z]
            # get the initial seeds for this slice
            # and a mask for the inital seeds
            initial_seeds_z = initial_seeds[z]
            initial_seed_mask = initial_seeds_z != 0
            # don't place maxima at initial seeds
            dtz[initial_seed_mask] = 0

            seeds = _make_seeds(dtz, config)
            # remove seeds in mask
            if mask is not None:
                seeds[mask[z]] = 0

            # add offset to seeds
            seeds[seeds != 0] += offset
            # add initial seeds
            seeds[initial_seed_mask] = initial_seeds_z[initial_seed_mask]

            # we need to remap the seeds consecutively, because vigra
            # watersheds can only handle uint32 seeds, and we WILL overflow uint32
            seeds, _, old_to_new = vigra.analysis.relabelConsecutive(
                seeds, start_label=1, keep_zeros=True)
            new_to_old = {new: old for old, new in old_to_new.items()}

            # run watershed
            hmap = _make_hmap(input_[z], dtz, alpha, sigma_weights)
            wsz, max_id = vu.watershed(hmap,
                                       seeds=seeds,
                                       size_filter=size_filter,
                                       exclude=initial_seeds_z)
            wsz = wsz.astype('uint64')
            # mask the result if we have a mask
            if mask is not None:
                wsz[mask[z]] = 0
                inv_mask = np.logical_not(mask[z])
                # NOTE we might not have any pixels in mask for 2d slice
                max_id = int(wsz[inv_mask].max()) if inv_mask.sum() > 0 else 0

            # increase the offset
            offset += max_id
            # map back to original ids
            wsz = nt.takeDict(new_to_old, wsz)
            ws[z] = wsz
        #
        return ws

    # apply the watersheds in 3d
    else:
        # find seeds
        seeds = _make_seeds(dt, config)
        # remove seeds in mask
        if mask is not None:
            seeds[mask] = 0
        seeds[seeds != 0] += offset

        # add the initial seeds
        initial_seed_mask = initial_seeds != 0
        seeds[initial_seed_mask] = initial_seeds[initial_seed_mask]

        # we need to remap the seeds consecutively, because vigra
        # watersheds can only handle uint32 seeds, and we WILL overflow uint32
        seeds, _, old_to_new = vigra.analysis.relabelConsecutive(
            seeds, start_label=1, keep_zeros=True)
        new_to_old = {new: old for old, new in old_to_new.items()}

        # run watershed
        initial_seed_ids = np.unique(initial_seeds[initial_seed_mask])
        hmap = _make_hmap(input_, dt, alpha, sigma_weights)
        ws, max_id = vu.watershed(hmap,
                                  seeds=seeds,
                                  size_filter=size_filter,
                                  exclude=initial_seed_ids)
        ws = ws.astype('uint64')
        ws = nt.takeDict(new_to_old, ws)
        if mask is not None:
            ws[mask] = 0
        return ws
Example #26
0
def _solve_block_problem(block_id, graph, uv_ids, ds_nodes, costs, solver,
                         ignore_label, blocking, out, time_limit):
    fu.log("Start processing block %i" % block_id)

    # load the nodes in this sub-block and map them
    # to our current node-labeling
    chunk_id = blocking.blockGridPosition(block_id)
    nodes = ds_nodes.read_chunk(chunk_id)
    if nodes is None:
        fu.log_block_success(block_id)
        return

    # if we have an ignore label, remove zero from the nodes
    # (nodes are sorted, so it will always be at pos 0)
    if ignore_label and nodes[0] == 0:
        nodes = nodes[1:]
        removed_ignore_label = True
        if len(nodes) == 0:
            fu.log_block_success(block_id)
            return
    else:
        removed_ignore_label = False

    # we allow for invalid nodes here,
    # which can occur for un-connected graphs resulting from bad masks ...
    inner_edges, outer_edges = graph.extractSubgraphFromNodes(
        nodes, allowInvalidNodes=True)

    # if we only have no inner edges, return
    # the outer edges as cut edges
    if len(inner_edges) == 0:
        if len(nodes) > 1:
            assert removed_ignore_label,\
                "Can only have trivial sub-graphs for more than one node if we removed ignore label"
        cut_edge_ids = outer_edges
        sub_result = None
        fu.log("Block %i: has no inner edges" % block_id)
    # otherwise solve the multicut for this block
    else:
        fu.log("Block %i: Solving sub-block with %i nodes and %i edges" %
               (block_id, len(nodes), len(inner_edges)))
        sub_uvs = uv_ids[inner_edges]
        # relabel the sub-nodes and associated uv-ids for more efficient processing
        nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive(
            nodes, start_label=0, keep_zeros=False)
        sub_uvs = nt.takeDict(mapping, sub_uvs)
        n_local_nodes = max_id + 1
        sub_graph = nifty.graph.undirectedGraph(n_local_nodes)
        sub_graph.insertEdges(sub_uvs)

        sub_costs = costs[inner_edges]
        assert len(sub_costs) == sub_graph.numberOfEdges

        # solve multicut and relabel the result
        sub_result = solver(sub_graph, sub_costs, time_limit=time_limit)
        assert len(sub_result) == len(nodes), "%i, %i" % (len(sub_result),
                                                          len(nodes))

        sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]]
        assert len(sub_edgeresult) == len(inner_edges)
        cut_edge_ids = inner_edges[sub_edgeresult]
        cut_edge_ids = np.concatenate([cut_edge_ids, outer_edges])

        _, res_max_id, _ = vigra.analysis.relabelConsecutive(sub_result,
                                                             start_label=1,
                                                             keep_zeros=False,
                                                             out=sub_result)
        fu.log("Block %i: Subresult has %i unique ids" %
               (block_id, res_max_id))
        # IMPORTANT !!!
        # we can only add back the ignore label after getting the edge-result !!!
        if removed_ignore_label:
            sub_result = np.concatenate((np.zeros(1,
                                                  dtype='uint64'), sub_result))

    # get chunk id of this block
    block = blocking.getBlock(block_id)
    chunk_id = tuple(beg // sh
                     for beg, sh in zip(block.begin, blocking.blockShape))

    # serialize the cut-edge-ids and the (local) node labeling
    ds_edge_res = out['cut_edge_ids']
    fu.log("Block %i: Serializing %i cut edges" %
           (block_id, len(cut_edge_ids)))
    ds_edge_res.write_chunk(chunk_id, cut_edge_ids, True)

    if sub_result is not None:
        ds_node_res = out['node_result']
        fu.log("Block %i: Serializing %i node results" %
               (block_id, len(sub_result)))
        ds_node_res.write_chunk(chunk_id, sub_result, True)

    fu.log_block_success(block_id)
Example #27
0
def _apply_watershed_with_seeds(input_, dt, offset,
                                initial_seeds, config, mask=None):
    apply_2d = config.get('apply_ws_2d', True)
    sigma_seeds = config.get('sigma_seeds', 2.)
    size_filter = config.get('size_filter', 25)
    sigma_weights = config.get('sigma_weights', 2.)
    alpha = config.get('alpha', 0.2)

    # apply the watersheds in 2d
    if apply_2d:
        ws = np.zeros_like(input_, dtype='uint64')
        for z in range(ws.shape[0]):

            # smooth the distance transform if specified
            dtz = vu.apply_filter(dt[z], 'gaussianSmoothing',
                                  sigma_seeds) if sigma_seeds != 0 else dt[z]

            # get the initial seeds for this slice
            # and a mask for the inital seeds
            initial_seeds_z = initial_seeds[z]
            initial_seed_mask = initial_seeds_z != 0
            # don't place maxima at initial seeds
            dtz[initial_seed_mask] = 0

            seeds = vigra.analysis.localMaxima(dtz, marker=np.nan,
                                               allowAtBorder=True, allowPlateaus=True)
            seeds = vigra.analysis.labelImageWithBackground(np.isnan(seeds).view('uint8'))
            # remove seeds in mask
            if mask is not None:
                seeds[mask[z]] = 0

            # add offset to seeds
            seeds[seeds != 0] += offset
            # add initial seeds
            seeds[initial_seed_mask] = initial_seeds_z[initial_seed_mask]

            # we need to remap the seeds consecutively, because vigra
            # watersheds can only handle uint32 seeds, and we WILL overflow uint32
            seeds, _, old_to_new = vigra.analysis.relabelConsecutive(seeds,
                                                                     start_label=1,
                                                                     keep_zeros=True)
            new_to_old = {new: old for old, new in old_to_new.items()}

            # run watershed
            hmap = _make_hmap(input_[z], dtz, alpha, sigma_weights)
            wsz, max_id = vu.watershed(hmap, seeds=seeds, size_filter=size_filter,
                                       exclude=initial_seeds_z)
            # mask the result if we have a mask
            if mask is not None:
                wsz[mask[z]] = 0
                inv_mask = np.logical_not(mask[z])
                # NOTE we might not have any pixels in mask for 2d slice
                max_id = int(wsz[inv_mask].max()) if inv_mask.sum() > 0 else 0

            # increase the offset
            offset += max_id
            # map back to original ids
            wsz = nt.takeDict(new_to_old, wsz)
            ws[z] = wsz
        #
        return ws

    # apply the watersheds in 3d
    else:
        if sigma_seeds != 0:
            dt = vu.apply_filter(dt, 'gaussianSmoothing', sigma_seeds)

        # find seeds
        seeds = vigra.analysis.localMaxima3D(dt, marker=np.nan,
                                             allowAtBorder=True, allowPlateaus=True)
        seeds = vigra.analysis.labelVolumeWithBackground(np.isnan(seeds).view('uint8'))
        # remove seeds in mask
        if mask is not None:
            seeds[mask] = 0
        seeds[seeds != 0] += offset

        # add the initial seeds
        initial_seed_mask = initial_seeds != 0
        seeds[initial_seed_mask] = initial_seeds[initial_seed_mask]

        # we need to remap the seeds consecutively, because vigra
        # watersheds can only handle uint32 seeds, and we WILL overflow uint32
        seeds, _, old_to_new = vigra.analysis.relabelConsecutive(seeds,
                                                                 start_label=1,
                                                                 keep_zeros=True)
        new_to_old = {new: old for old, new in old_to_new.items()}

        # run watershed
        initial_seed_ids = np.unique(initial_seeds[initial_seed_mask])
        hmap = _make_hmap(input_, dt, alpha, sigma_weights)
        ws, max_id = vu.watershed(hmap, seeds=seeds, size_filter=size_filter,
                                  exclude=initial_seed_ids)
        ws = nt.takeDict(new_to_old, ws)
        if mask is not None:
            ws[mask] = 0
        return ws