Example #1
0
def debug_subresult(block_id=1):
    from cremi_tools.viewer.volumina import view
    path = '/g/kreshuk/data/arendt/platyneris_v1/membrane_training_data/validation/segmentation/val_block_01.n5'
    tmp_folder = './tmp_plat_val'
    block_prefix = os.path.join(path, 's0', 'sub_graphs', 'block_')

    graph = ndist.Graph(os.path.join(path, 'graph'))
    block_path = block_prefix + str(block_id)
    nodes = ndist.loadNodes(block_path)
    nodes = nodes[1:]
    inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes)

    block_res_path = os.path.join(
        tmp_folder, 'subproblem_results/s0_block%i.npy' % block_id)
    res = np.load(block_res_path)

    merge_edges = np.ones(graph.numberOfEdges, dtype='bool')
    merge_edges[res] = False
    merge_edges[outer_edges] = False

    uv_ids = graph.uvIds()
    n_nodes = int(uv_ids.max()) + 1
    ufd = nufd.ufd(n_nodes)
    ufd.merge(uv_ids[merge_edges])
    node_labels = ufd.elementLabeling()

    ws = z5py.File(path)['volumes/watershed'][:]
    seg = nt.take(node_labels, ws)
    view([ws, seg])
def debug_subresult(block_id=1):
    example_path = '/home/cpape/Work/data/isbi2012/cluster_example/isbi_train.n5'
    block_prefix = os.path.join(example_path, 's0', 'sub_graphs', 'block_')

    graph = ndist.Graph(os.path.join(example_path, 'graph'))
    block_path = block_prefix + str(block_id)
    nodes = ndist.loadNodes(block_path)
    inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes)

    block_res_path = './tmp/subproblem_results/s0_block%i.npy' % block_id
    res = np.load(block_res_path)

    merge_edges = np.ones(graph.numberOfEdges, dtype='bool')
    merge_edges[res] = False
    merge_edges[outer_edges] = False

    uv_ids = graph.uvIds()
    n_nodes = int(uv_ids.max()) + 1
    ufd = nifty.ufd.ufd(n_nodes)
    ufd.merge(uv_ids[merge_edges])
    node_labels = ufd.elementLabeling()

    ws = z5py.File(example_path)['volumes/watersheds'][:]
    rag = nrag.gridRag(ws, numberOfLabels=n_nodes)
    seg = nrag.projectScalarNodeDataToPixels(rag, node_labels)
    view([ws, seg])
def edges_from_skeletons(path, ws_key, labels_key,
                         skel_key, assignment_key, out_key,
                         graph_path, graph_key, n_threads):
    f = z5py.File(path)
    ds_ws = f[ws_key]
    ds_skel = f[skel_key]
    n_labels = ds_skel.shape[0]

    ds_labels = f[labels_key]
    ds_labels.n_threads = n_threads
    gt_labels = ds_labels[:]

    ds_assignment = f[assignment_key]
    ds_assignment.n_threads = n_threads
    assignment = ds_assignment[:]

    rag = ndist.Graph(os.path.join(graph_path, graph_key), n_threads)
    ds_out = f.require_dataset(out_key, shape=(n_labels,), chunks=(1,), compression='gzip',
                               dtype='uint64')

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [tp.submit(skeleton_to_edges, seg_id, ds_ws, ds_skel,
                           gt_labels, assignment, rag, ds_out)
                 for seg_id in range(n_labels)]
        [t.result() for t in tasks]
Example #4
0
def insert(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # path to the reduced problem
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    decomposition_path = config['decomposition_path']
    # path where the node labeling shall be written
    output_path = config['output_path']
    output_key = config['output_key']
    n_threads = config['threads_per_job']

    tmp_folder = config['tmp_folder']
    n_jobs = config['n_jobs']

    # load the graph
    graph = ndist.Graph(os.path.join(graph_path, graph_key),
                        numberOfThreads=n_threads)
    with vu.file_reader(graph_path, 'r') as f:
        ignore_label = f[graph_key].attrs['ignoreLabel']

    # load the cut edges from initial decomposition
    with vu.file_reader(decomposition_path, 'r') as f:
        ds = f['cut_edges']
        ds.n_threads = n_threads
        cut_edges_decomp = ds[:]

    # load all the sub results
    cut_edges = np.concatenate([
        np.load(
            os.path.join(tmp_folder, 'subproblem_results',
                         'job%i.npy' % job_id)) for job_id in range(n_jobs)
    ])
    cut_edges = np.unique(cut_edges).astype('uint64')
    cut_edges = np.concatenate([cut_edges_decomp, cut_edges])

    edge_labels = np.zeros(graph.numberOfEdges, dtype='bool')
    edge_labels[cut_edges] = 1

    node_labeling = ndist.connectedComponents(graph, edge_labels, ignore_label)

    n_nodes = len(node_labeling)
    node_shape = (n_nodes, )
    chunks = (min(n_nodes, 524288), )
    with vu.file_reader(output_path) as f:
        ds = f.require_dataset(output_key,
                               dtype='uint64',
                               shape=node_shape,
                               chunks=chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = node_labeling
    fu.log('saving results to %s' % output_path)
    fu.log('and key %s' % output_key)
    fu.log_job_success(job_id)
def solve_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']
    n_threads = config['threads_per_job']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    # load the costs
    costs_key = 's%i/costs' % scale
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    graph_key = 's%i/graph' % scale
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignoreLabel']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using agglomerator %s" % agglomerator_key)
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    # the output group
    out = problem['s%i/sub_results' % scale]

    # TODO this should be a n5 varlen dataset as well and
    # then this is just another dataset in problem path
    block_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs',
                                'block_')
    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids,
                      block_prefix, costs, agglomerator, ignore_label,
                      blocking, out, time_limit) for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)
Example #6
0
def solve_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']
    n_threads = config['threads_per_job']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem['s0/graph'].attrs['shape']

    # load the costs
    costs_key = 's%i/costs' % scale
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    graph_key = 's%i/graph' % scale
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(problem_path, graph_key, numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignore_label']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using solver %s" % agglomerator_key)
    solver = get_multicut_solver(agglomerator_key)

    # the output group
    out = problem['s%i/sub_results' % scale]

    node_ds_key = 's%i/sub_graphs/nodes' % scale
    ds_nodes = problem[node_ds_key]

    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids, ds_nodes,
                      costs, solver, ignore_label, blocking, out, time_limit)
            for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)
def set_up_problem():
    path = './exp_data/exp_data.n5'

    # load the graph and edge probs
    f = z5py.File(path)
    ds = f['features']
    probs = ds[:, 0]
    g = ndist.Graph(os.path.join(path, 's0/graph'))
    graph = nifty.graph.UndirectedGraph(g.numberOfNodes + 1)
    graph.insertEdges(g.uvIds())

    # add lifted edges up to nhood 3
    nhood = 2
    obj = nlmc.liftedMulticutObjective(graph)
    obj.insertLiftedEdgesBfs(nhood)
    lifted_uvs = obj.liftedUvIds()
    print("Number of lifted edges:")
    print(len(lifted_uvs))

    chunks = (int(1e6), 2)
    f.create_dataset('s0/lifted_nh',
                     data=lifted_uvs,
                     chunks=chunks,
                     compression='gzip')

    # set the lifted costs according to the mean prob. of the shortest path
    print("Calculating costs ...")

    def find_costs_from_sp(lifted_id):
        print(lifted_id, "/", len(lifted_uvs))
        sp = nifty.graph.ShortestPathDijkstra(graph)
        u, v = lifted_uvs[lifted_id]
        edge_path = sp.runSingleSourceSingleTarget(probs, u, v, False)
        max_prob = np.max(probs[edge_path])
        return max_prob

    # p = find_costs_from_sp(0)
    # print(p)
    # return

    #
    n_threads = 8
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(find_costs_from_sp, i) for i in range(len(lifted_uvs))
        ]
        costs = np.array([t.result() for t in tasks])
    assert len(costs) == len(lifted_uvs)
    costs = probs_to_costs(costs)
    chunks = (int(1e6), )
    f.create_dataset('s0/lifted_costs',
                     data=costs,
                     chunks=chunks,
                     compression='gzip')
def rank_false_merges(problem_path,
                      graph_key,
                      feat_key,
                      morpho_key,
                      node_label_path,
                      node_label_key,
                      ignore_ids,
                      out_path_ids,
                      out_path_scores,
                      n_threads,
                      n_candidates,
                      heuristic=weight_quantile_heuristic):
    g = ndist.Graph(problem_path, graph_key, n_threads)
    with open_file(problem_path, 'r') as f:
        ds = f[feat_key]
        ds.n_threads = n_threads
        probs = ds[:, 0]

        ds = f[morpho_key]
        ds.n_threads = n_threads
        sizes = ds[:, 1]

    with open_file(node_label_path, 'r') as f:
        ds = f[node_label_key]
        ds.n_threads = n_threads
        node_labels = ds[:]

    seg_ids = np.arange(len(sizes), dtype='uint64')
    seg_ids = seg_ids[np.argsort(sizes)[::-1]][:n_candidates]
    seg_ids = seg_ids[~np.isin(seg_ids, ignore_ids.tolist() + [0])]
    max_size = sizes[seg_ids].max()
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(weight_quantile_heuristic, seg_id, g, node_labels, sizes,
                      max_size, probs) for seg_id in seg_ids
        ]
        fm_scores = np.array([t.result() for t in tasks])

    # print("Id:", seg_ids[0])
    # sc = weight_quantile_heuristic(seg_ids[0], g,
    #                                node_labels, sizes, max_size, probs)
    # print("Score:", sc)
    # return

    # sort ids by score (decreasing)
    sorter = np.argsort(fm_scores)[::-1]
    seg_ids = seg_ids[sorter]
    fm_scores = fm_scores[sorter]

    with open(out_path_scores, 'w') as f:
        json.dump(fm_scores.tolist(), f)
    with open(out_path_ids, 'w') as f:
        json.dump(seg_ids.tolist(), f)
Example #9
0
def solve_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    costs_path = config['costs_path']
    costs_key = config['costs_key']
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    decomposition_path = config['decomposition_path']
    tmp_folder = config['tmp_folder']
    component_list = config['block_list']
    n_threads = config['threads_per_job']
    agglomerator_key = config['agglomerator']

    with vu.file_reader(costs_path, 'r') as f:
        ds = f[costs_key]
        ds.n_threads = n_threads
        costs = ds[:]

    with vu.file_reader(decomposition_path, 'r') as f:
        ds = f['graph_labels']
        ds.n_threads = n_threads
        graph_labels = ds[:]

    # load the graph
    graph = ndist.Graph(os.path.join(graph_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_component, component_id, graph, uv_ids,
                      graph_labels, costs, agglomerator)
            for component_id in component_list
        ]
        results = [t.result() for t in tasks]

    cut_edge_ids = np.concatenate([res for res in results if res is not None])
    cut_edge_ids = np.unique(cut_edge_ids)

    res_folder = os.path.join(tmp_folder, 'subproblem_results')
    job_res_path = os.path.join(res_folder, 'job%i.npy' % job_id)
    fu.log("saving cut edge results to %s" % job_res_path)
    np.save(job_res_path, cut_edge_ids)
    fu.log_job_success(job_id)
def multicut_step1(graph_path,
                   block_prefix,
                   scale,
                   tmp_folder,
                   agglomerator_key,
                   initial_block_shape,
                   block_file,
                   n_threads,
                   cut_outer_edges=True):

    t0 = time.time()
    agglomerator = AGGLOMERATORS[agglomerator_key]
    costs = z5py.File(os.path.join(tmp_folder, 'merged_graph.n5/s%i' % scale),
                      use_zarr_format=False)['costs'][:]

    block_ids = np.load(block_file)

    shape = z5py.File(graph_path).attrs['shape']
    factor = 2**scale
    block_shape = [factor * bs for bs in initial_block_shape]

    # TODO we should have symlinks instead of the if else
    if scale == 0:
        graph_path_ = os.path.join(graph_path, 'graph')
    else:
        graph_path_ = os.path.join(tmp_folder, 'merged_graph.n5',
                                   's%i' % scale)
    graph = ndist.Graph(graph_path_)

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(solve_block_subproblem, block_id, graph, block_prefix,
                      costs, agglomerator, shape, block_shape, cut_outer_edges)
            for block_id in block_ids
        ]
        results = [t.result() for t in tasks]

    results = [res for res in results if res is not None]
    if len(results) > 0:
        cut_edge_ids = np.concatenate(results)
        cut_edge_ids = np.unique(cut_edge_ids).astype('uint64')
    else:
        cut_edge_ids = np.zeros(0, dtype='uint64')

    job_id = int(os.path.split(block_file)[1].split('_')[3][:-4])
    np.save(os.path.join(tmp_folder, '1_output_s%i_%i.npy' % (scale, job_id)),
            cut_edge_ids)

    print("Success job %i" % job_id)
    print("In %f s" % (time.time() - t0, ))
Example #11
0
def solve_subproblems(graph_path,
                      costs_path,
                      scale,
                      job_id,
                      config_path,
                      tmp_folder,
                      cut_outer_edges=True):

    # TODO support more agglomerators
    agglomerator_key = 'multicut_kl'
    agglomerator = AGGLOMERATORS[agglomerator_key]
    costs = z5py.File(costs_path)['costs'][:]

    with open(config_path) as f:
        config = json.load(f)
        initial_block_shape = config['block_shape']
        n_threads = config['n_threads']
        block_ids = config['block_list']

    shape = z5py.File(graph_path).attrs['shape']
    factor = 2**scale
    block_shape = [factor * bs for bs in initial_block_shape]

    if scale == 0:
        graph_path_ = os.path.join(graph_path, 'graph')
        block_prefix = os.path.join(graph_path, 'sub_graphs', 's0', 'block_')
    else:
        graph_path_ = os.path.join(tmp_folder, 'merged_graph.n5',
                                   's%i' % scale)
        block_prefix = os.path.join(graph_path_, 'sub_graphs', 'block_')

    # TODO parallelize ?!
    # load the complete graph
    graph = ndist.Graph(graph_path_)

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(solve_block_subproblem, block_id, graph, block_prefix,
                      costs, agglomerator, shape, block_shape, tmp_folder,
                      scale, cut_outer_edges) for block_id in block_ids
        ]
        results = [t.result() for t in tasks]

    results = [res for res in results if res is not None]
    if len(results) > 0:
        cut_edge_ids = np.concatenate(results)
        cut_edge_ids = np.unique(cut_edge_ids).astype('uint64')
    else:
        cut_edge_ids = np.zeros(0, dtype='uint64')
Example #12
0
    def _check_subresults(self):
        f = z5py.File(self.input_path)
        f_out = z5py.File(self.output_path)
        ds_ws = f[self.input_key]

        shape = ds_ws.shape
        blocking = nt.blocking([0, 0, 0], list(shape), self.block_shape)

        f_graph = z5py.File(self.output_path)
        halo = [1, 1, 1]
        for block_id in range(blocking.numberOfBlocks):
            # get the block with the appropriate halo
            # and the corresponding bounding box
            block = blocking.getBlockWithHalo(block_id, halo)
            outer_block, inner_block = block.outerBlock, block.innerBlock
            bb = tuple(
                slice(beg, end)
                for beg, end in zip(inner_block.begin, outer_block.end))
            # check that the rois are correct
            block_key = os.path.join('s0', 'sub_graphs', 'block_%i' % block_id)
            roi_begin = f_out[block_key].attrs['roiBegin']
            roi_end = f_out[block_key].attrs['roiEnd']
            self.assertEqual(inner_block.begin, roi_begin)
            self.assertEqual(outer_block.end, roi_end)

            # load the graph
            graph_path = os.path.join(self.output_path, block_key)
            graph = ndist.Graph(graph_path)
            nodes_deser = ndist.loadNodes(graph_path)

            # load the segmentation and check that the nodes
            # are correct
            seg = ds_ws[bb]
            nodes = graph.nodes()
            nodes_ws = np.unique(seg)
            self.assertTrue(np.allclose(nodes_ws, nodes_deser))
            self.assertTrue(np.allclose(nodes_ws, nodes))

            # compute the rag and check that the graph is correct
            rag = nrag.gridRag(seg, numberOfLabels=int(seg.max()) + 1)
            # number of nodes in nifty can be larger
            self.assertGreaterEqual(rag.numberOfNodes, graph.numberOfNodes)
            self.assertEqual(rag.numberOfEdges, graph.numberOfEdges)
            self.assertTrue(np.allclose(rag.uvIds(), graph.uvIds()))
Example #13
0
    def test_graph_connected_components(self):
        from cluster_tools.postprocess import ConnectedComponentsWorkflow
        task = ConnectedComponentsWorkflow

        self.compute_graph(ignore_label=False)

        # check the graph again
        g = self.compute_nifty_graph()
        g1 = ndist.Graph(self.output_path, self.graph_key)
        self.assertEqual(g.numberOfNodes, g1.numberOfNodes)
        self.assertEqual(g.numberOfEdges, g1.numberOfEdges)
        self.assertTrue(np.allclose(g.uvIds(), g1.uvIds()))

        assignment_key = 'initial_assignments'
        assignments = self.make_assignments(g, self.output_path,
                                            assignment_key)

        # compute expected components
        expected = nifty.graph.connectedComponentsFromNodeLabels(
            g, assignments)
        vigra.analysis.relabelConsecutive(expected, out=expected)

        out_key = 'connected_components'
        t = task(tmp_folder=self.tmp_folder,
                 config_dir=self.config_folder,
                 target=self.target,
                 max_jobs=self.max_jobs,
                 problem_path=self.output_path,
                 graph_key=self.graph_key,
                 assignment_path=self.output_path,
                 assignment_key=assignment_key,
                 output_path=self.output_path,
                 assignment_out_key=out_key)
        ret = luigi.build([t], local_scheduler=True)
        self.assertTrue(ret)

        # load the output components
        with z5py.File(self.output_path) as f:
            results = f[out_key][:]

        # compare
        self.assertEqual(results.shape, expected.shape)
        ri, _ = rand_index(results, expected)
        self.assertAlmostEqual(ri, 0.)
def graph_connected_components(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    problem_path = config['problem_path']
    graph_key = config['graph_key']
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']
    output_path = config['output_path']
    output_key = config['output_key']
    n_threads = config.get('n_threads', 8)

    with vu.file_reader(assignment_path, 'r') as f:
        ds_ass = f[assignment_key]
        ds_ass.n_threads = n_threads
        assignments = ds_ass[:]
        chunks = ds_ass.chunks

    graph = ndist.Graph(os.path.join(problem_path, graph_key), n_threads)
    # TODO check if we acutally have an ignore label
    assignments = ndist.connectedComponentsFromNodes(graph, assignments, True)
    vigra.analysis.relabelConsecutive(assignments,
                                      out=assignments,
                                      start_label=1,
                                      keep_zeros=True)

    with vu.file_reader(output_path) as f:
        ds_out = f.require_dataset(output_key,
                                   shape=assignments.shape,
                                   chunks=chunks,
                                   compression='gzip',
                                   dtype='uint64')
        ds_out.n_threads = n_threads
        ds_out[:] = assignments

    fu.log_job_success(job_id)
Example #15
0
    def check_result(self, seg_key):
        # check shapes
        with z5py.File(self.input_path) as f:
            seg = f[seg_key]
            seg.n_threads = 8
            seg = seg[:]
            shape = seg.shape
        with z5py.File(self.output_path) as f:
            shape_ = tuple(f[self.graph_key].attrs['shape'])
        self.assertEqual(shape, shape_)

        # check graph
        # compute nifty rag
        rag = nrag.gridRag(seg, numberOfLabels=int(seg.max()) + 1)

        # load the graph
        graph = ndist.Graph(self.output_path, self.output_key)

        self.assertEqual(rag.numberOfNodes, graph.numberOfNodes)
        self.assertEqual(rag.numberOfEdges, graph.numberOfEdges)
        self.assertTrue(np.array_equal(rag.uvIds(), graph.uvIds()))
Example #16
0
    def test_components_dist_toy(self):
        uv_ids, node_labels, expected_cc = self.toy_problem()

        graph_path = os.path.join(self.tmp_folder, 'graph.n5')
        with z5py.File(graph_path) as f:
            g = f.create_group('graph')
            g.attrs['numberOfEdges'] = len(uv_ids)
            g.create_dataset('edges',
                             data=uv_ids,
                             chunks=(int(1e5), 2),
                             compression='raw')

        g = ndist.Graph(graph_path, 'graph')
        self.assertEqual(g.numberOfNodes, uv_ids.max() + 1)
        self.assertEqual(g.numberOfEdges, len(uv_ids))

        result = ndist.connectedComponentsFromNodes(g, node_labels, True)
        self.assertEqual(len(result), len(expected_cc))

        ri, _ = rand_index(result, expected_cc)
        self.assertAlmostEqual(ri, 0.)
Example #17
0
def compute_energy(name):
    exp_path = './exp_data/%s.n5' % name
    f = z5py.File(exp_path)

    g = ndist.Graph(os.path.join(exp_path, 's0', 'graph'))
    graph = nifty.graph.undirectedGraph(g.numberOfNodes + 1)
    graph.insertEdges(g.uvIds())

    costs = f['s0/costs'][:]
    lifted_costs = f['s0/lifted_costs_%s' % name][:]
    lifted_uvs = f['s0/lifted_nh_%s' % name][:]

    obj = nlmc.liftedMulticutObjective(graph)
    obj.setGraphEdgesCosts(costs)
    obj.setCosts(lifted_uvs, lifted_costs)

    path = '/g/kreshuk/data/FIB25/cutout.n5'
    res_key = 'node_labels/%s' % name
    with z5py.File(path) as f:
        node_labels = f[res_key][:]

    e = obj.evalNodeLabels(node_labels)
    return e
Example #18
0
    def _write_graph(self):
        graph_path = os.path.join(self.input_path, self.graph_key)
        graph = ndist.Graph(graph_path)
        uv_ids = graph.uvIds().astype('uint32')

        # the ilastik graph serialization corresponds to a the serialization of
        # vigra:adjacencyListGraph, see
        # https://github.com/constantinpape/vigra/blob/master/include/vigra/adjacency_list_graph.hxx#L536

        # first element: node and edge numbers, max node and edge ids
        n_nodes = graph.numberOfNodes
        n_edges = graph.numberOfEdges
        serialization = [
            np.array([n_nodes, n_edges, graph.maxNodeId, graph.maxEdgeId],
                     dtype='uint32')
        ]
        # second element: uv-ids
        serialization.append(uv_ids.flatten())
        # third element: node neighborhoods (implemented convinience function in cpp for this)
        serialization.append(graph.flattenedNeighborhoods().astype('uint32'))
        serialization = np.concatenate(serialization)

        ilastk_graph_key = 'preprocessing/graph/graph'
        ilastik_seed_key = 'preprocessing/graph/nodeSeeds'
        ilastik_res_key = 'preprocessing/graph/resultSegmentation'
        with h5py.File(self.output_path) as f:
            f.create_dataset(ilastk_graph_key,
                             data=serialization,
                             compression='gzip')
            # initialize node seed labels and result labels with zeros
            f.create_dataset(ilastik_seed_key,
                             shape=(graph.maxNodeId + 1, ),
                             dtype='uint8')
            f.create_dataset(ilastik_res_key,
                             shape=(graph.maxNodeId + 1, ),
                             dtype='uint8')
            f['preprocessing/graph'].attrs['numNodes'] = n_nodes
Example #19
0
    def __init__(self,
                 graph_path,
                 graph_key,
                 weights_path,
                 weights_key,
                 n_threads,
                 ignore_label=None):
        self.n_threads = n_threads

        # load graph and weights
        self.graph = ndist.Graph(os.path.join(graph_path, graph_key),
                                 n_threads)
        self.uv_ids = self.graph.uvIds()

        weight_ds = z5py.File(weights_path)[weights_key]
        weight_ds.n_threads = self.n_threads
        self.weights = weight_ds[:, 0].squeeze(
        ) if weight_ds.ndim == 2 else weight_ds[:]
        assert len(self.weights) == self.graph.numberOfEdges

        # we need to set the ignore label to be max repulsve
        if ignore_label is not None:
            ignore_mask = (self.uv_ids == ignore_label).any(axis=1)
            self.weights[ignore_mask] = 1.
    def load_all_data(self):
        self.ds_raw = open_file(self.raw_path)[self.raw_key]
        self.ds_ws = open_file(self.ws_path)[self.ws_key]
        if self.ds_ws.attrs.get('isLabelMultiset', False):
            self.ds_ws = LabelMultisetWrapper(self.ds_ws)

        self.shape = self.ds_raw.shape
        assert self.ds_ws.shape == self.shape

        with open_file(self.node_label_path, 'r') as f:
            self.node_labels = f[self.node_label_key][:]

        with open(self.false_merge_id_path) as f:
            self.false_merge_ids = np.array(json.load(f))

        if os.path.exists(self.processed_ids_file):
            with open(self.processed_ids_file) as f:
                self.processed_ids = json.load(f)
        else:
            self.processed_ids = []

        if os.path.exists(self.bg_ids_file):
            with open(self.bg_ids_file) as f:
                self.background_ids = json.load(f)
        else:
            self.background_ids = []

        already_processed = np.in1d(self.false_merge_ids, self.processed_ids)
        missing_ids = self.false_merge_ids[~already_processed]

        if os.path.exists(self.annotation_path):
            with open(self.annotation_path) as f:
                self.annotations = json.load(f)
        else:
            self.annotations = {}

        self.next_queue = queue.Queue()
        for mi in missing_ids:
            self.next_queue.put_nowait(mi)

        self.graph = ndist.Graph(self.problem_path, self.graph_key,
                                 self.n_threads)
        self.uv_ids = self.graph.uvIds()
        assert len(self.uv_ids) > 0

        with open_file(self.problem_path, 'r') as f:
            ds = f[self.feat_key]
            ds.n_threads = self.n_threads

            self.probs = ds[:, 0]

        # morphology table entries
        # id (1)
        # size (1)
        # com (3)
        # bb-min (3)
        # bb-max (3)
        with open_file(self.table_path, 'r') as f:
            table = f[self.table_key][:]
        self.bb_starts = table[:, 5:8]
        self.bb_stops = table[:, 8:11]
        self.bb_starts /= self.scale_factor
        self.bb_stops /= self.scale_factor
Example #21
0
    def check_subresults(self, seg_key):
        f = z5py.File(self.input_path)
        f_out = z5py.File(self.output_path)
        ds_ws = f[seg_key]

        full_graph = ndist.Graph(self.output_path, self.output_key)

        shape = ds_ws.shape
        blocking = nt.blocking([0, 0, 0], list(shape), self.block_shape)

        ds_nodes = f_out["s0/sub_graphs/nodes"]
        ds_edges = f_out["s0/sub_graphs/edges"]
        ds_edge_ids = f_out["s0/sub_graphs/edge_ids"]

        halo = [1, 1, 1]
        for block_id in range(blocking.numberOfBlocks):
            # get the block with the appropriate halo
            # and the corresponding bounding box
            block = blocking.getBlockWithHalo(block_id, halo)
            outer_block, inner_block = block.outerBlock, block.innerBlock
            bb1 = tuple(
                slice(beg, end)
                for beg, end in zip(inner_block.begin, inner_block.end))
            bb2 = tuple(
                slice(beg, end)
                for beg, end in zip(outer_block.begin, inner_block.end))
            # load the nodes
            chunk_id = blocking.blockGridPosition(block_id)
            nodes_deser = ds_nodes.read_chunk(chunk_id)

            # load the segmentation and check that the nodes
            # are correct
            seg1 = ds_ws[bb1]
            nodes_ws = np.unique(seg1)
            self.assertTrue(np.array_equal(nodes_ws, nodes_deser))

            # load the edges and construct the graph
            edges = ds_edges.read_chunk(chunk_id)
            if edges is None:
                self.assertEqual(len(nodes_ws), 1)
                continue
            edges = edges.reshape((edges.size // 2, 2))
            graph = ndist.Graph(edges)

            # compute the rag and check that the graph is correct
            seg2 = ds_ws[bb2]

            # check the graph nodes (only if we have edges)
            if graph.numberOfEdges > 0:
                nodes = graph.nodes()
                nodes_ws2 = np.unique(seg2)
                self.assertTrue(np.array_equal(nodes_ws2, nodes))

            rag = nrag.gridRag(seg2, numberOfLabels=int(seg2.max()) + 1)
            # number of nodes in nifty can be larger
            self.assertGreaterEqual(rag.numberOfNodes, graph.numberOfNodes)
            self.assertEqual(rag.numberOfEdges, graph.numberOfEdges)
            uv_ids = graph.uvIds()
            self.assertTrue(np.array_equal(rag.uvIds(), uv_ids))

            if graph.numberOfEdges == 0:
                continue

            # check the edge ids
            edge_ids = ds_edge_ids.read_chunk(chunk_id)
            self.assertEqual(len(edge_ids), graph.numberOfEdges)
            expected_ids = full_graph.findEdges(uv_ids)
            self.assertTrue(np.array_equal(edge_ids, expected_ids))
def solve_lifted_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']

    lifted_prefix = config['lifted_prefix']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)
    n_threads = config.get('threads_per_job', 1)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    # load the costs
    # NOTE we use different cost identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the costs come from the CostsWorkflow and
    # hence the identifier is identical
    costs_key = 's%i/costs_lmc' % scale if scale > 0 else 's0/costs'
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    # NOTE we use different graph identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the graph comes from the GraphWorkflow and
    # hence the identifier is identical
    graph_key = 's%i/graph_lmc' % scale if scale > 0 else 's0/graph'
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignoreLabel']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using agglomerator %s" % agglomerator_key)
    lifted_agglomerator = su.key_to_lifted_agglomerator(agglomerator_key)
    # TODO enable different multicut agglomerator
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    # load the lifted edges and costs
    nh_key = 's%i/lifted_nh_%s' % (scale, lifted_prefix)
    lifted_costs_key = 's%i/lifted_costs_%s' % (scale, lifted_prefix)
    ds = problem[nh_key]
    fu.log("reading lifted uvs")
    ds.n_threads = n_threads
    lifted_uvs = ds[:]

    fu.log("reading lifted costs")
    ds = problem[lifted_costs_key]
    ds.n_threads = n_threads
    lifted_costs = ds[:]

    # the output group
    out = problem['s%i/sub_results_lmc' % scale]

    # NOTE we use different sub-graph identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the sub-graphs come from the GraphWorkflow and
    # are hence identical
    sub_graph_identifier = 'sub_graphs' if scale == 0 else 'sub_graphs_lmc'
    block_prefix = os.path.join(problem_path, 's%i' % scale,
                                sub_graph_identifier, 'block_')
    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    fu.log("start processsing %i blocks" % len(block_list))
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids,
                      block_prefix, costs, lifted_uvs, lifted_costs,
                      lifted_agglomerator, agglomerator, ignore_label,
                      blocking, out, time_limit) for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)
def resolve_inidividual_objects(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']

    objects_path = config['objects_path']
    objects_group = config['objects_group']
    assignment_in_path = config['assignment_in_path']
    assignment_in_key = config['assignment_in_key']
    assignment_out_path = config['assignment_out_path']
    assignment_out_key = config['assignment_out_key']

    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)
    n_threads = config.get('threads_per_job', 1)

    fu.log("reading problem from %s" % problem_path)
    problem = vu.file_reader(problem_path)

    # load the costs
    costs_key = 's0/costs'
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    graph_key = 's0/graph'
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)

    fu.log("using agglomerator %s" % agglomerator_key)
    agglomerator = su.key_to_lifted_agglomerator(agglomerator_key)

    # load assignments
    f = vu.file_reader(assignment_in_path)
    ds = f[assignment_in_key]
    ds.n_threads = n_threads
    assignments = ds[:]

    # load the object group
    objects = vu.file_reader(objects_path)[objects_group]
    new_assignments = _solve_objects(objects, graph, assignments, costs,
                                     agglomerator, n_threads, time_limit)

    chunks = ds.chunks
    f = vu.file_reader(assignment_out_path)
    ds = f.require_dataset(assignment_out_key,
                           shape=new_assignments.shape,
                           chunks=chunks,
                           compression='gzip',
                           dtype='uint64')
    ds.n_threads = n_threads
    ds[:] = new_assignments

    fu.log_job_success(job_id)
Example #24
0
def lifted_problem_from_segmentation(rag,
                                     watershed,
                                     input_segmentation,
                                     overlap_threshold,
                                     graph_depth,
                                     same_segment_cost,
                                     different_segment_cost,
                                     mode='all',
                                     n_threads=None):
    """ Compute lifted problem from segmentation by mapping segments to
        watershed superpixels.

    Arguments:
        rag [RegionAdjacencyGraph] - the region adjacency graph
        watershed [np.ndarray] - the watershed over segmentation
        input_segmentation [np.ndarray] - Segmentation used to determine node attribution.
        overlap_threshold [float] - minimal overlap to assign a segment id to node
        graph_depth [int] - maximal graph depth up to which
            lifted edges will be included
        same_segment_cost [float] - costs for edges between nodes with same segment id attribution
        different_segment_cost [float] - costs for edges between nodes with different segment id attribution
        mode [str] - mode for insertion of lifted edges. Can be
            "all" - lifted edges will be inserted in between all nodes with attribution
            "different" - lifted edges will only be inserted in between nodes attributed to different classes
            "same" - lifted edges will only be inserted in between nodes attribted to the same class
            (default: "different")
        n_threads [int] - number of threads used for the calculation (default: None)
    """
    n_threads = multiprocessing.cpu_count() if n_threads is None else n_threads
    assert input_segmentation.shape == watershed.shape

    # compute the overlaps
    ovlp_comp = ngt.overlap(watershed, input_segmentation)
    ws_ids = np.unique(watershed)
    n_labels = ws_ids[-1] + 1
    assert n_labels == rag.numberOfNodes, "%i, %i" % (n_labels,
                                                      rag.numberOfNodes)

    # initialise the arrays for node labels, to be
    # dense in the watershed id space (even if some ws-ids are not present)
    node_labels = np.zeros(n_labels, dtype='uint64')

    # extract the overlap values and node labels from the overlap
    # computation results
    overlaps = [
        ovlp_comp.overlapArraysNormalized(ws_id, sorted=False)
        for ws_id in ws_ids
    ]
    node_label_vals = np.array([ovlp[0][0] for ovlp in overlaps])
    overlap_values = np.array([ovlp[1][0] for ovlp in overlaps])
    node_label_vals[overlap_values < overlap_threshold] = 0
    assert len(node_label_vals) == len(ws_ids)
    node_labels[ws_ids] = node_label_vals

    # find all lifted edges up to the graph depth between mapped nodes
    # NOTE we need to convert to the different graph type for now, but
    # it would be nice to support all nifty graphs at some type
    uv_ids = rag.uvIds()
    g_temp = ndist.Graph(uv_ids)

    lifted_uvs = ndist.liftedNeighborhoodFromNodeLabels(
        g_temp,
        node_labels,
        graph_depth,
        mode=mode,
        numberOfThreads=n_threads,
        ignoreLabel=0)
    # make sure that the lifted uv ids are in range of the node labels
    assert lifted_uvs.max() < rag.numberOfNodes, "%i, %i" % (int(
        lifted_uvs.max()), rag.numberOfNodes)
    lifted_labels = node_labels[lifted_uvs]
    lifted_costs = np.zeros_like(lifted_labels, dtype='float32')

    same_mask = lifted_labels[:, 0] == lifted_labels[:, 1]
    lifted_costs[same_mask] = same_segment_cost
    lifted_costs[~same_mask] = different_segment_cost

    return lifted_uvs, lifted_costs
Example #25
0
def lifted_problem_from_probabilities(rag,
                                      watershed,
                                      input_maps,
                                      assignment_threshold,
                                      graph_depth,
                                      feats_to_costs=feats_to_costs_default,
                                      mode='different',
                                      n_threads=None):
    """ Compute lifted problem from probability maps by mapping them to superpixels.

    Arguments:
        rag [RegionAdjacencyGraph] - the region adjacency graph
        watershed [np.ndarray] - the watershed over segmentation
        input_maps [list[np.ndarray]] - list of probability maps. Each
            map must have the same shape as the watersheds and each map is
            treated as the probability to correspond to a different class.
        assignment_threshold [float] - minimal expression level to assign a
            class to a graph node (= watershed segment)
        graph_depth [int] - maximal graph depth up to which
            lifted edges will be included
        feats_to_costs [callable] - function to calculate the lifted costs from the
            class assignment probabilities. This becomes as inputs 'lifted_labels',
            which stores the two classes assigned to a lifted edge, and `lifted_features`,
            which stores the two assignment probabilities. (default: feats_to_costs_default).
        mode [str] - mode for insertion of lifted edges. Can be
            "all" - lifted edges will be inserted in between all nodes with attribution
            "different" - lifted edges will only be inserted in between nodes attributed to different classes
            "same" - lifted edges will only be inserted in between nodes attribted to the same class
            (default: "different")
        n_threads [int] - number of threads used for the calculation (default: None)
    """

    n_threads = multiprocessing.cpu_count() if n_threads is None else n_threads
    # validate inputs
    assert isinstance(input_maps, (list, tuple))
    assert all(isinstance(inp, np.ndarray) for inp in input_maps)
    shape = watershed.shape
    assert all(inp.shape == shape for inp in input_maps)

    # map the probability maps to superpixels - we only map to superpixels which
    # have a larger mean expression than `assignment_threshold`

    # TODO handle the dtype conversion for vigra gracefully somehow ...
    # think about supporting uint8 input and normalizing

    # TODO how do we handle cases where the same superpixel is mapped to
    # more than one class ?

    n_nodes = int(watershed.max()) + 1
    node_labels = np.zeros(n_nodes, dtype='uint64')
    node_features = np.zeros(n_nodes, dtype='float32')
    # TODO we could allow for more features that could then be used for the cost estimation
    for class_id, inp in enumerate(input_maps):
        mean_prob = vigra.analysis.extractRegionFeatures(inp,
                                                         watershed,
                                                         features=['mean'
                                                                   ])['mean']
        # we can in principle map multiple classes here, and right now will just override
        class_mask = mean_prob > assignment_threshold
        node_labels[class_mask] = class_id
        node_features[class_mask] = mean_prob[class_mask]

    # find all lifted edges up to the graph depth between mapped nodes
    # NOTE we need to convert to the different graph type for now, but
    # it would be nice to support all nifty graphs at some type
    uv_ids = rag.uvIds()
    g_temp = ndist.Graph(uv_ids)

    lifted_uvs = ndist.liftedNeighborhoodFromNodeLabels(
        g_temp,
        node_labels,
        graph_depth,
        mode=mode,
        numberOfThreads=n_threads,
        ignoreLabel=0)
    lifted_labels = node_labels[lifted_uvs]
    lifted_features = node_features[lifted_uvs]

    lifted_costs = feats_to_costs(lifted_labels, lifted_features)
    return lifted_uvs, lifted_costs
Example #26
0
def decompose(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    costs_path = config['costs_path']
    costs_key = config['costs_key']
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    output_path = config['output_path']
    n_threads = config['threads_per_job']

    with vu.file_reader(costs_path, 'r') as f:
        ds = f[costs_key]
        ds.n_threads = n_threads
        costs = ds[:]

    with vu.file_reader(graph_path, 'r') as f:
        ignore_label = f[graph_key].attrs['ignoreLabel']

    # load the graph
    graph = ndist.Graph(os.path.join(graph_path, graph_key),
                        numberOfThreads=n_threads)

    # mark repulsive edges as cut
    edge_labels = costs < 0

    # find the connected components
    labels = ndist.connectedComponents(graph, edge_labels, ignore_label)
    labels, max_id, _ = vigra.analysis.relabelConsecutive(labels,
                                                          keep_zeros=True,
                                                          start_label=1)

    # find the edges between connected components
    # which will be cut
    uv_ids = graph.uvIds()
    cut_edges = labels[uv_ids[:, 0]] != labels[uv_ids[:, 1]]
    cut_edges = np.where(cut_edges)[0].astype('uint64')

    n_nodes = labels.shape[0]
    node_shape = (n_nodes, )
    node_chunks = (min(n_nodes, 524288), )

    n_edges = cut_edges.shape[0]
    edge_shape = (n_edges, )
    edge_chunks = (min(n_edges, 524288), )

    with vu.file_reader(output_path) as f:
        ds = f.require_dataset('graph_labels',
                               dtype='uint64',
                               shape=node_shape,
                               chunks=node_chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = labels
        ds.attrs['max_id'] = max_id

        ds = f.require_dataset('cut_edges',
                               dtype='uint64',
                               shape=edge_shape,
                               chunks=edge_chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = cut_edges

    fu.log_job_success(job_id)
Example #27
0
def _accumulate_block(block_id, blocking, ds_in, ds_labels, ds_edges, ds_out,
                      filters, sigmas, halo, ignore_label, apply_in_2d,
                      channel_agglomeration):

    fu.log("start processing block %i" % block_id)
    chunk_pos = blocking.blockGridPosition(block_id)

    # load edges and construct the graph if this block has edges
    edges = ds_edges.read_chunk(chunk_pos)
    if edges is None:
        fu.log("block %i has no edges" % block_id)
        fu.log_block_success(block_id)
        return
    edges = edges.reshape((edges.size, 2))
    graph = ndist.Graph(edges)

    shape = ds_labels.shape
    # get the bounding
    if sum(halo) > 0:
        block = blocking.getBlockWithHalo(block_id, halo)
        block_shape = block.outerBlock.shape
        bb_in = vu.block_to_bb(block.outerBlock)
        bb = vu.block_to_bb(block.innerBlock)
        bb_local = vu.block_to_bb(block.innerBlockLocal)
        # increase inner bounding box by 1 in posirive direction
        # in accordance with the graph extraction
        bb = tuple(
            slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape))
        bb_local = tuple(
            slice(b.start, min(b.stop + 1, bsh))
            for b, bsh in zip(bb_local, block_shape))
    else:
        block = blocking.getBlock(block_id)
        bb = vu.block_to_bb(block)
        bb = tuple(
            slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape))
        bb_in = bb
        bb_local = slice(None)

    input_dim = ds_in.ndim
    # TODO make choice of channels optional
    if input_dim == 4:
        bb_in = (slice(0, 3), ) + bb_in

    input_ = vu.normalize(ds_in[bb_in])
    if input_dim == 4:
        assert channel_agglomeration is not None
        input_ = getattr(np, channel_agglomeration)(input_, axis=0)

    # load labels
    labels = ds_labels[bb]

    # TODO pre-smoothing ?!
    # accumulate the edge features
    edge_features = [
        _accumulate_filter(input_, graph, labels, bb_local, filter_name, sigma,
                           ignore_label, filter_name == filters[-1]
                           and sigma == sigmas[-1], apply_in_2d)
        for filter_name in filters for sigma in sigmas
    ]
    edge_features = np.concatenate(edge_features, axis=1)

    # save the features
    fu.log("saving feature result of shape %s" % str(edge_features.shape))
    ds_out.write_chunk(chunk_pos, edge_features.flatten(), True)
    fu.log_block_success(block_id)
    return edge_features.shape[1]
Example #28
0
def _accumulate_block(block_id, blocking, ds_in, ds_labels, out_prefix,
                      graph_block_prefix, filters, sigmas, halo, ignore_label,
                      apply_in_2d, channel_agglomeration):

    fu.log("start processing block %i" % block_id)
    # load graph and check if this block has edges
    graph = ndist.Graph(graph_block_prefix + str(block_id))
    if graph.numberOfEdges == 0:
        fu.log("block %i has no edges" % block_id)
        fu.log_block_success(block_id)
        return

    shape = ds_labels.shape
    # get the bounding
    if sum(halo) > 0:
        block = blocking.getBlockWithHalo(block_id, halo)
        block_shape = block.outerBlock.shape
        bb_in = vu.block_to_bb(block.outerBlock)
        bb = vu.block_to_bb(block.innerBlock)
        bb_local = vu.block_to_bb(block.innerBlockLocal)
        # increase inner bounding box by 1 in posirive direction
        # in accordance with the graph extraction
        bb = tuple(
            slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape))
        bb_local = tuple(
            slice(b.start, min(b.stop + 1, bsh))
            for b, bsh in zip(bb_local, block_shape))
    else:
        block = blocking.getBlock(block_id)
        bb = vu.block_to_bb(block)
        bb = tuple(
            slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape))
        bb_in = bb
        bb_local = slice(None)

    input_dim = ds_in.ndim
    # TODO make choice of channels optional
    if input_dim == 4:
        bb_in = (slice(0, 3), ) + bb_in

    input_ = vu.normalize(ds_in[bb_in])
    if input_dim == 4:
        assert channel_agglomeration is not None
        input_ = getattr(np, channel_agglomeration)(input_, axis=0)

    # load labels
    labels = ds_labels[bb]

    # TODO pre-smoothing ?!
    # accumulate the edge features
    edge_features = [
        _accumulate_filter(input_, graph, labels, bb_local, filter_name, sigma,
                           ignore_label, filter_name == filters[-1]
                           and sigma == sigmas[-1], apply_in_2d)
        for filter_name in filters for sigma in sigmas
    ]
    edge_features = np.concatenate(edge_features, axis=1)

    # save the features
    save_path = out_prefix + str(block_id)
    fu.log("saving feature result of shape %s to %s" %
           (str(edge_features.shape), save_path))
    save_root, save_key = os.path.split(save_path)
    with z5py.N5File(save_root) as f:
        f.create_dataset(save_key,
                         data=edge_features,
                         chunks=edge_features.shape)

    fu.log_block_success(block_id)
Example #29
0
from carving.big_correction import segmentation_correction
from elf.io import open_file

path = './data/data.n5'
raw_root = 'raw'
ws_root = 'watersheds'
node_label_key = 'node_labels/initial'
save_key = 'node_labels/corrected'

scale = 0
n_scales = 3

with_graph = True

if with_graph:
    graph = ndist.Graph(path, 's0/graph', 4)
    with open_file('./data/data.n5', 'r') as f:
        weights = f['features'][:, 0]
else:
    graph, weights = None, None

segmentation_correction(path,
                        raw_root,
                        scale,
                        path,
                        ws_root,
                        scale,
                        path,
                        node_label_key,
                        path,
                        save_key,
Example #30
0
def fix_merges(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    path = config['path']
    problem_path = config['problem_path']
    merge_object_path = config['merge_object_path']

    assignment_key = config['assignment_key']
    out_key = config['out_key']

    graph_key = config['graph_key']
    features_key = config['features_key']
    node_label_key = config['node_label_key']

    from_costs = config['from_costs']
    relabel = config['relabel']
    n_threads = config['threads_per_job']

    # load the merge objects
    with open(merge_object_path) as f:
        merge_objects = json.load(f)

    if len(merge_objects) == 0:
        fu.log("no merges to resolve")
        ln_src = os.path.join(path, assignment_key)
        ln_dst = os.path.join(path, out_key)
        os.symlink(ln_src, ln_dst)
        return

    fu.log("resolving %i merges" % len(merge_objects))

    fu.log("reading problem from %s" % problem_path)
    f = vu.file_reader(path)
    problem = vu.file_reader(problem_path, 'r')

    # load the graph
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(problem_path, graph_key, numberOfThreads=n_threads)

    # load the assignments
    ds = f[assignment_key]
    chunks = ds.chunks
    ds.n_threads = 8
    assignments = ds[:]

    # load the costs
    ds = problem[features_key]
    ds.n_threads = 8
    if ds.ndim == 2:
        features = ds[:, 0].squeeze()
    else:
        features = ds[:]
    if from_costs:
        minc = features.min()
        fu.log("Mapping costs with range %f to %f to range 0 to 1" %
               (minc, features.max()))
        features -= minc
        features /= features.max()
        features = 1. - features

    # load the node labels
    ds = problem[node_label_key]
    ds.n_threads = n_threads
    node_labels = ds[:]

    assignments = fix_merge_assignments(graph, assignments, merge_objects,
                                        node_labels, features, n_threads)

    # relabel and save assignments
    if relabel:
        vigra.analysis.relabelConsecutive(assignments,
                                          out=assignments,
                                          start_label=1,
                                          keep_zeros=True)
    ds = f.create_dataset(out_key,
                          shape=assignments.shape,
                          chunks=chunks,
                          dtype='uint64',
                          compression='gzip')
    ds.n_threads = 8
    ds[:] = assignments

    fu.log_job_success(job_id)