def solve_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']
    n_threads = config['threads_per_job']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    # load the costs
    costs_key = 's%i/costs' % scale
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    graph_key = 's%i/graph' % scale
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignoreLabel']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using agglomerator %s" % agglomerator_key)
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    # the output group
    out = problem['s%i/sub_results' % scale]

    # TODO this should be a n5 varlen dataset as well and
    # then this is just another dataset in problem path
    block_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs',
                                'block_')
    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids,
                      block_prefix, costs, agglomerator, ignore_label,
                      blocking, out, time_limit) for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)
Esempio n. 2
0
def solve_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    costs_path = config['costs_path']
    costs_key = config['costs_key']
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    decomposition_path = config['decomposition_path']
    tmp_folder = config['tmp_folder']
    component_list = config['block_list']
    n_threads = config['threads_per_job']
    agglomerator_key = config['agglomerator']

    with vu.file_reader(costs_path, 'r') as f:
        ds = f[costs_key]
        ds.n_threads = n_threads
        costs = ds[:]

    with vu.file_reader(decomposition_path, 'r') as f:
        ds = f['graph_labels']
        ds.n_threads = n_threads
        graph_labels = ds[:]

    # load the graph
    graph = ndist.Graph(os.path.join(graph_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_component, component_id, graph, uv_ids,
                      graph_labels, costs, agglomerator)
            for component_id in component_list
        ]
        results = [t.result() for t in tasks]

    cut_edge_ids = np.concatenate([res for res in results if res is not None])
    cut_edge_ids = np.unique(cut_edge_ids)

    res_folder = os.path.join(tmp_folder, 'subproblem_results')
    job_res_path = os.path.join(res_folder, 'job%i.npy' % job_id)
    fu.log("saving cut edge results to %s" % job_res_path)
    np.save(job_res_path, cut_edge_ids)
    fu.log_job_success(job_id)
def solve_global(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # path to the reduced problem
    problem_path = config['problem_path']
    # path where the node labeling shall be written
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']
    scale = config['scale']
    agglomerator_key = config['agglomerator']
    n_threads = config['threads_per_job']
    time_limit = config.get('time_limit_solver', None)

    fu.log("using agglomerator %s" % agglomerator_key)
    if time_limit is None:
        fu.log("agglomeration without time limit")
    else:
        fu.log("agglomeration time limit %i" % time_limit)
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    with vu.file_reader(problem_path, 'r') as f:
        group = f['s%i' % scale]
        graph_group = group['graph']
        ignore_label = graph_group.attrs['ignoreLabel']

        ds = graph_group['edges']
        ds.n_threads = n_threads
        uv_ids = ds[:]
        n_edges = len(uv_ids)

        # we only need to load the initial node labeling if at
        # least one reduction step was performed i.e. scale > 0
        if scale > 0:
            ds = group['node_labeling']
            ds.n_threads = n_threads
            initial_node_labeling = ds[:]

        ds = group['costs']
        ds.n_threads = n_threads
        costs = ds[:]
        assert len(costs) == n_edges, "%i, %i" (len(costs), n_edges)

    n_nodes = int(uv_ids.max()) + 1
    fu.log("creating graph with %i nodes an %i edges" % (n_nodes, len(uv_ids)))
    graph = nifty.graph.undirectedGraph(n_nodes)
    graph.insertEdges(uv_ids)
    fu.log("start agglomeration")
    node_labeling = agglomerator(graph, costs,
                                 n_threads=n_threads,
                                 time_limit=time_limit)
    fu.log("finished agglomeration")

    # get the labeling of initial nodes
    if scale > 0:
        initial_node_labeling = node_labeling[initial_node_labeling]
    else:
        initial_node_labeling = node_labeling
    n_nodes = len(initial_node_labeling)

    # make sure zero is mapped to 0 if we have an ignore label
    if ignore_label and initial_node_labeling[0] != 0:
        new_max_label = int(node_labeling.max() + 1)
        initial_node_labeling[initial_node_labeling == 0] = new_max_label
        initial_node_labeling[0] = 0

    node_shape = (n_nodes,)
    chunks = (min(n_nodes, 524288),)
    with vu.file_reader(assignment_path) as f:
        ds = f.require_dataset(assignment_key, dtype='uint64',
                               shape=node_shape,
                               chunks=chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = initial_node_labeling

    fu.log('saving results to %s:%s' % (assignment_path, assignment_key))
    fu.log_job_success(job_id)
def solve_lifted_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']

    lifted_prefix = config['lifted_prefix']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)
    n_threads = config.get('threads_per_job', 1)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    # load the costs
    # NOTE we use different cost identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the costs come from the CostsWorkflow and
    # hence the identifier is identical
    costs_key = 's%i/costs_lmc' % scale if scale > 0 else 's0/costs'
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    # NOTE we use different graph identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the graph comes from the GraphWorkflow and
    # hence the identifier is identical
    graph_key = 's%i/graph_lmc' % scale if scale > 0 else 's0/graph'
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignoreLabel']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using agglomerator %s" % agglomerator_key)
    lifted_agglomerator = su.key_to_lifted_agglomerator(agglomerator_key)
    # TODO enable different multicut agglomerator
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    # load the lifted edges and costs
    nh_key = 's%i/lifted_nh_%s' % (scale, lifted_prefix)
    lifted_costs_key = 's%i/lifted_costs_%s' % (scale, lifted_prefix)
    ds = problem[nh_key]
    fu.log("reading lifted uvs")
    ds.n_threads = n_threads
    lifted_uvs = ds[:]

    fu.log("reading lifted costs")
    ds = problem[lifted_costs_key]
    ds.n_threads = n_threads
    lifted_costs = ds[:]

    # the output group
    out = problem['s%i/sub_results_lmc' % scale]

    # NOTE we use different sub-graph identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the sub-graphs come from the GraphWorkflow and
    # are hence identical
    sub_graph_identifier = 'sub_graphs' if scale == 0 else 'sub_graphs_lmc'
    block_prefix = os.path.join(problem_path, 's%i' % scale,
                                sub_graph_identifier, 'block_')
    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    fu.log("start processsing %i blocks" % len(block_list))
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids,
                      block_prefix, costs, lifted_uvs, lifted_costs,
                      lifted_agglomerator, agglomerator, ignore_label,
                      blocking, out, time_limit) for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)