예제 #1
0
def _run_inference(blocking, block_list, halo, ds_in, ds_out, mask, preprocess,
                   predict, channel_mapping, n_threads):

    block_shape = blocking.blockShape
    dtypes = [dso.dtype for dso in ds_out]
    dtype = dtypes[0]
    assert all(dtp == dtype for dtp in dtypes)

    @dask.delayed
    def log1(block_id):
        fu.log("start processing block %i" % block_id)
        return block_id

    @dask.delayed
    def load_input(block_id):
        block = blocking.getBlock(block_id)

        # if we have a mask, check if this block is in mask
        if mask is not None:
            bb = vu.block_to_bb(block)
            bb_mask = mask[bb]
            if np.sum(bb_mask) == 0:
                return block_id, None

        return block_id, _load_input(ds_in, block.begin, block_shape, halo)

    @dask.delayed
    def preprocess_impl(inputs):
        block_id, data = inputs
        if data is None:
            return block_id, None
        data = preprocess(data)
        return block_id, data

    @dask.delayed
    def predict_impl(inputs):
        block_id, data = inputs
        if data is None:
            return block_id, None
        data = predict(data)
        return block_id, data

    @dask.delayed
    def write_output(inputs):
        block_id, output = inputs

        if output is None:
            return block_id

        out_shape = output.shape
        if len(out_shape) == 3:
            assert len(ds_out) == 1
        bb = vu.block_to_bb(blocking.getBlock(block_id))

        # check if we need to crop the output
        actual_shape = [b.stop - b.start for b in bb]
        if actual_shape != block_shape:
            block_bb = tuple(slice(0, ash) for ash in actual_shape)
            if output.ndim == 4:
                block_bb = (slice(None), ) + block_bb
            output = output[block_bb]

        # cast to uint8 if necessary
        if dtype == 'uint8':
            output = _to_uint8(output)

        # write the output to our output dataset(s)
        for dso, chann_mapping in zip(ds_out, channel_mapping):
            chan_start, chan_stop = chann_mapping

            if dso.ndim == 3:
                assert chan_stop - chan_start == 1
                out_bb = bb
            else:
                assert output.ndim == 4
                assert chan_stop - chan_start == dso.shape[0]
                out_bb = (slice(None), ) + bb

            if output.ndim == 4:
                outp = output[chan_start:chan_stop].squeeze()

            dso[out_bb] = outp

        return block_id

    @dask.delayed
    def log2(block_id):
        fu.log_block_success(block_id)
        return 1

    # iterate over the blocks in block list, get the input data and predict
    results = []
    for block_id in block_list:
        res = tz.pipe(block_id, log1, load_input, preprocess_impl,
                      predict_impl, write_output, log2)
        results.append(res)

    success = dask.compute(*results,
                           scheduler='threads',
                           num_workers=n_threads)
    fu.log('Finished prediction for %i blocks' % sum(success))
예제 #2
0
def _ds_block(blocking, block_id, ds_in, ds_out, scale_factor, halo, sampler):
    fu.log("start processing block %i" % block_id)

    # load the block (output dataset / downsampled) coordinates
    if halo is None:
        block = blocking.getBlock(block_id)
        local_bb = np.s_[:]
        in_bb = vu.block_to_bb(block)
        out_bb = vu.block_to_bb(block)
        out_shape = block.shape
    else:
        halo_ds = [ha // scale_factor for ha in halo] if isinstance(scale_factor, int) else\
            [ha // sf for sf, ha in zip(scale_factor, halo)]
        block = blocking.getBlockWithHalo(block_id, halo_ds)
        in_bb = vu.block_to_bb(block.outerBlock)
        out_bb = vu.block_to_bb(block.innerBlock)
        local_bb = vu.block_to_bb(block.innerBlockLocal)
        out_shape = block.outerBlock.shape

    # check if we have channels
    ndim = ds_in.ndim
    in_shape = ds_in.shape
    if ndim == 4:
        in_shape = in_shape[1:]

    # upsample the input bounding box
    if isinstance(scale_factor, int):
        in_bb = tuple(slice(ib.start * scale_factor, min(ib.stop * scale_factor, sh))
                      for ib, sh in zip(in_bb, in_shape))
    else:
        in_bb = tuple(slice(ib.start * sf, min(ib.stop * sf, sh))
                      for ib, sf, sh in zip(in_bb, scale_factor, in_shape))
    # load the input
    if ndim == 4:
        in_bb = (slice(None),) + in_bb
        out_bb = (slice(None),) + out_bb
        local_bb = (slice(None),) + local_bb
    x = ds_in[in_bb]

    # don't sample empty blocks
    if np.sum(x != 0) == 0:
        fu.log_block_success(block_id)
        return

    dtype = x.dtype
    if np.dtype(dtype) != np.dtype('float32'):
        x = x.astype('float32')

    if ndim == 4:
        n_channels = x.shape[0]
        out = np.zeros((n_channels,) + tuple(out_shape), dtype=dtype)
        for c in range(n_channels):
            out[c] = _ds_vol(x[c], out_shape, sampler, scale_factor, dtype)
    else:
        out = _ds_vol(x, out_shape, sampler, scale_factor, dtype)

    try:
        ds_out[out_bb] = out[local_bb]
    except IndexError:
        raise(IndexError("%s, %s, %s" % (str(out_bb), str(local_bb), str(out.shape))))

    # log block success
    fu.log_block_success(block_id)
예제 #3
0
def merge_uniques(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    n_jobs = config['n_jobs']
    tmp_folder = config['tmp_folder']
    n_threads = config['threads_per_job']
    output_path = config['output_path']
    output_key = config['output_key']

    def _read_input(job_id):
        return np.load(
            os.path.join(tmp_folder, 'find_uniques_job_%i.npy' % job_id))

    fu.log("read uniques")
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [tp.submit(_read_input, job_id) for job_id in range(n_jobs)]
        uniques = np.concatenate([t.result() for t in tasks])

    fu.log("compute uniques")
    uniques = np.unique(uniques)
    fu.log("found %i unique values" % len(uniques))

    fu.log("saving results to %s/%s" % (output_path, output_key))
    with vu.file_reader(output_path) as f:
        chunk_size = min(int(1e6), len(uniques))
        chunks = (chunk_size, )
        ds = f.create_dataset(output_key,
                              shape=uniques.shape,
                              dtype='uint64',
                              compression='gzip',
                              chunks=chunks)
        ds.n_threads = n_threads
        ds[:] = uniques

    # log success
    fu.log_job_success(job_id)
예제 #4
0
def _read_subresults(ds_results,
                     block_node_prefix,
                     blocking,
                     block_list,
                     n_threads,
                     initial_node_labeling=None):
    def read_subres(block_id):
        block = blocking.getBlock(block_id)
        # load nodes corresponding to this block
        block_path = block_node_prefix + str(block_id)
        nodes = ndist.loadNodes(block_path)
        # load the sub result for this block
        chunk = tuple(beg // bs
                      for beg, bs in zip(block.begin, blocking.blockShape))
        subres = ds_results.read_chunk(chunk)

        # subres is None -> this block has ignore label
        # and has no edgees. Note that this does not imply that the
        # block ONLY has ignore label (or only one ordinary node)
        # because multiple ordinary nodes could be seperated by the ignore label
        # and thus not share an edge.
        if subres is None:
            assert 0 in nodes
            return None

        assert len(nodes) == len(
            subres), "block %i: %i, %i" % (block_id, len(nodes), len(subres))
        return nodes, subres, int(subres.max()) + 1

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [tp.submit(read_subres, block_id) for block_id in block_list]
        results = [t.result() for t in tasks]

    # filter and get results
    block_list = [
        block_id for block_id, res in zip(block_list, results)
        if res is not None
    ]
    block_nodes = [res[0] for res in results if res is not None]
    block_res = [res[1] for res in results if res is not None]
    block_offsets = np.array([res[2] for res in results if res is not None],
                             dtype='uint64')

    # get the offsets and add them to the block results to make these unique
    block_offsets = np.roll(block_offsets, 1)
    block_offsets[0] = 0
    block_offsets = np.cumsum(block_offsets)
    block_res = [bres + boff for bres, boff in zip(block_res, block_offsets)]

    # apply the node labeling
    if initial_node_labeling is not None:
        fu.log("Apply initial node labeling to block nodes")
        block_nodes = [initial_node_labeling[nodes] for nodes in block_nodes]

    # construct result dicts for each block
    # keep zero mapped to zero
    block_results = [{
        node_id: res_id if node_id != 0 else 0
        for node_id, res_id in zip(bnodes, bres)
    } for bnodes, bres in zip(block_nodes, block_res)]
    return block_list, block_results
def remove_noise_objects(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)

    input_path = config['input_path']
    input_key = config['input_key']
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    output_path = config['output_path']
    output_key = config['output_key']
    output_graph_path = config['output_graph_path']
    output_graph_key = config['output_graph_key']

    with vu.file_reader(input_path) as f:
        input = f[input_key][:]
    with vu.file_reader(graph_path) as f:
        nodes = f[graph_key]['nodes'][:]
        edges = f[graph_key]['edges'][:]


    # First, remove all components except the largest one
    bg_candidates = [input[0,0,0], input[0,0,-1], input[0,-1,0], input[0,-1,-1], input[-1,0,0], input[-1,0,-1], input[-1,-1,0], input[-1,-1,-1]]
    bg_id = max(bg_candidates, key=bg_candidates.count)
    fu.log("Background id is %d" % bg_id)
    
    adj_m = np.zeros((len(nodes), len(nodes)))
    for edge in edges:
        if edge[0] != bg_id and edge[1] != bg_id:
            adj_m[edge[0], edge[1]] = 1
    
    n_comp, comp = scipy.sparse.csgraph.connected_components(adj_m, directed=False, return_labels=True)   
    fu.log("Found %d connected components" % n_comp)
    
    max_size = 0
    max_comp = 0
    for component in set(comp):
        if component != bg_id:
            compsize = 0
            for label in nodes[comp==component]:
                compsize += (input == label).sum()
            if compsize > max_size:
                max_size = compsize
                max_comp = component

    to_remove = nodes[comp!=max_comp]
    for rc in to_remove:
        fu.log("Cleaning up id %d" % rc)
        if rc != bg_id:
            input[input==rc] = bg_id
            nodes[nodes==rc] = bg_id
            edges[edges==rc] = bg_id
    
    # Remove duplicates from graph
    nodes = np.unique(nodes)
    edge_new = []
    for edge in edges:
        if edge[0] == edge[1]:
            pass
        elif edge[1] < edge[0]:
            edge_new.append([edge[1], edge[0]])
        else:
            edge_new.append([edge[0], edge[1]])
            
    edges = np.array(edge_new, dtype=edge.dtype)
    edges = np.unique(edge_new, axis=0)
        
        

    # Now do the size based merging, as a method to compensate for some leftover oversegmentation
    # Skip this step if we have too few objects (since that means we are early in the development
    # of the embryo, and there are these small polar bodies, which we dont want to merge with the cells)
    if len(nodes) >= 10:
        sizes = []
        for node in nodes:
            sizes.append((input==node).sum())
        size_median = np.median(sizes)
        min_cell_size = size_median * 0.3
        
        sizes = np.array(sizes)
        node_sorted = np.argsort(sizes)
        for i in range(len(node_sorted)):
            node = node_sorted[i]
            if sizes[node] < min_cell_size:
                fu.log('Cell %d is too small '%node)
                neighbor_edges = edges[np.logical_or(edges[:,0]==node, edges[:,1]==node)]
                neighbors = neighbor_edges[neighbor_edges != node].reshape(-1)
                if len(neighbors)>0:
                    smallest_neighbor_i = np.argmin(sizes[neighbors])
                    smallest_neighbor = neighbors[smallest_neighbor_i]
                    input[input==node] = smallest_neighbor
                    sizes[smallest_neighbor] += sizes[node]
                    sizes[node] = 0
                    node_sorted = np.argsort(sizes)
                    edges[edges==node] = smallest_neighbor
                    fu.log('Merging %d with %d'% (node, smallest_neighbor))
    
        
    with vu.file_reader(output_path,'w') as f:
        ds = f.require_dataset(output_key, dtype='uint32', shape=input.shape, compression='gzip')
        ds[:] = input
        
    with vu.file_reader(output_graph_path,'w') as f:
        ds = f.require_dataset(output_graph_key+"/nodes", dtype='uint32', shape=nodes.shape, compression='gzip')
        ds[:] = nodes
        ds = f.require_dataset(output_graph_key+"/edges", dtype='uint32', shape=edges.shape, compression='gzip')
        ds[:] = edges
        
    fu.log_job_success(job_id)
def solve_lifted_global(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # path to the reduced problem
    problem_path = config['problem_path']
    # path where the node labeling shall be written
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']

    lifted_prefix = config['lifted_prefix']
    scale = config['scale']
    agglomerator_key = config['agglomerator']
    n_threads = config['threads_per_job']
    time_limit = config.get('time_limit_solver', None)

    fu.log("using agglomerator %s" % agglomerator_key)
    solver = get_lifted_multicut_solver(agglomerator_key)

    with vu.file_reader(problem_path) as f:
        group = f['s%i' % scale]
        graph_group = group['graph'] if scale == 0 else group['graph_lmc']
        ignore_label = graph_group.attrs['ignore_label']

        ds = graph_group['edges']
        ds.n_threads = n_threads
        uv_ids = ds[:]
        n_edges = len(uv_ids)
        n_nodes = int(uv_ids.max()) + 1

        if scale > 0:
            ds = group['node_labeling_lmc']
            ds.n_threads = n_threads
            initial_node_labeling = ds[:]

        ds = group['costs'] if scale == 0 else group['costs_lmc']
        ds.n_threads = n_threads
        costs = ds[:]
        assert len(costs) == n_edges, "%i, %i" (len(costs), n_edges)

        ds = group['lifted_nh_%s' % lifted_prefix]
        ds.n_threads = n_threads
        lifted_uvs = ds[:]

        ds = group['lifted_costs_%s' % lifted_prefix]
        ds.n_threads = n_threads
        lifted_costs = ds[:]

    graph = nifty.graph.undirectedGraph(n_nodes)
    graph.insertEdges(uv_ids)
    fu.log("start agglomeration")
    node_labeling = solver(graph,
                           costs,
                           lifted_uvs,
                           lifted_costs,
                           n_threads=n_threads,
                           time_limit=time_limit)
    fu.log("finished agglomeration")

    if scale > 0:
        # get the labeling of initial nodes
        initial_node_labeling = node_labeling[initial_node_labeling]
    else:
        initial_node_labeling = node_labeling
    n_nodes = len(initial_node_labeling)

    # make sure zero is mapped to 0 if we have an ignore label
    if ignore_label and initial_node_labeling[0] != 0:
        new_max_label = int(node_labeling.max() + 1)
        initial_node_labeling[initial_node_labeling == 0] = new_max_label
        initial_node_labeling[0] = 0

    # make node labeling consecutive
    vigra.analysis.relabelConsecutive(initial_node_labeling,
                                      start_label=1,
                                      keep_zeros=True,
                                      out=initial_node_labeling)

    # write assignments
    node_shape = (n_nodes, )
    chunks = (min(n_nodes, 524288), )
    with vu.file_reader(assignment_path) as f:
        ds = f.require_dataset(assignment_key,
                               dtype='uint64',
                               shape=node_shape,
                               chunks=chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = initial_node_labeling

    fu.log('saving results to %s:%s' % (assignment_path, assignment_key))
    fu.log_job_success(job_id)
예제 #7
0
def _apply_node_labels(costs, uv_ids, mode, labels, max_repulsive,
                       max_attractive):
    # TODO for now we assume binary node labeling,
    # but of course we could also have something more fancy with
    # multiple label ids
    n_nodes = len(labels)
    max_node_id = int(uv_ids.max())
    assert max_node_id + 1 <= n_nodes, "%i, %i" % (max_node_id, n_nodes)
    with_label = np.arange(n_nodes, dtype='uint64')[labels > 0]
    fu.log("number of nodes with label %i / %i" % (len(with_label), n_nodes))
    if mode == 'ignore':
        fu.log("Node-label mode: ignore")
        # ignore mode: set all edges that connect to a node with label to max repulsive
        edges_with_label = np.isn(uv_ids, with_label)
        edges_with_label = edges_with_label.any(axis=1)
        costs[edges_with_label] = max_repulsive
    elif mode == 'isolate':
        # isolate mode: set all edges that connect to a node with label to node without label to max repulsive
        fu.log("Node-label mode: isolate")
        # ignore mode: set all edges that connect two node with label to max attractive
        edges_with_label = np.in1d(uv_ids, with_label).reshape(uv_ids.shape)
        label_sum = edges_with_label.sum(axis=1)
        att_edges = label_sum == 2
        rep_edges = label_sum == 1
        fu.log("number of attractive edges: %i / %i" %
               (att_edges.sum(), len(att_edges)))
        fu.log("number of repulsive edges: %i / %i" %
               (rep_edges.sum(), len(rep_edges)))
        costs[att_edges] = max_attractive
        costs[rep_edges] = max_repulsive
    elif mode == 'ignore_transition':
        fu.log("Node-label mode: ignore_transition")
        labels_mapped_to_edges = labels[uv_ids]
        transition = labels_mapped_to_edges[:, 0] != labels_mapped_to_edges[:,
                                                                            1]
        costs[transition] = max_repulsive
        fu.log("number of repulsive edges: %i / %i" %
               (transition.sum(), len(transition)))
    else:
        raise RuntimeError("Invalid label mode: %s" % mode)
    return costs
def background_size_filter(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']
    block_list = config['block_list']
    block_shape = config['block_shape']
    res_path = config['res_path']

    # get the shape
    with vu.file_reader(input_path, 'r') as f:
        ds = f[input_key]
        shape = f[input_key].shape
    blocking = nt.blocking(roiBegin=[0, 0, 0],
                           roiEnd=list(shape),
                           blockShape=list(block_shape))

    discard_ids = np.load(res_path)
    fu.log("Discarding %i ids" % len(discard_ids))

    same_file = input_path == output_path
    in_place = same_file and input_key == output_key

    if in_place:
        with vu.file_reader(input_path) as f:
            ds = f[input_key]
            [
                apply_block(block_id, blocking, ds, ds, discard_ids)
                for block_id in block_list
            ]
    elif same_file:
        with vu.file_reader(input_path) as f:
            ds_in = f[input_key]
            ds_out = f[output_key]
            [
                apply_block(block_id, blocking, ds_in, ds_out, discard_ids)
                for block_id in block_list
            ]
    else:
        with vu.file_reader(input_path,
                            'r') as f_in, vu.file_reader(output_path) as f_out:
            ds_in = f_in[input_key]
            ds_out = f_out[output_key]
            [
                apply_block(block_id, blocking, ds_in, ds_out, discard_ids)
                for block_id in block_list
            ]

    # copy the 'maxId' attribute if present
    if job_id == 0 and not in_place:
        with vu.file_reader(input_path, 'r') as f:
            attrs = f[input_key].attrs
            max_id = attrs.get('maxId', None)
        if max_id is not None:
            with vu.file_reader(output_path) as f:
                f[output_key].attrs['maxId'] = max_id

    fu.log_job_success(job_id)
예제 #9
0
def _mws_block_pass2(block_id, blocking, ds_in, ds_out, mask, offsets, strides,
                     randomize_strides, halo, noise_level, max_block_id,
                     tmp_folder):
    fu.log("(Pass2) start processing block %i" % block_id)

    block = blocking.getBlockWithHalo(block_id, halo)
    in_bb = vu.block_to_bb(block.outerBlock)

    if mask is None:
        # if we don't have a mask, initialize with fully 'in-mask' volume
        # bb_mask = np.ones(tuple(b.stop - b.begin for b in in_bb),
        #                   dtype='bool')
        bb_mask = None
    else:
        bb_mask = mask[in_bb].astype('bool')
        if np.sum(bb_mask) == 0:
            fu.log_block_success(block_id)
            return

    # TODO does this make sense ?
    # set the mask for parts of indirect neighbor blocks
    # (which are also part of pass 2) to 0
    # bb_mask = mask_corners(bb_mask, halo)

    aff_bb = (slice(None), ) + in_bb
    affs = vu.normalize(ds_in[aff_bb])

    # load seeds
    seeds = ds_out[in_bb]
    seed_ids = np.unique(seeds)
    if seed_ids[0] == 0:
        seed_ids = seed_ids[1:]

    # load the serialized state for the neighboring (pass1) blocks
    # and find relevant edges between seed ids

    seed_edges = []
    seed_edge_weights = []
    attractive_mask = []

    for axis in range(3):
        for to_lower in (False, True):
            ngb_id = blocking.getNeighborId(block_id, axis, to_lower)

            # get path to state serialization and check if it exists
            save_path = os.path.join(tmp_folder,
                                     'seg_state_block%i.h5' % ngb_id)
            if not os.path.exists(save_path):
                continue

            with vu.file_reader(save_path) as f:
                # first, load the edges and see if they have overlap with our seed ids
                ngb_edges = f['edges'][:]
                ngb_edge_mask = np.in1d(ngb_edges,
                                        seed_ids).reshape(ngb_edges.shape)
                ngb_edge_mask = ngb_edge_mask.all(axis=1)

                # if we have edges, load the corresponding weights
                # and attractive / repulsive state
                if ngb_edge_mask.sum() > 0:
                    ngb_edges = ngb_edges[ngb_edge_mask]
                    ngb_weights = f['weights'][:][ngb_edge_mask]
                    ngb_attractive_edges = f['attractive_edge_mask'][:][
                        ngb_edge_mask]

                    seed_edges.append(ngb_edges)
                    seed_edge_weights.append(ngb_weights)
                    attractive_mask.append(ngb_attractive_edges)

    seed_edges = np.concatenate(seed_edges, axis=0)
    seed_edge_weights = np.concatenate(seed_edge_weights)
    attractive_mask = np.concatenate(attractive_mask)
    assert len(seed_edges) == len(seed_edge_weights) == len(attractive_mask)

    repulsive_mask = np.logical_not(attractive_mask)
    attractive_edges, repulsive_edges = seed_edges[
        attractive_mask], seed_edges[repulsive_mask]
    attractive_weights, repulsive_weights = seed_edge_weights[
        attractive_mask], seed_edge_weights[repulsive_mask]

    # run mws segmentation with seeds
    seed_state = {
        'attractive': (attractive_edges, attractive_weights),
        'repulsive': (repulsive_edges, repulsive_weights)
    }
    seg, grid_graph = mutex_watershed_with_seeds(
        affs,
        offsets,
        seeds,
        strides=strides,
        mask=bb_mask,
        randomize_strides=randomize_strides,
        noise_level=noise_level,
        return_graph=True,
        seed_state=seed_state)
    # offset with lowest block coordinate
    offset_id = block_id * np.prod(blocking.blockShape)
    vigra.analysis.relabelConsecutive(seg,
                                      start_label=offset_id,
                                      keep_zeros=True,
                                      out=seg)

    # find assignment of seed ids to segmentation ids
    assignments = grid_graph.get_seed_assignments_from_node_labels(
        seg.flatten())

    # get the cropped segmentation
    local_bb = vu.block_to_bb(block.innerBlockLocal)
    seg_crop = seg[local_bb]

    # filter the assignments from ids that are not in the crop
    crop_ids = np.unique(seg_crop)
    filter_mask = np.in1d(assignments[:, 1], crop_ids)
    assignments = assignments[filter_mask]

    # store assignments to tmp folder
    save_path = os.path.join(
        tmp_folder, 'mws_two_pass_assignments_block_%i.npy' % block_id)
    np.save(save_path, assignments)

    out_bb = vu.block_to_bb(block.innerBlock)
    ds_out[out_bb] = seg_crop

    # write max-id for the last block
    if block_id == max_block_id:
        _write_nlabels(ds_out, seg)
    # log block success
    fu.log_block_success(block_id)
def merge_lifted_problems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    path = config['path']
    prefixs = config['prefixs']
    out_prefix = config['out_prefix']
    n_threads = config.get('threads_per_job', 1)

    f = z5py.File(path)
    edge_root = 's0/lifted_nh_%s'
    cost_root = 's0/lifted_costs_%s'

    edges = []
    costs = []
    for prefix in prefixs:
        edge_key = edge_root % prefix
        cost_key = cost_root % prefix

        ds_edges = f[edge_key]
        ds_edges.n_threads = n_threads
        this_edges = ds_edges[:]

        ds_costs = f[cost_key]
        ds_costs.n_threads = n_threads
        this_costs = ds_costs[:]

        assert len(this_costs) == len(this_edges)
        edges.append(this_edges)
        costs.append(this_costs)

    # TODO would be cleaner to
    # - sort the edges again
    # - see if any of the edges are duplicate and add up costs if they are
    edges = np.concatenate(edges, axis=0)
    costs = np.concatenate(costs, axis=0)

    edge_out_key = edge_root % out_prefix
    edge_chunks = (min(len(edges), 100000), 2)
    ds_edges_out = f.require_dataset(edge_out_key,
                                     shape=edges.shape,
                                     compression='gzip',
                                     dtype=edges.dtype,
                                     chunks=edge_chunks)
    ds_edges_out.n_threads = n_threads
    ds_edges_out[:] = edges

    cost_out_key = cost_root % out_prefix
    cost_chunks = (min(len(costs), 100000), )
    ds_costs_out = f.require_dataset(cost_out_key,
                                     shape=costs.shape,
                                     compression='gzip',
                                     dtype=costs.dtype,
                                     chunks=cost_chunks)
    ds_costs_out.n_threads = n_threads
    ds_costs_out[:] = costs

    fu.log_job_success(job_id)
예제 #11
0
def graph_watershed_assignments(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    # load from config
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']
    problem_path = config['problem_path']
    graph_key = config['graph_key']
    features_key = config['features_key']
    filter_nodes_path = config['filter_nodes_path']
    output_path = config['output_path']
    output_key = config['output_key']
    relabel = config['relabel']
    from_costs = config['from_costs']
    n_threads = config.get('threads_per_job', 1)

    # load the uv-ids, features and assignments
    fu.log("Read features and edges from %s" % problem_path)
    with vu.file_reader(problem_path, 'r') as f:
        ds = f['%s/edges' % graph_key]
        ds.n_threads = n_threads
        uv_ids = ds[:]
        n_nodes = int(uv_ids.max()) + 1

        ds = f[features_key]
        ds.n_threads = n_threads
        if ds.ndim == 2:
            features = ds[:, 0].squeeze()
        else:
            features = ds[:]

    if from_costs:
        minc = features.min()
        fu.log("Mapping costs with range %f to %f to range 0 to 1" %
               (minc, features.max()))
        features -= minc
        features /= features.max()
        features = 1. - features

    fu.log("Read assignments from %s" % assignment_path)
    with vu.file_reader(assignment_path, 'r') as f:
        ds = f[assignment_key]
        ds.n_threads = n_threads
        chunks = ds.chunks
        assignments = ds[:]
    assert n_nodes == len(assignments),\
        "Expected number of nodes %i and number of assignments %i does not agree" % (n_nodes, len(assignments))

    seed_offset = int(assignments.max()) + 1

    # load the discard ids
    discard_ids = np.load(filter_nodes_path)
    assert 0 not in discard_ids, "Breaks logic"

    # build the new graph
    graph = nifty.graph.undirectedGraph(n_nodes)
    graph.insertEdges(uv_ids)

    # run graph watershed to get the new assignments
    # map zero label to new id
    assignments[assignments == 0] = seed_offset

    discard_mask = np.in1d(assignments, discard_ids)
    assignments[discard_mask] = 0

    n_discard = int(discard_mask.sum())
    fu.log("Discarding %i / %i fragments" % (n_discard, assignments.size))
    fu.log("Start grah watershed")
    assignments = nifty.graph.edgeWeightedWatershedsSegmentation(
        graph, assignments, features)
    fu.log("Finished graph watershed")
    assignments[assignments == seed_offset] = 0

    if relabel:
        max_id = vigra.analysis.relabelConsecutive(assignments,
                                                   out=assignments,
                                                   start_label=1,
                                                   keep_zeros=True)[1]
        fu.log("Max-id after relabeling: %i (before was %i)" %
               (max_id, seed_offset - 1))

    with vu.file_reader(output_path) as f:
        ds = f.require_dataset(output_key,
                               shape=assignments.shape,
                               chunks=chunks,
                               compression='gzip',
                               dtype='uint64')
        ds.n_threads = n_threads
        ds[:] = assignments

    fu.log_job_success(job_id)
예제 #12
0
def find_labeling(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    n_jobs = config['n_jobs']
    tmp_folder = config['tmp_folder']
    input_path = config['input_path']
    input_key = config['input_key']
    n_threads = config['threads_per_job']
    assignment_path = config['assignment_path']

    def _read_input(job_id):
        return np.load(
            os.path.join(tmp_folder, 'find_uniques_job_%i.npy' % job_id))

    # TODO this could be parallelized
    fu.log("read uniques")
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [tp.submit(_read_input, job_id) for job_id in range(n_jobs)]
        uniques = np.concatenate([t.result() for t in tasks])
    fu.log("compute uniques")
    # uniques = nt.unique(uniques)
    uniques = np.unique(uniques)
    fu.log("relabel")
    _, max_id, mapping = vigra.analysis.relabelConsecutive(uniques,
                                                           keep_zeros=True,
                                                           start_label=1)

    fu.log("saving results to %s" % assignment_path)
    with open(assignment_path, 'wb') as f:
        pickle.dump(mapping, f)
    # log success
    fu.log_job_success(job_id)
예제 #13
0
def inference(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    checkpoint_path = config['checkpoint_path']
    block_shape = config['block_shape']
    block_list = config['block_list']
    halo = config['halo']
    framework = config['framework']
    n_threads = config['threads_per_job']

    fu.log("run iference with framework %s, with %i threads" %
           (framework, n_threads))

    output_keys = config['output_keys']
    channel_mapping = config['channel_mapping']

    if config.get('set_visible_device', False):
        os.environ['CUDA_VISIBLE_DEVICES'] = str(job_id)
        fu.log("setting cuda visible devices to %i" % job_id)
    gpu = 0

    fu.log("Loading model from %s" % checkpoint_path)
    predict = get_predictor(framework)(checkpoint_path, halo, gpu=gpu)
    fu.log("Have model")
    preprocess = get_preprocessor(framework)

    shape = vu.get_shape(input_path, input_key)
    blocking = nt.blocking(roiBegin=[0, 0, 0],
                           roiEnd=list(shape),
                           blockShape=list(block_shape))

    with vu.file_reader(input_path,
                        'r') as f_in, vu.file_reader(output_path) as f_out:

        ds_in = f_in[input_key]
        ds_out = [f_out[key] for key in output_keys]

        if 'mask_path' in config:
            mask = vu.load_mask(config['mask_path'], config['mask_key'], shape)
        else:
            mask = None
        _run_inference(blocking, block_list, halo, ds_in, ds_out, mask,
                       preprocess, predict, channel_mapping, n_threads)
    fu.log_job_success(job_id)
예제 #14
0
 def log1(block_id):
     fu.log("start processing block %i" % block_id)
     return block_id
예제 #15
0
def reduce_problem(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    problem_path = config['problem_path']
    initial_block_shape = config['block_shape']
    scale = config['scale']
    block_list = config['block_list']
    accumulation_method = config.get('accumulation_method', 'sum')
    n_threads = config['threads_per_job']
    roi_begin = config.get('roi_begin', None)
    roi_end = config.get('roi_end', None)

    # get the number of nodes and uv-ids at this scale level
    # as well as the initial node labeling
    fu.log("read problem from %s" % problem_path)
    graph_key = 's%i/graph' % scale
    with vu.file_reader(problem_path, 'r') as f:
        # load graph nodes and edges
        group = f[graph_key]
        shape = group.attrs['shape']

        # nodes
        # we only need to load the nodes for scale 0
        # otherwise, we already know that they are consecutive
        if scale == 0:
            ds = group['nodes']
            ds.n_threads = n_threads
            nodes = ds[:]
            n_nodes = len(nodes)
        else:
            n_nodes = group.attrs['numberOfNodes']
            nodes = np.arange(n_nodes, dtype='uint64')

        # edges
        ds = group['edges']
        ds.n_threads = n_threads
        uv_ids = ds[:]
        n_edges = len(uv_ids)

        # read initial node labeling
        if scale == 0:
            initial_node_labeling = None
        else:
            ds = f['s%i/node_labeling' % scale]
            ds.n_threads = n_threads
            initial_node_labeling = ds[:]

    costs_key = 's%i/costs' % scale
    with vu.file_reader(problem_path) as f:
        ds = f[costs_key]
        ds.n_threads = n_threads
        costs = ds[:]
    assert len(costs) == n_edges, "%i, %i" (len(costs), n_edges)

    block_shape = [bsh * 2**scale for bsh in initial_block_shape]
    blocking = nt.blocking([0, 0, 0], shape, block_shape)

    # get the new node assignment
    fu.log("merge nodes")
    n_new_nodes, node_labeling, new_initial_node_labeling = _merge_nodes(
        problem_path, scale, blocking, block_list, nodes, uv_ids,
        initial_node_labeling, n_threads)
    # get the new edge assignment
    fu.log("get new edge ids")
    new_uv_ids, edge_labeling, new_costs = _get_new_edges(
        uv_ids, node_labeling, costs, accumulation_method, n_threads)

    # serialize the input graph and costs for the next scale level
    fu.log("serialize new problem to %s/s%i" % (problem_path, scale + 1))
    n_new_edges = _serialize_new_problem(problem_path, n_new_nodes, new_uv_ids,
                                         node_labeling, edge_labeling,
                                         new_costs, new_initial_node_labeling,
                                         shape, scale, initial_block_shape,
                                         n_threads, roi_begin, roi_end)

    fu.log("Reduced graph from %i to %i nodes; %i to %i edges." %
           (n_nodes, n_new_nodes, n_edges, n_new_edges))
    fu.log_job_success(job_id)
def merge_offsets(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    tmp_folder = config['tmp_folder']
    n_jobs = config['n_jobs']
    save_path = config['save_path']
    n_blocks = config['n_blocks']
    save_prefix = config['save_prefix']

    offsets = {}
    for block_job_id in range(n_jobs):
        path = os.path.join(tmp_folder,
                            '%s_%i.json' % (save_prefix, block_job_id))
        with open(path, 'r') as f:
            offsets.update(json.load(f))
        os.remove(path)

    # NOTE: the block-id keys in 'offsets' are stored as str, so we can't just use
    # 'sorted(offsets.items())' because it would string-sort!
    blocks = list(map(int, list(offsets.keys())))
    offset_list = list(offsets.values())
    assert len(blocks) == len(offset_list) == n_blocks
    fu.log("merging offsets for %i blocks" % n_blocks)

    key_sort = np.argsort(blocks)
    offset_list = np.array([offset_list[k] for k in key_sort], dtype='uint64')
    last_offset = offset_list[-1]

    empty_blocks = np.where(offset_list == 0)[0].tolist()

    offset_list = np.roll(offset_list, 1)
    offset_list[0] = 0
    offset_list = np.cumsum(offset_list).tolist()
    assert len(offset_list) == n_blocks, "%i, %i" % (len(offset_list), n_blocks)

    n_labels = offset_list[-1] + last_offset + 1
    fu.log("number of empty blocks: %i / %i" % (len(empty_blocks), n_blocks))
    fu.log("total number of labels: %i" % n_labels)

    fu.log("dumping offsets to %s" % save_path)
    with open(save_path, 'w') as f:
        json.dump({'offsets': offset_list,
                   'empty_blocks': empty_blocks,
                   'n_labels': n_labels}, f)
    fu.log_job_success(job_id)
def agglomerative_clustering(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # path to the reduced problem
    problem_path = config['problem_path']
    # path where the node labeling shall be written
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']
    features_path = config['features_path']
    features_key = config['features_key']

    threshold = config['threshold']
    n_threads = config['threads_per_job']

    scale = 0
    with vu.file_reader(problem_path) as f:
        group = f['s%i' % scale]
        graph_group = group['graph']
        ignore_label = graph_group.attrs['ignoreLabel']

        ds = graph_group['edges']
        ds.n_threads = n_threads
        uv_ids = ds[:]
        n_edges = len(uv_ids)

    with vu.file_reader(features_path) as f:
        ds = f[features_key]
        ds.n_threads = n_threads
        edge_features = ds[:, 0].squeeze()
        edge_sizes = ds[:, -1].squeeze()
        assert len(edge_features) == n_edges

    n_nodes = int(uv_ids.max()) + 1
    fu.log("creating graph with %i nodes an %i edges" % (n_nodes, len(uv_ids)))
    graph = nifty.graph.undirectedGraph(n_nodes)
    graph.insertEdges(uv_ids)
    fu.log("start agglomeration")
    # TODO also support vanilla agglomerative clustering
    node_labeling = su.mala_clustering(graph, edge_features, edge_sizes,
                                       threshold)
    fu.log("finished agglomeration")

    n_nodes = len(node_labeling)

    # make sure zero is mapped to 0 if we have an ignore label
    if ignore_label and node_labeling[0] != 0:
        new_max_label = int(node_labeling.max() + 1)
        node_labeling[node_labeling == 0] = new_max_label
        node_labeling[0] = 0

    node_shape = (n_nodes, )
    chunks = (min(n_nodes, 524288), )
    with vu.file_reader(assignment_path) as f:
        ds = f.require_dataset(assignment_key,
                               dtype='uint64',
                               shape=node_shape,
                               chunks=chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = node_labeling

    fu.log('saving results to %s:%s' % (assignment_path, assignment_key))
    fu.log_job_success(job_id)
예제 #18
0
def solve_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']
    n_threads = config['threads_per_job']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    # load the costs
    costs_key = 's%i/costs' % scale
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    graph_key = 's%i/graph' % scale
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignoreLabel']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using agglomerator %s" % agglomerator_key)
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    # the output group
    out = problem['s%i/sub_results' % scale]

    # TODO this should be a n5 varlen dataset as well and
    # then this is just another dataset in problem path
    block_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs',
                                'block_')
    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids,
                      block_prefix, costs, agglomerator, ignore_label,
                      blocking, out, time_limit) for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)
예제 #19
0
def _accumulate_block(block_id, blocking, ds_in, ds_labels, out_prefix,
                      graph_block_prefix, blocks_prefix, filters, sigmas, halo,
                      ignore_label, apply_in_2d, channel_agglomeration):

    fu.log("start processing block %i" % block_id)
    # load graph and check if this block has edges
    graph = ndist.Graph(graph_block_prefix + str(block_id))
    if graph.numberOfEdges == 0:
        fu.log("block %i has no edges" % block_id)
        fu.log_block_success(block_id)
        return

    shape = ds_labels.shape
    # get the bounding
    if sum(halo) > 0:
        block = blocking.getBlockWithHalo(block_id, halo)
        block_shape = block.outerBlock.shape
        bb_in = vu.block_to_bb(block.outerBlock)
        bb = vu.block_to_bb(block.innerBlock)
        bb_local = vu.block_to_bb(block.innerBlockLocal)
        # increase inner bounding box by 1 in posirive direction
        # in accordance with the graph extraction
        bb = tuple(
            slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape))
        bb_local = tuple(
            slice(b.start, min(b.stop + 1, bsh))
            for b, bsh in zip(bb_local, block_shape))
    else:
        block = blocking.getBlock(block_id)
        bb = vu.block_to_bb(block)
        bb = tuple(
            slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape))
        bb_in = bb
        bb_local = slice(None)

    input_dim = ds_in.ndim
    # TODO make choice of channels optional
    if input_dim == 4:
        bb_in = (slice(0, 3), ) + bb_in

    input_ = vu.normalize(ds_in[bb_in])
    if input_dim == 4:
        assert channel_agglomeration is not None
        input_ = getattr(np, channel_agglomeration)(input_, axis=0)

    # load labels
    labels = ds_labels[bb]

    # TODO pre-smoothing ?!
    # accumulate the edge features
    edge_features = [
        _accumulate_filter(input_, graph, labels, bb_local, filter_name, sigma,
                           ignore_label, filter_name == filters[-1]
                           and sigma == sigmas[-1], apply_in_2d)
        for filter_name in filters for sigma in sigmas
    ]
    edge_features = np.concatenate(edge_features, axis=1)

    # save the features
    save_path = out_prefix + str(block_id)
    fu.log("saving feature result of shape %s to %s" %
           (str(edge_features.shape), save_path))
    save_root, save_key = os.path.split(save_path)
    with z5py.N5File(save_root) as f:
        f.create_dataset(save_key,
                         data=edge_features,
                         chunks=edge_features.shape)

    fu.log_block_success(block_id)
def merge_assignments(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    output_path = config['output_path']
    output_key = config['output_key']

    tmp_folder = config['tmp_folder']
    n_jobs = config['n_jobs']
    offset_path = config['offset_path']
    save_prefix = config['save_prefix']

    with open(offset_path) as f:
        n_labels = int(json.load(f)['n_labels'])
    labels = np.arange(n_labels, dtype='uint64')

    # load and remove assignments
    assignments = [
        np.load(
            os.path.join(tmp_folder,
                         '%s_%i.npy' % (save_prefix, block_job_id)))
        for block_job_id in range(n_jobs)
    ]
    # for block_job_id in range(n_jobs):
    #     os.remove(os.path.join(tmp_folder,
    #                            'assignments_%i.npy' % block_job_id))

    if all(ass.size for ass in assignments):
        assignments = np.concatenate(assignments, axis=0)
        assignments = np.unique(assignments, axis=0)
        assert assignments.shape[1] == 2
        fu.log("have %i pairs of node assignments" % len(assignments))
        have_assignments = True
    else:
        fu.log(
            "did not find any node assignments, label assignment will be identity"
        )
        have_assignments = False

    ufd = nufd.boost_ufd(labels)
    if have_assignments:
        assert int(assignments.max()) + 1 <= n_labels, "%i, %i" % (
            int(assignments.max()) + 1, n_labels)
        ufd.merge(assignments)

    label_assignments = ufd.find(labels)
    label_assignemnts, max_id, _ = vigra.analysis.relabelConsecutive(
        label_assignments, keep_zeros=True, start_label=1)
    assert len(label_assignments) == n_labels
    fu.log("reducing the number of labels from %i to %i" %
           (n_labels, max_id + 1))

    chunks = (min(65334, n_labels), )
    with vu.file_reader(output_path) as f:
        f.create_dataset(output_key,
                         data=label_assignments,
                         compression='gzip',
                         chunks=chunks)

    fu.log_job_success(job_id)
예제 #21
0
def probs_to_costs(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)

    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']
    features_path = config['features_path']
    features_key = config['features_key']
    # config for cost transformations
    invert_inputs = config.get('invert_inputs', False)
    transform_to_costs = config.get('transform_to_costs', True)
    weight_edges = config.get('weight_edges', False)
    weighting_exponent = config.get('weighting_exponent', 1.)
    beta = config.get('beta', 0.5)

    # additional node labels
    node_labels = config.get('node_labels', None)

    n_threads = config['threads_per_job']

    fu.log("reading input from %s:%s" % (input_path, input_key))
    with vu.file_reader(input_path) as f:
        ds = f[input_key]
        ds.n_threads = n_threads
        # we might have 1d or 2d inputs, depending on input from features or random forest
        slice_ = slice(None) if ds.ndim == 1 else (slice(None), slice(0, 1))
        costs = ds[slice_].squeeze()

    # normalize to range 0, 1
    min_, max_ = costs.min(), costs.max()
    fu.log('input-range: %f %f' % (min_, max_))
    fu.log('%f +- %f' % (costs.mean(), costs.std()))

    if invert_inputs:
        fu.log("inverting probability inputs")
        costs = 1. - costs

    if transform_to_costs:
        fu.log("converting probability inputs to costs")
        if weight_edges:
            fu.log("weighting edges by size")
            # the edge sizes are at the last feature index
            with vu.file_reader(features_path) as f:
                ds = f[features_key]
                n_features = ds.shape[1]
                ds.n_threads = n_threads
                edge_sizes = ds[:, n_features - 1:n_features].squeeze()
        else:
            fu.log("no edge weighting")
            edge_sizes = None

        costs = _transform_probabilities_to_costs(
            costs,
            beta=beta,
            edge_sizes=edge_sizes,
            weighting_exponent=weighting_exponent)

        # adjust edges of nodes with labels if given
        if node_labels is not None:
            fu.log("have node labels")
            max_repulsive = 5 * costs.min()
            max_attractive = 5 * costs.max()
            fu.log("maximally attractive edge weight %f" % max_attractive)
            fu.log("maximally repulsive edge weight %f" % max_repulsive)
            with vu.file_reader(features_path, 'r') as f:
                ds = f['s0/graph/edges']
                ds.n_threads = n_threads
                uv_ids = ds[:]
            for mode, path_key in node_labels.items():
                path, key = path_key
                fu.log("applying node labels with mode %s from %s:%s" %
                       (mode, path, key))
                with vu.file_reader(path, 'r') as f:
                    ds = f[key]
                    ds.n_threads = n_threads
                    labels = ds[:]
                costs = _apply_node_labels(costs, uv_ids, mode, labels,
                                           max_repulsive, max_attractive)

    with vu.file_reader(output_path) as f:
        ds = f[output_key]
        ds.n_threads = n_threads
        ds[:] = costs

    fu.log_job_success(job_id)
예제 #22
0
def write(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("loading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read I/O config
    input_path = config['input_path']
    input_key = config['input_key']

    # check if we write in-place
    if 'output_path' in config:
        output_path = config['output_path']
        output_key = config['output_key']
        in_place = False
    else:
        in_place = True

    block_shape = config['block_shape']
    block_list = config['block_list']
    n_threads = config.get('threads_per_core', 1)
    allow_empty_assignments = config.get('allow_empty_assignments', False)

    # read node assignments
    assignment_path = config['assignment_path']
    assignment_key = config.get('assignment_key', None)
    fu.log("loading node labels from %s" % assignment_path)
    node_labels = _load_assignments(assignment_path, assignment_key, n_threads)

    offset_path = config.get('offset_path', None)

    # if we write in-place, we only need to open one file and one dataset
    if in_place:
        with vu.file_reader(input_path) as f:
            ds_in = f[input_key]
            ds_out = ds_in

            shape = ds_in.shape
            blocking = nt.blocking([0, 0, 0], list(shape), list(block_shape))

            if offset_path is None:
                _write(ds_in, ds_out, blocking, block_list, n_threads,
                       node_labels, allow_empty_assignments)
            else:
                _write_with_offsets(ds_in, ds_out, blocking, block_list,
                                    n_threads, node_labels, offset_path,
                                    allow_empty_assignments)
        # write the max-label
        # for job 0
        if job_id == 0:
            _write_maxlabel(input_path, input_key, node_labels)

    else:
        # even if we do not write in-place, we might still write to the same output_file,
        # but different datasets
        # hdf5 does not like opening a file twice, so we need to check for this
        if input_path == output_path:
            with vu.file_reader(input_path) as f:
                ds_in = f[input_key]
                ds_out = f[output_key]

                shape = ds_in.shape
                blocking = nt.blocking([0, 0, 0], list(shape),
                                       list(block_shape))

                if offset_path is None:
                    _write(ds_in, ds_out, blocking, block_list, n_threads,
                           node_labels, allow_empty_assignments)
                else:
                    _write_with_offsets(ds_in, ds_out, blocking, block_list,
                                        n_threads, node_labels, offset_path,
                                        allow_empty_assignments)
        else:
            with vu.file_reader(
                    input_path,
                    'r') as f_in, vu.file_reader(output_path) as f_out:
                ds_in = f_in[input_key]
                ds_out = f_out[output_key]

                shape = ds_in.shape
                blocking = nt.blocking([0, 0, 0], list(shape),
                                       list(block_shape))

                if offset_path is None:
                    _write(ds_in, ds_out, blocking, block_list, n_threads,
                           node_labels, allow_empty_assignments)
                else:
                    _write_with_offsets(ds_in, ds_out, blocking, block_list,
                                        n_threads, node_labels, offset_path,
                                        allow_empty_assignments)
        # write the max-label
        # for job 0
        if job_id == 0:
            _write_maxlabel(output_path, output_key, node_labels)

    fu.log_job_success(job_id)
예제 #23
0
def sub_solutions(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']
    n_threads = config['threads_per_job']
    output_path = config['output_path']
    output_key = config['output_key']
    ws_path = config['ws_path']
    ws_key = config['ws_key']

    sub_result_identifier = config.get('sub_result_identifier', 'sub_results')
    sub_graph_identifier = config.get('sub_graph_identifier', 'sub_graphs')

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    blocking = nt.blocking([0, 0, 0], list(shape), list(block_shape))

    # we need to project the ws labels back to the original labeling
    # for this, we first need to load the initial node labeling
    if scale > 1:
        node_label_key = 's%i/node_labeling' % scale
        fu.log("scale %i > 1; reading node labeling from %s" %
               (scale, node_label_key))
        ds_node_labeling = problem[node_label_key]
        ds_node_labeling.n_threads = n_threads
        initial_node_labeling = ds_node_labeling[:]
    else:
        initial_node_labeling = None

    # read the sub results
    ds_results = problem['s%i/%s/node_result' % (scale, sub_result_identifier)]
    # TODO should be varlen dataset
    fu.log("reading subresults")
    block_node_prefix = os.path.join(problem_path, 's%i' % scale,
                                     sub_graph_identifier, 'block_')
    block_list, block_results = _read_subresults(ds_results, block_node_prefix,
                                                 blocking, block_list,
                                                 n_threads,
                                                 initial_node_labeling)

    fu.log("writing subresults")
    # write the resulting segmentation
    with vu.file_reader(output_path) as f_out, vu.file_reader(ws_path,
                                                              'r') as f_in:
        ds_in = f_in[ws_key]
        ds_out = f_out[output_key]
        with futures.ThreadPoolExecutor(n_threads) as tp:
            tasks = [
                tp.submit(_write_block_res, ds_in, ds_out, block_id, blocking,
                          block_res)
                for block_id, block_res in zip(block_list, block_results)
            ]
            [t.result() for t in tasks]
    fu.log_job_success(job_id)
def _solve_block_problem(block_id, graph, uv_ids, block_prefix, costs,
                         lifted_uvs, lifted_costs, lifted_agglomerator,
                         agglomerator, ignore_label, blocking, out,
                         time_limit):
    fu.log("Start processing block %i" % block_id)

    # load the nodes in this sub-block and map them
    # to our current node-labeling
    block_path = block_prefix + str(block_id)
    assert os.path.exists(block_path), block_path
    nodes = ndist.loadNodes(block_path)
    # if we have an ignore label, remove zero from the nodes
    # (nodes are sorted, so it will always be at pos 0)
    if ignore_label and nodes[0] == 0:
        nodes = nodes[1:]
        removed_ignore_label = True
        if len(nodes) == 0:
            fu.log_block_success(block_id)
            return
    else:
        removed_ignore_label = False

    # we allow for invalid nodes here,
    # which can occur for un-connected graphs resulting from bad masks ...
    inner_edges, outer_edges = graph.extractSubgraphFromNodes(
        nodes, allowInvalidNodes=True)

    # if we only have no inner edges, return
    # the outer edges as cut edges
    if len(inner_edges) == 0:
        if len(nodes) > 1:
            assert removed_ignore_label,\
                "Can only have trivial sub-graphs for more than one node if we removed ignore label"
        cut_edge_ids = outer_edges
        sub_result = None
        fu.log("Block %i: has no inner edges" % block_id)
    # otherwise solve the multicut for this block
    else:
        # find  the lifted uv-ids that correspond to the inner edges
        inner_lifted_edges = _find_lifted_edges(lifted_uvs, nodes)
        fu.log(
            "Block %i: Solving sub-block with %i nodes, %i edges and %i lifted edges"
            %
            (block_id, len(nodes), len(inner_edges), len(inner_lifted_edges)))
        sub_uvs = uv_ids[inner_edges]
        # relabel the sub-nodes and associated uv-ids for more efficient processing
        nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive(
            nodes, start_label=0, keep_zeros=False)
        sub_uvs = nt.takeDict(mapping, sub_uvs)
        n_local_nodes = max_id + 1
        sub_graph = nifty.graph.undirectedGraph(n_local_nodes)
        sub_graph.insertEdges(sub_uvs)

        sub_costs = costs[inner_edges]
        assert len(sub_costs) == sub_graph.numberOfEdges

        # we only need to run lifted multicut if we have lifted edges in
        # the subgraph
        if len(inner_lifted_edges) > 0:
            fu.log(
                "Block %i: have lifted edges and use lifted multicut solver")
            sub_lifted_uvs = nt.takeDict(mapping,
                                         lifted_uvs[inner_lifted_edges])
            sub_lifted_costs = lifted_costs[inner_lifted_edges]

            # solve multicut and relabel the result
            sub_result = lifted_agglomerator(sub_graph,
                                             sub_costs,
                                             sub_lifted_uvs,
                                             sub_lifted_costs,
                                             time_limit=time_limit)

        # otherwise we run normal multicut
        else:
            fu.log("Block %i: don't have lifted edges and use multicut solver")
            # solve multicut and relabel the result
            sub_result = agglomerator(sub_graph,
                                      sub_costs,
                                      time_limit=time_limit)

        assert len(sub_result) == len(nodes), "%i, %i" % (len(sub_result),
                                                          len(nodes))
        sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]]
        assert len(sub_edgeresult) == len(inner_edges)
        cut_edge_ids = inner_edges[sub_edgeresult]
        cut_edge_ids = np.concatenate([cut_edge_ids, outer_edges])

        _, res_max_id, _ = vigra.analysis.relabelConsecutive(sub_result,
                                                             start_label=1,
                                                             keep_zeros=False,
                                                             out=sub_result)
        fu.log("Block %i: Subresult has %i unique ids" %
               (block_id, res_max_id))
        # IMPORTANT !!!
        # we can only add back the ignore label after getting the edge-result !!!
        if removed_ignore_label:
            sub_result = np.concatenate((np.zeros(1,
                                                  dtype='uint64'), sub_result))

    # get chunk id of this block
    block = blocking.getBlock(block_id)
    chunk_id = tuple(beg // sh
                     for beg, sh in zip(block.begin, blocking.blockShape))

    # serialize the cut-edge-ids and the (local) node labeling
    ds_edge_res = out['cut_edge_ids']
    fu.log("Block %i: Serializing %i cut edges" %
           (block_id, len(cut_edge_ids)))
    ds_edge_res.write_chunk(chunk_id, cut_edge_ids, True)

    if sub_result is not None:
        ds_node_res = out['node_result']
        fu.log("Block %i: Serializing %i node results" %
               (block_id, len(sub_result)))
        ds_node_res.write_chunk(chunk_id, sub_result, True)

    fu.log_block_success(block_id)
def transformix_coordinate(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)

    input_path = config['input_path']
    input_key = config['input_key']

    output_path = config['output_path']
    output_key = config['output_key']

    transformation_file = config['transformation_file']
    elastix_dir = config['elastix_directory']
    tmp_folder = config['tmp_folder']

    block_list = config['block_list']
    block_shape = config['block_shape']

    fu.log("Applying registration with:")
    fu.log("transformation_file: %s" % transformation_file)
    fu.log("elastix_directory: %s" % elastix_dir)

    transformix_bin = os.path.join(elastix_dir, 'bin', 'transformix')
    # set the ld library path
    lib_path = os.environ['LD_LIBRARY_PATH']
    elastix_lib_path = os.path.join(elastix_dir, 'lib')
    os.environ['LD_LIBRARY_PATH'] = f"{lib_path}:{elastix_lib_path}"

    with open_file(input_path, 'r') as f_in, open_file(output_path,
                                                       'a') as f_out:

        ds_in = f_in[input_key]
        ds_out = f_out[output_key]
        shape = ds_out.shape

        blocking = nt.blocking([0, 0, 0], shape, block_shape)

        for block_id in block_list:
            fu.log("start processing block %i" % block_id)
            process_block(ds_in, ds_out, blocking, block_id, transformix_bin,
                          transformation_file, tmp_folder)
            fu.log_block_success(block_id)

    fu.log_job_success(job_id)
def solve_lifted_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']

    lifted_prefix = config['lifted_prefix']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)
    n_threads = config.get('threads_per_job', 1)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    # load the costs
    # NOTE we use different cost identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the costs come from the CostsWorkflow and
    # hence the identifier is identical
    costs_key = 's%i/costs_lmc' % scale if scale > 0 else 's0/costs'
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    # NOTE we use different graph identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the graph comes from the GraphWorkflow and
    # hence the identifier is identical
    graph_key = 's%i/graph_lmc' % scale if scale > 0 else 's0/graph'
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignoreLabel']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using agglomerator %s" % agglomerator_key)
    lifted_agglomerator = su.key_to_lifted_agglomerator(agglomerator_key)
    # TODO enable different multicut agglomerator
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    # load the lifted edges and costs
    nh_key = 's%i/lifted_nh_%s' % (scale, lifted_prefix)
    lifted_costs_key = 's%i/lifted_costs_%s' % (scale, lifted_prefix)
    ds = problem[nh_key]
    fu.log("reading lifted uvs")
    ds.n_threads = n_threads
    lifted_uvs = ds[:]

    fu.log("reading lifted costs")
    ds = problem[lifted_costs_key]
    ds.n_threads = n_threads
    lifted_costs = ds[:]

    # the output group
    out = problem['s%i/sub_results_lmc' % scale]

    # NOTE we use different sub-graph identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the sub-graphs come from the GraphWorkflow and
    # are hence identical
    sub_graph_identifier = 'sub_graphs' if scale == 0 else 'sub_graphs_lmc'
    block_prefix = os.path.join(problem_path, 's%i' % scale,
                                sub_graph_identifier, 'block_')
    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    fu.log("start processsing %i blocks" % len(block_list))
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids,
                      block_prefix, costs, lifted_uvs, lifted_costs,
                      lifted_agglomerator, agglomerator, ignore_label,
                      blocking, out, time_limit) for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)
def learn_rf(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)

    features_dict = config['features_dict']
    labels_dict = config['labels_dict']
    output_path = config['output_path']
    n_threads = config['threads_per_job']
    n_trees = config.get('n_trees', 100)

    features = []
    labels = []

    # TODO enable multiple feature paths
    # NOTE we assert that keys of boyh dicts are identical in the main class
    for key, feat_path in features_dict.items():
        label_path = labels_dict[key]
        fu.log("reading featurs from %s:%s, labels from %s:%s" %
               tuple(feat_path + label_path))

        with vu.file_reader(feat_path[0]) as f:
            ds = f[feat_path[1]]
            ds.n_threads = n_threads
            feats = ds[:]

        with vu.file_reader(label_path[0]) as f:
            ds = f[label_path[1]]
            ds.n_threads = n_threads
            label = ds[:]
        assert len(label) == len(feats)

        # check if we have an ignore label
        ignore_mask = label != -1
        n_ignore = np.sum(ignore_mask)
        if n_ignore < ignore_mask.size:
            fu.log("removing %i examples due to ignore mask" % n_ignore)
            feats = feats[ignore_mask]
            label = label[ignore_mask]

        features.append(feats)
        labels.append(label)

    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)

    fu.log("start learning random forest with %i examples and %i features" %
           features.shape)
    rf = RandomForestClassifier(n_estimators=n_trees, n_jobs=n_threads)
    rf.fit(features, labels)

    fu.log("saving random forest to %s" % output_path)
    with open(output_path, 'wb') as f:
        pickle.dump(rf, f)

    fu.log_job_success(job_id)
예제 #28
0
def _merge_nodes(problem_path, scale, blocking, block_list, nodes, uv_ids,
                 initial_node_labeling, n_threads):
    # load the cut edge ids
    n_edges = len(uv_ids)
    cut_edge_ids = _load_cut_edges(problem_path, scale, blocking, block_list,
                                   n_threads)
    assert len(cut_edge_ids) < n_edges, "%i = %i, does not reduce problem" % (
        len(cut_edge_ids), n_edges)

    merge_edges = np.ones(n_edges, dtype='bool')
    merge_edges[cut_edge_ids] = False
    fu.log('merging %i / %i edges' % (np.sum(merge_edges), n_edges))

    # merge node pairs with ufd
    ufd = nufd.boost_ufd(nodes)
    ufd.merge(uv_ids[merge_edges])

    # get the node results and label them consecutively
    node_labeling = ufd.find(nodes)
    node_labeling, max_new_id, _ = relabelConsecutive(node_labeling,
                                                      start_label=0,
                                                      keep_zeros=False)
    assert node_labeling[0] == 0
    # FIXME this looks fishy, redo !!!
    # # make sure that zero is still mapped to zero
    # if node_labeling[0] != 0:
    #     # if it isn't, swap labels accordingly
    #     zero_label = node_labeling[0]
    #     to_relabel = node_labeling == 0
    #     node_labeling[node_labeling == zero_label] = 0
    #     node_labeling[to_relabel] = zero_laebl
    n_new_nodes = max_new_id + 1
    fu.log("have %i nodes in new node labeling" % n_new_nodes)

    # get the labeling of initial nodes
    if initial_node_labeling is None:
        # if we don't have an initial node labeling, we are in the first scale.
        # here, the graph nodes might not be consecutive / not start at zero.
        # to keep the node labeling valid, we must make the labeling consecutive by inserting zeros
        fu.log("don't have an initial node labeling")

        # check if `nodes` are consecutive and start at zero
        node_max_id = int(nodes.max())
        if node_max_id + 1 != len(nodes):
            fu.log("nodes are not consecutve and/or don't start at zero")
            fu.log("inflating node labels accordingly")
            node_labeling = nt.inflateLabeling(nodes, node_labeling,
                                               node_max_id)

        new_initial_node_labeling = node_labeling
    else:
        fu.log(
            "mapping new node labeling to labeling of inital (= scale 0) nodes"
        )
        # NOTE access like this is ok because all node labelings will be consecutive
        new_initial_node_labeling = node_labeling[initial_node_labeling]
        assert len(new_initial_node_labeling) == len(initial_node_labeling)

    return n_new_nodes, node_labeling, new_initial_node_labeling
예제 #29
0
def find_labeling(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    n_jobs = config['n_jobs']
    tmp_folder = config['tmp_folder']
    n_threads = config['threads_per_job']
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']

    def _read_input(job_id):
        return np.load(
            os.path.join(tmp_folder, 'find_uniques_job_%i.npy' % job_id))

    fu.log("read uniques")
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [tp.submit(_read_input, job_id) for job_id in range(n_jobs)]
        uniques = np.concatenate([t.result() for t in tasks])

    fu.log("compute uniques")
    uniques = np.unique(uniques)

    if uniques[0] == 0:
        start_label = 0
        stop_label = len(uniques)
    else:
        start_label = 1
        stop_label = len(uniques) + 1
    fu.log("relabel to new max-id %i" % stop_label)
    new_ids = np.arange(start_label, stop_label, dtype='uint64')
    assignments = np.concatenate([uniques[:, None], new_ids[:, None]], axis=1)

    fu.log("saving results to %s/%s" % (assignment_path, assignment_key))
    with vu.file_reader(assignment_path) as f:
        chunk_size = min(int(1e6), len(assignments))
        chunks = (chunk_size, 2)
        ds = vu.force_dataset(f,
                              assignment_key,
                              shape=assignments.shape,
                              dtype='uint64',
                              compression='gzip',
                              chunks=chunks)
        ds.n_threads = n_threads
        ds[:] = assignments

    # log success
    fu.log_job_success(job_id)
예제 #30
0
def solve_global(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # path to the reduced problem
    problem_path = config['problem_path']
    # path where the node labeling shall be written
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']
    scale = config['scale']
    agglomerator_key = config['agglomerator']
    n_threads = config['threads_per_job']
    time_limit = config.get('time_limit_solver', None)

    fu.log("using agglomerator %s" % agglomerator_key)
    if time_limit is None:
        fu.log("agglomeration without time limit")
    else:
        fu.log("agglomeration time limit %i" % time_limit)
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    with vu.file_reader(problem_path, 'r') as f:
        group = f['s%i' % scale]
        graph_group = group['graph']
        ignore_label = graph_group.attrs['ignoreLabel']

        ds = graph_group['edges']
        ds.n_threads = n_threads
        uv_ids = ds[:]
        n_edges = len(uv_ids)
        n_nodes = int(uv_ids.max() + 1)

        # we only need to load the initial node labeling if at
        # least one reduction step was performed i.e. scale > 0
        if scale > 0:
            ds = group['node_labeling']
            ds.n_threads = n_threads
            initial_node_labeling = ds[:]

        ds = group['costs']
        ds.n_threads = n_threads
        costs = ds[:]
        assert len(costs) == n_edges, "%i, %i" (len(costs), n_edges)

    fu.log("creating graph with %i nodes an %i edges" % (n_nodes, len(uv_ids)))
    graph = nifty.graph.undirectedGraph(n_nodes)
    graph.insertEdges(uv_ids)
    fu.log("start agglomeration")
    node_labeling = agglomerator(graph,
                                 costs,
                                 n_threads=n_threads,
                                 time_limit=time_limit)
    fu.log("finished agglomeration")

    # get the labeling of initial nodes
    if scale > 0:
        initial_node_labeling = node_labeling[initial_node_labeling]
    else:
        initial_node_labeling = node_labeling
    n_nodes = len(initial_node_labeling)

    # make sure zero is mapped to 0 if we have an ignore label
    if ignore_label and initial_node_labeling[0] != 0:
        new_max_label = int(node_labeling.max() + 1)
        initial_node_labeling[initial_node_labeling == 0] = new_max_label
        initial_node_labeling[0] = 0

    # make node labeling consecutive
    vigra.analysis.relabelConsecutive(initial_node_labeling,
                                      start_label=1,
                                      keep_zeros=True,
                                      out=initial_node_labeling)

    # write node labeling
    node_shape = (n_nodes, )
    chunks = (min(n_nodes, 524288), )
    with vu.file_reader(assignment_path) as f:
        ds = f.require_dataset(assignment_key,
                               dtype='uint64',
                               shape=node_shape,
                               chunks=chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = initial_node_labeling

    fu.log('saving results to %s:%s' % (assignment_path, assignment_key))
    fu.log_job_success(job_id)