Beispiel #1
0
def agglomerate(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']
    shape = list(vu.get_shape(input_path, input_key))
    if len(shape) == 4:
        shape = shape[1:]

    block_shape = list(config['block_shape'])
    block_list = config['block_list']

    # read the output config
    output_path = config['output_path']
    output_key = config['output_key']

    # get the blocking
    blocking = nt.blocking([0, 0, 0], shape, block_shape)

    # submit blocks
    with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out:
        ds_in = f_in[input_key]
        assert ds_in.ndim in (3, 4)
        ds_out = f_out[output_key]
        assert ds_out.ndim == 3
        for block_id in block_list:
            _agglomerate_block(blocking, block_id, ds_in, ds_out, config)

    # log success
    fu.log_job_success(job_id)
Beispiel #2
0
def region_features(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path, 'r') as f:
        config = json.load(f)

    block_list = config['block_list']
    input_path = config['input_path']
    input_key = config['input_key']
    labels_path = config['labels_path']
    labels_key = config['labels_key']
    output_path = config['output_path']
    output_key = config['output_key']
    block_shape = config['block_shape']
    ignore_label = config['ignore_label']

    with vu.file_reader(input_path) as f_in,\
            vu.file_reader(labels_path) as f_l,\
            vu.file_reader(output_path) as f_out:

        ds_in = f_in[input_key]
        ds_labels = f_l[labels_key]
        ds_out = f_out[output_key]

        shape = ds_out.shape
        blocking = nt.blocking([0, 0, 0], shape, block_shape)

        for block_id in block_list:
            _block_features(block_id, blocking, ds_in, ds_labels, ds_out,
                            ignore_label)

    fu.log_job_success(job_id)
Beispiel #3
0
def insert(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # path to the reduced problem
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    decomposition_path = config['decomposition_path']
    # path where the node labeling shall be written
    output_path = config['output_path']
    output_key = config['output_key']
    n_threads = config['threads_per_job']

    tmp_folder = config['tmp_folder']
    n_jobs = config['n_jobs']

    # load the graph
    graph = ndist.Graph(os.path.join(graph_path, graph_key),
                        numberOfThreads=n_threads)
    with vu.file_reader(graph_path, 'r') as f:
        ignore_label = f[graph_key].attrs['ignoreLabel']

    # load the cut edges from initial decomposition
    with vu.file_reader(decomposition_path, 'r') as f:
        ds = f['cut_edges']
        ds.n_threads = n_threads
        cut_edges_decomp = ds[:]

    # load all the sub results
    cut_edges = np.concatenate([
        np.load(
            os.path.join(tmp_folder, 'subproblem_results',
                         'job%i.npy' % job_id)) for job_id in range(n_jobs)
    ])
    cut_edges = np.unique(cut_edges).astype('uint64')
    cut_edges = np.concatenate([cut_edges_decomp, cut_edges])

    edge_labels = np.zeros(graph.numberOfEdges, dtype='bool')
    edge_labels[cut_edges] = 1

    node_labeling = ndist.connectedComponents(graph, edge_labels, ignore_label)

    n_nodes = len(node_labeling)
    node_shape = (n_nodes, )
    chunks = (min(n_nodes, 524288), )
    with vu.file_reader(output_path) as f:
        ds = f.require_dataset(output_key,
                               dtype='uint64',
                               shape=node_shape,
                               chunks=chunks,
                               compression='gzip')
        ds.n_threads = n_threads
        ds[:] = node_labeling
    fu.log('saving results to %s' % output_path)
    fu.log('and key %s' % output_key)
    fu.log_job_success(job_id)
Beispiel #4
0
def block_statistics(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    path = config['path']
    key = config['key']
    tmp_folder = config['tmp_folder']

    block_shape = config['block_shape']
    block_list = config['block_list']

    with vu.file_reader(path, 'r') as f:
        shape = f[key].shape

    blocking = nt.blocking([0, 0, 0], list(shape), list(block_shape))

    with vu.file_reader(path, 'r') as f_in:
        ds = f_in[key]
        block_stats = [
            _compute_block_stats(block_id, blocking, ds)
            for block_id in block_list
        ]

    save_path = os.path.join(tmp_folder,
                             'block_statistics_job%i.json' % job_id)
    job_stats = merge_stats(block_stats)
    with open(save_path, 'w') as f:
        json.dump(job_stats, f)

    fu.log_job_success(job_id)
Beispiel #5
0
def merge_region_features(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path, 'r') as f:
        config = json.load(f)
    output_path = config['output_path']
    output_key = config['output_key']
    tmp_path = config['tmp_path']
    tmp_key = config['tmp_key']
    node_block_list = config['block_list']
    node_chunk_size = config['node_chunk_size']

    with vu.file_reader(output_path) as f,\
            vu.file_reader(tmp_path) as f_in:

        ds_in = f_in[tmp_key]
        ds = f[output_key]
        n_nodes = ds.shape[0]

        node_blocking = nt.blocking([0], [n_nodes], [node_chunk_size])
        node_begin = node_blocking.getBlock(node_block_list[0]).begin[0]
        node_end = node_blocking.getBlock(node_block_list[-1]).end[0]

        shape = list(ds_in.shape)
        chunks = list(ds_in.chunks)
        blocking = nt.blocking([0, 0, 0], shape, chunks)

        _extract_and_merge_region_features(blocking, ds_in, ds, node_begin, node_end)

    fu.log_job_success(job_id)
def sparse_lifted_neighborhood(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    graph_path = config['graph_path']
    graph_key = config['graph_key']
    node_label_path = config['node_label_path']
    node_label_key = config['node_label_key']
    output_path = config['output_path']
    output_key = config['output_key']

    n_threads = config.get('threads_per_job', 1)
    graph_depth = config['nh_graph_depth']
    node_ignore_label = config['node_ignore_label']

    mode = config.get('mode', 'all')
    fu.log("lifted nh mode set to %s, depth set to %i" % (mode, graph_depth))
    fu.log("have ignore label: %i" % node_ignore_label)

    fu.log("start lifted neighborhood extraction for depth %i" % graph_depth)
    ndist.computeLiftedNeighborhoodFromNodeLabels(
        graph_path, graph_key, node_label_path, node_label_key, output_path,
        output_key, graph_depth, n_threads, mode, node_ignore_label)
    with vu.file_reader(output_path, 'r') as f:
        n_lifted = f[output_key].shape[0]
    fu.log("extracted %i lifted edges" % n_lifted)
    fu.log_job_success(job_id)
Beispiel #7
0
def block_faces(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    block_list = config['block_list']
    tmp_folder = config['tmp_folder']
    offsets_path = config['offsets_path']
    block_shape = config['block_shape']

    with open(offsets_path) as f:
        offsets = json.load(f)['offsets']

    with vu.file_reader(input_path, 'r') as f:
        ds = f[input_key]
        shape = list(ds.shape)

        blocking = nt.blocking([0, 0, 0], shape, block_shape)
        assignments = [
            _process_faces(block_id, blocking, ds, offsets)
            for block_id in block_list
        ]
    # filter out empty assignments
    assignments = [ass for ass in assignments if ass is not None]
    assignments = np.concatenate(assignments, axis=0)
    assignments = np.unique(assignments, axis=0)

    save_path = os.path.join(tmp_folder, 'assignments_%i.npy' % job_id)
    np.save(save_path, assignments)
    fu.log_job_success(job_id)
def merge_morphology(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']

    block_list = config['block_list']
    out_shape = config['out_shape']
    out_chunks = config['out_chunks']

    blocking = nt.blocking([0], out_shape[:1], out_chunks[:1])

    # merge and serialize the overlaps
    for block_id in block_list:
        block = blocking.getBlock(block_id)
        label_begin = block.begin[0]
        label_end = block.end[0]
        ndist.mergeAndSerializeMorphology(os.path.join(input_path, input_key),
                                          os.path.join(output_path, output_key),
                                          labelBegin=label_begin, labelEnd=label_end)
    fu.log_job_success(job_id)
def block_components(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']
    block_list = config['block_list']
    tmp_folder = config['tmp_folder']
    block_shape = config['block_shape']
    threshold = config['threshold']
    threshold_mode = config['threshold_mode']

    mask_path = config.get('mask_path', '')
    mask_key = config.get('mask_key', '')

    channel = config.get('channel', None)

    fu.log("Applying threshold %f with mode %s" % (threshold, threshold_mode))

    with vu.file_reader(input_path, 'r') as f_in,\
        vu.file_reader(output_path) as f_out:

        ds_in = f_in[input_key]
        ds_out = f_out[output_key]

        shape = ds_in.shape
        if channel is not None:
            shape = shape[1:]
        assert len(shape) == 3

        blocking = nt.blocking([0, 0, 0], list(shape), block_shape)

        if mask_path != '':
            # note that the mask is usually small enough to keep it
            # in memory (and we interpolate to get to the full volume)
            # if this does not hold need to change this code!
            mask = vu.load_mask(mask_path, mask_key, shape)
            offsets = [
                _cc_block_with_mask(block_id, blocking, ds_in, ds_out,
                                    threshold, threshold_mode, mask, channel)
                for block_id in block_list
            ]

        else:
            offsets = [
                _cc_block(block_id, blocking, ds_in, ds_out, threshold,
                          threshold_mode, channel) for block_id in block_list
            ]

    offset_dict = {block_id: off for block_id, off in zip(block_list, offsets)}
    save_path = os.path.join(tmp_folder,
                             'connected_components_offsets_%i.json' % job_id)
    with open(save_path, 'w') as f:
        json.dump(offset_dict, f)
    fu.log_job_success(job_id)
Beispiel #10
0
def skeletonize(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']

    output_path = config['output_path']
    output_key = config['output_key']
    skeleton_format = config['skeleton_format']
    n_threads = config.get('threads_per_job', 1)

    # load input segmentation
    with vu.file_reader(input_path) as f_in:
        ds_in = f_in[input_key]
        ds_in.n_threads = n_threads
        seg = ds_in[:]

    fu.log("writing output in format %s" % skeleton_format)
    fu.log("to %s:%s" % (output_path, output_key))
    if skeleton_format == 'volume':
        _skeletonize_to_volume(seg, output_path, output_key, config)
    elif skeleton_format == 'swc':
        _skeletonize_to_swc(seg, output_path, output_key, config)
    elif skeleton_format == 'n5':
        _skeletonize_to_n5(seg, output_path, output_key, config)
    else:
        raise RuntimeError("Format %s not supported" % skeleton_format)

    # log success
    fu.log_job_success(job_id)
def orphan_assignments(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    # load from config
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    output_path = config['output_path']
    output_key = config['output_key']
    relabel = config['relabel']
    n_threads = config.get('threads_per_job', 1)

    # load the uv-ids and assignments
    with vu.file_reader(graph_path) as f:
        ds = f['%s/edges' % graph_key]
        ds.n_threads = n_threads
        uv_ids = ds[:]
    with vu.file_reader(assignment_path) as f:
        ds = f[assignment_key]
        ds.n_threads = n_threads
        chunks = ds.chunks
        assignments = ds[:]

    n_new_nodes = int(assignments.max()) + 1
    # find the new uv-ids
    edge_mapping = nt.EdgeMapping(uv_ids, assignments, numberOfThreads=n_threads)
    new_uv_ids = edge_mapping.newUvIds()

    # find all orphans = segments that have node degree one
    ids, node_degrees = np.unique(new_uv_ids, return_counts=True)
    orphans = ids[node_degrees == 1]
    n_orphans = len(orphans)
    fu.log("Found %i orphans of %i clusters" % (n_orphans, n_new_nodes))

    # make graph for fast neighbor search
    graph = nifty.graph.undirectedGraph(n_new_nodes)
    graph.insertEdges(new_uv_ids)

    orphan_assignments = np.array([next(graph.nodeAdjacency(orphan_id))[0]
                                   for orphan_id in orphans],)
    assert len(orphan_assignments) == n_orphans, "%i, %i" % (len(orphan_assignments), n_orphans)
    assignments[orphans] = orphan_assignments.astype('uint64')

    if relabel:
        vigra.analysis.relabelConsecutive(assignments, out=assignments,
                                          start_label=1, keep_zeros=True)

    with vu.file_reader(output_path) as f:
        ds = f.require_dataset(output_key, shape=assignments.shape, chunks=chunks,
                               compression='gzip', dtype='uint64')
        ds[:] = assignments

    fu.log_job_success(job_id)
Beispiel #12
0
def check_sub_graphs(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)
    ws_path = config['ws_path']
    ws_key = config['ws_key']
    graph_block_prefix = config['graph_block_prefix']
    block_shape = config['block_shape']
    block_list = config['block_list']
    tmp_folder = config['tmp_folder']

    with vu.file_reader(ws_path, 'r') as f:
        ds = f[ws_key]
        shape = list(ds.shape)
        blocking = nt.blocking([0, 0, 0], shape, block_shape)
        violating_blocks = [
            check_block(block_id, blocking, ds, graph_block_prefix)
            for block_id in block_list
        ]
        violating_blocks = [vb for vb in violating_blocks if vb is not None]
    save_path = os.path.join(tmp_folder, 'failed_blocks_job_%i.json' % job_id)
    with open(save_path, 'w') as f:
        json.dump(violating_blocks, f)

    # log success
    fu.log_job_success(job_id)
def gradients(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    path_dict = config['path_dict']
    output_path = config['output_path']
    output_key = config['output_key']
    block_list = config['block_list']
    block_shape = config['block_shape']
    average_gradient = config['average_gradient']

    with open(path_dict) as f:
        path_dict = json.load(f)

    input_datasets = []
    for path in sorted(path_dict):
        input_datasets.append(vu.file_reader(path, 'r')[path_dict[path]])

    # 5 pix should be enough halo to make gradient computation correct
    halo = 3 * [5]
    with vu.file_reader(output_path) as f:
        ds = f[output_key]
        shape = ds.shape if average_gradient else ds.shape[1:]
        blocking = nt.blocking([0, 0, 0], list(shape), block_shape)
        [
            _gradients_block(block_id, blocking, input_datasets, ds, halo,
                             average_gradient) for block_id in block_list
        ]
    fu.log_job_success(job_id)
def copy_and_crop(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']

    roi_start = config['roi_start']
    roi_size = config['roi_size']
    bb = tuple(slice(rs, rs + size) for rs, size in zip(roi_start, roi_size))
    max_threads = config.get('threads_per_job', 1)

    ds_in = z5py.File(input_path)[input_key]
    ds_in.n_threads = max_threads
    seg = ds_in[bb]
    max_id = int(seg.max())

    f = z5py.File(output_path)
    ds_out = f.require_dataset(output_key,
                               shape=seg.shape,
                               chunks=ds_in.chunks,
                               compression='gzip',
                               dtype='uint64')
    ds_out.n_threads = max_threads
    ds_out[:] = seg
    ds_out.attrs['maxId'] = max_id

    fu.log_job_success(job_id)
def conseq_labels(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    print("RUN")
    # get the config
    with open(config_path, 'r') as f:
        config = json.load(f)
    output_path = config['output_path']
    output_key = config['output_key']
    input_path = config['input_path']
    input_key = config['input_key']

    # load the labels
    with vu.file_reader(input_path, 'r') as f:
        labels = f[input_key][:]
        unique = np.unique(labels)
        output = np.zeros(labels.shape, dtype=labels.dtype)
        next_l = 0
        for l in sorted(list(unique)):
            output[labels == l] = next_l
            next_l = next_l + 1

        with vu.file_reader(output_path, 'w') as fout:
            fout.create_dataset(output_key,
                                data=output,
                                chunks=f[input_key].chunks,
                                compression='gzip')

    fu.log_job_success(job_id)
def unique_block_labels(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']

    block_list = config['block_list']
    block_shape = config['block_shape']
    is_multiset = config['is_multiset']

    # open the input file
    with vu.file_reader(input_path,
                        'r') as f, vu.file_reader(output_path) as f_out:
        ds = f[input_key]
        ds_out = f_out[output_key]
        chunks = ds.chunks
        shape = ds.shape
        assert tuple(chunks) == tuple(block_shape),\
            "Chunks %s and block shape %s must agree" % (str(chunks), str(block_shape))

        blocking = nt.blocking([0, 0, 0], shape, block_shape)

        if is_multiset:
            _uniques_multiset(ds, ds_out, blocking, block_list)
        else:
            _uniques_default(ds, ds_out, blocking, block_list)

    # log success
    fu.log_job_success(job_id)
def id_filter(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    node_label_path = config['node_label_path']
    node_label_key = config['node_label_key']
    output_path = config['output_path']
    filter_labels = np.array(config['filter_labels'], dtype='uint64')

    with vu.file_reader(node_label_path, 'r') as f:
        node_labels = f[node_label_key][:]

    # find the node ids that overlap with the filter labels
    filter_mask = np.in1d(node_labels, filter_labels)
    filter_ids = np.where(filter_mask)[0].tolist()

    fu.log("%i ids will be filtered" % len(filter_ids))
    fu.log("saving filter ids to %s" % output_path)
    with open(output_path, 'w') as f:
        json.dump(filter_ids, f)

    fu.log_job_success(job_id)
Beispiel #18
0
def block_morphology(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']

    block_shape = config['block_shape']
    block_list = config['block_list']

    with vu.file_reader(input_path, 'r') as f:
        shape = f[input_key].shape

    blocking = nt.blocking([0, 0, 0],
                           list(shape),
                           list(block_shape))

    with vu.file_reader(input_path, 'r') as f_in:
        ds_in = f_in[input_key]
        [_morphology_for_block(block_id, blocking, ds_in,
                               output_path, output_key)
         for block_id in block_list]
    fu.log_job_success(job_id)
Beispiel #19
0
def blocks_from_mask(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)

    mask_path = config['mask_path']
    mask_key = config['mask_key']
    output_path = config['output_path']
    shape = config['shape']
    block_shape = config['block_shape']
    n_threads = config.get('threads_per_job', 1)

    # NOTE we assume that the mask is small and will fit into memory
    with vu.file_reader(mask_path, 'r') as f:
        ds = f[mask_key]
        ds.n_threads = n_threads
        mask_data = ds[:]
    mask = ResizedVolume(mask_data, tuple(shape))

    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))
    blocks_in_mask = _get_blocks_in_mask(mask, blocking, n_threads)

    with open(output_path, 'w') as f:
        json.dump(blocks_in_mask, f)

    fu.log_job_success(job_id)
def watershed_from_seeds(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']
    shape = list(vu.get_shape(input_path, input_key))
    if len(shape) == 4:
        shape = shape[1:]

    block_shape = list(config['block_shape'])
    block_list = config['block_list']

    # TODO seeds and output might be identical
    # in that case we would need in-place logic if we
    # want to support h5 (it's fine with n5 as is)
    # read the seed and  output config
    seeds_path = config['seeds_path']
    seeds_key = config['seeds_key']
    output_path = config['output_path']
    output_key = config['output_key']

    # check if we have a mask
    with_mask = 'mask_path' in config
    if with_mask:
        mask_path = config['mask_path']
        mask_key = config['mask_key']

    # get the blocking
    blocking = nt.blocking([0, 0, 0], shape, block_shape)

    # submit blocks
    with vu.file_reader(input_path, 'r') as f_in,\
         vu.file_reader(seeds_path, 'r') as f_seeds,\
         vu.file_reader(output_path) as f_out:

        ds_in = f_in[input_key]
        assert ds_in.ndim in (3, 4)
        ds_seeds = f_out[seeds_key]
        assert ds_seeds.ndim == 3
        ds_out = f_out[output_key]
        assert ds_out.ndim == 3

        # note that the mask is usually small enough to keep it
        # in memory (and we interpolate to get to the full volume)
        # if this does not hold need to change this code!
        if with_mask:
            mask = vu.load_mask(mask_path, mask_key, shape)
            for block_id in block_list:
                _ws_block_masked(blocking, block_id, ds_in, ds_seeds, ds_out,
                                 mask, config)

        else:
            for block_id in block_list:
                _ws_block(blocking, block_id, ds_in, ds_seeds, ds_out, config)
    # log success
    fu.log_job_success(job_id)
def label_block_mapping(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']
    number_of_labels = config['number_of_labels']

    roi_begin = config.get('roi_begin', None)
    roi_end = config.get('roi_end', None)
    assert (roi_begin is None) == (roi_end is None)

    # we need to turn `None` rois to empty lists,
    # because I don't really understand how pybind11 handles None yet
    if roi_begin is None:
        roi_begin = []
        roi_end = []

    n_threads = config.get('threads_per_job', 1)

    ndist.serializeBlockMapping(os.path.join(input_path, input_key),
                                os.path.join(output_path,
                                             output_key), number_of_labels,
                                n_threads, roi_begin, roi_end)

    # log success
    fu.log_job_success(job_id)
Beispiel #22
0
def sparse_lifted_neighborhood(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    graph_path = config['graph_path']
    graph_key = config['graph_key']
    node_label_path = config['node_label_path']
    node_label_key = config['node_label_key']
    output_path = config['output_path']
    output_key = config['output_key']

    n_threads = config.get('threads_per_job', 1)
    graph_depth = config['nh_graph_depth']

    mode = config.get('mode', 'all')
    fu.log("lifted nh mode set to %s, depth set to %i" % (mode, graph_depth))

    fu.log("start lifted neighborhood extraction for depth %i" % graph_depth)
    ndist.computeLiftedNeighborhoodFromNodeLabels(
        os.path.join(graph_path, graph_key),
        os.path.join(node_label_path, node_label_key),
        os.path.join(output_path, output_key), graph_depth, n_threads, mode)

    fu.log_job_success(job_id)
def skeleton_evaluation(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']

    skeleton_path = config['skeleton_path']
    skeleton_key = config['skeleton_key']

    output_path = config['output_path']
    skeleton_format = config['skeleton_format']
    n_threads = config.get('threads_per_job', 1)

    # TODO adapt nskel.SkeletonMetrics to new n5 skeleton format
    skeleton_ids = os.listdir(skeleton_file)
    skeleton_ids = [int(sk) for sk in skeleton_ids if sk.isdigit()]
    skeleton_ids.sort()
    metrics = nskel.SkeletonMetrics(os.path.join(input_path, input_key),
                                    os.path.join(skeleton_path, skeleton_key),
                                    skeleton_ids, n_threads)

    # TODO expose parameters for different eval options
    correct, split, merge, n_merges = metrics.computeGoogleScore(n_threads)
    res = {'correct': correct, 'split': split, 'merge': merge, 'n_merges': n_merges}
    with open(output_path, 'w') as f:
        json.dump(res, f)

    # log success
    fu.log_job_success(job_id)
def merge_predictions(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path, 'r') as f:
        config = json.load(f)

    output_path = config['output_path']
    output_key = config['output_key']
    tmp_prefix = config['tmp_prefix']
    halo = config['halo']
    n_channels = config['n_channels']

    shape = vu.get_shape(output_path, output_key)
    if len(shape) > 3:
        shape = shape[-3:]
    block_shape = config['block_shape']
    blocking = nt.blocking([0, 0, 0], shape, block_shape)

    # TODO we could parallelize this
    with vu.file_reader(output_path) as f:
        ds = f[output_key]
        for block_id in range(blocking.numberOfBlocks):
            _merge_block(block_id, blocking, ds, tmp_prefix, halo, n_channels)

    fu.log_job_success(job_id)
def simple_stitch_edges(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)

    graph_path = config['graph_path']
    labels_path = config['labels_path']
    labels_key = config['labels_key']
    n_edges = config['n_edges']
    block_list = config['block_list']
    block_shape = config['block_shape']

    out_path = config['out_path']
    out_key = 'job_results/job_%i' % job_id

    subgraph_key = 's0/sub_graphs'
    res = ndist.findBlockBoundaryEdges(graph_path, subgraph_key, labels_path,
                                       labels_key, n_edges, block_shape,
                                       block_list)
    fu.log('Found %i / %i block boundary edges' % (res.sum(), len(res)))

    with vu.file_reader(out_path) as f:
        chunks = (min(int(1e6), len(res)), )
        vu.force_dataset(f,
                         out_key,
                         data=res.astype('uint8'),
                         compression='gzip',
                         chunks=chunks,
                         shape=res.shape)

    fu.log_job_success(job_id)
def insert_affinities(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    affinity_path = config['affinity_path']
    affinity_key = config['affinity_key']
    objects_path = config['objects_path']
    objects_key = config['objects_key']

    block_list = config['block_list']
    block_shape = config['block_shape']
    offsets = config['offsets']

    with vu.file_reader(affinity_path) as f_in, vu.file_reader(
            objects_path) as f_obj:
        ds = f_in[affinity_key]
        shape = ds.shape[1:]

        # TODO actually check that objects are on a lower scale
        ds_objs = f_obj[objects_key]
        objects = vu.InterpolatedVolume(ds_objs, shape)

        blocking = nt.blocking([0, 0, 0], list(shape), block_shape)
        [
            _insert_affinities_block(block_id, blocking, ds, objects, offsets)
            for block_id in block_list
        ]

    fu.log_job_success(job_id)
def merge_statistics(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    n_jobs = config['n_jobs']
    tmp_folder = config['tmp_folder']
    output_path = config['output_path']

    job_stats = []
    for stat_job_id in range(n_jobs):
        job_path = os.path.join(tmp_folder,
                                'block_statistics_job%i.json' % stat_job_id)
        with open(job_path) as f:
            job_stat = json.load(f)
        job_stats.append(job_stat)

    stats = merge_stats(job_stats)
    with open(output_path, 'w') as f:
        json.dump(stats, f)

    fu.log_job_success(job_id)
def merge_offsets(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    tmp_folder = config['tmp_folder']
    n_jobs = config['n_jobs']
    save_path = config['save_path']
    n_blocks = config['n_blocks']

    offsets = {}
    for block_job_id in range(n_jobs):
        path = os.path.join(tmp_folder,
                            'connected_components_offsets_%i.json' % block_job_id)
        with open(path, 'r') as f:
            offsets.update(json.load(f))
        os.remove(path)

    offset_list = np.array([v for _, v in sorted(offsets.items())],
                           dtype='uint64')
    empty_blocks = np.where(offset_list == 0)[0].tolist()

    offset_list = np.roll(offset_list, 1)
    offset_list[0] = 0
    offset_list = np.cumsum(offset_list).tolist()
    assert len(offset_list) == n_blocks, "%i, %i" % (len(offset_list), n_blocks)

    fu.log("dumping offsets to %s" % save_path)
    with open(save_path, 'w') as f:
        json.dump({'offsets': offset_list,
                   'empty_blocks': empty_blocks}, f)
    fu.log_job_success(job_id)
def minfilter(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # input/output files
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']

    # blocks and task config
    block_list = config['block_list']
    filter_shape = config['filter_shape']

    with vu.file_reader(input_path,
                        'r') as f_in, vu.file_reader(output_path) as f_out:
        ds_in = f_in[input_key]
        ds_out = f_out[output_key]
        shape = ds_in.shape

        blocking = nt.blocking(roiBegin=[0, 0, 0],
                               roiEnd=list(shape),
                               blockShape=list(block_shape))

        # TODO is half of the halo really enough halo ?
        halo = list(fshape // 2 for fshape in filter_shape)
        [
            _minfilter_block(block_id, blocking, halo, ds_in, ds_out,
                             filter_shape) for block_id in block_list
        ]
    # log success
    fu.log_job_success(job_id)
def solve_subproblems(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)
    # input configs
    problem_path = config['problem_path']
    scale = config['scale']
    block_shape = config['block_shape']
    block_list = config['block_list']
    n_threads = config['threads_per_job']
    agglomerator_key = config['agglomerator']
    time_limit = config.get('time_limit_solver', None)

    fu.log("reading problem from %s" % problem_path)
    problem = z5py.N5File(problem_path)
    shape = problem.attrs['shape']

    # load the costs
    costs_key = 's%i/costs' % scale
    fu.log("reading costs from path in problem: %s" % costs_key)
    ds = problem[costs_key]
    ds.n_threads = n_threads
    costs = ds[:]

    # load the graph
    graph_key = 's%i/graph' % scale
    fu.log("reading graph from path in problem: %s" % graph_key)
    graph = ndist.Graph(os.path.join(problem_path, graph_key),
                        numberOfThreads=n_threads)
    uv_ids = graph.uvIds()
    # check if the problem has an ignore-label
    ignore_label = problem[graph_key].attrs['ignoreLabel']
    fu.log("ignore label is %s" % ('true' if ignore_label else 'false'))

    fu.log("using agglomerator %s" % agglomerator_key)
    agglomerator = su.key_to_agglomerator(agglomerator_key)

    # the output group
    out = problem['s%i/sub_results' % scale]

    # TODO this should be a n5 varlen dataset as well and
    # then this is just another dataset in problem path
    block_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs',
                                'block_')
    blocking = nt.blocking([0, 0, 0], shape, list(block_shape))

    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [
            tp.submit(_solve_block_problem, block_id, graph, uv_ids,
                      block_prefix, costs, agglomerator, ignore_label,
                      blocking, out, time_limit) for block_id in block_list
        ]
        [t.result() for t in tasks]

    fu.log_job_success(job_id)