def agglomerate(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] shape = list(vu.get_shape(input_path, input_key)) if len(shape) == 4: shape = shape[1:] block_shape = list(config['block_shape']) block_list = config['block_list'] # read the output config output_path = config['output_path'] output_key = config['output_key'] # get the blocking blocking = nt.blocking([0, 0, 0], shape, block_shape) # submit blocks with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] assert ds_in.ndim in (3, 4) ds_out = f_out[output_key] assert ds_out.ndim == 3 for block_id in block_list: _agglomerate_block(blocking, block_id, ds_in, ds_out, config) # log success fu.log_job_success(job_id)
def region_features(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path, 'r') as f: config = json.load(f) block_list = config['block_list'] input_path = config['input_path'] input_key = config['input_key'] labels_path = config['labels_path'] labels_key = config['labels_key'] output_path = config['output_path'] output_key = config['output_key'] block_shape = config['block_shape'] ignore_label = config['ignore_label'] with vu.file_reader(input_path) as f_in,\ vu.file_reader(labels_path) as f_l,\ vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] ds_labels = f_l[labels_key] ds_out = f_out[output_key] shape = ds_out.shape blocking = nt.blocking([0, 0, 0], shape, block_shape) for block_id in block_list: _block_features(block_id, blocking, ds_in, ds_labels, ds_out, ignore_label) fu.log_job_success(job_id)
def insert(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # path to the reduced problem graph_path = config['graph_path'] graph_key = config['graph_key'] decomposition_path = config['decomposition_path'] # path where the node labeling shall be written output_path = config['output_path'] output_key = config['output_key'] n_threads = config['threads_per_job'] tmp_folder = config['tmp_folder'] n_jobs = config['n_jobs'] # load the graph graph = ndist.Graph(os.path.join(graph_path, graph_key), numberOfThreads=n_threads) with vu.file_reader(graph_path, 'r') as f: ignore_label = f[graph_key].attrs['ignoreLabel'] # load the cut edges from initial decomposition with vu.file_reader(decomposition_path, 'r') as f: ds = f['cut_edges'] ds.n_threads = n_threads cut_edges_decomp = ds[:] # load all the sub results cut_edges = np.concatenate([ np.load( os.path.join(tmp_folder, 'subproblem_results', 'job%i.npy' % job_id)) for job_id in range(n_jobs) ]) cut_edges = np.unique(cut_edges).astype('uint64') cut_edges = np.concatenate([cut_edges_decomp, cut_edges]) edge_labels = np.zeros(graph.numberOfEdges, dtype='bool') edge_labels[cut_edges] = 1 node_labeling = ndist.connectedComponents(graph, edge_labels, ignore_label) n_nodes = len(node_labeling) node_shape = (n_nodes, ) chunks = (min(n_nodes, 524288), ) with vu.file_reader(output_path) as f: ds = f.require_dataset(output_key, dtype='uint64', shape=node_shape, chunks=chunks, compression='gzip') ds.n_threads = n_threads ds[:] = node_labeling fu.log('saving results to %s' % output_path) fu.log('and key %s' % output_key) fu.log_job_success(job_id)
def block_statistics(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) path = config['path'] key = config['key'] tmp_folder = config['tmp_folder'] block_shape = config['block_shape'] block_list = config['block_list'] with vu.file_reader(path, 'r') as f: shape = f[key].shape blocking = nt.blocking([0, 0, 0], list(shape), list(block_shape)) with vu.file_reader(path, 'r') as f_in: ds = f_in[key] block_stats = [ _compute_block_stats(block_id, blocking, ds) for block_id in block_list ] save_path = os.path.join(tmp_folder, 'block_statistics_job%i.json' % job_id) job_stats = merge_stats(block_stats) with open(save_path, 'w') as f: json.dump(job_stats, f) fu.log_job_success(job_id)
def merge_region_features(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path, 'r') as f: config = json.load(f) output_path = config['output_path'] output_key = config['output_key'] tmp_path = config['tmp_path'] tmp_key = config['tmp_key'] node_block_list = config['block_list'] node_chunk_size = config['node_chunk_size'] with vu.file_reader(output_path) as f,\ vu.file_reader(tmp_path) as f_in: ds_in = f_in[tmp_key] ds = f[output_key] n_nodes = ds.shape[0] node_blocking = nt.blocking([0], [n_nodes], [node_chunk_size]) node_begin = node_blocking.getBlock(node_block_list[0]).begin[0] node_end = node_blocking.getBlock(node_block_list[-1]).end[0] shape = list(ds_in.shape) chunks = list(ds_in.chunks) blocking = nt.blocking([0, 0, 0], shape, chunks) _extract_and_merge_region_features(blocking, ds_in, ds, node_begin, node_end) fu.log_job_success(job_id)
def sparse_lifted_neighborhood(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) graph_path = config['graph_path'] graph_key = config['graph_key'] node_label_path = config['node_label_path'] node_label_key = config['node_label_key'] output_path = config['output_path'] output_key = config['output_key'] n_threads = config.get('threads_per_job', 1) graph_depth = config['nh_graph_depth'] node_ignore_label = config['node_ignore_label'] mode = config.get('mode', 'all') fu.log("lifted nh mode set to %s, depth set to %i" % (mode, graph_depth)) fu.log("have ignore label: %i" % node_ignore_label) fu.log("start lifted neighborhood extraction for depth %i" % graph_depth) ndist.computeLiftedNeighborhoodFromNodeLabels( graph_path, graph_key, node_label_path, node_label_key, output_path, output_key, graph_depth, n_threads, mode, node_ignore_label) with vu.file_reader(output_path, 'r') as f: n_lifted = f[output_key].shape[0] fu.log("extracted %i lifted edges" % n_lifted) fu.log_job_success(job_id)
def block_faces(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] block_list = config['block_list'] tmp_folder = config['tmp_folder'] offsets_path = config['offsets_path'] block_shape = config['block_shape'] with open(offsets_path) as f: offsets = json.load(f)['offsets'] with vu.file_reader(input_path, 'r') as f: ds = f[input_key] shape = list(ds.shape) blocking = nt.blocking([0, 0, 0], shape, block_shape) assignments = [ _process_faces(block_id, blocking, ds, offsets) for block_id in block_list ] # filter out empty assignments assignments = [ass for ass in assignments if ass is not None] assignments = np.concatenate(assignments, axis=0) assignments = np.unique(assignments, axis=0) save_path = os.path.join(tmp_folder, 'assignments_%i.npy' % job_id) np.save(save_path, assignments) fu.log_job_success(job_id)
def merge_morphology(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] block_list = config['block_list'] out_shape = config['out_shape'] out_chunks = config['out_chunks'] blocking = nt.blocking([0], out_shape[:1], out_chunks[:1]) # merge and serialize the overlaps for block_id in block_list: block = blocking.getBlock(block_id) label_begin = block.begin[0] label_end = block.end[0] ndist.mergeAndSerializeMorphology(os.path.join(input_path, input_key), os.path.join(output_path, output_key), labelBegin=label_begin, labelEnd=label_end) fu.log_job_success(job_id)
def block_components(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] block_list = config['block_list'] tmp_folder = config['tmp_folder'] block_shape = config['block_shape'] threshold = config['threshold'] threshold_mode = config['threshold_mode'] mask_path = config.get('mask_path', '') mask_key = config.get('mask_key', '') channel = config.get('channel', None) fu.log("Applying threshold %f with mode %s" % (threshold, threshold_mode)) with vu.file_reader(input_path, 'r') as f_in,\ vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] ds_out = f_out[output_key] shape = ds_in.shape if channel is not None: shape = shape[1:] assert len(shape) == 3 blocking = nt.blocking([0, 0, 0], list(shape), block_shape) if mask_path != '': # note that the mask is usually small enough to keep it # in memory (and we interpolate to get to the full volume) # if this does not hold need to change this code! mask = vu.load_mask(mask_path, mask_key, shape) offsets = [ _cc_block_with_mask(block_id, blocking, ds_in, ds_out, threshold, threshold_mode, mask, channel) for block_id in block_list ] else: offsets = [ _cc_block(block_id, blocking, ds_in, ds_out, threshold, threshold_mode, channel) for block_id in block_list ] offset_dict = {block_id: off for block_id, off in zip(block_list, offsets)} save_path = os.path.join(tmp_folder, 'connected_components_offsets_%i.json' % job_id) with open(save_path, 'w') as f: json.dump(offset_dict, f) fu.log_job_success(job_id)
def skeletonize(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] skeleton_format = config['skeleton_format'] n_threads = config.get('threads_per_job', 1) # load input segmentation with vu.file_reader(input_path) as f_in: ds_in = f_in[input_key] ds_in.n_threads = n_threads seg = ds_in[:] fu.log("writing output in format %s" % skeleton_format) fu.log("to %s:%s" % (output_path, output_key)) if skeleton_format == 'volume': _skeletonize_to_volume(seg, output_path, output_key, config) elif skeleton_format == 'swc': _skeletonize_to_swc(seg, output_path, output_key, config) elif skeleton_format == 'n5': _skeletonize_to_n5(seg, output_path, output_key, config) else: raise RuntimeError("Format %s not supported" % skeleton_format) # log success fu.log_job_success(job_id)
def orphan_assignments(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # load from config assignment_path = config['assignment_path'] assignment_key = config['assignment_key'] graph_path = config['graph_path'] graph_key = config['graph_key'] output_path = config['output_path'] output_key = config['output_key'] relabel = config['relabel'] n_threads = config.get('threads_per_job', 1) # load the uv-ids and assignments with vu.file_reader(graph_path) as f: ds = f['%s/edges' % graph_key] ds.n_threads = n_threads uv_ids = ds[:] with vu.file_reader(assignment_path) as f: ds = f[assignment_key] ds.n_threads = n_threads chunks = ds.chunks assignments = ds[:] n_new_nodes = int(assignments.max()) + 1 # find the new uv-ids edge_mapping = nt.EdgeMapping(uv_ids, assignments, numberOfThreads=n_threads) new_uv_ids = edge_mapping.newUvIds() # find all orphans = segments that have node degree one ids, node_degrees = np.unique(new_uv_ids, return_counts=True) orphans = ids[node_degrees == 1] n_orphans = len(orphans) fu.log("Found %i orphans of %i clusters" % (n_orphans, n_new_nodes)) # make graph for fast neighbor search graph = nifty.graph.undirectedGraph(n_new_nodes) graph.insertEdges(new_uv_ids) orphan_assignments = np.array([next(graph.nodeAdjacency(orphan_id))[0] for orphan_id in orphans],) assert len(orphan_assignments) == n_orphans, "%i, %i" % (len(orphan_assignments), n_orphans) assignments[orphans] = orphan_assignments.astype('uint64') if relabel: vigra.analysis.relabelConsecutive(assignments, out=assignments, start_label=1, keep_zeros=True) with vu.file_reader(output_path) as f: ds = f.require_dataset(output_key, shape=assignments.shape, chunks=chunks, compression='gzip', dtype='uint64') ds[:] = assignments fu.log_job_success(job_id)
def check_sub_graphs(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # read the config with open(config_path) as f: config = json.load(f) ws_path = config['ws_path'] ws_key = config['ws_key'] graph_block_prefix = config['graph_block_prefix'] block_shape = config['block_shape'] block_list = config['block_list'] tmp_folder = config['tmp_folder'] with vu.file_reader(ws_path, 'r') as f: ds = f[ws_key] shape = list(ds.shape) blocking = nt.blocking([0, 0, 0], shape, block_shape) violating_blocks = [ check_block(block_id, blocking, ds, graph_block_prefix) for block_id in block_list ] violating_blocks = [vb for vb in violating_blocks if vb is not None] save_path = os.path.join(tmp_folder, 'failed_blocks_job_%i.json' % job_id) with open(save_path, 'w') as f: json.dump(violating_blocks, f) # log success fu.log_job_success(job_id)
def gradients(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) path_dict = config['path_dict'] output_path = config['output_path'] output_key = config['output_key'] block_list = config['block_list'] block_shape = config['block_shape'] average_gradient = config['average_gradient'] with open(path_dict) as f: path_dict = json.load(f) input_datasets = [] for path in sorted(path_dict): input_datasets.append(vu.file_reader(path, 'r')[path_dict[path]]) # 5 pix should be enough halo to make gradient computation correct halo = 3 * [5] with vu.file_reader(output_path) as f: ds = f[output_key] shape = ds.shape if average_gradient else ds.shape[1:] blocking = nt.blocking([0, 0, 0], list(shape), block_shape) [ _gradients_block(block_id, blocking, input_datasets, ds, halo, average_gradient) for block_id in block_list ] fu.log_job_success(job_id)
def copy_and_crop(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] roi_start = config['roi_start'] roi_size = config['roi_size'] bb = tuple(slice(rs, rs + size) for rs, size in zip(roi_start, roi_size)) max_threads = config.get('threads_per_job', 1) ds_in = z5py.File(input_path)[input_key] ds_in.n_threads = max_threads seg = ds_in[bb] max_id = int(seg.max()) f = z5py.File(output_path) ds_out = f.require_dataset(output_key, shape=seg.shape, chunks=ds_in.chunks, compression='gzip', dtype='uint64') ds_out.n_threads = max_threads ds_out[:] = seg ds_out.attrs['maxId'] = max_id fu.log_job_success(job_id)
def conseq_labels(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) print("RUN") # get the config with open(config_path, 'r') as f: config = json.load(f) output_path = config['output_path'] output_key = config['output_key'] input_path = config['input_path'] input_key = config['input_key'] # load the labels with vu.file_reader(input_path, 'r') as f: labels = f[input_key][:] unique = np.unique(labels) output = np.zeros(labels.shape, dtype=labels.dtype) next_l = 0 for l in sorted(list(unique)): output[labels == l] = next_l next_l = next_l + 1 with vu.file_reader(output_path, 'w') as fout: fout.create_dataset(output_key, data=output, chunks=f[input_key].chunks, compression='gzip') fu.log_job_success(job_id)
def unique_block_labels(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # read the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] block_list = config['block_list'] block_shape = config['block_shape'] is_multiset = config['is_multiset'] # open the input file with vu.file_reader(input_path, 'r') as f, vu.file_reader(output_path) as f_out: ds = f[input_key] ds_out = f_out[output_key] chunks = ds.chunks shape = ds.shape assert tuple(chunks) == tuple(block_shape),\ "Chunks %s and block shape %s must agree" % (str(chunks), str(block_shape)) blocking = nt.blocking([0, 0, 0], shape, block_shape) if is_multiset: _uniques_multiset(ds, ds_out, blocking, block_list) else: _uniques_default(ds, ds_out, blocking, block_list) # log success fu.log_job_success(job_id)
def id_filter(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) node_label_path = config['node_label_path'] node_label_key = config['node_label_key'] output_path = config['output_path'] filter_labels = np.array(config['filter_labels'], dtype='uint64') with vu.file_reader(node_label_path, 'r') as f: node_labels = f[node_label_key][:] # find the node ids that overlap with the filter labels filter_mask = np.in1d(node_labels, filter_labels) filter_ids = np.where(filter_mask)[0].tolist() fu.log("%i ids will be filtered" % len(filter_ids)) fu.log("saving filter ids to %s" % output_path) with open(output_path, 'w') as f: json.dump(filter_ids, f) fu.log_job_success(job_id)
def block_morphology(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] block_shape = config['block_shape'] block_list = config['block_list'] with vu.file_reader(input_path, 'r') as f: shape = f[input_key].shape blocking = nt.blocking([0, 0, 0], list(shape), list(block_shape)) with vu.file_reader(input_path, 'r') as f_in: ds_in = f_in[input_key] [_morphology_for_block(block_id, blocking, ds_in, output_path, output_key) for block_id in block_list] fu.log_job_success(job_id)
def blocks_from_mask(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) mask_path = config['mask_path'] mask_key = config['mask_key'] output_path = config['output_path'] shape = config['shape'] block_shape = config['block_shape'] n_threads = config.get('threads_per_job', 1) # NOTE we assume that the mask is small and will fit into memory with vu.file_reader(mask_path, 'r') as f: ds = f[mask_key] ds.n_threads = n_threads mask_data = ds[:] mask = ResizedVolume(mask_data, tuple(shape)) blocking = nt.blocking([0, 0, 0], shape, list(block_shape)) blocks_in_mask = _get_blocks_in_mask(mask, blocking, n_threads) with open(output_path, 'w') as f: json.dump(blocks_in_mask, f) fu.log_job_success(job_id)
def watershed_from_seeds(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] shape = list(vu.get_shape(input_path, input_key)) if len(shape) == 4: shape = shape[1:] block_shape = list(config['block_shape']) block_list = config['block_list'] # TODO seeds and output might be identical # in that case we would need in-place logic if we # want to support h5 (it's fine with n5 as is) # read the seed and output config seeds_path = config['seeds_path'] seeds_key = config['seeds_key'] output_path = config['output_path'] output_key = config['output_key'] # check if we have a mask with_mask = 'mask_path' in config if with_mask: mask_path = config['mask_path'] mask_key = config['mask_key'] # get the blocking blocking = nt.blocking([0, 0, 0], shape, block_shape) # submit blocks with vu.file_reader(input_path, 'r') as f_in,\ vu.file_reader(seeds_path, 'r') as f_seeds,\ vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] assert ds_in.ndim in (3, 4) ds_seeds = f_out[seeds_key] assert ds_seeds.ndim == 3 ds_out = f_out[output_key] assert ds_out.ndim == 3 # note that the mask is usually small enough to keep it # in memory (and we interpolate to get to the full volume) # if this does not hold need to change this code! if with_mask: mask = vu.load_mask(mask_path, mask_key, shape) for block_id in block_list: _ws_block_masked(blocking, block_id, ds_in, ds_seeds, ds_out, mask, config) else: for block_id in block_list: _ws_block(blocking, block_id, ds_in, ds_seeds, ds_out, config) # log success fu.log_job_success(job_id)
def label_block_mapping(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # read the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] number_of_labels = config['number_of_labels'] roi_begin = config.get('roi_begin', None) roi_end = config.get('roi_end', None) assert (roi_begin is None) == (roi_end is None) # we need to turn `None` rois to empty lists, # because I don't really understand how pybind11 handles None yet if roi_begin is None: roi_begin = [] roi_end = [] n_threads = config.get('threads_per_job', 1) ndist.serializeBlockMapping(os.path.join(input_path, input_key), os.path.join(output_path, output_key), number_of_labels, n_threads, roi_begin, roi_end) # log success fu.log_job_success(job_id)
def sparse_lifted_neighborhood(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) graph_path = config['graph_path'] graph_key = config['graph_key'] node_label_path = config['node_label_path'] node_label_key = config['node_label_key'] output_path = config['output_path'] output_key = config['output_key'] n_threads = config.get('threads_per_job', 1) graph_depth = config['nh_graph_depth'] mode = config.get('mode', 'all') fu.log("lifted nh mode set to %s, depth set to %i" % (mode, graph_depth)) fu.log("start lifted neighborhood extraction for depth %i" % graph_depth) ndist.computeLiftedNeighborhoodFromNodeLabels( os.path.join(graph_path, graph_key), os.path.join(node_label_path, node_label_key), os.path.join(output_path, output_key), graph_depth, n_threads, mode) fu.log_job_success(job_id)
def skeleton_evaluation(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] skeleton_path = config['skeleton_path'] skeleton_key = config['skeleton_key'] output_path = config['output_path'] skeleton_format = config['skeleton_format'] n_threads = config.get('threads_per_job', 1) # TODO adapt nskel.SkeletonMetrics to new n5 skeleton format skeleton_ids = os.listdir(skeleton_file) skeleton_ids = [int(sk) for sk in skeleton_ids if sk.isdigit()] skeleton_ids.sort() metrics = nskel.SkeletonMetrics(os.path.join(input_path, input_key), os.path.join(skeleton_path, skeleton_key), skeleton_ids, n_threads) # TODO expose parameters for different eval options correct, split, merge, n_merges = metrics.computeGoogleScore(n_threads) res = {'correct': correct, 'split': split, 'merge': merge, 'n_merges': n_merges} with open(output_path, 'w') as f: json.dump(res, f) # log success fu.log_job_success(job_id)
def merge_predictions(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path, 'r') as f: config = json.load(f) output_path = config['output_path'] output_key = config['output_key'] tmp_prefix = config['tmp_prefix'] halo = config['halo'] n_channels = config['n_channels'] shape = vu.get_shape(output_path, output_key) if len(shape) > 3: shape = shape[-3:] block_shape = config['block_shape'] blocking = nt.blocking([0, 0, 0], shape, block_shape) # TODO we could parallelize this with vu.file_reader(output_path) as f: ds = f[output_key] for block_id in range(blocking.numberOfBlocks): _merge_block(block_id, blocking, ds, tmp_prefix, halo, n_channels) fu.log_job_success(job_id)
def simple_stitch_edges(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) graph_path = config['graph_path'] labels_path = config['labels_path'] labels_key = config['labels_key'] n_edges = config['n_edges'] block_list = config['block_list'] block_shape = config['block_shape'] out_path = config['out_path'] out_key = 'job_results/job_%i' % job_id subgraph_key = 's0/sub_graphs' res = ndist.findBlockBoundaryEdges(graph_path, subgraph_key, labels_path, labels_key, n_edges, block_shape, block_list) fu.log('Found %i / %i block boundary edges' % (res.sum(), len(res))) with vu.file_reader(out_path) as f: chunks = (min(int(1e6), len(res)), ) vu.force_dataset(f, out_key, data=res.astype('uint8'), compression='gzip', chunks=chunks, shape=res.shape) fu.log_job_success(job_id)
def insert_affinities(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) affinity_path = config['affinity_path'] affinity_key = config['affinity_key'] objects_path = config['objects_path'] objects_key = config['objects_key'] block_list = config['block_list'] block_shape = config['block_shape'] offsets = config['offsets'] with vu.file_reader(affinity_path) as f_in, vu.file_reader( objects_path) as f_obj: ds = f_in[affinity_key] shape = ds.shape[1:] # TODO actually check that objects are on a lower scale ds_objs = f_obj[objects_key] objects = vu.InterpolatedVolume(ds_objs, shape) blocking = nt.blocking([0, 0, 0], list(shape), block_shape) [ _insert_affinities_block(block_id, blocking, ds, objects, offsets) for block_id in block_list ] fu.log_job_success(job_id)
def merge_statistics(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) n_jobs = config['n_jobs'] tmp_folder = config['tmp_folder'] output_path = config['output_path'] job_stats = [] for stat_job_id in range(n_jobs): job_path = os.path.join(tmp_folder, 'block_statistics_job%i.json' % stat_job_id) with open(job_path) as f: job_stat = json.load(f) job_stats.append(job_stat) stats = merge_stats(job_stats) with open(output_path, 'w') as f: json.dump(stats, f) fu.log_job_success(job_id)
def merge_offsets(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) tmp_folder = config['tmp_folder'] n_jobs = config['n_jobs'] save_path = config['save_path'] n_blocks = config['n_blocks'] offsets = {} for block_job_id in range(n_jobs): path = os.path.join(tmp_folder, 'connected_components_offsets_%i.json' % block_job_id) with open(path, 'r') as f: offsets.update(json.load(f)) os.remove(path) offset_list = np.array([v for _, v in sorted(offsets.items())], dtype='uint64') empty_blocks = np.where(offset_list == 0)[0].tolist() offset_list = np.roll(offset_list, 1) offset_list[0] = 0 offset_list = np.cumsum(offset_list).tolist() assert len(offset_list) == n_blocks, "%i, %i" % (len(offset_list), n_blocks) fu.log("dumping offsets to %s" % save_path) with open(save_path, 'w') as f: json.dump({'offsets': offset_list, 'empty_blocks': empty_blocks}, f) fu.log_job_success(job_id)
def minfilter(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # input/output files input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] # blocks and task config block_list = config['block_list'] filter_shape = config['filter_shape'] with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] ds_out = f_out[output_key] shape = ds_in.shape blocking = nt.blocking(roiBegin=[0, 0, 0], roiEnd=list(shape), blockShape=list(block_shape)) # TODO is half of the halo really enough halo ? halo = list(fshape // 2 for fshape in filter_shape) [ _minfilter_block(block_id, blocking, halo, ds_in, ds_out, filter_shape) for block_id in block_list ] # log success fu.log_job_success(job_id)
def solve_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs problem_path = config['problem_path'] scale = config['scale'] block_shape = config['block_shape'] block_list = config['block_list'] n_threads = config['threads_per_job'] agglomerator_key = config['agglomerator'] time_limit = config.get('time_limit_solver', None) fu.log("reading problem from %s" % problem_path) problem = z5py.N5File(problem_path) shape = problem.attrs['shape'] # load the costs costs_key = 's%i/costs' % scale fu.log("reading costs from path in problem: %s" % costs_key) ds = problem[costs_key] ds.n_threads = n_threads costs = ds[:] # load the graph graph_key = 's%i/graph' % scale fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(os.path.join(problem_path, graph_key), numberOfThreads=n_threads) uv_ids = graph.uvIds() # check if the problem has an ignore-label ignore_label = problem[graph_key].attrs['ignoreLabel'] fu.log("ignore label is %s" % ('true' if ignore_label else 'false')) fu.log("using agglomerator %s" % agglomerator_key) agglomerator = su.key_to_agglomerator(agglomerator_key) # the output group out = problem['s%i/sub_results' % scale] # TODO this should be a n5 varlen dataset as well and # then this is just another dataset in problem path block_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs', 'block_') blocking = nt.blocking([0, 0, 0], shape, list(block_shape)) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_block_problem, block_id, graph, uv_ids, block_prefix, costs, agglomerator, ignore_label, blocking, out, time_limit) for block_id in block_list ] [t.result() for t in tasks] fu.log_job_success(job_id)