def debug_subresult(block_id=1): from cremi_tools.viewer.volumina import view path = '/g/kreshuk/data/arendt/platyneris_v1/membrane_training_data/validation/segmentation/val_block_01.n5' tmp_folder = './tmp_plat_val' block_prefix = os.path.join(path, 's0', 'sub_graphs', 'block_') graph = ndist.Graph(os.path.join(path, 'graph')) block_path = block_prefix + str(block_id) nodes = ndist.loadNodes(block_path) nodes = nodes[1:] inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes) block_res_path = os.path.join( tmp_folder, 'subproblem_results/s0_block%i.npy' % block_id) res = np.load(block_res_path) merge_edges = np.ones(graph.numberOfEdges, dtype='bool') merge_edges[res] = False merge_edges[outer_edges] = False uv_ids = graph.uvIds() n_nodes = int(uv_ids.max()) + 1 ufd = nufd.ufd(n_nodes) ufd.merge(uv_ids[merge_edges]) node_labels = ufd.elementLabeling() ws = z5py.File(path)['volumes/watershed'][:] seg = nt.take(node_labels, ws) view([ws, seg])
def debug_subresult(block_id=1): example_path = '/home/cpape/Work/data/isbi2012/cluster_example/isbi_train.n5' block_prefix = os.path.join(example_path, 's0', 'sub_graphs', 'block_') graph = ndist.Graph(os.path.join(example_path, 'graph')) block_path = block_prefix + str(block_id) nodes = ndist.loadNodes(block_path) inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes) block_res_path = './tmp/subproblem_results/s0_block%i.npy' % block_id res = np.load(block_res_path) merge_edges = np.ones(graph.numberOfEdges, dtype='bool') merge_edges[res] = False merge_edges[outer_edges] = False uv_ids = graph.uvIds() n_nodes = int(uv_ids.max()) + 1 ufd = nifty.ufd.ufd(n_nodes) ufd.merge(uv_ids[merge_edges]) node_labels = ufd.elementLabeling() ws = z5py.File(example_path)['volumes/watersheds'][:] rag = nrag.gridRag(ws, numberOfLabels=n_nodes) seg = nrag.projectScalarNodeDataToPixels(rag, node_labels) view([ws, seg])
def edges_from_skeletons(path, ws_key, labels_key, skel_key, assignment_key, out_key, graph_path, graph_key, n_threads): f = z5py.File(path) ds_ws = f[ws_key] ds_skel = f[skel_key] n_labels = ds_skel.shape[0] ds_labels = f[labels_key] ds_labels.n_threads = n_threads gt_labels = ds_labels[:] ds_assignment = f[assignment_key] ds_assignment.n_threads = n_threads assignment = ds_assignment[:] rag = ndist.Graph(os.path.join(graph_path, graph_key), n_threads) ds_out = f.require_dataset(out_key, shape=(n_labels,), chunks=(1,), compression='gzip', dtype='uint64') with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [tp.submit(skeleton_to_edges, seg_id, ds_ws, ds_skel, gt_labels, assignment, rag, ds_out) for seg_id in range(n_labels)] [t.result() for t in tasks]
def insert(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # path to the reduced problem graph_path = config['graph_path'] graph_key = config['graph_key'] decomposition_path = config['decomposition_path'] # path where the node labeling shall be written output_path = config['output_path'] output_key = config['output_key'] n_threads = config['threads_per_job'] tmp_folder = config['tmp_folder'] n_jobs = config['n_jobs'] # load the graph graph = ndist.Graph(os.path.join(graph_path, graph_key), numberOfThreads=n_threads) with vu.file_reader(graph_path, 'r') as f: ignore_label = f[graph_key].attrs['ignoreLabel'] # load the cut edges from initial decomposition with vu.file_reader(decomposition_path, 'r') as f: ds = f['cut_edges'] ds.n_threads = n_threads cut_edges_decomp = ds[:] # load all the sub results cut_edges = np.concatenate([ np.load( os.path.join(tmp_folder, 'subproblem_results', 'job%i.npy' % job_id)) for job_id in range(n_jobs) ]) cut_edges = np.unique(cut_edges).astype('uint64') cut_edges = np.concatenate([cut_edges_decomp, cut_edges]) edge_labels = np.zeros(graph.numberOfEdges, dtype='bool') edge_labels[cut_edges] = 1 node_labeling = ndist.connectedComponents(graph, edge_labels, ignore_label) n_nodes = len(node_labeling) node_shape = (n_nodes, ) chunks = (min(n_nodes, 524288), ) with vu.file_reader(output_path) as f: ds = f.require_dataset(output_key, dtype='uint64', shape=node_shape, chunks=chunks, compression='gzip') ds.n_threads = n_threads ds[:] = node_labeling fu.log('saving results to %s' % output_path) fu.log('and key %s' % output_key) fu.log_job_success(job_id)
def solve_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs problem_path = config['problem_path'] scale = config['scale'] block_shape = config['block_shape'] block_list = config['block_list'] n_threads = config['threads_per_job'] agglomerator_key = config['agglomerator'] time_limit = config.get('time_limit_solver', None) fu.log("reading problem from %s" % problem_path) problem = z5py.N5File(problem_path) shape = problem.attrs['shape'] # load the costs costs_key = 's%i/costs' % scale fu.log("reading costs from path in problem: %s" % costs_key) ds = problem[costs_key] ds.n_threads = n_threads costs = ds[:] # load the graph graph_key = 's%i/graph' % scale fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(os.path.join(problem_path, graph_key), numberOfThreads=n_threads) uv_ids = graph.uvIds() # check if the problem has an ignore-label ignore_label = problem[graph_key].attrs['ignoreLabel'] fu.log("ignore label is %s" % ('true' if ignore_label else 'false')) fu.log("using agglomerator %s" % agglomerator_key) agglomerator = su.key_to_agglomerator(agglomerator_key) # the output group out = problem['s%i/sub_results' % scale] # TODO this should be a n5 varlen dataset as well and # then this is just another dataset in problem path block_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs', 'block_') blocking = nt.blocking([0, 0, 0], shape, list(block_shape)) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_block_problem, block_id, graph, uv_ids, block_prefix, costs, agglomerator, ignore_label, blocking, out, time_limit) for block_id in block_list ] [t.result() for t in tasks] fu.log_job_success(job_id)
def solve_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs problem_path = config['problem_path'] scale = config['scale'] block_shape = config['block_shape'] block_list = config['block_list'] n_threads = config['threads_per_job'] agglomerator_key = config['agglomerator'] time_limit = config.get('time_limit_solver', None) fu.log("reading problem from %s" % problem_path) problem = z5py.N5File(problem_path) shape = problem['s0/graph'].attrs['shape'] # load the costs costs_key = 's%i/costs' % scale fu.log("reading costs from path in problem: %s" % costs_key) ds = problem[costs_key] ds.n_threads = n_threads costs = ds[:] # load the graph graph_key = 's%i/graph' % scale fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(problem_path, graph_key, numberOfThreads=n_threads) uv_ids = graph.uvIds() # check if the problem has an ignore-label ignore_label = problem[graph_key].attrs['ignore_label'] fu.log("ignore label is %s" % ('true' if ignore_label else 'false')) fu.log("using solver %s" % agglomerator_key) solver = get_multicut_solver(agglomerator_key) # the output group out = problem['s%i/sub_results' % scale] node_ds_key = 's%i/sub_graphs/nodes' % scale ds_nodes = problem[node_ds_key] blocking = nt.blocking([0, 0, 0], shape, list(block_shape)) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_block_problem, block_id, graph, uv_ids, ds_nodes, costs, solver, ignore_label, blocking, out, time_limit) for block_id in block_list ] [t.result() for t in tasks] fu.log_job_success(job_id)
def set_up_problem(): path = './exp_data/exp_data.n5' # load the graph and edge probs f = z5py.File(path) ds = f['features'] probs = ds[:, 0] g = ndist.Graph(os.path.join(path, 's0/graph')) graph = nifty.graph.UndirectedGraph(g.numberOfNodes + 1) graph.insertEdges(g.uvIds()) # add lifted edges up to nhood 3 nhood = 2 obj = nlmc.liftedMulticutObjective(graph) obj.insertLiftedEdgesBfs(nhood) lifted_uvs = obj.liftedUvIds() print("Number of lifted edges:") print(len(lifted_uvs)) chunks = (int(1e6), 2) f.create_dataset('s0/lifted_nh', data=lifted_uvs, chunks=chunks, compression='gzip') # set the lifted costs according to the mean prob. of the shortest path print("Calculating costs ...") def find_costs_from_sp(lifted_id): print(lifted_id, "/", len(lifted_uvs)) sp = nifty.graph.ShortestPathDijkstra(graph) u, v = lifted_uvs[lifted_id] edge_path = sp.runSingleSourceSingleTarget(probs, u, v, False) max_prob = np.max(probs[edge_path]) return max_prob # p = find_costs_from_sp(0) # print(p) # return # n_threads = 8 with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(find_costs_from_sp, i) for i in range(len(lifted_uvs)) ] costs = np.array([t.result() for t in tasks]) assert len(costs) == len(lifted_uvs) costs = probs_to_costs(costs) chunks = (int(1e6), ) f.create_dataset('s0/lifted_costs', data=costs, chunks=chunks, compression='gzip')
def rank_false_merges(problem_path, graph_key, feat_key, morpho_key, node_label_path, node_label_key, ignore_ids, out_path_ids, out_path_scores, n_threads, n_candidates, heuristic=weight_quantile_heuristic): g = ndist.Graph(problem_path, graph_key, n_threads) with open_file(problem_path, 'r') as f: ds = f[feat_key] ds.n_threads = n_threads probs = ds[:, 0] ds = f[morpho_key] ds.n_threads = n_threads sizes = ds[:, 1] with open_file(node_label_path, 'r') as f: ds = f[node_label_key] ds.n_threads = n_threads node_labels = ds[:] seg_ids = np.arange(len(sizes), dtype='uint64') seg_ids = seg_ids[np.argsort(sizes)[::-1]][:n_candidates] seg_ids = seg_ids[~np.isin(seg_ids, ignore_ids.tolist() + [0])] max_size = sizes[seg_ids].max() with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(weight_quantile_heuristic, seg_id, g, node_labels, sizes, max_size, probs) for seg_id in seg_ids ] fm_scores = np.array([t.result() for t in tasks]) # print("Id:", seg_ids[0]) # sc = weight_quantile_heuristic(seg_ids[0], g, # node_labels, sizes, max_size, probs) # print("Score:", sc) # return # sort ids by score (decreasing) sorter = np.argsort(fm_scores)[::-1] seg_ids = seg_ids[sorter] fm_scores = fm_scores[sorter] with open(out_path_scores, 'w') as f: json.dump(fm_scores.tolist(), f) with open(out_path_ids, 'w') as f: json.dump(seg_ids.tolist(), f)
def solve_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs costs_path = config['costs_path'] costs_key = config['costs_key'] graph_path = config['graph_path'] graph_key = config['graph_key'] decomposition_path = config['decomposition_path'] tmp_folder = config['tmp_folder'] component_list = config['block_list'] n_threads = config['threads_per_job'] agglomerator_key = config['agglomerator'] with vu.file_reader(costs_path, 'r') as f: ds = f[costs_key] ds.n_threads = n_threads costs = ds[:] with vu.file_reader(decomposition_path, 'r') as f: ds = f['graph_labels'] ds.n_threads = n_threads graph_labels = ds[:] # load the graph graph = ndist.Graph(os.path.join(graph_path, graph_key), numberOfThreads=n_threads) uv_ids = graph.uvIds() agglomerator = su.key_to_agglomerator(agglomerator_key) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_component, component_id, graph, uv_ids, graph_labels, costs, agglomerator) for component_id in component_list ] results = [t.result() for t in tasks] cut_edge_ids = np.concatenate([res for res in results if res is not None]) cut_edge_ids = np.unique(cut_edge_ids) res_folder = os.path.join(tmp_folder, 'subproblem_results') job_res_path = os.path.join(res_folder, 'job%i.npy' % job_id) fu.log("saving cut edge results to %s" % job_res_path) np.save(job_res_path, cut_edge_ids) fu.log_job_success(job_id)
def multicut_step1(graph_path, block_prefix, scale, tmp_folder, agglomerator_key, initial_block_shape, block_file, n_threads, cut_outer_edges=True): t0 = time.time() agglomerator = AGGLOMERATORS[agglomerator_key] costs = z5py.File(os.path.join(tmp_folder, 'merged_graph.n5/s%i' % scale), use_zarr_format=False)['costs'][:] block_ids = np.load(block_file) shape = z5py.File(graph_path).attrs['shape'] factor = 2**scale block_shape = [factor * bs for bs in initial_block_shape] # TODO we should have symlinks instead of the if else if scale == 0: graph_path_ = os.path.join(graph_path, 'graph') else: graph_path_ = os.path.join(tmp_folder, 'merged_graph.n5', 's%i' % scale) graph = ndist.Graph(graph_path_) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(solve_block_subproblem, block_id, graph, block_prefix, costs, agglomerator, shape, block_shape, cut_outer_edges) for block_id in block_ids ] results = [t.result() for t in tasks] results = [res for res in results if res is not None] if len(results) > 0: cut_edge_ids = np.concatenate(results) cut_edge_ids = np.unique(cut_edge_ids).astype('uint64') else: cut_edge_ids = np.zeros(0, dtype='uint64') job_id = int(os.path.split(block_file)[1].split('_')[3][:-4]) np.save(os.path.join(tmp_folder, '1_output_s%i_%i.npy' % (scale, job_id)), cut_edge_ids) print("Success job %i" % job_id) print("In %f s" % (time.time() - t0, ))
def solve_subproblems(graph_path, costs_path, scale, job_id, config_path, tmp_folder, cut_outer_edges=True): # TODO support more agglomerators agglomerator_key = 'multicut_kl' agglomerator = AGGLOMERATORS[agglomerator_key] costs = z5py.File(costs_path)['costs'][:] with open(config_path) as f: config = json.load(f) initial_block_shape = config['block_shape'] n_threads = config['n_threads'] block_ids = config['block_list'] shape = z5py.File(graph_path).attrs['shape'] factor = 2**scale block_shape = [factor * bs for bs in initial_block_shape] if scale == 0: graph_path_ = os.path.join(graph_path, 'graph') block_prefix = os.path.join(graph_path, 'sub_graphs', 's0', 'block_') else: graph_path_ = os.path.join(tmp_folder, 'merged_graph.n5', 's%i' % scale) block_prefix = os.path.join(graph_path_, 'sub_graphs', 'block_') # TODO parallelize ?! # load the complete graph graph = ndist.Graph(graph_path_) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(solve_block_subproblem, block_id, graph, block_prefix, costs, agglomerator, shape, block_shape, tmp_folder, scale, cut_outer_edges) for block_id in block_ids ] results = [t.result() for t in tasks] results = [res for res in results if res is not None] if len(results) > 0: cut_edge_ids = np.concatenate(results) cut_edge_ids = np.unique(cut_edge_ids).astype('uint64') else: cut_edge_ids = np.zeros(0, dtype='uint64')
def _check_subresults(self): f = z5py.File(self.input_path) f_out = z5py.File(self.output_path) ds_ws = f[self.input_key] shape = ds_ws.shape blocking = nt.blocking([0, 0, 0], list(shape), self.block_shape) f_graph = z5py.File(self.output_path) halo = [1, 1, 1] for block_id in range(blocking.numberOfBlocks): # get the block with the appropriate halo # and the corresponding bounding box block = blocking.getBlockWithHalo(block_id, halo) outer_block, inner_block = block.outerBlock, block.innerBlock bb = tuple( slice(beg, end) for beg, end in zip(inner_block.begin, outer_block.end)) # check that the rois are correct block_key = os.path.join('s0', 'sub_graphs', 'block_%i' % block_id) roi_begin = f_out[block_key].attrs['roiBegin'] roi_end = f_out[block_key].attrs['roiEnd'] self.assertEqual(inner_block.begin, roi_begin) self.assertEqual(outer_block.end, roi_end) # load the graph graph_path = os.path.join(self.output_path, block_key) graph = ndist.Graph(graph_path) nodes_deser = ndist.loadNodes(graph_path) # load the segmentation and check that the nodes # are correct seg = ds_ws[bb] nodes = graph.nodes() nodes_ws = np.unique(seg) self.assertTrue(np.allclose(nodes_ws, nodes_deser)) self.assertTrue(np.allclose(nodes_ws, nodes)) # compute the rag and check that the graph is correct rag = nrag.gridRag(seg, numberOfLabels=int(seg.max()) + 1) # number of nodes in nifty can be larger self.assertGreaterEqual(rag.numberOfNodes, graph.numberOfNodes) self.assertEqual(rag.numberOfEdges, graph.numberOfEdges) self.assertTrue(np.allclose(rag.uvIds(), graph.uvIds()))
def test_graph_connected_components(self): from cluster_tools.postprocess import ConnectedComponentsWorkflow task = ConnectedComponentsWorkflow self.compute_graph(ignore_label=False) # check the graph again g = self.compute_nifty_graph() g1 = ndist.Graph(self.output_path, self.graph_key) self.assertEqual(g.numberOfNodes, g1.numberOfNodes) self.assertEqual(g.numberOfEdges, g1.numberOfEdges) self.assertTrue(np.allclose(g.uvIds(), g1.uvIds())) assignment_key = 'initial_assignments' assignments = self.make_assignments(g, self.output_path, assignment_key) # compute expected components expected = nifty.graph.connectedComponentsFromNodeLabels( g, assignments) vigra.analysis.relabelConsecutive(expected, out=expected) out_key = 'connected_components' t = task(tmp_folder=self.tmp_folder, config_dir=self.config_folder, target=self.target, max_jobs=self.max_jobs, problem_path=self.output_path, graph_key=self.graph_key, assignment_path=self.output_path, assignment_key=assignment_key, output_path=self.output_path, assignment_out_key=out_key) ret = luigi.build([t], local_scheduler=True) self.assertTrue(ret) # load the output components with z5py.File(self.output_path) as f: results = f[out_key][:] # compare self.assertEqual(results.shape, expected.shape) ri, _ = rand_index(results, expected) self.assertAlmostEqual(ri, 0.)
def graph_connected_components(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) problem_path = config['problem_path'] graph_key = config['graph_key'] assignment_path = config['assignment_path'] assignment_key = config['assignment_key'] output_path = config['output_path'] output_key = config['output_key'] n_threads = config.get('n_threads', 8) with vu.file_reader(assignment_path, 'r') as f: ds_ass = f[assignment_key] ds_ass.n_threads = n_threads assignments = ds_ass[:] chunks = ds_ass.chunks graph = ndist.Graph(os.path.join(problem_path, graph_key), n_threads) # TODO check if we acutally have an ignore label assignments = ndist.connectedComponentsFromNodes(graph, assignments, True) vigra.analysis.relabelConsecutive(assignments, out=assignments, start_label=1, keep_zeros=True) with vu.file_reader(output_path) as f: ds_out = f.require_dataset(output_key, shape=assignments.shape, chunks=chunks, compression='gzip', dtype='uint64') ds_out.n_threads = n_threads ds_out[:] = assignments fu.log_job_success(job_id)
def check_result(self, seg_key): # check shapes with z5py.File(self.input_path) as f: seg = f[seg_key] seg.n_threads = 8 seg = seg[:] shape = seg.shape with z5py.File(self.output_path) as f: shape_ = tuple(f[self.graph_key].attrs['shape']) self.assertEqual(shape, shape_) # check graph # compute nifty rag rag = nrag.gridRag(seg, numberOfLabels=int(seg.max()) + 1) # load the graph graph = ndist.Graph(self.output_path, self.output_key) self.assertEqual(rag.numberOfNodes, graph.numberOfNodes) self.assertEqual(rag.numberOfEdges, graph.numberOfEdges) self.assertTrue(np.array_equal(rag.uvIds(), graph.uvIds()))
def test_components_dist_toy(self): uv_ids, node_labels, expected_cc = self.toy_problem() graph_path = os.path.join(self.tmp_folder, 'graph.n5') with z5py.File(graph_path) as f: g = f.create_group('graph') g.attrs['numberOfEdges'] = len(uv_ids) g.create_dataset('edges', data=uv_ids, chunks=(int(1e5), 2), compression='raw') g = ndist.Graph(graph_path, 'graph') self.assertEqual(g.numberOfNodes, uv_ids.max() + 1) self.assertEqual(g.numberOfEdges, len(uv_ids)) result = ndist.connectedComponentsFromNodes(g, node_labels, True) self.assertEqual(len(result), len(expected_cc)) ri, _ = rand_index(result, expected_cc) self.assertAlmostEqual(ri, 0.)
def compute_energy(name): exp_path = './exp_data/%s.n5' % name f = z5py.File(exp_path) g = ndist.Graph(os.path.join(exp_path, 's0', 'graph')) graph = nifty.graph.undirectedGraph(g.numberOfNodes + 1) graph.insertEdges(g.uvIds()) costs = f['s0/costs'][:] lifted_costs = f['s0/lifted_costs_%s' % name][:] lifted_uvs = f['s0/lifted_nh_%s' % name][:] obj = nlmc.liftedMulticutObjective(graph) obj.setGraphEdgesCosts(costs) obj.setCosts(lifted_uvs, lifted_costs) path = '/g/kreshuk/data/FIB25/cutout.n5' res_key = 'node_labels/%s' % name with z5py.File(path) as f: node_labels = f[res_key][:] e = obj.evalNodeLabels(node_labels) return e
def _write_graph(self): graph_path = os.path.join(self.input_path, self.graph_key) graph = ndist.Graph(graph_path) uv_ids = graph.uvIds().astype('uint32') # the ilastik graph serialization corresponds to a the serialization of # vigra:adjacencyListGraph, see # https://github.com/constantinpape/vigra/blob/master/include/vigra/adjacency_list_graph.hxx#L536 # first element: node and edge numbers, max node and edge ids n_nodes = graph.numberOfNodes n_edges = graph.numberOfEdges serialization = [ np.array([n_nodes, n_edges, graph.maxNodeId, graph.maxEdgeId], dtype='uint32') ] # second element: uv-ids serialization.append(uv_ids.flatten()) # third element: node neighborhoods (implemented convinience function in cpp for this) serialization.append(graph.flattenedNeighborhoods().astype('uint32')) serialization = np.concatenate(serialization) ilastk_graph_key = 'preprocessing/graph/graph' ilastik_seed_key = 'preprocessing/graph/nodeSeeds' ilastik_res_key = 'preprocessing/graph/resultSegmentation' with h5py.File(self.output_path) as f: f.create_dataset(ilastk_graph_key, data=serialization, compression='gzip') # initialize node seed labels and result labels with zeros f.create_dataset(ilastik_seed_key, shape=(graph.maxNodeId + 1, ), dtype='uint8') f.create_dataset(ilastik_res_key, shape=(graph.maxNodeId + 1, ), dtype='uint8') f['preprocessing/graph'].attrs['numNodes'] = n_nodes
def __init__(self, graph_path, graph_key, weights_path, weights_key, n_threads, ignore_label=None): self.n_threads = n_threads # load graph and weights self.graph = ndist.Graph(os.path.join(graph_path, graph_key), n_threads) self.uv_ids = self.graph.uvIds() weight_ds = z5py.File(weights_path)[weights_key] weight_ds.n_threads = self.n_threads self.weights = weight_ds[:, 0].squeeze( ) if weight_ds.ndim == 2 else weight_ds[:] assert len(self.weights) == self.graph.numberOfEdges # we need to set the ignore label to be max repulsve if ignore_label is not None: ignore_mask = (self.uv_ids == ignore_label).any(axis=1) self.weights[ignore_mask] = 1.
def load_all_data(self): self.ds_raw = open_file(self.raw_path)[self.raw_key] self.ds_ws = open_file(self.ws_path)[self.ws_key] if self.ds_ws.attrs.get('isLabelMultiset', False): self.ds_ws = LabelMultisetWrapper(self.ds_ws) self.shape = self.ds_raw.shape assert self.ds_ws.shape == self.shape with open_file(self.node_label_path, 'r') as f: self.node_labels = f[self.node_label_key][:] with open(self.false_merge_id_path) as f: self.false_merge_ids = np.array(json.load(f)) if os.path.exists(self.processed_ids_file): with open(self.processed_ids_file) as f: self.processed_ids = json.load(f) else: self.processed_ids = [] if os.path.exists(self.bg_ids_file): with open(self.bg_ids_file) as f: self.background_ids = json.load(f) else: self.background_ids = [] already_processed = np.in1d(self.false_merge_ids, self.processed_ids) missing_ids = self.false_merge_ids[~already_processed] if os.path.exists(self.annotation_path): with open(self.annotation_path) as f: self.annotations = json.load(f) else: self.annotations = {} self.next_queue = queue.Queue() for mi in missing_ids: self.next_queue.put_nowait(mi) self.graph = ndist.Graph(self.problem_path, self.graph_key, self.n_threads) self.uv_ids = self.graph.uvIds() assert len(self.uv_ids) > 0 with open_file(self.problem_path, 'r') as f: ds = f[self.feat_key] ds.n_threads = self.n_threads self.probs = ds[:, 0] # morphology table entries # id (1) # size (1) # com (3) # bb-min (3) # bb-max (3) with open_file(self.table_path, 'r') as f: table = f[self.table_key][:] self.bb_starts = table[:, 5:8] self.bb_stops = table[:, 8:11] self.bb_starts /= self.scale_factor self.bb_stops /= self.scale_factor
def check_subresults(self, seg_key): f = z5py.File(self.input_path) f_out = z5py.File(self.output_path) ds_ws = f[seg_key] full_graph = ndist.Graph(self.output_path, self.output_key) shape = ds_ws.shape blocking = nt.blocking([0, 0, 0], list(shape), self.block_shape) ds_nodes = f_out["s0/sub_graphs/nodes"] ds_edges = f_out["s0/sub_graphs/edges"] ds_edge_ids = f_out["s0/sub_graphs/edge_ids"] halo = [1, 1, 1] for block_id in range(blocking.numberOfBlocks): # get the block with the appropriate halo # and the corresponding bounding box block = blocking.getBlockWithHalo(block_id, halo) outer_block, inner_block = block.outerBlock, block.innerBlock bb1 = tuple( slice(beg, end) for beg, end in zip(inner_block.begin, inner_block.end)) bb2 = tuple( slice(beg, end) for beg, end in zip(outer_block.begin, inner_block.end)) # load the nodes chunk_id = blocking.blockGridPosition(block_id) nodes_deser = ds_nodes.read_chunk(chunk_id) # load the segmentation and check that the nodes # are correct seg1 = ds_ws[bb1] nodes_ws = np.unique(seg1) self.assertTrue(np.array_equal(nodes_ws, nodes_deser)) # load the edges and construct the graph edges = ds_edges.read_chunk(chunk_id) if edges is None: self.assertEqual(len(nodes_ws), 1) continue edges = edges.reshape((edges.size // 2, 2)) graph = ndist.Graph(edges) # compute the rag and check that the graph is correct seg2 = ds_ws[bb2] # check the graph nodes (only if we have edges) if graph.numberOfEdges > 0: nodes = graph.nodes() nodes_ws2 = np.unique(seg2) self.assertTrue(np.array_equal(nodes_ws2, nodes)) rag = nrag.gridRag(seg2, numberOfLabels=int(seg2.max()) + 1) # number of nodes in nifty can be larger self.assertGreaterEqual(rag.numberOfNodes, graph.numberOfNodes) self.assertEqual(rag.numberOfEdges, graph.numberOfEdges) uv_ids = graph.uvIds() self.assertTrue(np.array_equal(rag.uvIds(), uv_ids)) if graph.numberOfEdges == 0: continue # check the edge ids edge_ids = ds_edge_ids.read_chunk(chunk_id) self.assertEqual(len(edge_ids), graph.numberOfEdges) expected_ids = full_graph.findEdges(uv_ids) self.assertTrue(np.array_equal(edge_ids, expected_ids))
def solve_lifted_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs problem_path = config['problem_path'] scale = config['scale'] block_shape = config['block_shape'] block_list = config['block_list'] lifted_prefix = config['lifted_prefix'] agglomerator_key = config['agglomerator'] time_limit = config.get('time_limit_solver', None) n_threads = config.get('threads_per_job', 1) fu.log("reading problem from %s" % problem_path) problem = z5py.N5File(problem_path) shape = problem.attrs['shape'] # load the costs # NOTE we use different cost identifiers for multicut and lifted multicut # in order to run both in the same n5-container. # However, for scale level 0 the costs come from the CostsWorkflow and # hence the identifier is identical costs_key = 's%i/costs_lmc' % scale if scale > 0 else 's0/costs' fu.log("reading costs from path in problem: %s" % costs_key) ds = problem[costs_key] ds.n_threads = n_threads costs = ds[:] # load the graph # NOTE we use different graph identifiers for multicut and lifted multicut # in order to run both in the same n5-container. # However, for scale level 0 the graph comes from the GraphWorkflow and # hence the identifier is identical graph_key = 's%i/graph_lmc' % scale if scale > 0 else 's0/graph' fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(os.path.join(problem_path, graph_key), numberOfThreads=n_threads) uv_ids = graph.uvIds() # check if the problem has an ignore-label ignore_label = problem[graph_key].attrs['ignoreLabel'] fu.log("ignore label is %s" % ('true' if ignore_label else 'false')) fu.log("using agglomerator %s" % agglomerator_key) lifted_agglomerator = su.key_to_lifted_agglomerator(agglomerator_key) # TODO enable different multicut agglomerator agglomerator = su.key_to_agglomerator(agglomerator_key) # load the lifted edges and costs nh_key = 's%i/lifted_nh_%s' % (scale, lifted_prefix) lifted_costs_key = 's%i/lifted_costs_%s' % (scale, lifted_prefix) ds = problem[nh_key] fu.log("reading lifted uvs") ds.n_threads = n_threads lifted_uvs = ds[:] fu.log("reading lifted costs") ds = problem[lifted_costs_key] ds.n_threads = n_threads lifted_costs = ds[:] # the output group out = problem['s%i/sub_results_lmc' % scale] # NOTE we use different sub-graph identifiers for multicut and lifted multicut # in order to run both in the same n5-container. # However, for scale level 0 the sub-graphs come from the GraphWorkflow and # are hence identical sub_graph_identifier = 'sub_graphs' if scale == 0 else 'sub_graphs_lmc' block_prefix = os.path.join(problem_path, 's%i' % scale, sub_graph_identifier, 'block_') blocking = nt.blocking([0, 0, 0], shape, list(block_shape)) fu.log("start processsing %i blocks" % len(block_list)) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_block_problem, block_id, graph, uv_ids, block_prefix, costs, lifted_uvs, lifted_costs, lifted_agglomerator, agglomerator, ignore_label, blocking, out, time_limit) for block_id in block_list ] [t.result() for t in tasks] fu.log_job_success(job_id)
def resolve_inidividual_objects(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs problem_path = config['problem_path'] objects_path = config['objects_path'] objects_group = config['objects_group'] assignment_in_path = config['assignment_in_path'] assignment_in_key = config['assignment_in_key'] assignment_out_path = config['assignment_out_path'] assignment_out_key = config['assignment_out_key'] agglomerator_key = config['agglomerator'] time_limit = config.get('time_limit_solver', None) n_threads = config.get('threads_per_job', 1) fu.log("reading problem from %s" % problem_path) problem = vu.file_reader(problem_path) # load the costs costs_key = 's0/costs' fu.log("reading costs from path in problem: %s" % costs_key) ds = problem[costs_key] ds.n_threads = n_threads costs = ds[:] # load the graph graph_key = 's0/graph' fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(os.path.join(problem_path, graph_key), numberOfThreads=n_threads) fu.log("using agglomerator %s" % agglomerator_key) agglomerator = su.key_to_lifted_agglomerator(agglomerator_key) # load assignments f = vu.file_reader(assignment_in_path) ds = f[assignment_in_key] ds.n_threads = n_threads assignments = ds[:] # load the object group objects = vu.file_reader(objects_path)[objects_group] new_assignments = _solve_objects(objects, graph, assignments, costs, agglomerator, n_threads, time_limit) chunks = ds.chunks f = vu.file_reader(assignment_out_path) ds = f.require_dataset(assignment_out_key, shape=new_assignments.shape, chunks=chunks, compression='gzip', dtype='uint64') ds.n_threads = n_threads ds[:] = new_assignments fu.log_job_success(job_id)
def lifted_problem_from_segmentation(rag, watershed, input_segmentation, overlap_threshold, graph_depth, same_segment_cost, different_segment_cost, mode='all', n_threads=None): """ Compute lifted problem from segmentation by mapping segments to watershed superpixels. Arguments: rag [RegionAdjacencyGraph] - the region adjacency graph watershed [np.ndarray] - the watershed over segmentation input_segmentation [np.ndarray] - Segmentation used to determine node attribution. overlap_threshold [float] - minimal overlap to assign a segment id to node graph_depth [int] - maximal graph depth up to which lifted edges will be included same_segment_cost [float] - costs for edges between nodes with same segment id attribution different_segment_cost [float] - costs for edges between nodes with different segment id attribution mode [str] - mode for insertion of lifted edges. Can be "all" - lifted edges will be inserted in between all nodes with attribution "different" - lifted edges will only be inserted in between nodes attributed to different classes "same" - lifted edges will only be inserted in between nodes attribted to the same class (default: "different") n_threads [int] - number of threads used for the calculation (default: None) """ n_threads = multiprocessing.cpu_count() if n_threads is None else n_threads assert input_segmentation.shape == watershed.shape # compute the overlaps ovlp_comp = ngt.overlap(watershed, input_segmentation) ws_ids = np.unique(watershed) n_labels = ws_ids[-1] + 1 assert n_labels == rag.numberOfNodes, "%i, %i" % (n_labels, rag.numberOfNodes) # initialise the arrays for node labels, to be # dense in the watershed id space (even if some ws-ids are not present) node_labels = np.zeros(n_labels, dtype='uint64') # extract the overlap values and node labels from the overlap # computation results overlaps = [ ovlp_comp.overlapArraysNormalized(ws_id, sorted=False) for ws_id in ws_ids ] node_label_vals = np.array([ovlp[0][0] for ovlp in overlaps]) overlap_values = np.array([ovlp[1][0] for ovlp in overlaps]) node_label_vals[overlap_values < overlap_threshold] = 0 assert len(node_label_vals) == len(ws_ids) node_labels[ws_ids] = node_label_vals # find all lifted edges up to the graph depth between mapped nodes # NOTE we need to convert to the different graph type for now, but # it would be nice to support all nifty graphs at some type uv_ids = rag.uvIds() g_temp = ndist.Graph(uv_ids) lifted_uvs = ndist.liftedNeighborhoodFromNodeLabels( g_temp, node_labels, graph_depth, mode=mode, numberOfThreads=n_threads, ignoreLabel=0) # make sure that the lifted uv ids are in range of the node labels assert lifted_uvs.max() < rag.numberOfNodes, "%i, %i" % (int( lifted_uvs.max()), rag.numberOfNodes) lifted_labels = node_labels[lifted_uvs] lifted_costs = np.zeros_like(lifted_labels, dtype='float32') same_mask = lifted_labels[:, 0] == lifted_labels[:, 1] lifted_costs[same_mask] = same_segment_cost lifted_costs[~same_mask] = different_segment_cost return lifted_uvs, lifted_costs
def lifted_problem_from_probabilities(rag, watershed, input_maps, assignment_threshold, graph_depth, feats_to_costs=feats_to_costs_default, mode='different', n_threads=None): """ Compute lifted problem from probability maps by mapping them to superpixels. Arguments: rag [RegionAdjacencyGraph] - the region adjacency graph watershed [np.ndarray] - the watershed over segmentation input_maps [list[np.ndarray]] - list of probability maps. Each map must have the same shape as the watersheds and each map is treated as the probability to correspond to a different class. assignment_threshold [float] - minimal expression level to assign a class to a graph node (= watershed segment) graph_depth [int] - maximal graph depth up to which lifted edges will be included feats_to_costs [callable] - function to calculate the lifted costs from the class assignment probabilities. This becomes as inputs 'lifted_labels', which stores the two classes assigned to a lifted edge, and `lifted_features`, which stores the two assignment probabilities. (default: feats_to_costs_default). mode [str] - mode for insertion of lifted edges. Can be "all" - lifted edges will be inserted in between all nodes with attribution "different" - lifted edges will only be inserted in between nodes attributed to different classes "same" - lifted edges will only be inserted in between nodes attribted to the same class (default: "different") n_threads [int] - number of threads used for the calculation (default: None) """ n_threads = multiprocessing.cpu_count() if n_threads is None else n_threads # validate inputs assert isinstance(input_maps, (list, tuple)) assert all(isinstance(inp, np.ndarray) for inp in input_maps) shape = watershed.shape assert all(inp.shape == shape for inp in input_maps) # map the probability maps to superpixels - we only map to superpixels which # have a larger mean expression than `assignment_threshold` # TODO handle the dtype conversion for vigra gracefully somehow ... # think about supporting uint8 input and normalizing # TODO how do we handle cases where the same superpixel is mapped to # more than one class ? n_nodes = int(watershed.max()) + 1 node_labels = np.zeros(n_nodes, dtype='uint64') node_features = np.zeros(n_nodes, dtype='float32') # TODO we could allow for more features that could then be used for the cost estimation for class_id, inp in enumerate(input_maps): mean_prob = vigra.analysis.extractRegionFeatures(inp, watershed, features=['mean' ])['mean'] # we can in principle map multiple classes here, and right now will just override class_mask = mean_prob > assignment_threshold node_labels[class_mask] = class_id node_features[class_mask] = mean_prob[class_mask] # find all lifted edges up to the graph depth between mapped nodes # NOTE we need to convert to the different graph type for now, but # it would be nice to support all nifty graphs at some type uv_ids = rag.uvIds() g_temp = ndist.Graph(uv_ids) lifted_uvs = ndist.liftedNeighborhoodFromNodeLabels( g_temp, node_labels, graph_depth, mode=mode, numberOfThreads=n_threads, ignoreLabel=0) lifted_labels = node_labels[lifted_uvs] lifted_features = node_features[lifted_uvs] lifted_costs = feats_to_costs(lifted_labels, lifted_features) return lifted_uvs, lifted_costs
def decompose(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs costs_path = config['costs_path'] costs_key = config['costs_key'] graph_path = config['graph_path'] graph_key = config['graph_key'] output_path = config['output_path'] n_threads = config['threads_per_job'] with vu.file_reader(costs_path, 'r') as f: ds = f[costs_key] ds.n_threads = n_threads costs = ds[:] with vu.file_reader(graph_path, 'r') as f: ignore_label = f[graph_key].attrs['ignoreLabel'] # load the graph graph = ndist.Graph(os.path.join(graph_path, graph_key), numberOfThreads=n_threads) # mark repulsive edges as cut edge_labels = costs < 0 # find the connected components labels = ndist.connectedComponents(graph, edge_labels, ignore_label) labels, max_id, _ = vigra.analysis.relabelConsecutive(labels, keep_zeros=True, start_label=1) # find the edges between connected components # which will be cut uv_ids = graph.uvIds() cut_edges = labels[uv_ids[:, 0]] != labels[uv_ids[:, 1]] cut_edges = np.where(cut_edges)[0].astype('uint64') n_nodes = labels.shape[0] node_shape = (n_nodes, ) node_chunks = (min(n_nodes, 524288), ) n_edges = cut_edges.shape[0] edge_shape = (n_edges, ) edge_chunks = (min(n_edges, 524288), ) with vu.file_reader(output_path) as f: ds = f.require_dataset('graph_labels', dtype='uint64', shape=node_shape, chunks=node_chunks, compression='gzip') ds.n_threads = n_threads ds[:] = labels ds.attrs['max_id'] = max_id ds = f.require_dataset('cut_edges', dtype='uint64', shape=edge_shape, chunks=edge_chunks, compression='gzip') ds.n_threads = n_threads ds[:] = cut_edges fu.log_job_success(job_id)
def _accumulate_block(block_id, blocking, ds_in, ds_labels, ds_edges, ds_out, filters, sigmas, halo, ignore_label, apply_in_2d, channel_agglomeration): fu.log("start processing block %i" % block_id) chunk_pos = blocking.blockGridPosition(block_id) # load edges and construct the graph if this block has edges edges = ds_edges.read_chunk(chunk_pos) if edges is None: fu.log("block %i has no edges" % block_id) fu.log_block_success(block_id) return edges = edges.reshape((edges.size, 2)) graph = ndist.Graph(edges) shape = ds_labels.shape # get the bounding if sum(halo) > 0: block = blocking.getBlockWithHalo(block_id, halo) block_shape = block.outerBlock.shape bb_in = vu.block_to_bb(block.outerBlock) bb = vu.block_to_bb(block.innerBlock) bb_local = vu.block_to_bb(block.innerBlockLocal) # increase inner bounding box by 1 in posirive direction # in accordance with the graph extraction bb = tuple( slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape)) bb_local = tuple( slice(b.start, min(b.stop + 1, bsh)) for b, bsh in zip(bb_local, block_shape)) else: block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) bb = tuple( slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape)) bb_in = bb bb_local = slice(None) input_dim = ds_in.ndim # TODO make choice of channels optional if input_dim == 4: bb_in = (slice(0, 3), ) + bb_in input_ = vu.normalize(ds_in[bb_in]) if input_dim == 4: assert channel_agglomeration is not None input_ = getattr(np, channel_agglomeration)(input_, axis=0) # load labels labels = ds_labels[bb] # TODO pre-smoothing ?! # accumulate the edge features edge_features = [ _accumulate_filter(input_, graph, labels, bb_local, filter_name, sigma, ignore_label, filter_name == filters[-1] and sigma == sigmas[-1], apply_in_2d) for filter_name in filters for sigma in sigmas ] edge_features = np.concatenate(edge_features, axis=1) # save the features fu.log("saving feature result of shape %s" % str(edge_features.shape)) ds_out.write_chunk(chunk_pos, edge_features.flatten(), True) fu.log_block_success(block_id) return edge_features.shape[1]
def _accumulate_block(block_id, blocking, ds_in, ds_labels, out_prefix, graph_block_prefix, filters, sigmas, halo, ignore_label, apply_in_2d, channel_agglomeration): fu.log("start processing block %i" % block_id) # load graph and check if this block has edges graph = ndist.Graph(graph_block_prefix + str(block_id)) if graph.numberOfEdges == 0: fu.log("block %i has no edges" % block_id) fu.log_block_success(block_id) return shape = ds_labels.shape # get the bounding if sum(halo) > 0: block = blocking.getBlockWithHalo(block_id, halo) block_shape = block.outerBlock.shape bb_in = vu.block_to_bb(block.outerBlock) bb = vu.block_to_bb(block.innerBlock) bb_local = vu.block_to_bb(block.innerBlockLocal) # increase inner bounding box by 1 in posirive direction # in accordance with the graph extraction bb = tuple( slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape)) bb_local = tuple( slice(b.start, min(b.stop + 1, bsh)) for b, bsh in zip(bb_local, block_shape)) else: block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) bb = tuple( slice(b.start, min(b.stop + 1, sh)) for b, sh in zip(bb, shape)) bb_in = bb bb_local = slice(None) input_dim = ds_in.ndim # TODO make choice of channels optional if input_dim == 4: bb_in = (slice(0, 3), ) + bb_in input_ = vu.normalize(ds_in[bb_in]) if input_dim == 4: assert channel_agglomeration is not None input_ = getattr(np, channel_agglomeration)(input_, axis=0) # load labels labels = ds_labels[bb] # TODO pre-smoothing ?! # accumulate the edge features edge_features = [ _accumulate_filter(input_, graph, labels, bb_local, filter_name, sigma, ignore_label, filter_name == filters[-1] and sigma == sigmas[-1], apply_in_2d) for filter_name in filters for sigma in sigmas ] edge_features = np.concatenate(edge_features, axis=1) # save the features save_path = out_prefix + str(block_id) fu.log("saving feature result of shape %s to %s" % (str(edge_features.shape), save_path)) save_root, save_key = os.path.split(save_path) with z5py.N5File(save_root) as f: f.create_dataset(save_key, data=edge_features, chunks=edge_features.shape) fu.log_block_success(block_id)
from carving.big_correction import segmentation_correction from elf.io import open_file path = './data/data.n5' raw_root = 'raw' ws_root = 'watersheds' node_label_key = 'node_labels/initial' save_key = 'node_labels/corrected' scale = 0 n_scales = 3 with_graph = True if with_graph: graph = ndist.Graph(path, 's0/graph', 4) with open_file('./data/data.n5', 'r') as f: weights = f['features'][:, 0] else: graph, weights = None, None segmentation_correction(path, raw_root, scale, path, ws_root, scale, path, node_label_key, path, save_key,
def fix_merges(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) path = config['path'] problem_path = config['problem_path'] merge_object_path = config['merge_object_path'] assignment_key = config['assignment_key'] out_key = config['out_key'] graph_key = config['graph_key'] features_key = config['features_key'] node_label_key = config['node_label_key'] from_costs = config['from_costs'] relabel = config['relabel'] n_threads = config['threads_per_job'] # load the merge objects with open(merge_object_path) as f: merge_objects = json.load(f) if len(merge_objects) == 0: fu.log("no merges to resolve") ln_src = os.path.join(path, assignment_key) ln_dst = os.path.join(path, out_key) os.symlink(ln_src, ln_dst) return fu.log("resolving %i merges" % len(merge_objects)) fu.log("reading problem from %s" % problem_path) f = vu.file_reader(path) problem = vu.file_reader(problem_path, 'r') # load the graph fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(problem_path, graph_key, numberOfThreads=n_threads) # load the assignments ds = f[assignment_key] chunks = ds.chunks ds.n_threads = 8 assignments = ds[:] # load the costs ds = problem[features_key] ds.n_threads = 8 if ds.ndim == 2: features = ds[:, 0].squeeze() else: features = ds[:] if from_costs: minc = features.min() fu.log("Mapping costs with range %f to %f to range 0 to 1" % (minc, features.max())) features -= minc features /= features.max() features = 1. - features # load the node labels ds = problem[node_label_key] ds.n_threads = n_threads node_labels = ds[:] assignments = fix_merge_assignments(graph, assignments, merge_objects, node_labels, features, n_threads) # relabel and save assignments if relabel: vigra.analysis.relabelConsecutive(assignments, out=assignments, start_label=1, keep_zeros=True) ds = f.create_dataset(out_key, shape=assignments.shape, chunks=chunks, dtype='uint64', compression='gzip') ds.n_threads = 8 ds[:] = assignments fu.log_job_success(job_id)