def scale_to_boundaries(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read paths from the config input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] boundaries_path = config['boundaries_path'] boundaries_key = config['boundaries_key'] offset = config['offset'] # additional config erode_by = config['erode_by'] erode_3d = config.get('erode_3d', True) channel = config['channel'] block_shape = list(config['block_shape']) block_list = config['block_list'] with vu.file_reader(input_path, 'r') as fin,\ vu.file_reader(boundaries_path, 'r') as fb,\ vu.file_reader(output_path) as fout: ds_bd = fb[boundaries_key] ds_out = fout[output_key] shape = ds_out.shape blocking = nt.blocking([0, 0, 0], list(shape), block_shape) ds_in = ResizedVolume(fin[input_key], shape) for block_id in block_list: _scale_block(block_id, blocking, ds_in, ds_bd, ds_out, offset, erode_by, erode_3d, channel) # log success fu.log_job_success(job_id)
def watershed(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] shape = list(vu.get_shape(input_path, input_key)) if len(shape) == 4: shape = shape[1:] block_shape = list(config['block_shape']) block_list = config['block_list'] # read the output config output_path = config['output_path'] output_key = config['output_key'] # get the blocking blocking = nt.blocking([0, 0, 0], shape, block_shape) # submit blocks with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] assert ds_in.ndim in (3, 4) ds_out = f_out[output_key] assert ds_out.ndim == 3 if 'mask_path' in config: mask_path = config['mask_path'] mask_key = config['mask_key'] mask = vu.load_mask(mask_path, mask_key, shape) else: mask = None for block_id in block_list: _ws_block(blocking, block_id, ds_in, ds_out, mask, config) # log success fu.log_job_success(job_id)
def _merge_graph(graph_path, output_key, scale, block_list, blocking, shape, n_threads): subgraph_key = 's%i/sub_graphs' % scale ndist.mergeSubgraphs(graph_path, subgraphKey=subgraph_key, blockIds=block_list, outKey=output_key, numberOfThreads=n_threads, serializeToVarlen=False) with vu.file_reader(graph_path) as f: f[output_key].attrs['shape'] = shape
def graph_connected_components(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) problem_path = config['problem_path'] graph_key = config['graph_key'] assignment_path = config['assignment_path'] assignment_key = config['assignment_key'] output_path = config['output_path'] output_key = config['output_key'] n_threads = config.get('n_threads', 8) with vu.file_reader(assignment_path, 'r') as f: ds_ass = f[assignment_key] ds_ass.n_threads = n_threads assignments = ds_ass[:] chunks = ds_ass.chunks graph = ndist.Graph(os.path.join(problem_path, graph_key), n_threads) # TODO check if we acutally have an ignore label assignments = ndist.connectedComponentsFromNodes(graph, assignments, True) vigra.analysis.relabelConsecutive(assignments, out=assignments, start_label=1, keep_zeros=True) with vu.file_reader(output_path) as f: ds_out = f.require_dataset(output_key, shape=assignments.shape, chunks=chunks, compression='gzip', dtype='uint64') ds_out.n_threads = n_threads ds_out[:] = assignments fu.log_job_success(job_id)
def threshold(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] block_list = config['block_list'] block_shape = config['block_shape'] threshold = config['threshold'] threshold_mode = config['threshold_mode'] sigma = config.get('sigma_prefilter', 0) channel = config.get('channel', None) fu.log("Applying threshold %f with mode %s" % (threshold, threshold_mode)) with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] ds_out = f_out[output_key] shape = ds_in.shape if channel is not None: shape = shape[1:] assert len(shape) == 3 blocking = nt.blocking([0, 0, 0], list(shape), block_shape) [ _threshold_block(block_id, blocking, ds_in, ds_out, threshold, threshold_mode, channel, sigma) for block_id in block_list ] fu.log_job_success(job_id)
def skeletonize(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] n_threads = config.get('threads_per_job', 1) # load input segmentation with vu.file_reader(input_path) as f_in: ds_in = f_in[input_key] ds_in.n_threads = n_threads seg = ds_in[:] # TODO size filtering ? # find unique ids in the segmentation ids = np.unique(seg) # if 0 in ids, discard it (ignore id) if ids[0] == 0: ids = ids[1:] fu.log("computing skeletons for %i ids" % len(ids)) # FIXME this is too slow because skeletonize 3d does not lift gil # skel_vol = skeletonize_multi_threaded(seg, ids, n_threads) skel_vol = skeletonize_mp(seg, ids, n_threads) # write the output with vu.file_reader(output_path) as f_out: ds_out = f_out[output_key] ds_out.n_threads = n_threads ds_out[:] = skel_vol # log success fu.log_job_success(job_id)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end, block_list_path = self.global_config_values( True) self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) if len(shape) == 4: shape = shape[1:] # load the watershed config ws_config = self.get_task_config() # require output dataset # TODO read chunks from config chunks = tuple(bs // 2 for bs in block_shape) with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, chunks=chunks, compression='gzip', dtype='uint64') # update the config with input and output paths and keys # as well as block shape ws_config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape }) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end, block_list_path=block_list_path) else: block_list = self.block_list self.clean_up_for_retry(block_list) self._write_log('scheduling %i blocks to be processed' % len(block_list)) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, ws_config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) if self.blockwise == False: with vu.file_reader(self.input_path) as f_in: block_shape = f_in[self.input_key].shape # load the task config config = self.get_task_config() # TODO make the scale at which we extract features accessible # update the config with input and output paths and keys # as well as block shape config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'labels_path': self.labels_path, 'labels_key': self.labels_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape, 'feature_list': self.feature_list }) # TODO support multi-channel shape = vu.get_shape(self.input_path, self.input_key) # require the temporary output data-set f_out = z5py.File(self.output_path) f_out.require_dataset(self.output_key, shape=shape, compression='gzip', chunks=tuple(block_shape), dtype='float32') if self.n_retries == 0: # get shape and make block config block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def _read_num_features(self, block_ids): n_feats = None with vu.file_reader(self.output_path) as f: for block_id in block_ids: block_key = os.path.join('blocks', 'block_%i' % block_id) block_path = os.path.join(self.output_path, block_key) if not os.path.exists(block_path): continue n_feats = f[block_key].shape[1] break assert n_feats is not None, "No valid feature block found" return n_feats
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() # require output group with vu.file_reader(self.output_path) as f: f.require_group(self.blocks_prefix) # TODO make the scale at which we extract features accessible # update the config with input and output paths and keys # as well as block shape config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'labels_path': self.labels_path, 'labels_key': self.labels_key, 'output_path': self.output_path, 'block_shape': block_shape, 'blocks_prefix': self.blocks_prefix, 'graph_block_prefix': os.path.join(self.graph_path, 's0', 'sub_graphs', 'block_') }) if self.n_retries == 0: # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) if len(shape) == 4: shape = shape[1:] block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def embedding_distances(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) path_dict = config['path_dict'] output_path = config['output_path'] output_key = config['output_key'] block_list = config['block_list'] block_shape = config['block_shape'] offsets = config['offsets'] norm = config['norm'] # TODO support thresholding threshold = config['threshold'] threshold_mode = config['threshold_mode'] assert threshold is None with open(path_dict) as f: path_dict = json.load(f) input_datasets = [] for path in sorted(path_dict): input_datasets.append(vu.file_reader(path, 'r')[path_dict[path]]) with vu.file_reader(output_path) as f: ds = f[output_key] shape = ds.shape[1:] blocking = nt.blocking([0, 0, 0], list(shape), block_shape) [ _embedding_distances_block(block_id, blocking, input_datasets, ds, offsets, norm) for block_id in block_list ] fu.log_job_success(job_id)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() shape = vu.get_shape(self.input_path, self.input_key) # FIXME we should be able to specify xyzc vs cyzx if len(shape) == 4: shape = shape[1:] assert len(shape) == 3 block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) # update the config with input and output paths and keys # as well as block shape config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'halo': self.halo, 'ilastik_project': self.ilastik_project, 'ilastik_folder': self.ilastik_folder, 'block_shape': block_shape, 'tmp_folder': self.tmp_folder }) # if the output key is not None, we have a z5 file and # need to require the dataset if self.output_key is not None: config.update({'output_key': self.output_key}) chunks = tuple(bs // 2 for bs in block_shape) if self.n_channels > 1: shape = (self.n_channels, ) + shape chunks = (1, ) + chunks dtype = config.get('dtype', 'float32') with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, chunks=chunks, dtype=dtype, compression='gzip') n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def find_uniques(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # read the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] block_list = config['block_list'] block_shape = config['block_shape'] tmp_folder = config['tmp_folder'] return_counts = config['return_counts'] # open the input file with vu.file_reader(input_path, 'r') as f: ds = f[input_key] is_label_multiset = ds.attrs.get("isLabelMultiset", False) if is_label_multiset: ds = LabelMultisetWrapper(ds) shape = ds.shape blocking = nt.blocking(roiBegin=[0, 0, 0], roiEnd=list(shape), blockShape=list(block_shape)) # find uniques for all blocks uniques = [ uniques_in_block(block_id, blocking, ds, return_counts) for block_id in block_list ] if return_counts: unique_values = np.unique(np.concatenate([un[0] for un in uniques])) counts = np.zeros(int(unique_values[-1] + 1), dtype='uint64') for uniques_block, counts_block in uniques: counts[uniques_block] += counts_block.astype('uint64') counts = counts[counts != 0] assert len(counts) == len(unique_values) count_path = os.path.join(tmp_folder, 'counts_job_%i.npy' % job_id) np.save(count_path, counts) else: unique_values = np.unique(np.concatenate(uniques)) # save the uniques for this job save_path = os.path.join(tmp_folder, 'find_uniques_job_%i.npy' % job_id) fu.log("saving results to %s" % save_path) np.save(save_path, unique_values) # log success fu.log_job_success(job_id)
def edge_labels(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path, 'r') as f: config = json.load(f) output_path = config['output_path'] output_key = config['output_key'] graph_path = config['graph_path'] graph_key = config['graph_key'] overlap_path = config['overlap_path'] overlap_key = config['overlap_key'] ignore_label_gt = config.get('ignore_label_gt', False) # load the node overlaps with vu.file_reader(overlap_path, 'r') as f: node_labels = f[overlap_key][:] # load the uv ids and check with vu.file_reader(graph_path, 'r') as f: uv_ids = f[graph_key]['edges'][:] lu = node_labels[uv_ids[:, 0]] lv = node_labels[uv_ids[:, 1]] edge_labels = (lu != lv).astype('int8') if ignore_label_gt: ignore_mask = np.logical_or(lu == 0, lv == 0) edge_labels[ignore_mask] = -1 n_edges = len(edge_labels) chunks = (min(262144, n_edges), ) with vu.file_reader(output_path) as f: f.create_dataset(output_key, data=edge_labels, chunks=chunks, compression='gzip') fu.log_job_success(job_id)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape, dtype and make block config with vu.file_reader(self.input_path, 'r') as f: shape = f[self.input_key].shape # load the skeletonize config task_config = self.get_task_config() # require output dataset chunks = (25, 256, 256) chunks = tuple(min(sh, ch) for sh, ch in zip(shape, chunks)) with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, chunks=chunks, compression='gzip', dtype='uint64') # update the config with input and output paths and keys # as well as block shape task_config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key }) # prime and run the jobs n_jobs = 1 self.prepare_jobs(n_jobs, None, task_config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def upsample_skeletons(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] skeleton_path = config['skeleton_path'] skeleton_key = config['skeleton_key'] block_list = config['block_list'] halo = config.get('halo', None) pixel_pitch = config.get('pixel_pitch', None) # load input segmentation with vu.file_reader(input_path) as f_in, vu.file_reader(skeleton_path) as f_skel: shape = f[input_key].shape skel_shape = f[skeleton_key].shape scale_factor = tuple(sh // sksh for sh, sksh in zip(shape, skel_shape)) blocking = nt.blocking([0, 0, 0], shape, block_shape) with vu.file_reader(input_path) as f_in,\ vu.file_reader(skeleton_path) as f_skel,\ vu.file_reader(output_path) as f_out: [_upsample_block(block_id, blocking, halo, ds_in, ds_out, ds_skel, scale_factor, pixel_pitch) for block_id in block_list] # log success fu.log_job_success(job_id)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) config = self.get_task_config() config.update({'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'objects_path': self.objects_path, 'objects_key': self.objects_key, 'offsets': self.offsets, 'block_shape': block_shape}) shape = vu.get_shape(self.input_path, self.input_key) dtype = vu.file_reader(self.input_path, 'r')[self.input_key].dtype chunks = config['chunks'] if chunks is None: chunks = vu.file_reader(self.input_path, 'r')[self.input_key].chunks assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks[1:])) with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=tuple(shape), chunks=tuple(chunks), dtype=dtype, compression='gzip') shape = shape[1:] block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) # we only have a single job to find the labeling self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape, dtype and make block config with vu.file_reader(self.input_path, 'r') as f: shape = f[self.input_key].shape # load the upsample_skeletons config task_config = self.get_task_config() # require output dataset chunks = (25, 256, 256) with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, chunks=chunks, compression='gzip', dtype='uint64') # update the config with input and output paths and keys # as well as block shape task_config.update({'input_path': self.input_path, 'input_key': self.input_key, 'skeleton_path': self.skeleton_path, 'skeleton_key': self.skeleton_key, 'output_path': self.output_path, 'output_key': self.output_key}) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) self._write_log("scheduled %i blocks to run" % len(block_list)) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) self.prepare_jobs(n_jobs, block_list, task_config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def block_edge_features(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path, 'r') as f: config = json.load(f) block_list = config['block_list'] input_path = config['input_path'] input_key = config['input_key'] labels_path = config['labels_path'] labels_key = config['labels_key'] output_path = config['output_path'] block_shape = config['block_shape'] graph_path = config['graph_path'] subgraph_key = config['subgraph_key'] output_key = config['output_key'] # offsets for accumulation of affinity maps offsets = config.get('offsets', None) filters = config.get('filters', None) sigmas = config.get('sigmas', None) apply_in_2d = config.get('apply_in_2d', False) halo = config.get('halo', [0, 0, 0]) channel_agglomeration = config.get('channel_agglomeration', 'mean') assert channel_agglomeration in ('mean', 'max', 'min', None) if filters is None: n_feats = _accumulate(input_path, input_key, labels_path, labels_key, graph_path, subgraph_key, output_path, output_key, block_list, offsets) else: assert offsets is None, "Filters and offsets are not supported" assert sigmas is not None, "Need sigma values" n_feats = _accumulate_with_filters(input_path, input_key, labels_path, labels_key, graph_path, subgraph_key, output_path, output_key, block_list, block_shape, filters, sigmas, halo, apply_in_2d, channel_agglomeration) # we need to serialize the number of features for job 0 if job_id == 0: with vu.file_reader(output_path) as f: ds = f[output_key] ds.attrs['n_features'] = n_feats fu.log_job_success(job_id)
def find_labeling(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) n_jobs = config['n_jobs'] tmp_folder = config['tmp_folder'] n_threads = config['threads_per_job'] assignment_path = config['assignment_path'] assignment_key = config['assignment_key'] def _read_input(job_id): return np.load( os.path.join(tmp_folder, 'find_uniques_job_%i.npy' % job_id)) fu.log("read uniques") with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [tp.submit(_read_input, job_id) for job_id in range(n_jobs)] uniques = np.concatenate([t.result() for t in tasks]) fu.log("compute uniques") uniques = np.unique(uniques) if uniques[0] == 0: start_label = 0 stop_label = len(uniques) else: start_label = 1 stop_label = len(uniques) + 1 fu.log("relabel to new max-id %i" % stop_label) new_ids = np.arange(start_label, stop_label, dtype='uint64') assignments = np.concatenate([uniques[:, None], new_ids[:, None]], axis=1) fu.log("saving results to %s/%s" % (assignment_path, assignment_key)) with vu.file_reader(assignment_path) as f: chunk_size = min(int(1e6), len(assignments)) chunks = (chunk_size, 2) ds = vu.force_dataset(f, assignment_key, shape=assignments.shape, dtype='uint64', compression='gzip', chunks=chunks) ds.n_threads = n_threads ds[:] = assignments # log success fu.log_job_success(job_id)
def _prepare_output(self, config): # make the blocking block_len = min(self.number_of_labels, config.get('chunk_len', 1000)) block_list = vu.blocks_in_volume((self.number_of_labels,), (block_len,)) n_jobs = min(len(block_list), self.max_jobs) # require output dataset with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=(self.number_of_labels,), chunks=(1,), compression='gzip', dtype='uint64') # update the config config.update({'number_of_labels': self.number_of_labels, 'block_len': block_len}) return config, n_jobs, block_list
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the watershed config config = self.get_task_config() # get the shape and write shape and ignore label to our output file with vu.file_reader(self.graph_path) as f: g = f['s0/sub_graphs'] shape = tuple(g.attrs['shape']) ignore_label = g.attrs['ignore_label'] g = f.require_group(self.output_key) g.attrs['ignore_label'] = ignore_label g.attrs['shape'] = shape # update the config with input and graph paths and keys # as well as block shape config.update({ 'graph_path': self.graph_path, 'block_shape': block_shape, 'scale': self.scale, 'merge_complete_graph': self.merge_complete_graph, 'output_key': self.output_key }) factor = 2**self.scale block_shape = tuple(sh * factor for sh in block_shape) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) if self.merge_complete_graph: n_jobs = 1 else: n_jobs = min(len(block_list), self.max_jobs) self._initialize_datasets(shape, block_shape) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def copy_volume(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] block_shape = list(config['block_shape']) block_list = config['block_list'] # read the output config output_path = config['output_path'] output_key = config['output_key'] # check if we offset by roi roi_begin = config.get('roi_begin', None) n_threads = config.get('threads_per_job', 1) # submit blocks with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] ds_in.n_threads = n_threads ds_out = f_out[output_key] ds_out.n_threads = n_threads shape = list(ds_in.shape) blocking = nt.blocking([0, 0, 0], shape, block_shape) _copy_blocks(ds_in, ds_out, blocking, block_list, roi_begin) # log success fu.log_job_success(job_id)
def check_components(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # read the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] output_path = config['output_path'] output_key = config['output_key'] block_shape = config['block_shape'] chunks = config['chunks'] n_labels = config['n_labels'] chunks_per_block = [bs // ch for bs, ch in zip(block_shape, chunks)] max_chunks_per_label = np.prod(chunks_per_block) # TODO don't hard-code assertion to special case for [512, 512, 50], [256, 256, 25] assert max_chunks_per_label == 8 n_threads = config.get('threads_per_job', 1) ds_in = vu.file_reader(input_path)[input_key] violating_ids = _check_components_impl(ds_in, max_chunks_per_label, n_threads, n_labels) if violating_ids.size > 0: fu.log("have %i violationg_ids" % violating_ids.shape[0]) vchunks = (min(10000, violating_ids.shape[0]), 2) with vu.file_reader(output_path) as f: f.create_dataset(output_key, data=violating_ids, chunks=vchunks) else: fu.log("no violating ids") # log success fu.log_job_success(job_id)
def stack_predictions(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path, 'r') as f: config = json.load(f) raw_path = config['raw_path'] raw_key = config['raw_key'] pred_path = config['pred_path'] pred_key = config['pred_key'] output_path = config['output_path'] output_key = config['output_key'] block_shape = config['block_shape'] block_list = config['block_list'] with vu.file_reader(raw_path, 'r') as fr,\ vu.file_reader(pred_path, 'r') as fp,\ vu.file_reader(output_path) as fout: ds_raw = fr[raw_key] ds_pred = fp[pred_key] ds_out = fout[output_key] dtype = str(ds_out.dtype) shape = ds_raw.shape blocking = nt.blocking([0, 0, 0], shape, block_shape) for block_id in block_list: stack_block(block_id, blocking, ds_raw, ds_pred, ds_out, dtype) fu.log_job_success(job_id)
def costs_from_node_labels(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) nh_path = config['nh_path'] nh_key = config['nh_key'] node_label_path = config['node_label_path'] node_label_key = config['node_label_key'] output_path = config['output_path'] output_key = config['output_key'] chunk_size = config['chunk_size'] inter_label_cost = config['inter_label_cost'] intra_label_cost = config['intra_label_cost'] block_list = config['block_list'] with vu.file_reader(node_label_path, 'r') as f: node_labels = f[node_label_key][:] with vu.file_reader(nh_path) as f_in, vu.file_reader(output_path) as f_out: ds_in = f_in[nh_key] ds_out = f_out[output_key] n_lifted_edges = ds_in.shape[0] blocking = nt.blocking([0], [n_lifted_edges], [chunk_size]) for block_id in block_list: _costs_for_edge_block(block_id, blocking, ds_in, ds_out, node_labels, inter_label_cost, intra_label_cost) fu.log_job_success(job_id)
def create_multiset(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) with open(config_path, 'r') as f: config = json.load(f) # read the input cofig input_path = config['input_path'] input_key = config['input_key'] block_shape = list(config['block_shape']) block_list = config['block_list'] # read the output config output_path = config['output_path'] output_key = config['output_key'] shape = list(vu.get_shape(output_path, output_key)) # get the blocking blocking = nt.blocking([0, 0, 0], shape, block_shape) # submit blocks with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out: ds_in = f_in[input_key] if ds_in.attrs.get('isLabelMultiset', False): ds_in = LabelMultisetWrapper(ds_in) ds_out = f_out[output_key] for block_id in block_list: _create_multiset_block(blocking, block_id, ds_in, ds_out) if job_id == 0: max_id = ds_in.attrs['maxId'] write_metadata(ds_out, max_id) # log success fu.log_job_success(job_id)
def run_impl(self): # get the global config and init configs shebang = self.global_config_values()[0] self.init(shebang) # load the task config config = self.get_task_config() # with vu.file_reader(self.nh_path, 'r') as f: n_lifted_edges = f[self.nh_key].shape[0] # chunk size = 64**3 chunk_size = min(262144, n_lifted_edges) with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=(n_lifted_edges,), chunks=(chunk_size,), compression='gzip', dtype='float32') # update the config with input and graph paths and keys # as well as block shape config.update({'nh_path': self.nh_path, 'nh_key': self.nh_key, 'node_label_path': self.node_label_path, 'node_label_key': self.node_label_key, 'output_path': self.output_path, 'output_key': self.output_key, 'chunk_size': chunk_size}) edge_block_list = vu.blocks_in_volume([n_lifted_edges], [chunk_size]) n_jobs = min(self.max_jobs, len(edge_block_list)) # prime and run the jobs self.prepare_jobs(n_jobs, edge_block_list, config, self.prefix) self.submit_jobs(n_jobs, self.prefix) # wait till jobs finish and check for job success self.wait_for_jobs(self.prefix) self.check_jobs(n_jobs, self.prefix)
def filling_size_filter(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # read the config with open(config_path) as f: config = json.load(f) input_path = config['input_path'] input_key = config['input_key'] hmap_path = config['hmap_path'] hmap_key = config['hmap_key'] output_path = config['output_path'] output_key = config['output_key'] block_list = config['block_list'] block_shape = config['block_shape'] res_path = config['res_path'] # get the shape with vu.file_reader(input_path, 'r') as f: ds = f[input_key] shape = f[input_key].shape blocking = nt.blocking(roiBegin=[0, 0, 0], roiEnd=list(shape), blockShape=list(block_shape)) discard_ids = np.load(res_path) same_file = input_path == output_path in_place = same_file and input_key == output_key if in_place: with vu.file_reader(input_path) as f, vu.file_reader(hmap_path, 'r') as f_h: ds = f[input_key] ds_hmap = f_h[hmap_key] [apply_block(block_id, blocking, ds_hmap, ds, ds, discard_ids) for block_id in block_list] elif same_file: with vu.file_reader(input_path) as f, vu.file_reader(hmap_path, 'r') as f_h: ds_in = f[input_key] ds_out = f[output_key] ds_hmap = f_h[hmap_key] [apply_block(block_id, blocking, ds_hmap, ds_in, ds_out, discard_ids) for block_id in block_list] else: with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out, vu.file_reader(hmap_path, 'r') as f_h: ds_in = f_in[input_key] ds_out = f_out[output_key] ds_hmap = f_h[hmap_key] [apply_block(block_id, blocking, ds_hmap, ds_in, ds_out, discard_ids) for block_id in block_list] fu.log_job_success(job_id)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) assert len(shape) == 4, "Need 4d input for MWS" n_channels = shape[0] shape = shape[1:] # TODO make optional which channels to choose assert len(self.offsets) == n_channels,\ "%i, %i" % (len(self.offsets), n_channels) assert all(len(off) == 3 for off in self.offsets) config = self.get_task_config() config.update({'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape, 'offsets': self.offsets, 'halo': self.halo, 'tmp_folder': self.tmp_folder}) # check if we have a mask and add to the config if we do if self.mask_path != '': assert self.mask_key != '' config.update({'mask_path': self.mask_path, 'mask_key': self.mask_key}) # get chunks chunks = config.pop('chunks', None) if chunks is None: chunks = tuple(bs // 2 for bs in block_shape) # clip chunks chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape)) # make output dataset compression = config.pop('compression', 'gzip') with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, dtype='uint64', compression=compression, chunks=chunks) blocking = nt.blocking([0, 0, 0], list(shape), list(block_shape)) block_lists = vu.make_checkerboard_block_lists(blocking, roi_begin, roi_end) # we need the max-block-id to write out max-label-id later max_block_id = max([max(bl) for bl in block_lists]) config.update({'max_block_id': max_block_id}) for pass_id, block_list in enumerate(block_lists): config['pass'] = pass_id self._mws_pass(block_list, config, 'pass_%i' % pass_id)