def make_new_fragment_segment_assignment(): path1 = '/g/kreshuk/data/arendt/platyneris_v1/data.n5' k1 = 'volumes/paintera/proofread_cells_multiset/data/s0' path2 = './data.n5' k2 = 'volumes/segmentation2/s0' tmp_folder = './tmp_subdivision_labels' config_folder = os.path.join(tmp_folder, 'configs') set_default_qos('high') write_default_global_config(config_folder) target = 'slurm' max_jobs = 200 assignments = node_labels(path1, k1, path2, k2, 'new-frag-seg', tmp_folder, target, max_jobs) assert assignments.ndim == 1 assignments = np.concatenate([ np.arange(len(assignments), dtype='uint64')[:, None], assignments[:, None] ], axis=1) with z5py.File(path2) as f: f.create_dataset('node_labels/fragment-segment-assignment2', data=assignments, compression='gzip', chunks=assignments.shape)
def cell_segmentation_workflow(path, aff_path, mask_path, mask_key, region_path, region_key, use_curated_affs, use_lmc, tmp_folder, target, max_jobs, roi_begin=None, roi_end=None): # number of jobs and threads for target assert target in ('slurm', 'local') if target == 'local': max_jobs_mc = 1 max_threads = 16 else: max_jobs_mc = 15 max_threads = 8 config_dir = os.path.join(tmp_folder, 'configs') write_default_global_config(config_dir, roi_begin, roi_end) run_watershed(path, aff_path, use_curated_affs, mask_path, mask_key, tmp_folder, target, max_jobs) if use_lmc: run_lmc(path, aff_path, use_curated_affs, region_path, region_key, tmp_folder, target, max_threads, max_jobs, max_jobs_mc) else: run_mc(path, aff_path, use_curated_affs, tmp_folder, target, max_threads, max_jobs, max_jobs_mc) # postprocessing: # 1.) compute sizes for size threshold run_morphology(path, use_curated_affs, use_lmc, tmp_folder, target, max_jobs) identifier = 'result' # we unmerge only if we also use lmc, because this takes nuclei into account if use_lmc: # 2.) unmerge cells with more than one assigned nucleus unmerge_nuclei(path, use_curated_affs, tmp_folder, target, max_jobs, max_threads) identifier = 'filtered_unmerge' # 3.) filter sizes with graph watershed filter_size(path, use_curated_affs, use_lmc, identifier, target, tmp_folder, max_jobs, max_threads)
def downscale(path, in_key, out_key, tmp_folder, max_jobs, target, n_scales=5): task = DownscalingWorkflow config_folder = os.path.join(tmp_folder, 'configs') write_default_global_config(config_folder) configs = task.get_config() config = configs['downscaling'] config.update({ 'mem_limit': 8, 'time_limit': 120, 'library_kwargs': { 'order': 0 } }) with open(os.path.join(config_folder, 'downscaling.config'), 'w') as f: json.dump(config, f) scale_factors = [[2, 2, 2]] * n_scales halos = [[0, 0, 0]] * n_scales t = task(tmp_folder=tmp_folder, config_dir=config_folder, target=target, max_jobs=max_jobs, input_path=path, input_key=in_key, output_key_prefix=out_key, scale_factors=scale_factors, halos=halos, metadata_format='paintera') ret = luigi.build([t], local_scheduler=True) if not ret: raise RuntimeError("Downscaling the segmentation failed") with z5py.File(path, 'r') as f: ds = f[in_key] max_id = ds.attrs['maxId'] for scale in range(n_scales + 1): scale_key = '%s/s%i' % (out_key, scale) add_max_id(path, scale_key, max_id=max_id)
def compute_baseline_tables(version, target, max_jobs): path = BASELINE_ROOT folder = os.path.join(ROOT, version, 'images', 'local') for name in BASELINE_NAMES: key = 'volumes/cells/%s/filtered_size' % name out_path = '%s.csv' % name if os.path.exists(out_path): continue tmp_folder = './tmp_regions_%s' % name config_folder = os.path.join(tmp_folder, 'configs') write_default_global_config(config_folder) label_ids = get_label_ids(path, key) region_attributes(path, out_path, folder, label_ids, tmp_folder, target=target, max_jobs=max_jobs, key_seg=key)
def make_proofreading_project(project_folder, tmp_folder, assignments, block_labels, block_roi, target, max_jobs): if len(block_labels) == 0: return # don't do anything if we have a paintera project already if os.path.exists(os.path.join(project_folder, 'attributes.json')): return os.makedirs(project_folder, exist_ok=True) config_dir = os.path.join(tmp_folder, 'configs') roi_begin, roi_end = block_roi write_default_global_config(config_dir, roi_begin, roi_end) with open(os.path.join(config_dir, 'global.config'), 'r') as f: block_shape = json.load(f)['block_shape'] data_path = os.path.join(project_folder, 'data.n5') f = z5py.File(data_path) f.require_group('volumes') # make a link to the raw data raw_out_key = 'volumes/raw' if raw_out_key not in f: print("Make raw symlink") raw_in = os.path.join(RAW_PATH, RAW_KEY) raw_out = os.path.join(data_path, raw_out_key) os.symlink(raw_in, raw_out) # get the relevant fragment segment assignments for this block print("Get assignment mask") assignment_mask = np.isin(assignments[:, 1], block_labels) assert assignment_mask.sum() > 0 block_assignments = assignments[assignment_mask] assert block_assignments.shape[0] == assignment_mask.sum() assert block_assignments.shape[1] == 2 print("Sub assignments have the shape:", block_assignments.shape) # copy the relevant part of the fragment segment assignment print("Copy the assignments") g_out = f.require_group('volumes/paintera') save_assignments = block_assignments.T ds_ass = g_out.require_dataset('fragment-segment-assignment', shape=save_assignments.shape, chunks=save_assignments.shape, compression='gzip', dtype='uint64') ds_ass[:] = save_assignments # copy the relevant parts of the watersheds print("Copy the watersheds") ws_ids = block_assignments[:, 0] copy_watersheds(PAINTERA_PATH, os.path.join(PAINTERA_KEY, 'data/s0'), data_path, 'volumes/watershed', ws_ids, tmp_folder, target, max_jobs) # make the paintera data res = [0.025, 0.01, 0.01] restrict_sets = [-1, -1, 5, 4, 4, 3, 3, 1] print("Make new paintera data") set_default_roi(roi_begin, roi_end) set_default_block_shape(block_shape) convert_to_paintera_format(data_path, raw_out_key, 'volumes/watershed', 'volumes/paintera', label_scale=1, resolution=res, tmp_folder=tmp_folder, target=target, max_jobs=max_jobs, max_threads=16, convert_to_label_multisets=True, restrict_sets=restrict_sets)
def export_selected_projects(projects, rois_to_blocks, target, max_jobs): """ Export only selected projects and fill in the rest with the old global paintera project. This means we need to keep ids consistent between projects. """ project_folders = [ os.path.join(PROJECT_ROOT, 'project%02i' % project_id) for project_id in projects ] assert all(os.path.exists(pfolder) for pfolder in project_folders) tmp_folder = './tmp_export' tmp_path = os.path.join(tmp_folder, 'data.n5') # # load the original paintera data # # copy the watershed segmentation ws_in_key = os.path.join(PAINTERA_KEY, 'data', 's0') ws_out_key = 'volumes/watershed' copy_watersheds(PAINTERA_PATH, ws_in_key, tmp_path, ws_out_key, None, tmp_folder, target, max_jobs) with z5py.File(tmp_path, 'r') as f: max_id = f[ws_out_key].attrs['maxId'] # load the fragment segment assignments ass_key = os.path.join(PAINTERA_KEY, 'fragment-segment-assignment') with z5py.File(PAINTERA_PATH, 'r') as f: assignments = f[ass_key][:].T # # load corrections from the projects and insert them # for project_folder in project_folders: proj_id = int(project_folder[-2:]) tmp_project = os.path.join(tmp_folder, 'tmp_proj%i' % proj_id) project_path = os.path.join(project_folder, 'data.n5') project_in_root = 'volumes/paintera' project_in_key = os.path.join(project_in_root, 'data', 's0') # set the bounding box for this project config_dir = os.path.join(tmp_project, 'configs') rb, re = rois_to_blocks[proj_id] set_default_block_shape([50, 512, 512]) write_default_global_config(config_dir, rb, re) # copy this watersheds, offsetting everything with the current max id copy_watersheds(project_path, project_in_key, tmp_path, ws_out_key, None, tmp_project, target, max_jobs, offset=max_id, insert_mode=True) # update the fragment segment assignment project_ass_key = os.path.join(project_in_root, 'fragment-segment-assignment') with z5py.File(project_path, 'r') as f: this_assignments = f[project_ass_key][:].T # offset the assignments this_assignments += max_id assignments = np.concatenate([assignments, this_assignments], axis=0) # update the max id max_id = int(assignments.max()) # write the new segmentation seg_out_key = 'volumes/segmentation2/s0' serialize_segmentation(tmp_path, ws_out_key, TMP_PATH, seg_out_key, assignments, tmp_folder, target, max_jobs)
def make_root_seg(tmp_folder, target, max_jobs): in_path = SEG_PATH in_key = SEG_KEY + '/s0' ws_path = PAINTERA_PATH ws_key = PAINTERA_KEY + "/data/s0" out_path = TMP_PATH out_key = 'volumes/segmentation' assignment_out_key = 'node_labels/fragment_segment_assignment' config_dir = os.path.join(tmp_folder, 'configs') write_default_global_config(config_dir) tmp_path = os.path.join(tmp_folder, 'data.n5') # get the current fragment segment assignment assignments = node_labels(ws_path, ws_key, in_path, in_key, 'rootseg', tmp_folder, target=target, max_jobs=max_jobs, max_overlap=True, ignore_label=None) # find the unique ids of the watersheds unique_key = 'uniques' find_uniques(ws_path, ws_key, tmp_path, unique_key, tmp_folder, config_dir, max_jobs, target) with z5py.File(tmp_path, 'r') as f: ds = f[unique_key] ws_ids = ds[:] # convert to paintera fragment segment assignments id_offset = int(ws_ids.max()) + 1 # print("Max ws id:", id_offset) # print("Ws len :", ws_ids.shape) # print("Ass len :", assignments.shape) # print(ws_ids[-10:]) assignments = assignments[ws_ids] assignments = vigra.analysis.relabelConsecutive(assignments, start_label=id_offset, keep_zeros=True)[0] assert len(assignments) == len(ws_ids), "%i, %i" % (len(assignments), len(ws_ids)) paintera_assignments = np.concatenate( [ws_ids[:, None], assignments[:, None]], axis=1).T assignment_tmp_key = 'tmp_assignments' with z5py.File(tmp_path) as f: ds = f.require_dataset(assignment_tmp_key, shape=paintera_assignments.shape, compression='gzip', chunks=paintera_assignments.shape, dtype='uint64') ds[:] = paintera_assignments # make and serialize new assignments print("Serializing assignments ...") serialize_assignments(tmp_folder, tmp_path, assignment_tmp_key, tmp_path, unique_key, out_path, assignment_out_key, locked_segments=None, relabel_output=False, map_to_background=None) # write the new segmentation print("Serializing new segmentation ...") serialize_merged_segmentation(ws_path, ws_key, out_path, out_key, out_path, assignment_out_key, tmp_folder, max_jobs, target)
def compute_connected_components(ws_path, ws_key, seg_path, seg_key, out_path, node_label_key, cc_key, tmp_folder, target, max_jobs, graph_key='graph', ignore_label=True): config_folder = os.path.join(tmp_folder, 'configs') write_default_global_config(config_folder) # # compute the graph # task = GraphWorkflow configs = task.get_config() conf = configs['initial_sub_graphs'] conf.update({'ignore_label': ignore_label}) with open(os.path.join(config_folder, 'inital_sub_graphs.config'), 'w') as f: json.dump(conf, f) n_threads = 8 task_names = ['merge_sub_graphs', 'map_edge_ids'] for tt in task_names: conf = configs['map_edge_ids'] conf.update({'threads_per_job': n_threads, 'mem_limit': 128}) with open(os.path.join(config_folder, '%s.config' % tt), 'w') as f: json.dump(conf, f) t = task(tmp_folder=tmp_folder, max_jobs=max_jobs, config_dir=config_folder, target=target, input_path=ws_path, input_key=ws_key, graph_path=out_path, output_key=graph_key) ret = luigi.build([t], local_scheduler=True) assert ret, "Graph computation failed" # # compute the node labels # task = NodeLabelWorkflow # configs = task.get_config() t = task(tmp_folder=tmp_folder, max_jobs=max_jobs, target=target, config_dir=config_folder, ws_path=ws_path, ws_key=ws_key, input_path=seg_path, input_key=seg_key, output_path=out_path, output_key=node_label_key, ignore_label=0 if ignore_label else None) ret = luigi.build([t], local_scheduler=True) assert ret, "Node label computation failed" with z5py.File(out_path, 'r') as f: node_labels = f[node_label_key][:] # # load the graph and check for connected components # ccs = _cc_nifty(out_path, graph_key, node_labels, ignore_label) return node_labels, ccs