def run_glia_prediction(e3=False): log = initialize_logging('glia_prediction', global_params.config.working_dir + '/logs/', overwrite=False) # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py') pred_key = "glia_probas" # Load initial RAG from Knossos mergelist text file. g = nx.read_edgelist(global_params.config.pruned_rag_path, nodetype=np.uint) all_sv_ids_in_rag = np.array(list(g.nodes()), dtype=np.uint) log.debug('Found {} CCs with a total of {} SVs in inital RAG.'.format( nx.number_connected_components(g), g.number_of_nodes())) # chunk them sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) multi_params = chunkify(sd.so_dir_paths, global_params.NGPU_TOTAL * 2) # get model properties if e3 == True: model_kwargs = 'get_glia_model_e3' else: m = get_glia_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for glia views set woglia to False (because glia are included), # raw_only to True pred_kwargs = dict(woglia=False, pred_key=pred_key, verbose=False, raw_only=True) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 is True: # TODO: using two GPUs on a single node seems to be error-prone # -> wb13 froze when processing example_cube=2 n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE if 'example_cube' in global_params.config.working_dir: n_cores = global_params.NCORES_PER_NODE # do not run two predictions in parallel qu.QSUB_script(multi_params, "predict_sv_views_chunked_e3", log=log, n_max_co_processes=global_params.NGPU_TOTAL, script_folder=None, n_cores=n_cores, suffix="_glia", additional_flags="--gres=gpu:1", remove_jobfolder=True) else: # randomly assign to gpu 0 or 1 for par in multi_params: mk = par[1] # GPUs are made available for every job via slurm, # no need for random assignments: np.random.rand(0, 2) mk["init_gpu"] = 0 _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, suffix="_glia", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished glia prediction. Checking completeness.') res = find_missing_sv_views(sd, woglia=False, n_cores=global_params.NCORES_PER_NODE) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append( el) # TODO: decide whether to use or not else: missing_contained_in_rag.append(el) if len(missing_contained_in_rag) != 0: msg = "Not all SVs were predicted! {}/{} missing:\n" \ "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag), missing_contained_in_rag[:100]) log.error(msg) raise ValueError(msg) else: log.info('Success.')
def run_glia_rendering(max_n_jobs=None): """ Uses the pruned RAG (stored as edge list .bz2 file) which is computed in `init_cell_subcell_sds`. Parameters ---------- max_n_jobs : Returns ------- """ if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \ else global_params.NCORE_TOTAL * 4 log = initialize_logging('glia_view_rendering', global_params.config.working_dir + '/logs/', overwrite=True) log.info("Preparing RAG.") np.random.seed(0) # view rendering prior to glia removal, choose SSD accordingly # glia removal is based on the initial RAG and does not require explicitly stored SSVs # TODO: refactor how splits are stored, currently those are stored at ssv_tmp version = "tmp" G = nx.read_edgelist(global_params.config.pruned_rag_path, nodetype=np.uint) cc_gs = sorted(list(nx.connected_component_subgraphs(G)), key=len, reverse=True) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) # generate parameter for view rendering of individual SSV # TODO: remove SVs below minimum size (-> global_params.min_cc_size_ssv) sds = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sv_size_dict = {} bbs = sds.load_cached_data('bounding_box') * sds.scaling for ii in range(len(sds.ids)): sv_size_dict[sds.ids[ii]] = bbs[ii] ccsize_dict = create_ccsize_dict(cc_gs, sv_size_dict, is_connected_components=True) multi_params = cc_gs big_ssv = [] small_ssv = [] for g in multi_params: if g.number_of_nodes() > RENDERING_MAX_NB_SV: big_ssv.append(g) elif ccsize_dict[list(g.nodes())[0]] < global_params.min_cc_size_ssv: pass # ignore this CC else: small_ssv.append(g) log.info("View rendering for glia separation started.") # # identify huge SSVs and process them on the entire cluster if len(big_ssv) > 0: n_threads = 2 log.info("Processing {} huge SSVs in {} threads on the entire cluster" ".".format(len(big_ssv), n_threads)) q_in = Queue() q_out = Queue() for kk, g in enumerate(big_ssv): q_in.put((kk, g, version)) for _ in range(n_threads): q_in.put(-1) ps = [ Process(target=_run_huge_ssv_render_worker, args=(q_in, q_out)) for _ in range(n_threads) ] for p in ps: p.start() time.sleep(0.5) q_in.close() q_in.join_thread() for p in ps: p.join() if q_out.qsize() != len(big_ssv): raise ValueError( 'Not all `_run_huge_ssv_render_worker` jobs completed successfully.' ) # render small SSV without overhead and single cpus on whole cluster multi_params = small_ssv np.random.shuffle(multi_params) multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir, version) for ixs in multi_params] _ = qu.QSUB_script(multi_params, "render_views_glia_removal", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, additional_flags="--gres=gpu:1", remove_jobfolder=True) # check completeness log.info( 'Finished view rendering for glia separation. Checking completeness.') sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) res = find_missing_sv_views(sd, woglia=False, n_cores=global_params.NCORES_PER_NODE) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append( el) # TODO: decide whether to use or not else: missing_contained_in_rag.append(el) if len(missing_contained_in_rag) != 0: msg = "Not all SVs were rendered completely! {}/{} missing:\n" \ "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag), missing_contained_in_rag[:100]) log.error(msg) raise ValueError(msg) else: log.info('All SVs now contain views required for glia prediction.')
def run_glia_rendering(): log = initialize_logging('glia_view_rendering', global_params.config.working_dir + '/logs/', overwrite=False) np.random.seed(0) # view rendering prior to glia removal, choose SSD accordingly version = "tmp" # glia removal is based on the initial RAG and does not require explicitly stored SSVs G = nx.Graph() # TODO: Add factory method for initial RAG with open(global_params.config.init_rag_path, 'r') as f: for l in f.readlines(): edges = [int(v) for v in re.findall('(\d+)', l)] G.add_edge(edges[0], edges[1]) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag))) # add single SV connected components to initial graph sd = SegmentationDataset(obj_type='sv', working_dir=global_params.config.working_dir) sv_ids = sd.ids diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag)))) log.info('Found {} single connected component SVs which were missing' ' in initial RAG.'.format(len(diff))) for ix in diff: G.add_node(ix) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG after adding size-one connected " "components. Writing kml text file".format( len(all_sv_ids_in_rag))) # write out readable format for 'glia_prediction.py' ccs = [[n for n in cc] for cc in nx.connected_component_subgraphs(G)] kml = knossos_ml_from_ccs([np.sort(cc)[0] for cc in ccs], ccs) with open(global_params.config.working_dir + "initial_rag.txt", 'w') as f: f.write(kml) # generate parameter for view rendering of individual SSV log.info("Starting view rendering.") multi_params = [] for cc in nx.connected_component_subgraphs(G): multi_params.append(cc) multi_params = np.array(multi_params) # identify huge SSVs and process them individually on whole cluster nb_svs = np.array([g.number_of_nodes() for g in multi_params]) big_ssv = multi_params[nb_svs > RENDERING_MAX_NB_SV] for kk, g in enumerate(big_ssv[::-1]): # Create SSV object sv_ixs = np.sort(list(g.nodes())) log.info("Processing SSV [{}/{}] with {} SVs on whole cluster.".format( kk + 1, len(big_ssv), len(sv_ixs))) sso = SuperSegmentationObject( sv_ixs[0], working_dir=global_params.config.working_dir, version=version, create=False, sv_ids=sv_ixs) # nodes of sso._rag need to be SV new_G = nx.Graph() for e in g.edges(): new_G.add_edge(sso.get_seg_obj("sv", e[0]), sso.get_seg_obj("sv", e[1])) sso._rag = new_G sso.render_views(add_cellobjects=False, cellobjects_only=False, skip_indexviews=True, woglia=False, qsub_pe="openmp", overwrite=True, qsub_co_jobs=global_params.NCORE_TOTAL) # render small SSV without overhead and single cpus on whole cluster multi_params = multi_params[nb_svs <= RENDERING_MAX_NB_SV] np.random.shuffle(multi_params) multi_params = chunkify(multi_params, 2000) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir, version) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "render_views_glia_removal", n_max_co_processes=global_params.NCORE_TOTAL, pe="openmp", queue=None, script_folder=None, suffix="") # check completeness sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) res = find_missing_sv_views(sd, woglia=False, n_cores=10) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append(el) else: missing_contained_in_rag.append(el) if len(missing_not_contained_in_rag): log.info("%d SVs were not rendered but also not part of the initial" "RAG: {}".format(missing_not_contained_in_rag)) if len(missing_contained_in_rag) != 0: msg = "Not all SSVs were rendered completely! Missing:\n" \ "{}".format(missing_contained_in_rag) log.error(msg) raise RuntimeError(msg)