def run_create_rag(): """ If ``global_params.config.prior_glia_removal==True``: stores pruned RAG at ``global_params.config.pruned_rag_path``, required for all glia removal steps. :func:`~syconn.exec.exec_multiview.run_glia_splitting` will finally store the ``neuron_rag.bz2`` at the currently active working directory. else: stores pruned RAG at ``global_params.config.working_dir + /glia/neuron_rag.bz2``, required by :func:`~syconn.exec.exec_multiview.run_create_neuron_ssd`. """ log = initialize_logging('create_rag', global_params.config.working_dir + '/logs/', overwrite=True) # Crop RAG according to cell SVs found during SD generation and apply size threshold G = nx.read_edgelist(global_params.config.init_rag_path, nodetype=np.uint) if 0 in G.nodes(): G.remove_node(0) log.warning('Found background node 0 in original graph. Removing.') all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag))) # add single SV connected components to initial graph sd = SegmentationDataset(obj_type='sv', working_dir=global_params.config.working_dir) sv_ids = sd.ids diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag)))) log.info( 'Found {} single-element connected component SVs which were missing' ' in initial RAG.'.format(len(diff))) for ix in diff: G.add_edge(ix, ix) log.debug("Found {} SVs in initial RAG after adding size-one connected " "components.".format(G.number_of_nodes())) # remove small connected components sv_size_dict = {} bbs = sd.load_cached_data('bounding_box') * sd.scaling for ii in range(len(sd.ids)): sv_size_dict[sd.ids[ii]] = bbs[ii] ccsize_dict = create_ccsize_dict(G, sv_size_dict) log.debug("Finished preparation of SSV size dictionary based " "on bounding box diagonal of corresponding SVs.") before_cnt = len(G.nodes()) for ix in list(G.nodes()): if ccsize_dict[ix] < global_params.config['glia']['min_cc_size_ssv']: G.remove_node(ix) cc_gs = list(nx.connected_component_subgraphs(G)) log.info("Removed {} SVs from RAG because of size. Final RAG contains {}" " SVs in {} CCs.".format(before_cnt - G.number_of_nodes(), G.number_of_nodes(), len(cc_gs))) nx.write_edgelist(G, global_params.config.pruned_rag_path) if not global_params.config.prior_glia_removal: os.makedirs(global_params.config.working_dir + '/glia/', exist_ok=True) shutil.copy(global_params.config.pruned_rag_path, global_params.config.working_dir + '/glia/neuron_rag.bz2')
def run_matrix_export(): # cache cell attributes ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) ssd.save_dataset_deep() log = initialize_logging('synapse_analysis', global_params.config.working_dir + '/logs/', overwrite=True) sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') # as an alternative to the skeletons, use vertex predictions or # sample_locations, ~3.5h @ 300 cpus # TODO: requires speed-up; one could collect properties only for synapses > # probability threshold # synssv_ids = synssv_ids[syn_prob > .5] # ssv_partners = ssv_partners[syn_prob > .5] # One could also re-use the cached synssv IDs (computed during mapping of # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (-> # slow for many synapses) cps.collect_properties_from_ssv_partners(global_params.config.working_dir, debug=True) # # collect new object attributes collected above partner axoness, celltypes, # synapse probabilities etc, no need to compute size/rep_coord etc. -> # recompute=False dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) log.info('Synapse property collection from SSVs finished.') # export_matrix log.info('Exporting connectivity matrix now.') dest_folder = global_params.config.working_dir + '/connectivity_matrix/' cps.export_matrix(dest_folder=dest_folder) log.info('Connectivity matrix was epxorted to "{}".'.format(dest_folder))
def __init__(self, syconn_path='', logger=None): """ Initializes a SyConn backend for operation. This includes in-memory initialization of the most important caches. Currently, SyConn Gate does not support backend data changes and the server needs to restart for changes to be valid. If the backend data is changed while the server is running, old content might be served. All backend functions must return dicts. :param syconn_path: str """ self.logger = logger self.logger.info('Initializing SyConn backend') self.ssd = ss.SuperSegmentationDataset(syconn_path, sso_locking=False) self.logger.info('SuperSegmentation dataset initialized.') self.sds = dict(syn_ssv=SegmentationDataset(working_dir=syconn_path, obj_type='syn_ssv')) # flat array representation of all synapses self.conn_dict = conn.load_cached_data_dict() self.logger.info('In memory cache of synapses initialized.') # directed networkx graph of connectivity self.conn_graph = conn.connectivity_to_nx_graph(self.conn_dict) self.logger.info('Connectivity graph initialized.')
def sd_init(co: str, max_n_jobs: int, log: Optional[Logger] = None): """ Initialize :class:`~syconn.reps.segmentation.SegmentationDataset` of given supervoxel type `co`. Args: co: Cellular organelle identifier (e.g. 'mi', 'vc', ...). max_n_jobs: Number of parallel jobs. log: Logger. """ sd_seg = SegmentationDataset(obj_type=co, working_dir=global_params.config.working_dir, version="0") multi_params = chunkify(sd_seg.so_dir_paths, max_n_jobs) so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type=co) multi_params = [[par, so_kwargs] for par in multi_params] if not global_params.config.use_new_meshing and (co != "sv" or (co == "sv" and global_params.config.allow_mesh_gen_cells)): _ = qu.QSUB_script(multi_params, "mesh_caching", suffix=co, remove_jobfolder=False, n_max_co_processes=global_params.NCORE_TOTAL, log=log) if co == "sv": _ = qu.QSUB_script(multi_params, "sample_location_caching", n_max_co_processes=global_params.NCORE_TOTAL, suffix=co, remove_jobfolder=True, log=log) # write mesh properties to attribute dictionaries if old meshing is active if not global_params.config.use_new_meshing: sd_proc.dataset_analysis(sd_seg, recompute=False, compute_meshprops=True)
def push_so_attr(self, so_id, so_type, attr_key, attr_value): """ Generic attribute pull, return empty string if key did not exist. Could be optimized with the assumption that all attributes have been cached as numpy arrays. Parameters ---------- so_id : int so_type : str attr_key : str attr_value : Returns ------- bytes Empty string of everything went well """ if so_type not in self.sds: self.sds[so_type] = SegmentationDataset(obj_type=so_type) sd = self.sds[so_type] try: so = sd.get_segmentation_object(so_id) so.save_attributes([attr_key], [attr_value]) return "" except Exception as e: return str(e)
def run_matrix_export(): """ Export the matrix as a ``.csv`` file at the ``connectivity_matrix`` folder of the currently active working directory. Also collects the following synapse properties from prior analysis steps: * 'partner_axoness': Cell compartment type (axon: 1, dendrite: 0, soma: 2, en-passant bouton: 3, terminal bouton: 4) of the partner neurons. * 'partner_spiness': Spine compartment predictions of both neurons. * 'partner_celltypes': Celltype of the both neurons. * 'latent_morph': Local morphology embeddings of the pre- and post- synaptic partners. Examples: See :class:`~syconn.reps.segmentation.SegmentationDataset` for examples. """ # cache cell attributes ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) ssd.save_dataset_deep() log = initialize_logging('synapse_analysis', global_params.config.working_dir + '/logs/', overwrite=True) sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') # as an alternative to the skeletons, use vertex predictions or # sample_locations, ~3.5h @ 300 cpus # TODO: requires speed-up; one could collect properties only for synapses > # probability threshold # synssv_ids = synssv_ids[syn_prob > .5] # ssv_partners = ssv_partners[syn_prob > .5] # One could also re-use the cached synssv IDs (computed during mapping of # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (-> # slow for many synapses) cps.collect_properties_from_ssv_partners(global_params.config.working_dir, debug=True) # # collect new object attributes collected above partner axoness, celltypes, # synapse probabilities etc, no need to compute size/rep_coord etc. -> # recompute=False dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) log.info('Synapse property collection from SSVs finished.') # export_matrix log.info('Exporting connectivity matrix now.') dest_folder = global_params.config.working_dir + '/connectivity_matrix/' cps.export_matrix(dest_folder=dest_folder) log.info('Connectivity matrix was exported to "{}".'.format(dest_folder))
def run_glia_splitting(): log = initialize_logging('glia_splitting', global_params.config.working_dir + '/logs/', overwrite=False) # path to networkx file containing the initial rag, TODO: create alternative formats G = nx.Graph() # TODO: Make this more general with open(global_params.config.init_rag_path, 'r') as f: for l in f.readlines(): edges = [int(v) for v in re.findall('(\d+)', l)] G.add_edge(edges[0], edges[1]) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag))) # add single SV connected components to initial graph sd = SegmentationDataset(obj_type='sv', working_dir=global_params.config.working_dir) sv_ids = sd.ids diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag)))) log.info('Found {} single connected component SVs which were' ' missing in initial RAG.'.format(len(diff))) for ix in diff: G.add_node(ix) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG after adding size-one connected " "components. Writing RAG to pkl.".format(len(all_sv_ids_in_rag))) if not os.path.isdir(global_params.config.working_dir + "/glia/"): os.makedirs(global_params.config.working_dir + "/glia/") transform_rag_edgelist2pkl(G) # first perform glia splitting based on multi-view predictions, results are # stored at SuperSegmentationDataset ssv_gliaremoval qsub_glia_splitting() # collect all neuron and glia SVs and store them in numpy array collect_glia_sv() # # here use reconnected RAG or initial rag recon_nx = G # create glia / neuron RAGs write_glia_rag(recon_nx, global_params.min_cc_size_ssv, suffix=rag_suffix) log.info("Finished glia splitting. Resulting RAGs are stored at {}." "".format(global_params.config.working_dir + "/glia/"))
def __init__(self, syconn_path: str = '', logger=None, synthresh=0.5, axodend_only=True): """ Initializes a SyConn backend for operation. This includes in-memory initialization of the most important caches. Currently, SyConn Gate does not support backend data changes and the server needs to restart for changes to be valid. If the backend data is changed while the server is running, old content might be served. All backend functions must return dicts. Args: syconn_path: logger: synthresh: All synapses below `synthresh` will be excluded. axodend_only: If True, only axo-dendritic synapses will be loaded. """ self.logger = logger self.logger.info('Initializing SyConn backend') self.ssd = ss.SuperSegmentationDataset(syconn_path, sso_locking=False) self.logger.info('SuperSegmentation dataset initialized.') self.sds = dict(syn_ssv=SegmentationDataset(working_dir=syconn_path, obj_type='syn_ssv')) self.nb_cpus = cpu_count() self.synthresh = synthresh self.axodend_only = axodend_only # flat array representation of all synapses self.conn_dict = conn.load_cached_data_dict() self.logger.info('In memory cache of synapses initialized.') # directed networkx graph of connectivity self.conn_graph = conn.connectivity_to_nx_graph(self.conn_dict) self.logger.info('Connectivity graph initialized.')
def pull_so_attr(self, so_id, so_type, attr_key): """ Generic attribute pull, return empty string if key did not exist. Could be optimized with the assumption that all attributes have been cached as numpy arrays. Parameters ---------- so_id : int so_type : str attr_key : str Returns ------- str """ if so_type not in self.sds: self.sds[so_type] = SegmentationDataset(obj_type=so_type) sd = self.sds[so_type] so = sd.get_segmentation_object(so_id) so.load_attr_dict() if attr_key not in so.attr_dict: return '' return so.attr_dict[attr_key]
def run_glia_prediction(e3=False): log = initialize_logging('glia_prediction', global_params.config.working_dir + '/logs/', overwrite=False) # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py') pred_key = "glia_probas" # Load initial RAG from Knossos mergelist text file. init_rag_p = global_params.config.working_dir + "initial_rag.txt" assert os.path.isfile(init_rag_p), "Initial RAG could not be found at %s."\ % init_rag_p init_rag = parse_cc_dict_from_kml(init_rag_p) log.info('Found {} CCs with a total of {} SVs in inital RAG.' ''.format(len(init_rag), np.sum([len(v) for v in init_rag.values()]))) # chunk them sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) multi_params = chunkify(sd.so_dir_paths, 100) # get model properties if e3 == True: model_kwargs = 'get_glia_model_e3' else: m = get_glia_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for glia views set woglia to False (because glia are included), # raw_only to True pred_kwargs = dict(woglia=False, pred_key=pred_key, verbose=False, raw_only=True) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 == True: path_to_out = qu.QSUB_script( multi_params, "predict_sv_views_chunked_e3", n_max_co_processes=15, pe="openmp", queue=None, script_folder=None, n_cores=10, suffix="_glia", additional_flags="--gres=gpu:1") # removed -V else: # randomly assign to gpu 0 or 1 for par in multi_params: mk = par[1] # GPUs are made available for every job via slurm, no need for random assignments: np.random.rand(0, 2) mk["init_gpu"] = 0 path_to_out = qu.QSUB_script( multi_params, "predict_sv_views_chunked", n_max_co_processes=25, pe="openmp", queue=None, n_cores=10, suffix="_glia", script_folder=None, additional_flags="--gres=gpu:1") # removed -V log.info('Finished glia prediction. Checking completeness.') res = find_missing_sv_attributes(sd, pred_key, n_cores=10) if len(res) > 0: log.error("Attribute '{}' missing for follwing" " SVs:\n{}".format(pred_key, res)) else: log.info('Success.')
def run_glia_rendering(max_n_jobs=None): """ Uses the pruned RAG (stored as edge list .bz2 file) which is computed in `init_cell_subcell_sds`. Parameters ---------- max_n_jobs : Returns ------- """ if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \ else global_params.NCORE_TOTAL * 4 log = initialize_logging('glia_view_rendering', global_params.config.working_dir + '/logs/', overwrite=True) log.info("Preparing RAG.") np.random.seed(0) # view rendering prior to glia removal, choose SSD accordingly # glia removal is based on the initial RAG and does not require explicitly stored SSVs # TODO: refactor how splits are stored, currently those are stored at ssv_tmp version = "tmp" G = nx.read_edgelist(global_params.config.pruned_rag_path, nodetype=np.uint) cc_gs = sorted(list(nx.connected_component_subgraphs(G)), key=len, reverse=True) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) # generate parameter for view rendering of individual SSV # TODO: remove SVs below minimum size (-> global_params.min_cc_size_ssv) sds = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sv_size_dict = {} bbs = sds.load_cached_data('bounding_box') * sds.scaling for ii in range(len(sds.ids)): sv_size_dict[sds.ids[ii]] = bbs[ii] ccsize_dict = create_ccsize_dict(cc_gs, sv_size_dict, is_connected_components=True) multi_params = cc_gs big_ssv = [] small_ssv = [] for g in multi_params: if g.number_of_nodes() > RENDERING_MAX_NB_SV: big_ssv.append(g) elif ccsize_dict[list(g.nodes())[0]] < global_params.min_cc_size_ssv: pass # ignore this CC else: small_ssv.append(g) log.info("View rendering for glia separation started.") # # identify huge SSVs and process them on the entire cluster if len(big_ssv) > 0: n_threads = 2 log.info("Processing {} huge SSVs in {} threads on the entire cluster" ".".format(len(big_ssv), n_threads)) q_in = Queue() q_out = Queue() for kk, g in enumerate(big_ssv): q_in.put((kk, g, version)) for _ in range(n_threads): q_in.put(-1) ps = [ Process(target=_run_huge_ssv_render_worker, args=(q_in, q_out)) for _ in range(n_threads) ] for p in ps: p.start() time.sleep(0.5) q_in.close() q_in.join_thread() for p in ps: p.join() if q_out.qsize() != len(big_ssv): raise ValueError( 'Not all `_run_huge_ssv_render_worker` jobs completed successfully.' ) # render small SSV without overhead and single cpus on whole cluster multi_params = small_ssv np.random.shuffle(multi_params) multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir, version) for ixs in multi_params] _ = qu.QSUB_script(multi_params, "render_views_glia_removal", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, additional_flags="--gres=gpu:1", remove_jobfolder=True) # check completeness log.info( 'Finished view rendering for glia separation. Checking completeness.') sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) res = find_missing_sv_views(sd, woglia=False, n_cores=global_params.NCORES_PER_NODE) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append( el) # TODO: decide whether to use or not else: missing_contained_in_rag.append(el) if len(missing_contained_in_rag) != 0: msg = "Not all SVs were rendered completely! {}/{} missing:\n" \ "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag), missing_contained_in_rag[:100]) log.error(msg) raise ValueError(msg) else: log.info('All SVs now contain views required for glia prediction.')
# -*- coding: utf-8 -*- # SyConn - Synaptic connectivity inference toolkit # # Copyright (c) 2016 - now # Max Planck Institute of Neurobiology, Martinsried, Germany # Authors: Philipp Schubert, Joergen Kornfeld import os from syconn.mp import batchjob_utils as mu from syconn.reps.segmentation import SegmentationDataset from syconn.handler.basics import chunkify if __name__ == "__main__": script_folder = os.path.abspath( os.path.dirname(__file__) + "/../qsub_scripts/") sds = SegmentationDataset("cs", version="33", working_dir="/wholebrain/scratch/areaxfs/") multi_params = chunkify(list(sds.sos), 1000) path_to_out = mu.QSUB_script(multi_params, "map_cs_properties", n_max_co_processes=40, pe="openmp", queue=None, script_folder=script_folder)
def run_syn_generation(chunk_size: Tuple[int, int, int] = (512, 512, 512), n_folders_fs: int = 10000, max_n_jobs: Optional[int] = None, cube_of_interest_bb: Optional[np.ndarray] = None): """ Run the synapse generation. Will create :class:`~syconn.reps.segmentation.SegmentationDataset` objects with the following versions: * 'cs': Contact site objects between supervoxels. * 'syn': Objects representing the overlap between 'cs' and the initial synaptic junction predictions. Note: These objects effectively represent synapse fragments between supervoxels. * 'syn_ssv': Agglomerated 'syn' objects based on the supervoxel graph. Args: chunk_size: The size of processed cubes. n_folders_fs: Number of folders used to create the folder structure in each :class:`~syconn.reps.segmentation.SegmentationDataset`. max_n_jobs: Number of parallel jobs. cube_of_interest_bb: Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). """ if max_n_jobs is None: max_n_jobs = global_params.config.ncore_total * 2 log = initialize_logging('synapse_generation', global_params.config.working_dir + '/logs/', overwrite=True) kd_seg_path = global_params.config.kd_seg_path kd = kd_factory(kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] ces.extract_contact_sites(chunk_size=chunk_size, log=log, max_n_jobs=max_n_jobs, cube_of_interest_bb=cube_of_interest_bb, n_folders_fs=n_folders_fs) log.info('SegmentationDataset of type "cs" and "syn" was generated.') # # TODO: add check for SSD existence, which is required at this point # # This creates an SD of type 'syn_ssv' cps.combine_and_split_syn( global_params.config.working_dir, resume_job=False, cs_gap_nm=global_params.config['cell_objects']['cs_gap_nm'], log=log, n_folders_fs=n_folders_fs) log.info('Synapse objects were created.') sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') dataset_analysis(sd_syn_ssv, compute_meshprops=True) log.info('SegmentationDataset of type "syn_ssv" was generated.') cps.map_objects_to_synssv(global_params.config.working_dir, log=log) log.info('Cellular organelles were mapped to "syn_ssv".') cps.classify_synssv_objects(global_params.config.working_dir, log=log) log.info('Synapse prediction finished.') log.info('Collecting and writing syn-ssv objects to SSV attribute ' 'dictionary.') # This needs to be run after `classify_synssv_objects` and before # `map_synssv_objects` if the latter uses thresholding for synaptic objects # just collect new data: ``recompute=False`` dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) # TODO: decide whether this should happen after prob thresholding or not map_synssv_objects(log=log) log.info('Finished.')
def run_glia_prediction(e3=False): log = initialize_logging('glia_prediction', global_params.config.working_dir + '/logs/', overwrite=False) # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py') pred_key = "glia_probas" # Load initial RAG from Knossos mergelist text file. g = nx.read_edgelist(global_params.config.pruned_rag_path, nodetype=np.uint) all_sv_ids_in_rag = np.array(list(g.nodes()), dtype=np.uint) log.debug('Found {} CCs with a total of {} SVs in inital RAG.'.format( nx.number_connected_components(g), g.number_of_nodes())) # chunk them sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) multi_params = chunkify(sd.so_dir_paths, global_params.NGPU_TOTAL * 2) # get model properties if e3 == True: model_kwargs = 'get_glia_model_e3' else: m = get_glia_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for glia views set woglia to False (because glia are included), # raw_only to True pred_kwargs = dict(woglia=False, pred_key=pred_key, verbose=False, raw_only=True) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 is True: # TODO: using two GPUs on a single node seems to be error-prone # -> wb13 froze when processing example_cube=2 n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE if 'example_cube' in global_params.config.working_dir: n_cores = global_params.NCORES_PER_NODE # do not run two predictions in parallel qu.QSUB_script(multi_params, "predict_sv_views_chunked_e3", log=log, n_max_co_processes=global_params.NGPU_TOTAL, script_folder=None, n_cores=n_cores, suffix="_glia", additional_flags="--gres=gpu:1", remove_jobfolder=True) else: # randomly assign to gpu 0 or 1 for par in multi_params: mk = par[1] # GPUs are made available for every job via slurm, # no need for random assignments: np.random.rand(0, 2) mk["init_gpu"] = 0 _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, suffix="_glia", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished glia prediction. Checking completeness.') res = find_missing_sv_views(sd, woglia=False, n_cores=global_params.NCORES_PER_NODE) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append( el) # TODO: decide whether to use or not else: missing_contained_in_rag.append(el) if len(missing_contained_in_rag) != 0: msg = "Not all SVs were predicted! {}/{} missing:\n" \ "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag), missing_contained_in_rag[:100]) log.error(msg) raise ValueError(msg) else: log.info('Success.')
num_locs = [] for p in paths: loc_dc = CompressedStorage(p + '/locations.pkl', read_only=True, disable_locking=True) sample_locs = [np.concatenate(sl) for sl in loc_dc.values()] num_locs += [len(sl) for sl in sample_locs] return num_locs # TODO: make this a test on toy data (which has to be created and added to the repo) if __name__ == '__main__': # performed on SSD at '/wholebrain/songbird/j0126/areaxfs_v6//ssv_0/', 17Jan02019 ssd = SuperSegmentationDataset( working_dir='/wholebrain/songbird/j0126/areaxfs_v6/') sd = SegmentationDataset( obj_type='sv', working_dir='/wholebrain/songbird/j0126/areaxfs_v6/') # # Statistics of SSVs in datatset # all_paths = chunkify(glob.glob(ssd.path + "/so_storage/*/*/*/"), 500) # num_samplelocs = start_multiprocess_imap(helper_func, all_paths, nb_cpus=20) # num_samplelocs = np.concatenate(num_samplelocs) # transform list of lists into 1D array # print('#SSVs: {}\nMean #sample_locs: {}\nTotal #sample_locs: {}'.format(len(ssd.ssv_ids), # np.mean(num_samplelocs), np.sum(num_samplelocs))) # # Statistics of SVs in the original datatset # all_paths = chunkify(sd.so_dir_paths, 500) # num_samplelocs = start_multiprocess_imap(helper_func_sd, all_paths, nb_cpus=20) # num_samplelocs = np.concatenate(num_samplelocs) # transform list of lists into 1D array # print('#SVs: {}\nMean #sample_locs: {}\nTotal #sample_locs: {}'.format(len(sd.ids), # np.mean(num_samplelocs), np.sum(num_samplelocs))) ssvs = ssd.get_super_segmentation_object([26607617, 27525127])
def run_glia_rendering(): log = initialize_logging('glia_view_rendering', global_params.config.working_dir + '/logs/', overwrite=False) np.random.seed(0) # view rendering prior to glia removal, choose SSD accordingly version = "tmp" # glia removal is based on the initial RAG and does not require explicitly stored SSVs G = nx.Graph() # TODO: Add factory method for initial RAG with open(global_params.config.init_rag_path, 'r') as f: for l in f.readlines(): edges = [int(v) for v in re.findall('(\d+)', l)] G.add_edge(edges[0], edges[1]) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag))) # add single SV connected components to initial graph sd = SegmentationDataset(obj_type='sv', working_dir=global_params.config.working_dir) sv_ids = sd.ids diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag)))) log.info('Found {} single connected component SVs which were missing' ' in initial RAG.'.format(len(diff))) for ix in diff: G.add_node(ix) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG after adding size-one connected " "components. Writing kml text file".format( len(all_sv_ids_in_rag))) # write out readable format for 'glia_prediction.py' ccs = [[n for n in cc] for cc in nx.connected_component_subgraphs(G)] kml = knossos_ml_from_ccs([np.sort(cc)[0] for cc in ccs], ccs) with open(global_params.config.working_dir + "initial_rag.txt", 'w') as f: f.write(kml) # generate parameter for view rendering of individual SSV log.info("Starting view rendering.") multi_params = [] for cc in nx.connected_component_subgraphs(G): multi_params.append(cc) multi_params = np.array(multi_params) # identify huge SSVs and process them individually on whole cluster nb_svs = np.array([g.number_of_nodes() for g in multi_params]) big_ssv = multi_params[nb_svs > RENDERING_MAX_NB_SV] for kk, g in enumerate(big_ssv[::-1]): # Create SSV object sv_ixs = np.sort(list(g.nodes())) log.info("Processing SSV [{}/{}] with {} SVs on whole cluster.".format( kk + 1, len(big_ssv), len(sv_ixs))) sso = SuperSegmentationObject( sv_ixs[0], working_dir=global_params.config.working_dir, version=version, create=False, sv_ids=sv_ixs) # nodes of sso._rag need to be SV new_G = nx.Graph() for e in g.edges(): new_G.add_edge(sso.get_seg_obj("sv", e[0]), sso.get_seg_obj("sv", e[1])) sso._rag = new_G sso.render_views(add_cellobjects=False, cellobjects_only=False, skip_indexviews=True, woglia=False, qsub_pe="openmp", overwrite=True, qsub_co_jobs=global_params.NCORE_TOTAL) # render small SSV without overhead and single cpus on whole cluster multi_params = multi_params[nb_svs <= RENDERING_MAX_NB_SV] np.random.shuffle(multi_params) multi_params = chunkify(multi_params, 2000) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir, version) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "render_views_glia_removal", n_max_co_processes=global_params.NCORE_TOTAL, pe="openmp", queue=None, script_folder=None, suffix="") # check completeness sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) res = find_missing_sv_views(sd, woglia=False, n_cores=10) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append(el) else: missing_contained_in_rag.append(el) if len(missing_not_contained_in_rag): log.info("%d SVs were not rendered but also not part of the initial" "RAG: {}".format(missing_not_contained_in_rag)) if len(missing_contained_in_rag) != 0: msg = "Not all SSVs were rendered completely! Missing:\n" \ "{}".format(missing_contained_in_rag) log.error(msg) raise RuntimeError(msg)
def run_syn_generation(chunk_size=(512, 512, 512), n_folders_fs=10000, max_n_jobs=None, cube_of_interest_bb=None): """ Parameters ---------- chunk_size : n_folders_fs : max_n_jobs : cube_of_interest_bb : Tuple[np.ndarray] Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). Returns ------- """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 log = initialize_logging('synapse_generation', global_params.config.working_dir + '/logs/', overwrite=True) kd_seg_path = global_params.config.kd_seg_path kd = kd_factory(kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] ces.extract_contact_sites(chunk_size=chunk_size, log=log, max_n_jobs=max_n_jobs, cube_of_interest_bb=cube_of_interest_bb, n_folders_fs=n_folders_fs) log.info('SegmentationDataset of type "cs" and "syn" was generated.') # TODO: add check for SSD existence, which is required at this point # This creates an SD of type 'syn_ssv' cps.combine_and_split_syn(global_params.config.working_dir, resume_job=False, cs_gap_nm=global_params.cs_gap_nm, log=log, n_folders_fs=n_folders_fs) log.info('Synapse objects were created.') # sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') dataset_analysis(sd_syn_ssv, compute_meshprops=True) log.info('SegmentationDataset of type "syn_ssv" was generated.') cps.map_objects_to_synssv(global_params.config.working_dir, log=log) log.info('Cellular organelles were mapped to "syn_ssv".') cps.classify_synssv_objects(global_params.config.working_dir, log=log) log.info('Synapse property prediction finished.') log.info('Collecting and writing syn-ssv objects to SSV attribute ' 'dictionary.') # This needs to be run after `classify_synssv_objects` and before # `map_synssv_objects` if the latter uses thresholding for synaptic objects dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) # just collect new data # TODO: decide whether this should happen after prob thresholding or not map_synssv_objects(log=log) log.info('Finished.')
def run_create_sds(chunk_size=None, n_folders_fs=10000, max_n_jobs=None, generate_sv_meshes=False, load_from_kd_overlaycubes=False, cube_of_interest_bb=None): """ Parameters ---------- chunk_size : max_n_jobs : int n_folders_fs : generate_sv_meshes : load_from_kd_overlaycubes : bool Load prob/seg data from overlaycubes instead of raw cubes. cube_of_interest_bb : Tuple[np.ndarray] Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). Returns ------- """ if chunk_size is None: chunk_size = [512, 512, 512] if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 3 log = initialize_logging('create_sds', global_params.config.working_dir + '/logs/', overwrite=False) # Sets initial values of object kd = kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1 offset = cube_of_interest_bb[0] # TODO: get rid of explicit voxel extraction, all info necessary should be extracted # at the beginning, e.g. size, bounding box etc and then refactor to only use those cached attributes! # resulting ChunkDataset, required for SV extraction -- # Object extraction - 2h, the same has to be done for all cell organelles cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" # Class that contains a dict of chunks (with coordinates) after initializing it cd = chunky.ChunkDataset() cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Generating SegmentationDatasets for cell and cell ' 'organelle supervoxels.') oew.from_ids_to_objects( cd, "sv", overlaydataset_path=global_params.config.kd_seg_path, n_chunk_jobs=max_n_jobs, hdf5names=["sv"], n_max_co_processes=None, n_folders_fs=n_folders_fs, use_combined_extraction=True, size=size, offset=offset) # Object Processing -- Perform after mapping to also cache mapping ratios sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sd_proc.dataset_analysis(sd, recompute=True, compute_meshprops=False) log.info("Extracted {} cell SVs. Preparing rendering locations " "(and meshes if not provided).".format(len(sd.ids))) start = time.time() # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type='sv') multi_params = [[par, so_kwargs] for par in multi_params] if generate_sv_meshes: _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) _ = qu.QSUB_script(multi_params, "sample_location_caching", n_max_co_processes=global_params.NCORE_TOTAL) # recompute=False: only collect new sample_location property sd_proc.dataset_analysis(sd, compute_meshprops=True, recompute=False) log.info( 'Finished preparation of cell SVs after {:.0f}s.'.format(time.time() - start)) # create SegmentationDataset for each cell organelle for co in global_params.existing_cell_organelles: start = time.time() cd_dir = global_params.config.working_dir + "chunkdatasets/{}/".format( co) cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Started object extraction of cellular organelles "{}" from ' '{} chunks.'.format(co, len(cd.chunk_dict))) prob_kd_path_dict = { co: getattr(global_params.config, 'kd_{}_path'.format(co)) } # This creates a SegmentationDataset of type 'co' prob_thresh = global_params.config.entries["Probathresholds"][ co] # get probability threshold path = "{}/knossosdatasets/{}_seg/".format( global_params.config.working_dir, co) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_without_conf(path, kd.boundary, kd.scale, kd.experiment_name, mags=[ 1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) oew.from_probabilities_to_objects( cd, co, # membrane_kd_path=global_params.config.kd_barrier_path, # TODO: currently does not exist prob_kd_path_dict=prob_kd_path_dict, thresholds=[prob_thresh], workfolder=global_params.config.working_dir, hdf5names=[co], n_max_co_processes=None, target_kd=target_kd, n_folders_fs=n_folders_fs, debug=False, size=size, offset=offset, load_from_kd_overlaycubes=load_from_kd_overlaycubes) sd_co = SegmentationDataset( obj_type=co, working_dir=global_params.config.working_dir) # TODO: check if this is faster then the alternative below sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=False) multi_params = chunkify(sd_co.so_dir_paths, max_n_jobs) so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type=co) multi_params = [[par, so_kwargs] for par in multi_params] _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) sd_proc.dataset_analysis(sd_co, recompute=False, compute_meshprops=True) # # Old alternative, requires much more reads/writes then above solution # sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=True) # About 0.2 h per object class log.info('Started mapping of {} cellular organelles of type "{}" to ' 'cell SVs.'.format(len(sd_co.ids), co)) sd_proc.map_objects_to_sv(sd, co, global_params.config.kd_seg_path, n_jobs=max_n_jobs) log.info('Finished preparation of {} "{}"-SVs after {:.0f}s.' ''.format(len(sd_co.ids), co, time.time() - start))
def run_create_neuron_ssd(prior_glia_removal=True): """ Creates SuperSegmentationDataset with version 0. Parameters ---------- prior_glia_removal : bool If False, will apply filtering to create SSO objects above minimum size, see global_params.min_cc_size_ssv and cache SV sample locations. Returns ------- """ log = initialize_logging('create_neuron_ssd', global_params.config.working_dir + '/logs/', overwrite=False) suffix = global_params.rag_suffix # TODO: the following paths currently require prior glia-splitting g_p = "{}/glia/neuron_rag{}.bz2".format(global_params.config.working_dir, suffix) rag_g = nx.read_edgelist(g_p, nodetype=np.uint) # e.g. if rag was not created by glia splitting procedure this filtering is required if not prior_glia_removal: sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sv_size_dict = {} bbs = sd.load_cached_data('bounding_box') * sd.scaling for ii in range(len(sd.ids)): sv_size_dict[sd.ids[ii]] = bbs[ii] ccsize_dict = create_ccsize_dict(rag_g, sv_size_dict) log.debug("Finished preparation of SSV size dictionary based " "on bounding box diagional of corresponding SVs.") before_cnt = len(rag_g.nodes()) for ix in list(rag_g.nodes()): if ccsize_dict[ix] < global_params.min_cc_size_ssv: rag_g.remove_node(ix) log.debug("Removed %d neuron CCs because of size." % (before_cnt - len(rag_g.nodes()))) ccs = nx.connected_components(rag_g) cc_dict = {} for cc in ccs: cc_arr = np.array(list(cc)) cc_dict[np.min(cc_arr)] = cc_arr cc_dict_inv = {} for ssv_id, cc in cc_dict.items(): for sv_id in cc: cc_dict_inv[sv_id] = ssv_id log.info('Parsed RAG from {} with {} SSVs and {} SVs.'.format( g_p, len(cc_dict), len(cc_dict_inv))) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir, version='0', ssd_type="ssv", sv_mapping=cc_dict_inv) # create cache-arrays for frequently used attributes ssd.save_dataset_deep(n_max_co_processes=global_params.NCORE_TOTAL ) # also executes 'ssd.save_dataset_shallow()' exec_skeleton.run_skeleton_generation() log.info('Finished SSD initialization. Starting cellular ' 'organelle mapping.') # map cellular organelles to SSVs # TODO: increase number of jobs in the next two QSUB submissions and sort by SSV size (descending) ssd_proc.aggregate_segmentation_object_mappings( ssd, global_params.existing_cell_organelles, qsub_pe="openmp") ssd_proc.apply_mapping_decisions(ssd, global_params.existing_cell_organelles, qsub_pe="openmp") log.info('Finished mapping of cellular organelles to SSVs. ' 'Writing individual SSV graphs.') # Write SSV RAGs pbar = tqdm.tqdm(total=len(ssd.ssv_ids), mininterval=0.5) for ssv in ssd.ssvs: # get all nodes in CC of this SSV if len(cc_dict[ ssv.id]) > 1: # CCs with 1 node do not exist in the global RAG n_list = nx.node_connected_component(rag_g, ssv.id) # get SSV RAG as subgraph ssv_rag = nx.subgraph(rag_g, n_list) else: ssv_rag = nx.Graph() # ssv.id is the minimal SV ID, and therefore the only SV in this case ssv_rag.add_edge(ssv.id, ssv.id) nx.write_edgelist(ssv_rag, ssv.edgelist_path) pbar.update(1) pbar.close() log.info('Finished saving individual SSV RAGs.')