def sd_init(co: str, max_n_jobs: int, log: Optional[Logger] = None): """ Initialize :class:`~syconn.reps.segmentation.SegmentationDataset` of given supervoxel type `co`. Args: co: Cellular organelle identifier (e.g. 'mi', 'vc', ...). max_n_jobs: Number of parallel jobs. log: Logger. """ sd_seg = SegmentationDataset(obj_type=co, working_dir=global_params.config.working_dir, version="0") multi_params = chunkify(sd_seg.so_dir_paths, max_n_jobs) so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type=co) multi_params = [[par, so_kwargs] for par in multi_params] if not global_params.config.use_new_meshing and (co != "sv" or (co == "sv" and global_params.config.allow_mesh_gen_cells)): _ = qu.QSUB_script(multi_params, "mesh_caching", suffix=co, remove_jobfolder=False, n_max_co_processes=global_params.NCORE_TOTAL, log=log) if co == "sv": _ = qu.QSUB_script(multi_params, "sample_location_caching", n_max_co_processes=global_params.NCORE_TOTAL, suffix=co, remove_jobfolder=True, log=log) # write mesh properties to attribute dictionaries if old meshing is active if not global_params.config.use_new_meshing: sd_proc.dataset_analysis(sd_seg, recompute=False, compute_meshprops=True)
def run_spiness_prediction(max_n_jobs_gpu: Optional[int] = None, max_n_jobs: Optional[int] = None): """ Will store semantic spine labels inside``ssv.label_dict('vertex')['spiness]``. Todo: * run rendering chunk-wise instead of on-the-fly and then perform prediction chunk-wise as well, adopt from spiness step. Args: max_n_jobs_gpu: Number of parallel GPU jobs. Used for the inference. max_n_jobs : Number of parallel CPU jobs. Used for the mapping step. """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('spine_identification', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) # run semantic spine segmentation on multi views sd = ssd.get_segmentationdataset("sv") # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu) # set model properties model_kwargs = dict(src=global_params.config.mpath_spiness, multi_gpu=False) so_kwargs = dict(working_dir=global_params.config.working_dir) pred_kwargs = dict(pred_key=global_params.semseg2mesh_spines['semseg_key']) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] log.info('Starting spine prediction.') qu.QSUB_script(multi_params, "predict_spiness_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished spine prediction.') # map semantic spine segmentation of multi views on SSV mesh # TODO: CURRENTLY HIGH MEMORY CONSUMPTION if not ssd.mapping_dict_exists: raise ValueError('Mapping dict does not exist.') multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters kwargs_semseg2mesh = global_params.semseg2mesh_spines kwargs_semsegforcoords = global_params.semseg2coords_spines multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, kwargs_semseg2mesh, kwargs_semsegforcoords) for ssv_ids in multi_params] log.info('Starting mapping of spine predictions to neurite surfaces.') qu.QSUB_script(multi_params, "map_spiness", n_max_co_processes=global_params.NCORE_TOTAL, n_cores=4, suffix="", additional_flags="", remove_jobfolder=True, log=log) log.info('Finished spine mapping.')
def run_spiness_prediction(max_n_jobs_gpu=None, max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('spine_identification', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) pred_key = "spiness" # run semantic spine segmentation on multi views sd = ssd.get_segmentationdataset("sv") # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu) # set model properties model_kwargs = dict(src=global_params.config.mpath_spiness, multi_gpu=False) so_kwargs = dict(working_dir=global_params.config.working_dir) pred_kwargs = dict(pred_key=pred_key) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] log.info('Starting spine prediction.') qu.QSUB_script(multi_params, "predict_spiness_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished spine prediction.') # map semantic spine segmentation of multi views on SSV mesh # TODO: CURRENTLY HIGH MEMORY CONSUMPTION if not ssd.mapping_dict_exists: raise ValueError('Mapping dict does not exist.') multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters kwargs_semseg2mesh = dict(semseg_key=pred_key, force_recompute=True) multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, kwargs_semseg2mesh) for ssv_ids in multi_params] log.info('Starting mapping of spine predictions to neurite surfaces.') qu.QSUB_script(multi_params, "map_spiness", n_max_co_processes=global_params.NCORE_TOTAL, n_cores=4, suffix="", additional_flags="", remove_jobfolder=True, log=log) log.info('Finished spine mapping.')
def run_morphology_embedding(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 2 log = initialize_logging('morphology_embedding', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) pred_key_appendix = "" multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, pred_key_appendix) for ssv_ids in multi_params] qu.QSUB_script(multi_params, "generate_morphology_embedding", n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, log=log, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished extraction of cell morphology embedding.')
def run_morphology_embedding(max_n_jobs: Optional[int] = None): """ Infer local morphology embeddings for all neuron reconstructions base on triplet-loss trained cellular morphology learning network (tCMN). Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering` and :func:`~syconn.exec.skeleton.run_skeleton_generation`. """ if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 2 log = initialize_logging('morphology_embedding', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) pred_key_appendix = "" multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, pred_key_appendix) for ssv_ids in multi_params] qu.QSUB_script(multi_params, "generate_morphology_embedding", n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, log=log, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished extraction of cell morphology embedding.')
def run_morphology_embedding(): log = initialize_logging('morphology_embedding', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) pred_key_appendix = "" multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, 2000) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, pred_key_appendix) for ssv_ids in multi_params] qu.QSUB_script(multi_params, "generate_morphology_embedding", pe="openmp", queue=None, n_cores=10, suffix="", additional_flags="--gres=gpu:1", resume_job=False) # removed -V (used with QSUB) log.info('Finished extraction of cell morphology embedding.')
def run_skeleton_generation(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 log = initialize_logging('skeleton_generation', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: think about using create_sso_skeleton_fast if underlying RAG # obeys spatial correctness (> 10x faster) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = ssd.ssv_ids nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir) for ssv_ids in multi_params] # create SSV skeletons, requires SV skeletons! log.info('Starting skeleton generation of {} SSVs.'.format(len( ssd.ssv_ids))) qu.QSUB_script(multi_params, "export_skeletons_new", log=log, n_max_co_processes=global_params.NCORE_TOTAL, remove_jobfolder=True) log.info('Finished skeleton generation.')
def run_axoness_mapping(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 """Maps axon prediction of rendering locations onto SSV skeletons""" log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/', overwrite=False) pred_key_appendix = "" # Working directory has to be changed globally in global_params ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) multi_params = np.array(ssd.ssv_ids, dtype=np.uint) # sort ssv ids according to their number of SVs (descending) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) multi_params = [(par, pred_key_appendix) for par in multi_params] log.info('Starting axoness mapping.') _ = qu.QSUB_script(multi_params, "map_viewaxoness2skel", log=log, n_max_co_processes=global_params.NCORE_TOTAL, suffix="", n_cores=1, remove_jobfolder=True) # TODO: perform completeness check log.info('Finished axoness mapping.')
def run_celltype_prediction(max_n_jobs_gpu: Optional[int] = None): """ Run the celltype inference based on the ``img2scalar`` CMN. Args: max_n_jobs_gpu: Number of parallel GPU jobs. Notes: Requires :func:`~run_create_neuron_ssd` and :func:`~run_neuron_rendering`. """ if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('celltype_prediction', global_params.config.working_dir+ '/logs/', overwrite=False) ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) # shuffle SV IDs np.random.seed(0) log.info('Starting cell type prediction.') nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = ssd.ssv_ids ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] max_n_jobs_gpu = np.max([max_n_jobs_gpu, len(multi_params) // 200]) # at most 200 SSV per job multi_params = chunkify(multi_params, max_n_jobs_gpu) # job parameter will be read sequentially, i.e. in order to provide only # one list as parameter one needs an additonal axis multi_params = [(ixs, ) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "predict_cell_type", log=log, n_max_co_processes=global_params.NNODES_TOTAL, suffix="", additional_flags="--gres=gpu:1", n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, remove_jobfolder=True) log.info('Finished prediction of {} SSVs. Checking completeness.' ''.format(len(ordering))) out_files = glob.glob(path_to_out + "*.pkl") err = [] for fp in out_files: with open(fp, "rb") as f: local_err = pkl.load(f) err += list(local_err) if len(err) > 0: msg = "{} errors occurred for SSVs with ID: " \ "{}".format(len(err), [el[0] for el in err]) log.error(msg) raise ValueError(msg) else: log.info('Success.')
def run_skeleton_generation(max_n_jobs: Optional[int] = None, map_myelin: Optional[bool] = None): """ Generate the cell reconstruction skeletons. Args: max_n_jobs: Number of parallel jobs. map_myelin: Map myelin predictions at every ``skeleton['nodes']`` in :py:attr:`~syconn.reps.super_segmentation_object.SuperSegmentationObject.skeleton`. """ if map_myelin is None: map_myelin = os.path.isdir(global_params.config.working_dir + '/knossosdatasets/myelin/') if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 log = initialize_logging('skeleton_generation', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: think about using create_sso_skeleton_fast if underlying RAG # obeys spatial correctness (> 10x faster) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = ssd.ssv_ids nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, map_myelin) for ssv_ids in multi_params] # create SSV skeletons, requires SV skeletons! log.info('Starting skeleton generation of {} SSVs.'.format(len( ssd.ssv_ids))) qu.QSUB_script(multi_params, "export_skeletons_new", log=log, n_max_co_processes=global_params.NCORE_TOTAL, remove_jobfolder=True, n_cores=2) log.info('Finished skeleton generation.')
def run_semsegaxoness_mapping(max_n_jobs: Optional[int] = None): """ Map semantic segmentation results of the 2D projections onto the cell reconstruction mesh. Generates the following attributes by default in :py:attr:`~syconn.reps.super_segmentation_object.SuperSegmentationObject.skeleton`: * "axoness": Vertex predictions mapped to skeleton (see ``global_params.map_properties_semsegax``. * "axoness_avg10000": Sliding window average along skeleton (10um traversal length). * "axoness_avg10000_comp_maj": Majority vote on connected components after removing the soma. Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering`, :func:`~run_semsegaxoness_prediction` and :func:`~syconn.exec.skeleton.run_skeleton_generation`. """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 """Maps axon prediction of rendering locations onto SSV skeletons""" log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/', overwrite=False) pred_key_appendix = "" # Working directory has to be changed globally in global_params ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) multi_params = np.array(ssd.ssv_ids, dtype=np.uint) # sort ssv ids according to their number of SVs (descending) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) multi_params = [(par, pred_key_appendix) for par in multi_params] log.info('Starting axoness mapping.') _ = qu.QSUB_script(multi_params, "map_semsegaxoness2skel", log=log, n_max_co_processes=global_params.NCORE_TOTAL, suffix="", n_cores=1, remove_jobfolder=True) # TODO: perform completeness check log.info('Finished axoness mapping.')
def run_celltype_prediction(max_n_jobs=100): log = initialize_logging('celltype_prediction', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # shuffle SV IDs np.random.seed(0) log.info('Starting cell type prediction.') nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = ssd.ssv_ids ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] max_n_jobs = np.max([max_n_jobs, len(multi_params) // 200]) # at most 200 SSV per job multi_params = chunkify(multi_params, max_n_jobs) # job parameter will be read sequentially, i.e. in order to provide only # one list as parameter one needs an additonal axis multi_params = [(ixs, ) for ixs in multi_params] # TODO: switch n_max_co_processes to `global_params.NGPUS_TOTAL` as soon as EGL ressource allocation works! path_to_out = qu.QSUB_script(multi_params, "predict_cell_type", n_max_co_processes=global_params.NNODES_TOTAL, suffix="", additional_flags="--gres=gpu:2", n_cores=global_params.NCORES_PER_NODE) log.info('Finished prediction of {} SSVs. Checking completeness.' ''.format(len(ordering))) out_files = glob.glob(path_to_out + "*.pkl") err = [] for fp in out_files: with open(fp, "rb") as f: local_err = pkl.load(f) err += list(local_err) if len(err) > 0: log.error("{} errors occurred for SSVs with ID: " "{}".format(len(err), [el[0] for el in err])) else: log.info('Success.')
def map_myelin_global(max_n_jobs: Optional[int] = None): """ Stand-alone myelin mapping to cell reconstruction skeletons. See kwarg ``map_myelin`` in :func:`run_skeleton_generation` for a mapping right after skeleton generation. Args: max_n_jobs: Number of parallel jobs. """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 log = initialize_logging('myelin_mapping', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: think about using create_sso_skeleton_fast if underlying RAG # obeys spatial correctness (> 10x faster) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = ssd.ssv_ids nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir) for ssv_ids in multi_params] # create SSV skeletons, requires SV skeletons! log.info('Starting myelin mapping of {} SSVs.'.format(len(ssd.ssv_ids))) qu.QSUB_script(multi_params, "map_myelin2skel", log=log, n_max_co_processes=global_params.NCORE_TOTAL, remove_jobfolder=True, n_cores=2) log.info('Finished myelin mapping.')
def run_axoness_mapping(max_n_jobs: Optional[int] = None): """ Map ``img2scalar`` CMN results of the 2D projections onto the cell reconstruction mesh. See :func:`~run_semsegaxoness_mapping` for the semantic segmentation approach. Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering`, :func:`run_axoness_prediction` and :func:`~syconn.exec.skeleton.run_skeleton_generation`. """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 """Maps axon prediction of rendering locations onto SSV skeletons""" log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/', overwrite=False) pred_key_appendix = "" # Working directory has to be changed globally in global_params ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) multi_params = np.array(ssd.ssv_ids, dtype=np.uint) # sort ssv ids according to their number of SVs (descending) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) multi_params = [(par, pred_key_appendix) for par in multi_params] log.info('Starting axoness mapping.') _ = qu.QSUB_script(multi_params, "map_viewaxoness2skel", log=log, n_max_co_processes=global_params.NCORE_TOTAL, suffix="", n_cores=1, remove_jobfolder=True) # TODO: perform completeness check log.info('Finished axoness mapping.')
def run_semsegaxoness_prediction(max_n_jobs_gpu=None): """ Will store semantic axoness labels as `view_properties_semsegax['semseg_key']` inside ssv.label_dict('vertex')[semseg_key] TODO: run rendering chunk-wise instead of on-the-fly and then perform prediction chunk-wise as well, adopt from spiness step Parameters ---------- max_n_jobs_gpu : int Returns ------- """ if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('axoness_prediction', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # shuffle SV IDs np.random.seed(0) log.info('Starting axoness prediction.') nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = ssd.ssv_ids ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] max_n_jobs_gpu = np.max([max_n_jobs_gpu, len(multi_params) // 100 ]) # at most 100 SSV per job multi_params = chunkify(multi_params, max_n_jobs_gpu) # job parameter will be read sequentially, i.e. in order to provide only # one list as parameter one needs an additonal axis multi_params = [(ixs, ) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "predict_axoness_semseg", log=log, n_max_co_processes=global_params.NNODES_TOTAL, suffix="", additional_flags="--gres=gpu:1", n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, remove_jobfolder=True) log.info('Finished prediction of {} SSVs. Checking completeness.' ''.format(len(ordering))) out_files = glob.glob(path_to_out + "*.pkl") err = [] for fp in out_files: with open(fp, "rb") as f: local_err = pkl.load(f) err += list(local_err) if len(err) > 0: msg = "{} errors occurred for SSVs with ID: " \ "{}".format(len(err), [el[0] for el in err]) log.error(msg) raise ValueError(msg) else: log.info('Success.')
def _run_neuron_rendering_small_helper(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \ else global_params.NCORE_TOTAL * 4 log = initialize_logging('neuron_view_rendering_small', global_params.config.working_dir + '/logs/') # view rendering prior to glia removal, choose SSD accordingly ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: use actual size criteria, e.g. number of sampling locations nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # render normal size SSVs size_mask = nb_svs_per_ssv <= global_params.RENDERING_MAX_NB_SV if 'example' in global_params.config.working_dir and np.sum( ~size_mask) == 0: # generate at least one (artificial) huge SSV size_mask[:1] = False size_mask[1:] = True multi_params = ssd.ssv_ids[size_mask] # sort ssv ids according to their number of SVs (descending) ordering = np.argsort(nb_svs_per_ssv[size_mask]) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir) for ixs in multi_params] log.info('Started rendering of {} SSVs. '.format(np.sum(size_mask))) # generic if global_params.PYOPENGL_PLATFORM == 'osmesa': # utilize all CPUs path_to_out = qu.QSUB_script( multi_params, "render_views", log=log, n_max_co_processes=global_params.NCORE_TOTAL, remove_jobfolder=False) elif global_params.PYOPENGL_PLATFORM == 'egl': # utilize 1 GPU per task # run EGL on single node: 20 parallel jobs if global_params.config.working_dir is not None and 'example_cube' in \ global_params.config.working_dir: n_cores = 1 n_parallel_jobs = global_params.NCORES_PER_NODE path_to_out = qu.QSUB_script(multi_params, "render_views", n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:2", n_cores=n_cores, remove_jobfolder=False) # run on whole cluster else: n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE n_parallel_jobs = global_params.NGPU_TOTAL path_to_out = qu.QSUB_script(multi_params, "render_views_egl", n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:1", n_cores=n_cores, remove_jobfolder=True) else: raise RuntimeError('Specified OpenGL platform "{}" not supported.' ''.format(global_params.PYOPENGL_PLATFORM)) log.info('Finished rendering of {}/{} SSVs.'.format( len(ordering), len(nb_svs_per_ssv)))
def run_glia_rendering(): log = initialize_logging('glia_view_rendering', global_params.config.working_dir + '/logs/', overwrite=False) np.random.seed(0) # view rendering prior to glia removal, choose SSD accordingly version = "tmp" # glia removal is based on the initial RAG and does not require explicitly stored SSVs G = nx.Graph() # TODO: Add factory method for initial RAG with open(global_params.config.init_rag_path, 'r') as f: for l in f.readlines(): edges = [int(v) for v in re.findall('(\d+)', l)] G.add_edge(edges[0], edges[1]) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag))) # add single SV connected components to initial graph sd = SegmentationDataset(obj_type='sv', working_dir=global_params.config.working_dir) sv_ids = sd.ids diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag)))) log.info('Found {} single connected component SVs which were missing' ' in initial RAG.'.format(len(diff))) for ix in diff: G.add_node(ix) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) log.info("Found {} SVs in initial RAG after adding size-one connected " "components. Writing kml text file".format( len(all_sv_ids_in_rag))) # write out readable format for 'glia_prediction.py' ccs = [[n for n in cc] for cc in nx.connected_component_subgraphs(G)] kml = knossos_ml_from_ccs([np.sort(cc)[0] for cc in ccs], ccs) with open(global_params.config.working_dir + "initial_rag.txt", 'w') as f: f.write(kml) # generate parameter for view rendering of individual SSV log.info("Starting view rendering.") multi_params = [] for cc in nx.connected_component_subgraphs(G): multi_params.append(cc) multi_params = np.array(multi_params) # identify huge SSVs and process them individually on whole cluster nb_svs = np.array([g.number_of_nodes() for g in multi_params]) big_ssv = multi_params[nb_svs > RENDERING_MAX_NB_SV] for kk, g in enumerate(big_ssv[::-1]): # Create SSV object sv_ixs = np.sort(list(g.nodes())) log.info("Processing SSV [{}/{}] with {} SVs on whole cluster.".format( kk + 1, len(big_ssv), len(sv_ixs))) sso = SuperSegmentationObject( sv_ixs[0], working_dir=global_params.config.working_dir, version=version, create=False, sv_ids=sv_ixs) # nodes of sso._rag need to be SV new_G = nx.Graph() for e in g.edges(): new_G.add_edge(sso.get_seg_obj("sv", e[0]), sso.get_seg_obj("sv", e[1])) sso._rag = new_G sso.render_views(add_cellobjects=False, cellobjects_only=False, skip_indexviews=True, woglia=False, qsub_pe="openmp", overwrite=True, qsub_co_jobs=global_params.NCORE_TOTAL) # render small SSV without overhead and single cpus on whole cluster multi_params = multi_params[nb_svs <= RENDERING_MAX_NB_SV] np.random.shuffle(multi_params) multi_params = chunkify(multi_params, 2000) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir, version) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "render_views_glia_removal", n_max_co_processes=global_params.NCORE_TOTAL, pe="openmp", queue=None, script_folder=None, suffix="") # check completeness sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) res = find_missing_sv_views(sd, woglia=False, n_cores=10) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append(el) else: missing_contained_in_rag.append(el) if len(missing_not_contained_in_rag): log.info("%d SVs were not rendered but also not part of the initial" "RAG: {}".format(missing_not_contained_in_rag)) if len(missing_contained_in_rag) != 0: msg = "Not all SSVs were rendered completely! Missing:\n" \ "{}".format(missing_contained_in_rag) log.error(msg) raise RuntimeError(msg)
def run_glia_prediction(e3=False): log = initialize_logging('glia_prediction', global_params.config.working_dir + '/logs/', overwrite=False) # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py') pred_key = "glia_probas" # Load initial RAG from Knossos mergelist text file. init_rag_p = global_params.config.working_dir + "initial_rag.txt" assert os.path.isfile(init_rag_p), "Initial RAG could not be found at %s."\ % init_rag_p init_rag = parse_cc_dict_from_kml(init_rag_p) log.info('Found {} CCs with a total of {} SVs in inital RAG.' ''.format(len(init_rag), np.sum([len(v) for v in init_rag.values()]))) # chunk them sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) multi_params = chunkify(sd.so_dir_paths, 100) # get model properties if e3 == True: model_kwargs = 'get_glia_model_e3' else: m = get_glia_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for glia views set woglia to False (because glia are included), # raw_only to True pred_kwargs = dict(woglia=False, pred_key=pred_key, verbose=False, raw_only=True) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 == True: path_to_out = qu.QSUB_script( multi_params, "predict_sv_views_chunked_e3", n_max_co_processes=15, pe="openmp", queue=None, script_folder=None, n_cores=10, suffix="_glia", additional_flags="--gres=gpu:1") # removed -V else: # randomly assign to gpu 0 or 1 for par in multi_params: mk = par[1] # GPUs are made available for every job via slurm, no need for random assignments: np.random.rand(0, 2) mk["init_gpu"] = 0 path_to_out = qu.QSUB_script( multi_params, "predict_sv_views_chunked", n_max_co_processes=25, pe="openmp", queue=None, n_cores=10, suffix="_glia", script_folder=None, additional_flags="--gres=gpu:1") # removed -V log.info('Finished glia prediction. Checking completeness.') res = find_missing_sv_attributes(sd, pred_key, n_cores=10) if len(res) > 0: log.error("Attribute '{}' missing for follwing" " SVs:\n{}".format(pred_key, res)) else: log.info('Success.')
def run_neuron_rendering(max_n_jobs=2000): log = initialize_logging('neuron_view_rendering', global_params.config.working_dir + '/logs/') # TODO: currently working directory has to be set globally in global_params # and is not adjustable here because all qsub jobs will start a script # referring to 'global_params.config.working_dir' # view rendering prior to glia removal, choose SSD accordingly ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: use actual size criteria, e.g. number of sampling locations nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # render normal size SSVs size_mask = nb_svs_per_ssv <= global_params.RENDERING_MAX_NB_SV multi_params = ssd.ssv_ids[size_mask] # TODO: move from osmesa to egl, egl rendering worker (10 cpus, 1 gpu) then should utilize more threads for bigger # SSVs, and run more SSVs in parallel if they are small # sort ssv ids according to their number of SVs (descending) ordering = np.argsort(nb_svs_per_ssv[size_mask]) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir) for ixs in multi_params] log.info('Started rendering of {} SSVs. '.format(np.sum(size_mask))) if np.sum(~size_mask) > 0: log.info('{} huge SSVs will be rendered afterwards using the whole' ' cluster.'.format(np.sum(~size_mask))) # generic # TODO: switch n_cores to `global_params.NGPUS_TOTAL` as soon as EGL ressource allocation works! if global_params.PYOPENGL_PLATFORM == 'osmesa': # utilize all CPUs path_to_out = qu.QSUB_script( multi_params, "render_views", n_max_co_processes=global_params.NCORE_TOTAL) elif global_params.PYOPENGL_PLATFORM == 'egl': # utilize 1 GPU per task # TODO: use render_views_egl script path_to_out = qu.QSUB_script( multi_params, "render_views", n_max_co_processes=global_params.NNODES_TOTAL, additional_flags="--gres=gpu:2", n_cores=global_params.NCORES_PER_NODE) else: raise RuntimeError('Specified OpenGL platform "{}" not supported.' ''.format(global_params.PYOPENGL_PLATFORM)) if np.sum(~size_mask) > 0: log.info('Finished rendering of {}/{} SSVs.'.format( len(ordering), len(nb_svs_per_ssv))) # identify huge SSVs and process them individually on whole cluster big_ssv = ssd.ssv_ids[~size_mask] for kk, ssv_id in enumerate(big_ssv): ssv = ssd.get_super_segmentation_object(ssv_id) log.info( "Processing SSV [{}/{}] with {} SVs on whole cluster.".format( kk + 1, len(big_ssv), len(ssv.sv_ids))) ssv.render_views(add_cellobjects=True, cellobjects_only=False, woglia=True, qsub_pe="openmp", overwrite=True, qsub_co_jobs=global_params.NCORE_TOTAL, skip_indexviews=False, resume_job=False) log.info('Finished rendering of all SSVs. Checking completeness.') res = find_incomplete_ssv_views(ssd, woglia=True, n_cores=global_params.NCORES_PER_NODE) if len(res) != 0: msg = "Not all SSVs were rendered completely! Missing:\n{}".format(res) log.error(msg) raise RuntimeError(msg) else: log.info('Success.')
def run_create_sds(chunk_size=None, n_folders_fs=10000, max_n_jobs=None, generate_sv_meshes=False, load_from_kd_overlaycubes=False, cube_of_interest_bb=None): """ Parameters ---------- chunk_size : max_n_jobs : int n_folders_fs : generate_sv_meshes : load_from_kd_overlaycubes : bool Load prob/seg data from overlaycubes instead of raw cubes. cube_of_interest_bb : Tuple[np.ndarray] Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). Returns ------- """ if chunk_size is None: chunk_size = [512, 512, 512] if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 3 log = initialize_logging('create_sds', global_params.config.working_dir + '/logs/', overwrite=False) # Sets initial values of object kd = kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1 offset = cube_of_interest_bb[0] # TODO: get rid of explicit voxel extraction, all info necessary should be extracted # at the beginning, e.g. size, bounding box etc and then refactor to only use those cached attributes! # resulting ChunkDataset, required for SV extraction -- # Object extraction - 2h, the same has to be done for all cell organelles cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" # Class that contains a dict of chunks (with coordinates) after initializing it cd = chunky.ChunkDataset() cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Generating SegmentationDatasets for cell and cell ' 'organelle supervoxels.') oew.from_ids_to_objects( cd, "sv", overlaydataset_path=global_params.config.kd_seg_path, n_chunk_jobs=max_n_jobs, hdf5names=["sv"], n_max_co_processes=None, n_folders_fs=n_folders_fs, use_combined_extraction=True, size=size, offset=offset) # Object Processing -- Perform after mapping to also cache mapping ratios sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sd_proc.dataset_analysis(sd, recompute=True, compute_meshprops=False) log.info("Extracted {} cell SVs. Preparing rendering locations " "(and meshes if not provided).".format(len(sd.ids))) start = time.time() # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type='sv') multi_params = [[par, so_kwargs] for par in multi_params] if generate_sv_meshes: _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) _ = qu.QSUB_script(multi_params, "sample_location_caching", n_max_co_processes=global_params.NCORE_TOTAL) # recompute=False: only collect new sample_location property sd_proc.dataset_analysis(sd, compute_meshprops=True, recompute=False) log.info( 'Finished preparation of cell SVs after {:.0f}s.'.format(time.time() - start)) # create SegmentationDataset for each cell organelle for co in global_params.existing_cell_organelles: start = time.time() cd_dir = global_params.config.working_dir + "chunkdatasets/{}/".format( co) cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Started object extraction of cellular organelles "{}" from ' '{} chunks.'.format(co, len(cd.chunk_dict))) prob_kd_path_dict = { co: getattr(global_params.config, 'kd_{}_path'.format(co)) } # This creates a SegmentationDataset of type 'co' prob_thresh = global_params.config.entries["Probathresholds"][ co] # get probability threshold path = "{}/knossosdatasets/{}_seg/".format( global_params.config.working_dir, co) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_without_conf(path, kd.boundary, kd.scale, kd.experiment_name, mags=[ 1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) oew.from_probabilities_to_objects( cd, co, # membrane_kd_path=global_params.config.kd_barrier_path, # TODO: currently does not exist prob_kd_path_dict=prob_kd_path_dict, thresholds=[prob_thresh], workfolder=global_params.config.working_dir, hdf5names=[co], n_max_co_processes=None, target_kd=target_kd, n_folders_fs=n_folders_fs, debug=False, size=size, offset=offset, load_from_kd_overlaycubes=load_from_kd_overlaycubes) sd_co = SegmentationDataset( obj_type=co, working_dir=global_params.config.working_dir) # TODO: check if this is faster then the alternative below sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=False) multi_params = chunkify(sd_co.so_dir_paths, max_n_jobs) so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type=co) multi_params = [[par, so_kwargs] for par in multi_params] _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) sd_proc.dataset_analysis(sd_co, recompute=False, compute_meshprops=True) # # Old alternative, requires much more reads/writes then above solution # sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=True) # About 0.2 h per object class log.info('Started mapping of {} cellular organelles of type "{}" to ' 'cell SVs.'.format(len(sd_co.ids), co)) sd_proc.map_objects_to_sv(sd, co, global_params.config.kd_seg_path, n_jobs=max_n_jobs) log.info('Finished preparation of {} "{}"-SVs after {:.0f}s.' ''.format(len(sd_co.ids), co, time.time() - start))
def run_axoness_prediction(max_n_jobs_gpu=None, e3=False): log = initialize_logging('axon_prediction', global_params.config.working_dir + '/logs/', overwrite=False) if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 # here because all qsub jobs will start a script referring to 'global_params.config.working_dir' ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) sd = ssd.get_segmentationdataset("sv") # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu) pred_key = "axoness_probas" # leave this fixed because it is used all over # get model properties log.info( 'Performing axon prediction of neuron views. Labels will be stored ' 'on SV level in the attribute dict with key "{}"'.format(pred_key)) if e3 is True: model_kwargs = 'get_axoness_model_e3' else: m = get_axoness_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) #all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for axoness views set woglia to True (because glia were removed beforehand), # raw_only to False pred_kwargs = dict(woglia=True, pred_key=pred_key, verbose=False, raw_only=False) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 is True: # TODO: using two GPUs on a single node seems to be error-prone # -> wb13 froze when processing example_cube=2 n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE if 'example_cube' in global_params.config.working_dir: n_cores = global_params.NCORES_PER_NODE # do not run two predictions in parallel _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked_e3", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=n_cores, suffix="_axoness", additional_flags="--gres=gpu:1", remove_jobfolder=True) else: for par in multi_params: mk = par[1] # Single GPUs are made available for every job via slurm, no need for random assignments. mk["init_gpu"] = 0 # np.random.rand(0, 2) _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL // 2, n_cores=global_params.NCORES_PER_NODE, suffix="_axoness", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished axon prediction. Now checking for missing predictions.') res = find_missing_sv_attributes_in_ssv( ssd, pred_key, n_cores=global_params.NCORES_PER_NODE) if len(res) > 0: log.error("Attribute '{}' missing for follwing" " SVs:\n{}".format(pred_key, res)) else: log.info('Success.')
def _run_neuron_rendering_big_helper(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NNODES_TOTAL * 2 log = initialize_logging('neuron_view_rendering_big', global_params.config.working_dir + '/logs/') # view rendering prior to glia removal, choose SSD accordingly ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: use actual size criteria, e.g. number of sampling locations nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # render normal size SSVs size_mask = nb_svs_per_ssv <= global_params.RENDERING_MAX_NB_SV if 'example' in global_params.config.working_dir and np.sum( ~size_mask) == 0: # generate at least one (artificial) huge SSV size_mask[:1] = False size_mask[1:] = True # sort ssv ids according to their number of SVs (descending) # list of SSV IDs and SSD parameters need to be given to a single QSUB job if np.sum(~size_mask) > 0: log.info('{} huge SSVs will be rendered on the cluster.'.format( np.sum(~size_mask))) # identify huge SSVs and process them individually on whole cluster big_ssv = ssd.ssv_ids[~size_mask] # # TODO: Currently high memory consumption when rendering index views! take into account # # when multiprocessing # # TODO: refactor `render_sso_coords_multiprocessing` and then use `QSUB_render_views_egl` # # here! # render normal views only n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE n_parallel_jobs = global_params.NGPU_TOTAL render_kwargs = dict(add_cellobjects=True, woglia=True, overwrite=True, skip_indexviews=True) sso_kwargs = dict(working_dir=global_params.config.working_dir, nb_cpus=n_cores, enable_locking_so=False, enable_locking=False) # sort ssv ids according to their number of SVs (descending) ordering = np.argsort(nb_svs_per_ssv[~size_mask]) multi_params = big_ssv[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, sso_kwargs, render_kwargs) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "render_views", n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:1", n_cores=n_cores, remove_jobfolder=True) # # render index-views only for ssv_id in big_ssv: ssv = SuperSegmentationObject( ssv_id, working_dir=global_params.config.working_dir) render_sso_coords_multiprocessing(ssv, global_params.config.working_dir, verbose=True, return_views=False, disable_batchjob=False, n_jobs=n_parallel_jobs, n_cores=n_cores, render_indexviews=True) log.info('Finished rendering of {}/{} SSVs.'.format( len(big_ssv), len(nb_svs_per_ssv)))
# -*- coding: utf-8 -*- # SyConn - Synaptic connectivity inference toolkit # # Copyright (c) 2016 - now # Max Planck Institute of Neurobiology, Martinsried, Germany # Authors: Philipp Schubert, Joergen Kornfeld import os import numpy as np from syconn.mp import batchjob_utils as mu from syconn.handler.basics import chunkify # path to the folder containing the scripts # "/your/qsub_script_folder/" script_folder = os.path.abspath(os.path.dirname(__file__)) # get example arguments for our jobs (600 arrays of size 10) params = np.arange(6000).reshape((-1, 10)) # Create a list of arguments; each element is input for an executed script. # We have created 300 jobs, each with 2 arrays params = chunkify(params, 300) mu.QSUB_script(params, "print", pe="openmp", queue=None, script_folder=script_folder, n_max_co_processes=40)
# -*- coding: utf-8 -*- # SyConn - Synaptic connectivity inference toolkit # # Copyright (c) 2016 - now # Max Planck Institute of Neurobiology, Martinsried, Germany # Authors: Philipp Schubert, Joergen Kornfeld import os from syconn.mp import batchjob_utils as mu from syconn.reps.segmentation import SegmentationDataset from syconn.handler.basics import chunkify if __name__ == "__main__": script_folder = os.path.abspath( os.path.dirname(__file__) + "/../qsub_scripts/") sds = SegmentationDataset("cs", version="33", working_dir="/wholebrain/scratch/areaxfs/") multi_params = chunkify(list(sds.sos), 1000) path_to_out = mu.QSUB_script(multi_params, "map_cs_properties", n_max_co_processes=40, pe="openmp", queue=None, script_folder=script_folder)
def run_axoness_prediction(max_n_jobs_gpu: Optional[int] = None, e3: bool = True): """ Run the axoness inference based on the ``img2scalar`` CMN. See :func:`~run_semsegaxoness_prediction` for the semantic segmentation model. Args: max_n_jobs_gpu: Number of parallel jobs. e3: If True, use elektronn3 models. Notes: Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering` and :func:`~syconn.exec.skeleton.run_skeleton_generation`. """ log = initialize_logging('axon_prediction', global_params.config.working_dir + '/logs/', overwrite=False) if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.config.ngpu_total * 2 # here because all qsub jobs will start a script referring to # 'global_params.config.working_dir' ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) sd = ssd.get_segmentationdataset("sv") # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu) pred_key = "axoness_probas" # leave this fixed because it is used all over # get model properties log.info('Performing axon prediction of neuron views. Labels will be stored ' 'on SV level in the attribute dict with key "{}"'.format(pred_key)) if e3 is True: model_kwargs = 'get_axoness_model_e3' else: m = get_axoness_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) # all other kwargs like obj_type='sv' and version are the current SV # SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for axoness views set woglia to True (because glia were removed beforehand), # raw_only to False pred_kwargs = dict(woglia=True, pred_key=pred_key, verbose=False, raw_only=False) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 is True: # TODO: using two GPUs on a single node seems to be error-prone # -> wb13 froze when processing example_cube=2 n_cores = global_params.config['ncores_per_node'] // global_params.config['ngpus_per_node'] _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked_e3", log=log, n_max_co_processes=global_params.config.ngpu_total, n_cores=n_cores, suffix="_axoness", additional_flags="--gres=gpu:1", remove_jobfolder=True) else: for par in multi_params: mk = par[1] # SLURM is GPU aware, no need for random assignments. mk["init_gpu"] = 0 # np.random.rand(0, 2) _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked", log=log, n_max_co_processes=global_params.config.ngpu_total // 2, n_cores=global_params.config['ncores_per_node'], suffix="_axoness", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished axon prediction. Now checking for missing predictions.') res = find_missing_sv_attributes_in_ssv(ssd, pred_key, n_cores=global_params.config['ncores_per_node']) if len(res) > 0: log.error("Attribute '{}' missing for follwing" " SVs:\n{}".format(pred_key, res)) else: log.info('Success.')
def run_semsegaxoness_prediction(max_n_jobs_gpu: Optional[int] = None): """ Will store semantic axoness labels as ``view_properties_semsegax['semseg_key']`` inside ``ssv.label_dict('vertex')``. Todo: * run rendering chunk-wise instead of on-the-fly and then perform prediction chunk-wise as well, adopt from spiness step. Args: max_n_jobs_gpu: Number of parallel GPU jobs. Returns: """ if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.config.ngpu_total * 2 log = initialize_logging('axoness_prediction', global_params.config.working_dir+ '/logs/', overwrite=False) ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) # shuffle SV IDs np.random.seed(0) log.info('Starting axoness prediction.') nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = ssd.ssv_ids ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] max_n_jobs_gpu = np.max([max_n_jobs_gpu, len(multi_params) // 100]) # at most 100 SSV per job multi_params = chunkify(multi_params, max_n_jobs_gpu) # job parameter will be read sequentially, i.e. in order to provide only # one list as parameter one needs an additonal axis multi_params = [(ixs, ) for ixs in multi_params] if not 'example' in global_params.config.working_dir: n_cores = global_params.config['ncores_per_node'] // global_params.config['ngpus_per_node'] else: n_cores = global_params.config['ncores_per_node'] path_to_out = qu.QSUB_script(multi_params, "predict_axoness_semseg", log=log, n_max_co_processes=global_params.config['nnodes_total'], suffix="", additional_flags="--gres=gpu:1", n_cores=n_cores, remove_jobfolder=False) log.info('Finished prediction of {} SSVs. Checking completeness.' ''.format(len(ordering))) out_files = glob.glob(path_to_out + "*.pkl") err = [] for fp in out_files: with open(fp, "rb") as f: local_err = pkl.load(f) if local_err is not None: err += list(local_err) if len(err) > 0: msg = "{} errors occurred for SSVs with ID: " \ "{}".format(len(err), [el[0] for el in err]) log.error(msg) raise ValueError(msg) else: log.info('Success.') shutil.rmtree(os.path.abspath(path_to_out + "/../"), ignore_errors=True)
def run_glia_prediction(e3=False): log = initialize_logging('glia_prediction', global_params.config.working_dir + '/logs/', overwrite=False) # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py') pred_key = "glia_probas" # Load initial RAG from Knossos mergelist text file. g = nx.read_edgelist(global_params.config.pruned_rag_path, nodetype=np.uint) all_sv_ids_in_rag = np.array(list(g.nodes()), dtype=np.uint) log.debug('Found {} CCs with a total of {} SVs in inital RAG.'.format( nx.number_connected_components(g), g.number_of_nodes())) # chunk them sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) multi_params = chunkify(sd.so_dir_paths, global_params.NGPU_TOTAL * 2) # get model properties if e3 == True: model_kwargs = 'get_glia_model_e3' else: m = get_glia_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for glia views set woglia to False (because glia are included), # raw_only to True pred_kwargs = dict(woglia=False, pred_key=pred_key, verbose=False, raw_only=True) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 is True: # TODO: using two GPUs on a single node seems to be error-prone # -> wb13 froze when processing example_cube=2 n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE if 'example_cube' in global_params.config.working_dir: n_cores = global_params.NCORES_PER_NODE # do not run two predictions in parallel qu.QSUB_script(multi_params, "predict_sv_views_chunked_e3", log=log, n_max_co_processes=global_params.NGPU_TOTAL, script_folder=None, n_cores=n_cores, suffix="_glia", additional_flags="--gres=gpu:1", remove_jobfolder=True) else: # randomly assign to gpu 0 or 1 for par in multi_params: mk = par[1] # GPUs are made available for every job via slurm, # no need for random assignments: np.random.rand(0, 2) mk["init_gpu"] = 0 _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, suffix="_glia", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished glia prediction. Checking completeness.') res = find_missing_sv_views(sd, woglia=False, n_cores=global_params.NCORES_PER_NODE) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append( el) # TODO: decide whether to use or not else: missing_contained_in_rag.append(el) if len(missing_contained_in_rag) != 0: msg = "Not all SVs were predicted! {}/{} missing:\n" \ "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag), missing_contained_in_rag[:100]) log.error(msg) raise ValueError(msg) else: log.info('Success.')
def _run_neuron_rendering_small_helper(max_n_jobs: Optional[int] = None): """ Render the default views as defined in ``global_params`` [WIP] of small neuron reconstructions. Helper method of :func:`~run_neuron_rendering`. Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`. """ if max_n_jobs is None: max_n_jobs = global_params.config.ngpu_total * 4 if \ global_params.config['pyopengl_platform'] == 'egl' \ else global_params.config.ncore_total * 4 log = initialize_logging('neuron_view_rendering_small', global_params.config.working_dir + '/logs/') # view rendering prior to glia removal, choose SSD accordingly ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) # TODO: use actual size criteria, e.g. number of sampling locations nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # render normal size SSVs size_mask = nb_svs_per_ssv <= global_params.config['glia']['rendering_max_nb_sv'] if 'example' in global_params.config.working_dir and np.sum(~size_mask) == 0: # generate at least one (artificial) huge SSV size_mask[:1] = False size_mask[1:] = True multi_params = ssd.ssv_ids[size_mask] # sort ssv ids according to their number of SVs (descending) ordering = np.argsort(nb_svs_per_ssv[size_mask]) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir) for ixs in multi_params] log.info('Started rendering of {} SSVs. '.format(np.sum(size_mask))) if global_params.config['pyopengl_platform'] == 'osmesa': # utilize all CPUs qu.QSUB_script(multi_params, "render_views", log=log, suffix='_small', n_max_co_processes=global_params.config.ncore_total, remove_jobfolder=False) elif global_params.config['pyopengl_platform'] == 'egl': # utilize 1 GPU per task # run EGL on single node: 20 parallel jobs if not qu.batchjob_enabled(): n_cores = 1 n_parallel_jobs = global_params.config['ncores_per_node'] qu.QSUB_script(multi_params, "render_views", suffix='_small', n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:2", disable_batchjob=True, n_cores=n_cores, remove_jobfolder=True) # run on whole cluster else: n_cores = global_params.config['ncores_per_node'] // global_params.config['ngpus_per_node'] n_parallel_jobs = global_params.config.ngpu_total qu.QSUB_script(multi_params, "render_views_egl", suffix='_small', n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:1", n_cores=n_cores, remove_jobfolder=True) else: raise RuntimeError('Specified OpenGL platform "{}" not supported.' ''.format(global_params.config['pyopengl_platform'])) log.info('Finished rendering of {}/{} SSVs.'.format(len(ordering), len(nb_svs_per_ssv)))
def run_glia_rendering(max_n_jobs=None): """ Uses the pruned RAG (stored as edge list .bz2 file) which is computed in `init_cell_subcell_sds`. Parameters ---------- max_n_jobs : Returns ------- """ if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \ else global_params.NCORE_TOTAL * 4 log = initialize_logging('glia_view_rendering', global_params.config.working_dir + '/logs/', overwrite=True) log.info("Preparing RAG.") np.random.seed(0) # view rendering prior to glia removal, choose SSD accordingly # glia removal is based on the initial RAG and does not require explicitly stored SSVs # TODO: refactor how splits are stored, currently those are stored at ssv_tmp version = "tmp" G = nx.read_edgelist(global_params.config.pruned_rag_path, nodetype=np.uint) cc_gs = sorted(list(nx.connected_component_subgraphs(G)), key=len, reverse=True) all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint) # generate parameter for view rendering of individual SSV # TODO: remove SVs below minimum size (-> global_params.min_cc_size_ssv) sds = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sv_size_dict = {} bbs = sds.load_cached_data('bounding_box') * sds.scaling for ii in range(len(sds.ids)): sv_size_dict[sds.ids[ii]] = bbs[ii] ccsize_dict = create_ccsize_dict(cc_gs, sv_size_dict, is_connected_components=True) multi_params = cc_gs big_ssv = [] small_ssv = [] for g in multi_params: if g.number_of_nodes() > RENDERING_MAX_NB_SV: big_ssv.append(g) elif ccsize_dict[list(g.nodes())[0]] < global_params.min_cc_size_ssv: pass # ignore this CC else: small_ssv.append(g) log.info("View rendering for glia separation started.") # # identify huge SSVs and process them on the entire cluster if len(big_ssv) > 0: n_threads = 2 log.info("Processing {} huge SSVs in {} threads on the entire cluster" ".".format(len(big_ssv), n_threads)) q_in = Queue() q_out = Queue() for kk, g in enumerate(big_ssv): q_in.put((kk, g, version)) for _ in range(n_threads): q_in.put(-1) ps = [ Process(target=_run_huge_ssv_render_worker, args=(q_in, q_out)) for _ in range(n_threads) ] for p in ps: p.start() time.sleep(0.5) q_in.close() q_in.join_thread() for p in ps: p.join() if q_out.qsize() != len(big_ssv): raise ValueError( 'Not all `_run_huge_ssv_render_worker` jobs completed successfully.' ) # render small SSV without overhead and single cpus on whole cluster multi_params = small_ssv np.random.shuffle(multi_params) multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir, version) for ixs in multi_params] _ = qu.QSUB_script(multi_params, "render_views_glia_removal", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, additional_flags="--gres=gpu:1", remove_jobfolder=True) # check completeness log.info( 'Finished view rendering for glia separation. Checking completeness.') sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) res = find_missing_sv_views(sd, woglia=False, n_cores=global_params.NCORES_PER_NODE) missing_not_contained_in_rag = [] missing_contained_in_rag = [] for el in res: if el not in all_sv_ids_in_rag: missing_not_contained_in_rag.append( el) # TODO: decide whether to use or not else: missing_contained_in_rag.append(el) if len(missing_contained_in_rag) != 0: msg = "Not all SVs were rendered completely! {}/{} missing:\n" \ "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag), missing_contained_in_rag[:100]) log.error(msg) raise ValueError(msg) else: log.info('All SVs now contain views required for glia prediction.')
# # Copyright (c) 2016 - now # Max Planck Institute of Neurobiology, Martinsried, Germany # Authors: Philipp Schubert, Joergen Kornfeld import os from syconn.mp import batchjob_utils as qu from syconn.mp.mp_utils import start_multiprocess from syconn.reps.super_segmentation_dataset import SuperSegmentationDataset from syconn.handler.basics import chunkify from syconn.proc.mapping import map_glia_fraction import numpy as np import itertools if __name__ == "__main__": script_folder = os.path.dirname( os.path.abspath(__file__)) + "/../../syconn/QSUB_scripts/" print(script_folder) ssds = SuperSegmentationDataset( working_dir="/wholebrain/scratch/areaxfs3/", version="0") multi_params = ssds.ssv_ids np.random.shuffle(multi_params) multi_params = chunkify(multi_params, 2000) path_to_out = qu.QSUB_script( multi_params, "render_sso_ortho", #"export_skeletons_new", #"map_viewaxoness2skel", n_max_co_processes=100, pe="openmp", queue=None, script_folder=script_folder, suffix="", n_cores=1)