def map_ids(wd, n_jobs=1000, qsub_pe=None, qsub_queue=None, nb_cpus=None, n_max_co_processes=None, chunk_size=(128, 128, 128), debug=False): global_params.wd = wd kd = kd_factory(global_params.config.kd_seg_path) cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" cd_cell = chunky.ChunkDataset() cd_cell.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) multi_params = [] chunkify_id = 0 for coord_chunk in chunkify( [cd_cell.chunk_dict[key].coordinates for key in cd_cell.chunk_dict], 100): multi_params.append([coord_chunk, chunk_size, wd, chunkify_id]) chunkify_id += 1 sm.start_multiprocess_imap(_map_ids_thread, multi_params, nb_cpus=n_max_co_processes, verbose=debug, debug=debug)
def _map_ids_thread(args): coord_list = args[0] chunk_size = args[1] wd = args[2] chunkify_id = args[3] worker_sv_dc = {} kd_obj = {} small_dc = {} stri = wd + '/voxel_%s.pkl' % chunkify_id f = open(stri, "wb") for obj_type in global_params.existing_cell_organelles: small_dc[obj_type] = {} kd_obj[obj_type] = kd_factory( global_params.config.entries['Paths']['kd_%s' % obj_type]) kd_cell = kd_factory(global_params.config.kd_seg_path) for coord in coord_list: seg_cell = kd_cell.from_overlaycubes_to_matrix( offset=coord, size=chunk_size).flatten() seg_obj = {} for obj in kd_obj: # seg_obj[obj] = kd_obj[obj].from_overlaycubes_to_matrix(offset=coord, size=chunk_size).flatten() seg_obj[obj] = create_toy_data(chunk_size, 3).flatten() for unique_cell_id in np.unique(seg_cell): if unique_cell_id in worker_sv_dc: continue worker_sv_dc[unique_cell_id] = small_dc for vox in range(len(seg_cell)): cell_id = seg_cell[vox] for obj in kd_obj: j = seg_obj[obj][vox] if j in worker_sv_dc[cell_id][obj]: worker_sv_dc[cell_id][obj][j] += 1 else: worker_sv_dc[cell_id][obj][j] = 1 pickle.dump(worker_sv_dc, f) f.close()
def run_syn_generation(chunk_size: Tuple[int, int, int] = (512, 512, 512), n_folders_fs: int = 10000, max_n_jobs: Optional[int] = None, cube_of_interest_bb: Optional[np.ndarray] = None): """ Run the synapse generation. Will create :class:`~syconn.reps.segmentation.SegmentationDataset` objects with the following versions: * 'cs': Contact site objects between supervoxels. * 'syn': Objects representing the overlap between 'cs' and the initial synaptic junction predictions. Note: These objects effectively represent synapse fragments between supervoxels. * 'syn_ssv': Agglomerated 'syn' objects based on the supervoxel graph. Args: chunk_size: The size of processed cubes. n_folders_fs: Number of folders used to create the folder structure in each :class:`~syconn.reps.segmentation.SegmentationDataset`. max_n_jobs: Number of parallel jobs. cube_of_interest_bb: Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). """ if max_n_jobs is None: max_n_jobs = global_params.config.ncore_total * 2 log = initialize_logging('synapse_generation', global_params.config.working_dir + '/logs/', overwrite=True) kd_seg_path = global_params.config.kd_seg_path kd = kd_factory(kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] ces.extract_contact_sites(chunk_size=chunk_size, log=log, max_n_jobs=max_n_jobs, cube_of_interest_bb=cube_of_interest_bb, n_folders_fs=n_folders_fs) log.info('SegmentationDataset of type "cs" and "syn" was generated.') # # TODO: add check for SSD existence, which is required at this point # # This creates an SD of type 'syn_ssv' cps.combine_and_split_syn( global_params.config.working_dir, resume_job=False, cs_gap_nm=global_params.config['cell_objects']['cs_gap_nm'], log=log, n_folders_fs=n_folders_fs) log.info('Synapse objects were created.') sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') dataset_analysis(sd_syn_ssv, compute_meshprops=True) log.info('SegmentationDataset of type "syn_ssv" was generated.') cps.map_objects_to_synssv(global_params.config.working_dir, log=log) log.info('Cellular organelles were mapped to "syn_ssv".') cps.classify_synssv_objects(global_params.config.working_dir, log=log) log.info('Synapse prediction finished.') log.info('Collecting and writing syn-ssv objects to SSV attribute ' 'dictionary.') # This needs to be run after `classify_synssv_objects` and before # `map_synssv_objects` if the latter uses thresholding for synaptic objects # just collect new data: ``recompute=False`` dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) # TODO: decide whether this should happen after prob thresholding or not map_synssv_objects(log=log) log.info('Finished.')
def load_seg_data(offset): global_params.wd = "/u/mariakaw/SyConn/example_cube1" kd_cell = basics.kd_factory(global_params.config.kd_seg_path) return kd_cell.from_overlaycubes_to_matrix((256, 256, 256), offset)
def init_cell_subcell_sds(chunk_size: Optional[Tuple[int, int, int]] = None, n_folders_fs: int = 10000, n_folders_fs_sc: int = 10000, max_n_jobs: Optional[int] = None, load_cellorganelles_from_kd_overlaycubes: bool = False, transf_func_kd_overlay: Optional[Callable] = None, cube_of_interest_bb: Optional[np.ndarray] = None): """ Todo: * Don't extract sj objects and replace their use-cases with syn objects (?). Args: chunk_size: Size of the cube which are processed by each worker. n_folders_fs: Number of folders used to create the folder structure in the resulting :class:`~syconn.reps.segmentation.SegmentationDataset` for the cell supervoxels (``version='sv'``). n_folders_fs_sc: Number of folders used to create the folder structure in the resulting :class:`~syconn.reps.segmentation.SegmentationDataset` for the cell organelle supervxeols (e.g. ``version='mi'``). max_n_jobs: Number of parallel jobs. load_cellorganelles_from_kd_overlaycubes: transf_func_kd_overlay: Transformation applied on the prob. map or segmentation data. cube_of_interest_bb: Bounding of the (sub-) volume of the dataset which is processed. """ log = initialize_logging('create_sds', global_params.config.working_dir + '/logs/', overwrite=True) if transf_func_kd_overlay is None: transf_func_kd_overlay = {k: None for k in global_params.existing_cell_organelles} if chunk_size is None: chunk_size = [512, 512, 512] if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 # loading cached data or adapt number of jobs/cache size dynamically, dependent on the # dataset kd = kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] log.info('Converting predictions of cellular organelles to KnossosDatasets for every' 'type available: {}.'.format(global_params.existing_cell_organelles)) start = time.time() ps = [Process(target=kd_init, args=[co, chunk_size, transf_func_kd_overlay, load_cellorganelles_from_kd_overlaycubes, cube_of_interest_bb, log]) for co in global_params.existing_cell_organelles] for p in ps: p.start() time.sleep(5) for p in ps: p.join() log.info('Finished KD generation after {:.0f}s.'.format(time.time() - start)) log.info('Generating SegmentationDatasets for subcellular structures {} and' ' cell supervoxels.'.format(global_params.existing_cell_organelles)) start = time.time() sd_proc.map_subcell_extract_props( global_params.config.kd_seg_path, global_params.config.kd_organelle_seg_paths, n_folders_fs=n_folders_fs, n_folders_fs_sc=n_folders_fs_sc, n_chunk_jobs=max_n_jobs, cube_of_interest_bb=cube_of_interest_bb, chunk_size=chunk_size, log=log) log.info('Finished extraction and mapping after {:.2f}s.' ''.format(time.time() - start)) log.info('Caching properties of subcellular structures {} and cell' ' supervoxels'.format(global_params.existing_cell_organelles)) start = time.time() ps = [Process(target=sd_init, args=[co, max_n_jobs, log]) for co in ["sv"] + global_params.existing_cell_organelles] for p in ps: p.start() time.sleep(5) for p in ps: p.join() log.info('Finished SD caching after {:.2f}s.' ''.format(time.time() - start))
def run_syn_generation(chunk_size=(512, 512, 512), n_folders_fs=10000, max_n_jobs=None, cube_of_interest_bb=None): """ Parameters ---------- chunk_size : n_folders_fs : max_n_jobs : cube_of_interest_bb : Tuple[np.ndarray] Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). Returns ------- """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 log = initialize_logging('synapse_generation', global_params.config.working_dir + '/logs/', overwrite=True) kd_seg_path = global_params.config.kd_seg_path kd = kd_factory(kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] ces.extract_contact_sites(chunk_size=chunk_size, log=log, max_n_jobs=max_n_jobs, cube_of_interest_bb=cube_of_interest_bb, n_folders_fs=n_folders_fs) log.info('SegmentationDataset of type "cs" and "syn" was generated.') # TODO: add check for SSD existence, which is required at this point # This creates an SD of type 'syn_ssv' cps.combine_and_split_syn(global_params.config.working_dir, resume_job=False, cs_gap_nm=global_params.cs_gap_nm, log=log, n_folders_fs=n_folders_fs) log.info('Synapse objects were created.') # sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') dataset_analysis(sd_syn_ssv, compute_meshprops=True) log.info('SegmentationDataset of type "syn_ssv" was generated.') cps.map_objects_to_synssv(global_params.config.working_dir, log=log) log.info('Cellular organelles were mapped to "syn_ssv".') cps.classify_synssv_objects(global_params.config.working_dir, log=log) log.info('Synapse property prediction finished.') log.info('Collecting and writing syn-ssv objects to SSV attribute ' 'dictionary.') # This needs to be run after `classify_synssv_objects` and before # `map_synssv_objects` if the latter uses thresholding for synaptic objects dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) # just collect new data # TODO: decide whether this should happen after prob thresholding or not map_synssv_objects(log=log) log.info('Finished.')
if __name__ == "__main__": dictionary_elements = [] seg_dict = [] dictionary_elements.append("mi") dictionary_elements.append("vc") dictionary_elements.append("sj") #dictionary_elements.append("sv") #dictionary_elements.append("cs") offset = (10, 10, 10) print(global_params.wd) global_params.wd = '/wholebrain/u/atultm/SyConn/example_cube1/' cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" chunk_size = [128] * 3 kd = kd_factory(global_params.config.kd_seg_path) cd_cell = chunky.ChunkDataset() cd_cell.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) ch = cd_cell.chunk_dict[0] seg_cell = kd.from_overlaycubes_to_matrix(offset=ch.coordinates, size=ch.size) # for element in dictionary_elements: # cd_dir = global_params.config.working_dir + "chunkdatasets/" + element + "/" # # Class that contains a dict of chunks (with coordinates) after initializing it # cd_mi = chunky.ChunkDataset() # cd_mi.initialize(kd, kd.boundary, chunk_size, cd_dir, # box_coords=[0, 0, 0], fit_box_size=True) # ch = cd_mi.chunk_dict[0]
def run_create_sds(chunk_size=None, n_folders_fs=10000, max_n_jobs=None, generate_sv_meshes=False, load_from_kd_overlaycubes=False, cube_of_interest_bb=None): """ Parameters ---------- chunk_size : max_n_jobs : int n_folders_fs : generate_sv_meshes : load_from_kd_overlaycubes : bool Load prob/seg data from overlaycubes instead of raw cubes. cube_of_interest_bb : Tuple[np.ndarray] Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). Returns ------- """ if chunk_size is None: chunk_size = [512, 512, 512] if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 3 log = initialize_logging('create_sds', global_params.config.working_dir + '/logs/', overwrite=False) # Sets initial values of object kd = kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1 offset = cube_of_interest_bb[0] # TODO: get rid of explicit voxel extraction, all info necessary should be extracted # at the beginning, e.g. size, bounding box etc and then refactor to only use those cached attributes! # resulting ChunkDataset, required for SV extraction -- # Object extraction - 2h, the same has to be done for all cell organelles cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" # Class that contains a dict of chunks (with coordinates) after initializing it cd = chunky.ChunkDataset() cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Generating SegmentationDatasets for cell and cell ' 'organelle supervoxels.') oew.from_ids_to_objects( cd, "sv", overlaydataset_path=global_params.config.kd_seg_path, n_chunk_jobs=max_n_jobs, hdf5names=["sv"], n_max_co_processes=None, n_folders_fs=n_folders_fs, use_combined_extraction=True, size=size, offset=offset) # Object Processing -- Perform after mapping to also cache mapping ratios sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sd_proc.dataset_analysis(sd, recompute=True, compute_meshprops=False) log.info("Extracted {} cell SVs. Preparing rendering locations " "(and meshes if not provided).".format(len(sd.ids))) start = time.time() # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type='sv') multi_params = [[par, so_kwargs] for par in multi_params] if generate_sv_meshes: _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) _ = qu.QSUB_script(multi_params, "sample_location_caching", n_max_co_processes=global_params.NCORE_TOTAL) # recompute=False: only collect new sample_location property sd_proc.dataset_analysis(sd, compute_meshprops=True, recompute=False) log.info( 'Finished preparation of cell SVs after {:.0f}s.'.format(time.time() - start)) # create SegmentationDataset for each cell organelle for co in global_params.existing_cell_organelles: start = time.time() cd_dir = global_params.config.working_dir + "chunkdatasets/{}/".format( co) cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Started object extraction of cellular organelles "{}" from ' '{} chunks.'.format(co, len(cd.chunk_dict))) prob_kd_path_dict = { co: getattr(global_params.config, 'kd_{}_path'.format(co)) } # This creates a SegmentationDataset of type 'co' prob_thresh = global_params.config.entries["Probathresholds"][ co] # get probability threshold path = "{}/knossosdatasets/{}_seg/".format( global_params.config.working_dir, co) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_without_conf(path, kd.boundary, kd.scale, kd.experiment_name, mags=[ 1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) oew.from_probabilities_to_objects( cd, co, # membrane_kd_path=global_params.config.kd_barrier_path, # TODO: currently does not exist prob_kd_path_dict=prob_kd_path_dict, thresholds=[prob_thresh], workfolder=global_params.config.working_dir, hdf5names=[co], n_max_co_processes=None, target_kd=target_kd, n_folders_fs=n_folders_fs, debug=False, size=size, offset=offset, load_from_kd_overlaycubes=load_from_kd_overlaycubes) sd_co = SegmentationDataset( obj_type=co, working_dir=global_params.config.working_dir) # TODO: check if this is faster then the alternative below sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=False) multi_params = chunkify(sd_co.so_dir_paths, max_n_jobs) so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type=co) multi_params = [[par, so_kwargs] for par in multi_params] _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) sd_proc.dataset_analysis(sd_co, recompute=False, compute_meshprops=True) # # Old alternative, requires much more reads/writes then above solution # sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=True) # About 0.2 h per object class log.info('Started mapping of {} cellular organelles of type "{}" to ' 'cell SVs.'.format(len(sd_co.ids), co)) sd_proc.map_objects_to_sv(sd, co, global_params.config.kd_seg_path, n_jobs=max_n_jobs) log.info('Finished preparation of {} "{}"-SVs after {:.0f}s.' ''.format(len(sd_co.ids), co, time.time() - start))