def generate_subcell_kd_from_proba(co, chunk_size=None, transf_func_kd_overlay=None, load_cellorganelles_from_kd_overlaycubes=False, cube_of_interest_bb=None, log=None, **kwargs): """ Generate KnossosDatasets for given subcellular structure key (e.g. 'mi'). The required initial data format is a chunkdataset located at ``"{}/chunkdatasets/{}/".format(global_params.config.working_dir, co)``. Resulting KD will be stored at ``"{}/knossosdatasets/{}_seg/".format(global_params.config.working_dir, co)``. See :func:`~syconn.extraction.object_extraction_wrapper.from_probabilities_to_kd` for details of the conversion process from the initial probability map to the SV segmentation. Default: thresholding and connected components, thresholds are set via the `config.ini` file, check ``syconn.global_params.config.entries["Probathresholds"]`` of an initialized :calss:`~syconn.handler.config.DynConfig` object. Parameters ---------- co : str chunk_size : Tuple transf_func_kd_overlay : callable load_cellorganelles_from_kd_overlaycubes : bool cube_of_interest_bb : Tuple[Tuple[int]] or np.ndarray log : logger """ if chunk_size is None: chunk_size = [512, 512, 512] if log is None: log = log_extraction kd = basics.kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1 offset = cube_of_interest_bb[0] cd_dir = "{}/chunkdatasets/{}/".format(global_params.config.working_dir, co) cd = chunky.ChunkDataset() cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True, list_of_coords=[]) log.info('Started object extraction of cellular organelles "{}" from ' '{} chunks.'.format(co, len(cd.chunk_dict))) prob_kd_path_dict = {co: getattr(global_params.config, 'kd_{}_path'.format(co))} # This creates a SegmentationDataset of type 'co' prob_thresh = global_params.config.entries["Probathresholds"][co] # get probability threshold # `from_probabilities_to_objects` will export a KD at `path`, remove if already existing path = global_params.config.kd_organelle_seg_paths[co] if os.path.isdir(path): log.debug('Found existing KD at {}. Removing it now.'.format(path)) shutil.rmtree(path) target_kd = knossosdataset.KnossosDataset() scale = np.array(global_params.config.entries["Dataset"]["scaling"], dtype=np.float32) target_kd.initialize_without_conf(path, kd.boundary, scale, kd.experiment_name, mags=[1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) from_probabilities_to_kd(cd, co, # membrane_kd_path=global_params.config.kd_barrier_path, # TODO: currently does not exist prob_kd_path_dict=prob_kd_path_dict, thresholds=[prob_thresh], hdf5names=[co], n_max_co_processes=None, target_kd=target_kd, debug=False, size=size, offset=offset, load_from_kd_overlaycubes=load_cellorganelles_from_kd_overlaycubes, transf_func_kd_overlay=transf_func_kd_overlay[co], log=log, **kwargs)
def map_ids(wd, n_jobs=1000, qsub_pe=None, qsub_queue=None, nb_cpus=None, n_max_co_processes=None, chunk_size=(128, 128, 128), debug=False): global_params.wd = wd kd = kd_factory(global_params.config.kd_seg_path) cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" cd_cell = chunky.ChunkDataset() cd_cell.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) multi_params = [] chunkify_id = 0 for coord_chunk in chunkify( [cd_cell.chunk_dict[key].coordinates for key in cd_cell.chunk_dict], 100): multi_params.append([coord_chunk, chunk_size, wd, chunkify_id]) chunkify_id += 1 sm.start_multiprocess_imap(_map_ids_thread, multi_params, nb_cpus=n_max_co_processes, verbose=debug, debug=debug)
def extract_contact_sites(n_max_co_processes: Optional[int] = None, chunk_size: Optional[Tuple[int, int, int]] = None, log: Optional[Logger] = None, max_n_jobs: Optional[int] = None, cube_of_interest_bb: Optional[np.ndarray] = None, n_folders_fs: int = 1000): """ Extracts contact sites and their overlap with `sj` objects and stores them in a :class:`~syconn.reps.segmentation.SegmentationDataset` of type ``cs`` and ``syn`` respectively. If synapse type is available, this information will be stored as the voxel-ratio per class in the attribute dictionary of the ``syn`` objects (keys: ``sym_prop``, ``asym_prop``). Notes: Replaced ``find_contact_sites``, ``extract_agg_contact_sites``, ` `syn_gen_via_cset`` and ``extract_synapse_type``. Args: n_max_co_processes: Number of parallel workers. chunk_size: Sub-cube volume which is processed at a time. log: Logger. max_n_jobs: Maximum number of jobs. cube_of_interest_bb: Sub-volume of the data set which is processed. Default: Entire data set. n_folders_fs: Number of folders used for organizing supervoxel data. """ if extract_cs_syntype is None: msg = '`extract_contact_sites` requires the cythonized method ' \ '`extract_cs_syntype`. Use `find_contact_sites` and others ' \ 'for contact site processing.' log_extraction.error(msg) raise ImportError(msg) kd = kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] if chunk_size is None: chunk_size = (512, 512, 512) size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1 offset = cube_of_interest_bb[0] # Initital contact site extraction cd_dir = global_params.config.temp_path + "/chunkdatasets/cs/" # Class that contains a dict of chunks (with coordinates) after initializing it cset = chunky.ChunkDataset() cset.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 if log is None: log = log_extraction if size is not None and offset is not None: chunk_list, _ = \ calculate_chunk_numbers_for_box(cset, offset, size) else: chunk_list = [ii for ii in range(len(cset.chunk_dict))] # shuffle chunklist to get a more balanced work-load rand_ixs = np.arange(len(chunk_list)) np.random.shuffle(rand_ixs) chunk_list = np.array(chunk_list)[rand_ixs] os.makedirs(cset.path_head_folder, exist_ok=True) multi_params = [] # TODO: currently pickles Chunk objects -> job submission might be slow for chunk_k in chunkify(chunk_list, max_n_jobs): multi_params.append([[cset.chunk_dict[k] for k in chunk_k], global_params.config.kd_seg_path]) if not qu.batchjob_enabled(): results = start_multiprocess_imap(_contact_site_extraction_thread, multi_params, debug=False, nb_cpus=n_max_co_processes) else: path_to_out = qu.QSUB_script(multi_params, "contact_site_extraction", n_max_co_processes=n_max_co_processes, log=log) out_files = glob.glob(path_to_out + "/*") results = [] for out_file in out_files: with open(out_file, 'rb') as f: results.append(pkl.load(f)) shutil.rmtree(os.path.abspath(path_to_out + "/../"), ignore_errors=True) # reduce step cs_props = [{}, defaultdict(list), {}] syn_props = [{}, defaultdict(list), {}] tot_sym_cnt = {} tot_asym_cnt = {} for curr_props, curr_syn_props, asym_cnt, sym_cnt in results: merge_prop_dicts([cs_props, curr_props]) merge_prop_dicts([syn_props, curr_syn_props]) merge_type_dicts([tot_asym_cnt, asym_cnt]) merge_type_dicts([tot_sym_cnt, sym_cnt]) log.info('Finished contact site (#objects: {}) and synapse (#objects: {})' ' extraction.'.format(len(cs_props[0]), len(syn_props[0]))) if len(syn_props[0]) == 0: log.critical( 'WARNING: Did not find any synapses during extraction step.') # TODO: extract syn objects! maybe replace sj_0 Segmentation dataset by the overlapping CS<-> # sj objects -> run syn. extraction and sd_generation in parallel and return mi_0, vc_0 and # syn_0 -> use syns as new sjs during rendering! # -> Run CS generation in parallel with mapping to at least get the syn objects before # rendering the neuron views (which need subcellular structures, there one can then use mi, # vc and syn (instead of sj)) dict_paths = [] # dump intermediate results # TODO: size filter here or during write-out? TODO: use config parameter dict_p = "{}/cs_prop_dict.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(cs_props, f) del cs_props dict_paths.append(dict_p) dict_p = "{}/syn_prop_dict.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(syn_props, f) del syn_props dict_paths.append(dict_p) # convert counting dicts to store ratio of syn. type voxels dict_p = "{}/cs_sym_cnt.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(tot_sym_cnt, f) del tot_sym_cnt dict_paths.append(dict_p) dict_p = "{}/cs_asym_cnt.pkl".format(global_params.config.temp_path) with open(dict_p, "wb") as f: pkl.dump(tot_asym_cnt, f) del tot_asym_cnt dict_paths.append(dict_p) # write cs and syn segmentation to KD and SD chunky.save_dataset(cset) kd = kd_factory(global_params.config.kd_seg_path) # convert Chunkdataset to syn and cs KD # TODO: spawn in parallel for obj_type in ['cs', 'syn']: path = "{}/knossosdatasets/{}_seg/".format( global_params.config.working_dir, obj_type) if os.path.isdir(path): log.debug('Found existing KD at {}. Removing it now.'.format(path)) shutil.rmtree(path) target_kd = knossosdataset.KnossosDataset() scale = np.array(global_params.config.entries["Dataset"]["scaling"]) target_kd.initialize_without_conf(path, kd.boundary, scale, kd.experiment_name, mags=[ 1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) export_cset_to_kd_batchjob(cset, target_kd, obj_type, [obj_type], offset=offset, size=size, stride=chunk_size, as_raw=False, orig_dtype=np.uint64, unified_labels=False, n_max_co_processes=n_max_co_processes, log=log) log.debug( 'Finished conversion of ChunkDataset ({}) into KnossosDataset' ' ({})'.format(cset.path_head_folder, target_kd.knossos_path)) # Write SD max_n_jobs = global_params.NNODES_TOTAL * 2 path = "{}/knossosdatasets/syn_seg/".format( global_params.config.working_dir) path_cs = "{}/knossosdatasets/cs_seg/".format( global_params.config.working_dir) storage_location_ids = rep_helper.get_unique_subfold_ixs(n_folders_fs) multi_params = [ (sv_id_block, n_folders_fs, path, path_cs) for sv_id_block in basics.chunkify(storage_location_ids, max_n_jobs) ] if not qu.batchjob_enabled(): start_multiprocess_imap(_write_props_to_syn_singlenode_thread, multi_params, nb_cpus=1, debug=False) else: qu.QSUB_script(multi_params, "write_props_to_syn_singlenode", log=log, n_cores=global_params.NCORES_PER_NODE, n_max_co_processes=global_params.NNODES_TOTAL, remove_jobfolder=True) sd = segmentation.SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn', version=0) dataset_analysis(sd, recompute=True, compute_meshprops=False) sd = segmentation.SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='cs', version=0) dataset_analysis(sd, recompute=True, compute_meshprops=False) for p in dict_paths: os.remove(p) shutil.rmtree(cd_dir, ignore_errors=True)
from __future__ import absolute_import, division, print_function # builtins is either provided by Python 3 or by the "future" module for Python 2 (http://python-future.org/) from builtins import range, map, zip, filter, round, next, input, bytes, hex, oct, chr, int from functools import reduce from knossos_utils import knossosdataset from knossos_utils import chunky kd = knossosdataset.KnossosDataset() kd.initialize_from_knossos_path("/path/to/knossosdir/") cd = chunky.ChunkDataset() # Example: Initialize chunkdataset to span the whole knossosdataset with # chunk-edgelength 512; box_size refers to the box the chunkdataset is # operating on, this can also be a subset of the total volume. Use box_coords to # define the offset of your box. cd.initialize(kd, kd.boundary, [512, 512, 512], "/path/to/cd_home/", box_coords=[0, 0, 0], fit_box_size=True) # After initializing once the cd can be loaded via cd = chunky.load_dataset("/path/to/cd_home/") # All chunks are accessible via the chunk_dict. Say one wants number 10 chunk = cd.chunk_dict[10] # Raw data should never be saved in the cd. One can load with raw = cd.chunk_dict[0].raw_data(show_progress=True)
if __name__ == "__main__": dictionary_elements = [] seg_dict = [] dictionary_elements.append("mi") dictionary_elements.append("vc") dictionary_elements.append("sj") #dictionary_elements.append("sv") #dictionary_elements.append("cs") offset = (10, 10, 10) print(global_params.wd) global_params.wd = '/wholebrain/u/atultm/SyConn/example_cube1/' cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" chunk_size = [128] * 3 kd = kd_factory(global_params.config.kd_seg_path) cd_cell = chunky.ChunkDataset() cd_cell.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) ch = cd_cell.chunk_dict[0] seg_cell = kd.from_overlaycubes_to_matrix(offset=ch.coordinates, size=ch.size) # for element in dictionary_elements: # cd_dir = global_params.config.working_dir + "chunkdatasets/" + element + "/" # # Class that contains a dict of chunks (with coordinates) after initializing it # cd_mi = chunky.ChunkDataset() # cd_mi.initialize(kd, kd.boundary, chunk_size, cd_dir, # box_coords=[0, 0, 0], fit_box_size=True) # ch = cd_mi.chunk_dict[0] # input_file_folder = element + "_stitched_components"
def run_create_sds(chunk_size=None, n_folders_fs=10000, max_n_jobs=None, generate_sv_meshes=False, load_from_kd_overlaycubes=False, cube_of_interest_bb=None): """ Parameters ---------- chunk_size : max_n_jobs : int n_folders_fs : generate_sv_meshes : load_from_kd_overlaycubes : bool Load prob/seg data from overlaycubes instead of raw cubes. cube_of_interest_bb : Tuple[np.ndarray] Defines the bounding box of the cube to process. By default this is set to (np.zoers(3); kd.boundary). Returns ------- """ if chunk_size is None: chunk_size = [512, 512, 512] if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 3 log = initialize_logging('create_sds', global_params.config.working_dir + '/logs/', overwrite=False) # Sets initial values of object kd = kd_factory(global_params.config.kd_seg_path) if cube_of_interest_bb is None: cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary] size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1 offset = cube_of_interest_bb[0] # TODO: get rid of explicit voxel extraction, all info necessary should be extracted # at the beginning, e.g. size, bounding box etc and then refactor to only use those cached attributes! # resulting ChunkDataset, required for SV extraction -- # Object extraction - 2h, the same has to be done for all cell organelles cd_dir = global_params.config.working_dir + "chunkdatasets/sv/" # Class that contains a dict of chunks (with coordinates) after initializing it cd = chunky.ChunkDataset() cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Generating SegmentationDatasets for cell and cell ' 'organelle supervoxels.') oew.from_ids_to_objects( cd, "sv", overlaydataset_path=global_params.config.kd_seg_path, n_chunk_jobs=max_n_jobs, hdf5names=["sv"], n_max_co_processes=None, n_folders_fs=n_folders_fs, use_combined_extraction=True, size=size, offset=offset) # Object Processing -- Perform after mapping to also cache mapping ratios sd = SegmentationDataset("sv", working_dir=global_params.config.working_dir) sd_proc.dataset_analysis(sd, recompute=True, compute_meshprops=False) log.info("Extracted {} cell SVs. Preparing rendering locations " "(and meshes if not provided).".format(len(sd.ids))) start = time.time() # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs) # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type='sv') multi_params = [[par, so_kwargs] for par in multi_params] if generate_sv_meshes: _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) _ = qu.QSUB_script(multi_params, "sample_location_caching", n_max_co_processes=global_params.NCORE_TOTAL) # recompute=False: only collect new sample_location property sd_proc.dataset_analysis(sd, compute_meshprops=True, recompute=False) log.info( 'Finished preparation of cell SVs after {:.0f}s.'.format(time.time() - start)) # create SegmentationDataset for each cell organelle for co in global_params.existing_cell_organelles: start = time.time() cd_dir = global_params.config.working_dir + "chunkdatasets/{}/".format( co) cd.initialize(kd, kd.boundary, chunk_size, cd_dir, box_coords=[0, 0, 0], fit_box_size=True) log.info('Started object extraction of cellular organelles "{}" from ' '{} chunks.'.format(co, len(cd.chunk_dict))) prob_kd_path_dict = { co: getattr(global_params.config, 'kd_{}_path'.format(co)) } # This creates a SegmentationDataset of type 'co' prob_thresh = global_params.config.entries["Probathresholds"][ co] # get probability threshold path = "{}/knossosdatasets/{}_seg/".format( global_params.config.working_dir, co) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_without_conf(path, kd.boundary, kd.scale, kd.experiment_name, mags=[ 1, ]) target_kd = knossosdataset.KnossosDataset() target_kd.initialize_from_knossos_path(path) oew.from_probabilities_to_objects( cd, co, # membrane_kd_path=global_params.config.kd_barrier_path, # TODO: currently does not exist prob_kd_path_dict=prob_kd_path_dict, thresholds=[prob_thresh], workfolder=global_params.config.working_dir, hdf5names=[co], n_max_co_processes=None, target_kd=target_kd, n_folders_fs=n_folders_fs, debug=False, size=size, offset=offset, load_from_kd_overlaycubes=load_from_kd_overlaycubes) sd_co = SegmentationDataset( obj_type=co, working_dir=global_params.config.working_dir) # TODO: check if this is faster then the alternative below sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=False) multi_params = chunkify(sd_co.so_dir_paths, max_n_jobs) so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type=co) multi_params = [[par, so_kwargs] for par in multi_params] _ = qu.QSUB_script(multi_params, "mesh_caching", n_max_co_processes=global_params.NCORE_TOTAL) sd_proc.dataset_analysis(sd_co, recompute=False, compute_meshprops=True) # # Old alternative, requires much more reads/writes then above solution # sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=True) # About 0.2 h per object class log.info('Started mapping of {} cellular organelles of type "{}" to ' 'cell SVs.'.format(len(sd_co.ids), co)) sd_proc.map_objects_to_sv(sd, co, global_params.config.kd_seg_path, n_jobs=max_n_jobs) log.info('Finished preparation of {} "{}"-SVs after {:.0f}s.' ''.format(len(sd_co.ids), co, time.time() - start))