def generate_subcell_kd_from_proba(co, chunk_size=None, transf_func_kd_overlay=None,
                                   load_cellorganelles_from_kd_overlaycubes=False,
                                   cube_of_interest_bb=None, log=None, **kwargs):
    """
    Generate KnossosDatasets for given subcellular structure key (e.g. 'mi').
    The required initial data format is a chunkdataset located at
    ``"{}/chunkdatasets/{}/".format(global_params.config.working_dir, co)``.
    Resulting KD will be stored at
    ``"{}/knossosdatasets/{}_seg/".format(global_params.config.working_dir, co)``.
    See :func:`~syconn.extraction.object_extraction_wrapper.from_probabilities_to_kd` for details of
    the conversion process from the initial probability map to the SV segmentation. Default:
    thresholding and connected components, thresholds are set via the `config.ini` file, check
    ``syconn.global_params.config.entries["Probathresholds"]`` of an initialized
    :calss:`~syconn.handler.config.DynConfig` object.

    Parameters
    ----------
    co : str
    chunk_size : Tuple
    transf_func_kd_overlay : callable
    load_cellorganelles_from_kd_overlaycubes : bool
    cube_of_interest_bb : Tuple[Tuple[int]] or np.ndarray
    log : logger
    """
    if chunk_size is None:
        chunk_size = [512, 512, 512]
    if log is None:
        log = log_extraction
    kd = basics.kd_factory(global_params.config.kd_seg_path)
    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]
    size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1
    offset = cube_of_interest_bb[0]
    cd_dir = "{}/chunkdatasets/{}/".format(global_params.config.working_dir, co)
    cd = chunky.ChunkDataset()
    cd.initialize(kd, kd.boundary, chunk_size, cd_dir,
                  box_coords=[0, 0, 0], fit_box_size=True,
                  list_of_coords=[])
    log.info('Started object extraction of cellular organelles "{}" from '
             '{} chunks.'.format(co, len(cd.chunk_dict)))
    prob_kd_path_dict = {co: getattr(global_params.config, 'kd_{}_path'.format(co))}
    # This creates a SegmentationDataset of type 'co'
    prob_thresh = global_params.config.entries["Probathresholds"][co]  # get probability threshold

    # `from_probabilities_to_objects` will export a KD at `path`, remove if already existing
    path = global_params.config.kd_organelle_seg_paths[co]
    if os.path.isdir(path):
        log.debug('Found existing KD at {}. Removing it now.'.format(path))
        shutil.rmtree(path)
    target_kd = knossosdataset.KnossosDataset()
    scale = np.array(global_params.config.entries["Dataset"]["scaling"], dtype=np.float32)
    target_kd.initialize_without_conf(path, kd.boundary, scale, kd.experiment_name, mags=[1, ])
    target_kd = knossosdataset.KnossosDataset()
    target_kd.initialize_from_knossos_path(path)
    from_probabilities_to_kd(cd, co, # membrane_kd_path=global_params.config.kd_barrier_path,  # TODO: currently does not exist
                             prob_kd_path_dict=prob_kd_path_dict, thresholds=[prob_thresh],
                             hdf5names=[co], n_max_co_processes=None, target_kd=target_kd,
                             debug=False, size=size, offset=offset,
                             load_from_kd_overlaycubes=load_cellorganelles_from_kd_overlaycubes,
                             transf_func_kd_overlay=transf_func_kd_overlay[co], log=log, **kwargs)
Esempio n. 2
0
def map_ids(wd,
            n_jobs=1000,
            qsub_pe=None,
            qsub_queue=None,
            nb_cpus=None,
            n_max_co_processes=None,
            chunk_size=(128, 128, 128),
            debug=False):

    global_params.wd = wd
    kd = kd_factory(global_params.config.kd_seg_path)

    cd_dir = global_params.config.working_dir + "chunkdatasets/sv/"

    cd_cell = chunky.ChunkDataset()
    cd_cell.initialize(kd,
                       kd.boundary,
                       chunk_size,
                       cd_dir,
                       box_coords=[0, 0, 0],
                       fit_box_size=True)

    multi_params = []
    chunkify_id = 0
    for coord_chunk in chunkify(
        [cd_cell.chunk_dict[key].coordinates for key in cd_cell.chunk_dict],
            100):
        multi_params.append([coord_chunk, chunk_size, wd, chunkify_id])
        chunkify_id += 1

    sm.start_multiprocess_imap(_map_ids_thread,
                               multi_params,
                               nb_cpus=n_max_co_processes,
                               verbose=debug,
                               debug=debug)
def extract_contact_sites(n_max_co_processes: Optional[int] = None,
                          chunk_size: Optional[Tuple[int, int, int]] = None,
                          log: Optional[Logger] = None,
                          max_n_jobs: Optional[int] = None,
                          cube_of_interest_bb: Optional[np.ndarray] = None,
                          n_folders_fs: int = 1000):
    """
    Extracts contact sites and their overlap with `sj` objects and stores them in a
    :class:`~syconn.reps.segmentation.SegmentationDataset` of type ``cs`` and ``syn``
    respectively. If synapse type is available, this information will be stored
    as the voxel-ratio per class in the attribute dictionary of the ``syn``
    objects (keys: ``sym_prop``, ``asym_prop``).

    Notes:
        Replaced ``find_contact_sites``, ``extract_agg_contact_sites``, `
        `syn_gen_via_cset`` and ``extract_synapse_type``.

    Args:
        n_max_co_processes: Number of parallel workers.
        chunk_size: Sub-cube volume which is processed at a time.
        log: Logger.
        max_n_jobs: Maximum number of jobs.
        cube_of_interest_bb: Sub-volume of the data set which is processed.
            Default: Entire data set.
        n_folders_fs: Number of folders used for organizing supervoxel data.

    """
    if extract_cs_syntype is None:
        msg = '`extract_contact_sites` requires the cythonized method ' \
              '`extract_cs_syntype`. Use `find_contact_sites` and others ' \
              'for contact site processing.'
        log_extraction.error(msg)
        raise ImportError(msg)
    kd = kd_factory(global_params.config.kd_seg_path)
    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]
    if chunk_size is None:
        chunk_size = (512, 512, 512)
    size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1
    offset = cube_of_interest_bb[0]

    # Initital contact site extraction
    cd_dir = global_params.config.temp_path + "/chunkdatasets/cs/"
    # Class that contains a dict of chunks (with coordinates) after initializing it
    cset = chunky.ChunkDataset()
    cset.initialize(kd,
                    kd.boundary,
                    chunk_size,
                    cd_dir,
                    box_coords=[0, 0, 0],
                    fit_box_size=True)

    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2
    if log is None:
        log = log_extraction
    if size is not None and offset is not None:
        chunk_list, _ = \
            calculate_chunk_numbers_for_box(cset, offset, size)
    else:
        chunk_list = [ii for ii in range(len(cset.chunk_dict))]
    # shuffle chunklist to get a more balanced work-load
    rand_ixs = np.arange(len(chunk_list))
    np.random.shuffle(rand_ixs)
    chunk_list = np.array(chunk_list)[rand_ixs]

    os.makedirs(cset.path_head_folder, exist_ok=True)
    multi_params = []
    # TODO: currently pickles Chunk objects -> job submission might be slow
    for chunk_k in chunkify(chunk_list, max_n_jobs):
        multi_params.append([[cset.chunk_dict[k] for k in chunk_k],
                             global_params.config.kd_seg_path])

    if not qu.batchjob_enabled():
        results = start_multiprocess_imap(_contact_site_extraction_thread,
                                          multi_params,
                                          debug=False,
                                          nb_cpus=n_max_co_processes)
    else:
        path_to_out = qu.QSUB_script(multi_params,
                                     "contact_site_extraction",
                                     n_max_co_processes=n_max_co_processes,
                                     log=log)
        out_files = glob.glob(path_to_out + "/*")
        results = []
        for out_file in out_files:
            with open(out_file, 'rb') as f:
                results.append(pkl.load(f))
        shutil.rmtree(os.path.abspath(path_to_out + "/../"),
                      ignore_errors=True)
    # reduce step
    cs_props = [{}, defaultdict(list), {}]
    syn_props = [{}, defaultdict(list), {}]
    tot_sym_cnt = {}
    tot_asym_cnt = {}
    for curr_props, curr_syn_props, asym_cnt, sym_cnt in results:
        merge_prop_dicts([cs_props, curr_props])
        merge_prop_dicts([syn_props, curr_syn_props])
        merge_type_dicts([tot_asym_cnt, asym_cnt])
        merge_type_dicts([tot_sym_cnt, sym_cnt])
    log.info('Finished contact site (#objects: {}) and synapse (#objects: {})'
             ' extraction.'.format(len(cs_props[0]), len(syn_props[0])))
    if len(syn_props[0]) == 0:
        log.critical(
            'WARNING: Did not find any synapses during extraction step.')
    # TODO: extract syn objects! maybe replace sj_0 Segmentation dataset by the overlapping CS<->
    #  sj objects -> run syn. extraction and sd_generation in parallel and return mi_0, vc_0 and
    #  syn_0 -> use syns as new sjs during rendering!
    #  -> Run CS generation in parallel with mapping to at least get the syn objects before
    #  rendering the neuron views (which need subcellular structures, there one can then use mi,
    #  vc and syn (instead of sj))
    dict_paths = []
    # dump intermediate results
    # TODO: size filter here or during write-out? TODO: use config parameter
    dict_p = "{}/cs_prop_dict.pkl".format(global_params.config.temp_path)
    with open(dict_p, "wb") as f:
        pkl.dump(cs_props, f)
    del cs_props
    dict_paths.append(dict_p)

    dict_p = "{}/syn_prop_dict.pkl".format(global_params.config.temp_path)
    with open(dict_p, "wb") as f:
        pkl.dump(syn_props, f)
    del syn_props
    dict_paths.append(dict_p)

    # convert counting dicts to store ratio of syn. type voxels
    dict_p = "{}/cs_sym_cnt.pkl".format(global_params.config.temp_path)
    with open(dict_p, "wb") as f:
        pkl.dump(tot_sym_cnt, f)
    del tot_sym_cnt
    dict_paths.append(dict_p)

    dict_p = "{}/cs_asym_cnt.pkl".format(global_params.config.temp_path)
    with open(dict_p, "wb") as f:
        pkl.dump(tot_asym_cnt, f)
    del tot_asym_cnt
    dict_paths.append(dict_p)

    # write cs and syn segmentation to KD and SD
    chunky.save_dataset(cset)
    kd = kd_factory(global_params.config.kd_seg_path)
    # convert Chunkdataset to syn and cs KD
    # TODO: spawn in parallel
    for obj_type in ['cs', 'syn']:
        path = "{}/knossosdatasets/{}_seg/".format(
            global_params.config.working_dir, obj_type)
        if os.path.isdir(path):
            log.debug('Found existing KD at {}. Removing it now.'.format(path))
            shutil.rmtree(path)
        target_kd = knossosdataset.KnossosDataset()
        scale = np.array(global_params.config.entries["Dataset"]["scaling"])
        target_kd.initialize_without_conf(path,
                                          kd.boundary,
                                          scale,
                                          kd.experiment_name,
                                          mags=[
                                              1,
                                          ])
        target_kd = knossosdataset.KnossosDataset()
        target_kd.initialize_from_knossos_path(path)
        export_cset_to_kd_batchjob(cset,
                                   target_kd,
                                   obj_type, [obj_type],
                                   offset=offset,
                                   size=size,
                                   stride=chunk_size,
                                   as_raw=False,
                                   orig_dtype=np.uint64,
                                   unified_labels=False,
                                   n_max_co_processes=n_max_co_processes,
                                   log=log)
        log.debug(
            'Finished conversion of ChunkDataset ({}) into KnossosDataset'
            ' ({})'.format(cset.path_head_folder, target_kd.knossos_path))

    # Write SD
    max_n_jobs = global_params.NNODES_TOTAL * 2
    path = "{}/knossosdatasets/syn_seg/".format(
        global_params.config.working_dir)
    path_cs = "{}/knossosdatasets/cs_seg/".format(
        global_params.config.working_dir)
    storage_location_ids = rep_helper.get_unique_subfold_ixs(n_folders_fs)
    multi_params = [
        (sv_id_block, n_folders_fs, path, path_cs)
        for sv_id_block in basics.chunkify(storage_location_ids, max_n_jobs)
    ]

    if not qu.batchjob_enabled():
        start_multiprocess_imap(_write_props_to_syn_singlenode_thread,
                                multi_params,
                                nb_cpus=1,
                                debug=False)
    else:
        qu.QSUB_script(multi_params,
                       "write_props_to_syn_singlenode",
                       log=log,
                       n_cores=global_params.NCORES_PER_NODE,
                       n_max_co_processes=global_params.NNODES_TOTAL,
                       remove_jobfolder=True)

    sd = segmentation.SegmentationDataset(
        working_dir=global_params.config.working_dir,
        obj_type='syn',
        version=0)
    dataset_analysis(sd, recompute=True, compute_meshprops=False)
    sd = segmentation.SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='cs', version=0)
    dataset_analysis(sd, recompute=True, compute_meshprops=False)

    for p in dict_paths:
        os.remove(p)
    shutil.rmtree(cd_dir, ignore_errors=True)
Esempio n. 4
0
from __future__ import absolute_import, division, print_function
# builtins is either provided by Python 3 or by the "future" module for Python 2 (http://python-future.org/)
from builtins import range, map, zip, filter, round, next, input, bytes, hex, oct, chr, int
from functools import reduce

from knossos_utils import knossosdataset
from knossos_utils import chunky
kd = knossosdataset.KnossosDataset()
kd.initialize_from_knossos_path("/path/to/knossosdir/")

cd = chunky.ChunkDataset()

# Example: Initialize chunkdataset to span the whole knossosdataset with
# chunk-edgelength 512; box_size refers to the box the chunkdataset is
# operating on, this can also be a subset of the total volume. Use box_coords to
# define the offset of your box.
cd.initialize(kd,
              kd.boundary, [512, 512, 512],
              "/path/to/cd_home/",
              box_coords=[0, 0, 0],
              fit_box_size=True)

# After initializing once the cd can be loaded via
cd = chunky.load_dataset("/path/to/cd_home/")

# All chunks are accessible via the chunk_dict. Say one wants number 10
chunk = cd.chunk_dict[10]

# Raw data should never be saved in the cd. One can load with
raw = cd.chunk_dict[0].raw_data(show_progress=True)
Esempio n. 5
0
if __name__ == "__main__":
    dictionary_elements = []
    seg_dict = []
    dictionary_elements.append("mi")
    dictionary_elements.append("vc")
    dictionary_elements.append("sj")
    #dictionary_elements.append("sv")
    #dictionary_elements.append("cs")
    offset = (10, 10, 10)
    print(global_params.wd)
    global_params.wd = '/wholebrain/u/atultm/SyConn/example_cube1/'
    cd_dir = global_params.config.working_dir + "chunkdatasets/sv/"
    chunk_size = [128] * 3
    kd = kd_factory(global_params.config.kd_seg_path)
    cd_cell = chunky.ChunkDataset()
    cd_cell.initialize(kd, kd.boundary, chunk_size, cd_dir,
                       box_coords=[0, 0, 0], fit_box_size=True)

    ch = cd_cell.chunk_dict[0]

    seg_cell = kd.from_overlaycubes_to_matrix(offset=ch.coordinates,
                                              size=ch.size)
    # for element in dictionary_elements:
    #     cd_dir = global_params.config.working_dir + "chunkdatasets/" + element + "/"
    #     # Class that contains a dict of chunks (with coordinates) after initializing it
    #     cd_mi = chunky.ChunkDataset()
    #     cd_mi.initialize(kd, kd.boundary, chunk_size, cd_dir,
    #                   box_coords=[0, 0, 0], fit_box_size=True)
    #     ch = cd_mi.chunk_dict[0]
    #     input_file_folder = element + "_stitched_components"
Esempio n. 6
0
def run_create_sds(chunk_size=None,
                   n_folders_fs=10000,
                   max_n_jobs=None,
                   generate_sv_meshes=False,
                   load_from_kd_overlaycubes=False,
                   cube_of_interest_bb=None):
    """

    Parameters
    ----------
    chunk_size :
    max_n_jobs : int
    n_folders_fs :
    generate_sv_meshes :
    load_from_kd_overlaycubes : bool
        Load prob/seg data from overlaycubes instead of raw cubes.
    cube_of_interest_bb : Tuple[np.ndarray]
        Defines the bounding box of the cube to process. By default this is
        set to (np.zoers(3); kd.boundary).


    Returns
    -------

    """
    if chunk_size is None:
        chunk_size = [512, 512, 512]
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 3
    log = initialize_logging('create_sds',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)

    # Sets initial values of object
    kd = kd_factory(global_params.config.kd_seg_path)
    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]
    size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1
    offset = cube_of_interest_bb[0]
    # TODO: get rid of explicit voxel extraction, all info necessary should be extracted
    #  at the beginning, e.g. size, bounding box etc and then refactor to only use those cached attributes!
    # resulting ChunkDataset, required for SV extraction --
    # Object extraction - 2h, the same has to be done for all cell organelles
    cd_dir = global_params.config.working_dir + "chunkdatasets/sv/"
    # Class that contains a dict of chunks (with coordinates) after initializing it
    cd = chunky.ChunkDataset()
    cd.initialize(kd,
                  kd.boundary,
                  chunk_size,
                  cd_dir,
                  box_coords=[0, 0, 0],
                  fit_box_size=True)
    log.info('Generating SegmentationDatasets for cell and cell '
             'organelle supervoxels.')
    oew.from_ids_to_objects(
        cd,
        "sv",
        overlaydataset_path=global_params.config.kd_seg_path,
        n_chunk_jobs=max_n_jobs,
        hdf5names=["sv"],
        n_max_co_processes=None,
        n_folders_fs=n_folders_fs,
        use_combined_extraction=True,
        size=size,
        offset=offset)

    # Object Processing -- Perform after mapping to also cache mapping ratios
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    sd_proc.dataset_analysis(sd, recompute=True, compute_meshprops=False)

    log.info("Extracted {} cell SVs. Preparing rendering locations "
             "(and meshes if not provided).".format(len(sd.ids)))
    start = time.time()
    # chunk them
    multi_params = chunkify(sd.so_dir_paths, max_n_jobs)
    # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default
    so_kwargs = dict(working_dir=global_params.config.working_dir,
                     obj_type='sv')
    multi_params = [[par, so_kwargs] for par in multi_params]
    if generate_sv_meshes:
        _ = qu.QSUB_script(multi_params,
                           "mesh_caching",
                           n_max_co_processes=global_params.NCORE_TOTAL)
    _ = qu.QSUB_script(multi_params,
                       "sample_location_caching",
                       n_max_co_processes=global_params.NCORE_TOTAL)
    # recompute=False: only collect new sample_location property
    sd_proc.dataset_analysis(sd, compute_meshprops=True, recompute=False)
    log.info(
        'Finished preparation of cell SVs after {:.0f}s.'.format(time.time() -
                                                                 start))
    # create SegmentationDataset for each cell organelle
    for co in global_params.existing_cell_organelles:
        start = time.time()
        cd_dir = global_params.config.working_dir + "chunkdatasets/{}/".format(
            co)
        cd.initialize(kd,
                      kd.boundary,
                      chunk_size,
                      cd_dir,
                      box_coords=[0, 0, 0],
                      fit_box_size=True)
        log.info('Started object extraction of cellular organelles "{}" from '
                 '{} chunks.'.format(co, len(cd.chunk_dict)))
        prob_kd_path_dict = {
            co: getattr(global_params.config, 'kd_{}_path'.format(co))
        }
        # This creates a SegmentationDataset of type 'co'
        prob_thresh = global_params.config.entries["Probathresholds"][
            co]  # get probability threshold

        path = "{}/knossosdatasets/{}_seg/".format(
            global_params.config.working_dir, co)
        target_kd = knossosdataset.KnossosDataset()
        target_kd.initialize_without_conf(path,
                                          kd.boundary,
                                          kd.scale,
                                          kd.experiment_name,
                                          mags=[
                                              1,
                                          ])
        target_kd = knossosdataset.KnossosDataset()
        target_kd.initialize_from_knossos_path(path)
        oew.from_probabilities_to_objects(
            cd,
            co,  # membrane_kd_path=global_params.config.kd_barrier_path,  # TODO: currently does not exist
            prob_kd_path_dict=prob_kd_path_dict,
            thresholds=[prob_thresh],
            workfolder=global_params.config.working_dir,
            hdf5names=[co],
            n_max_co_processes=None,
            target_kd=target_kd,
            n_folders_fs=n_folders_fs,
            debug=False,
            size=size,
            offset=offset,
            load_from_kd_overlaycubes=load_from_kd_overlaycubes)
        sd_co = SegmentationDataset(
            obj_type=co, working_dir=global_params.config.working_dir)

        # TODO: check if this is faster then the alternative below
        sd_proc.dataset_analysis(sd_co,
                                 recompute=True,
                                 compute_meshprops=False)
        multi_params = chunkify(sd_co.so_dir_paths, max_n_jobs)
        so_kwargs = dict(working_dir=global_params.config.working_dir,
                         obj_type=co)
        multi_params = [[par, so_kwargs] for par in multi_params]
        _ = qu.QSUB_script(multi_params,
                           "mesh_caching",
                           n_max_co_processes=global_params.NCORE_TOTAL)
        sd_proc.dataset_analysis(sd_co,
                                 recompute=False,
                                 compute_meshprops=True)
        # # Old alternative, requires much more reads/writes then above solution
        # sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=True)

        # About 0.2 h per object class
        log.info('Started mapping of {} cellular organelles of type "{}" to '
                 'cell SVs.'.format(len(sd_co.ids), co))
        sd_proc.map_objects_to_sv(sd,
                                  co,
                                  global_params.config.kd_seg_path,
                                  n_jobs=max_n_jobs)
        log.info('Finished preparation of {} "{}"-SVs after {:.0f}s.'
                 ''.format(len(sd_co.ids), co,
                           time.time() - start))