def map_ids(wd,
            n_jobs=1000,
            qsub_pe=None,
            qsub_queue=None,
            nb_cpus=None,
            n_max_co_processes=None,
            chunk_size=(128, 128, 128),
            debug=False):

    global_params.wd = wd
    kd = kd_factory(global_params.config.kd_seg_path)

    cd_dir = global_params.config.working_dir + "chunkdatasets/sv/"

    cd_cell = chunky.ChunkDataset()
    cd_cell.initialize(kd,
                       kd.boundary,
                       chunk_size,
                       cd_dir,
                       box_coords=[0, 0, 0],
                       fit_box_size=True)

    multi_params = []
    chunkify_id = 0
    for coord_chunk in chunkify(
        [cd_cell.chunk_dict[key].coordinates for key in cd_cell.chunk_dict],
            100):
        multi_params.append([coord_chunk, chunk_size, wd, chunkify_id])
        chunkify_id += 1

    sm.start_multiprocess_imap(_map_ids_thread,
                               multi_params,
                               nb_cpus=n_max_co_processes,
                               verbose=debug,
                               debug=debug)
def _map_ids_thread(args):
    coord_list = args[0]
    chunk_size = args[1]
    wd = args[2]
    chunkify_id = args[3]

    worker_sv_dc = {}
    kd_obj = {}
    small_dc = {}
    stri = wd + '/voxel_%s.pkl' % chunkify_id
    f = open(stri, "wb")

    for obj_type in global_params.existing_cell_organelles:
        small_dc[obj_type] = {}
        kd_obj[obj_type] = kd_factory(
            global_params.config.entries['Paths']['kd_%s' % obj_type])

    kd_cell = kd_factory(global_params.config.kd_seg_path)

    for coord in coord_list:
        seg_cell = kd_cell.from_overlaycubes_to_matrix(
            offset=coord, size=chunk_size).flatten()

        seg_obj = {}
        for obj in kd_obj:
            # seg_obj[obj] = kd_obj[obj].from_overlaycubes_to_matrix(offset=coord, size=chunk_size).flatten()
            seg_obj[obj] = create_toy_data(chunk_size, 3).flatten()

        for unique_cell_id in np.unique(seg_cell):
            if unique_cell_id in worker_sv_dc:
                continue
            worker_sv_dc[unique_cell_id] = small_dc

        for vox in range(len(seg_cell)):
            cell_id = seg_cell[vox]

            for obj in kd_obj:
                j = seg_obj[obj][vox]
                if j in worker_sv_dc[cell_id][obj]:
                    worker_sv_dc[cell_id][obj][j] += 1
                else:
                    worker_sv_dc[cell_id][obj][j] = 1

    pickle.dump(worker_sv_dc, f)
    f.close()
Exemple #3
0
def run_syn_generation(chunk_size: Tuple[int, int, int] = (512, 512, 512),
                       n_folders_fs: int = 10000,
                       max_n_jobs: Optional[int] = None,
                       cube_of_interest_bb: Optional[np.ndarray] = None):
    """
    Run the synapse generation. Will create
    :class:`~syconn.reps.segmentation.SegmentationDataset` objects with
    the following versions:
        * 'cs': Contact site objects between supervoxels.
        * 'syn': Objects representing the overlap between 'cs' and the initial
          synaptic junction predictions. Note: These objects effectively represent
          synapse fragments between supervoxels.
        * 'syn_ssv': Agglomerated 'syn' objects based on the supervoxel graph.

    Args:
        chunk_size: The size of processed cubes.
        n_folders_fs: Number of folders used to create the folder structure in
            each :class:`~syconn.reps.segmentation.SegmentationDataset`.
        max_n_jobs: Number of parallel jobs.
        cube_of_interest_bb: Defines the bounding box of the cube to process.
            By default this is set to (np.zoers(3); kd.boundary).
    """
    if max_n_jobs is None:
        max_n_jobs = global_params.config.ncore_total * 2

    log = initialize_logging('synapse_generation',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)

    kd_seg_path = global_params.config.kd_seg_path
    kd = kd_factory(kd_seg_path)

    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]

    ces.extract_contact_sites(chunk_size=chunk_size,
                              log=log,
                              max_n_jobs=max_n_jobs,
                              cube_of_interest_bb=cube_of_interest_bb,
                              n_folders_fs=n_folders_fs)
    log.info('SegmentationDataset of type "cs" and "syn" was generated.')

    # # TODO: add check for SSD existence, which is required at this point
    # # This creates an SD of type 'syn_ssv'
    cps.combine_and_split_syn(
        global_params.config.working_dir,
        resume_job=False,
        cs_gap_nm=global_params.config['cell_objects']['cs_gap_nm'],
        log=log,
        n_folders_fs=n_folders_fs)
    log.info('Synapse objects were created.')

    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    dataset_analysis(sd_syn_ssv, compute_meshprops=True)
    log.info('SegmentationDataset of type "syn_ssv" was generated.')

    cps.map_objects_to_synssv(global_params.config.working_dir, log=log)
    log.info('Cellular organelles were mapped to "syn_ssv".')

    cps.classify_synssv_objects(global_params.config.working_dir, log=log)
    log.info('Synapse prediction finished.')

    log.info('Collecting and writing syn-ssv objects to SSV attribute '
             'dictionary.')
    # This needs to be run after `classify_synssv_objects` and before
    # `map_synssv_objects` if the latter uses thresholding for synaptic objects
    # just collect new data: ``recompute=False``
    dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False)
    # TODO: decide whether this should happen after prob thresholding or not
    map_synssv_objects(log=log)
    log.info('Finished.')
Exemple #4
0
def load_seg_data(offset):
    global_params.wd = "/u/mariakaw/SyConn/example_cube1"
    kd_cell = basics.kd_factory(global_params.config.kd_seg_path)
    return kd_cell.from_overlaycubes_to_matrix((256, 256, 256), offset)
Exemple #5
0
def init_cell_subcell_sds(chunk_size: Optional[Tuple[int, int, int]] = None,
                          n_folders_fs: int = 10000, n_folders_fs_sc: int = 10000,
                          max_n_jobs: Optional[int] = None,
                          load_cellorganelles_from_kd_overlaycubes: bool = False,
                          transf_func_kd_overlay: Optional[Callable] = None,
                          cube_of_interest_bb: Optional[np.ndarray] = None):
    """
    Todo:
        * Don't extract sj objects and replace their use-cases with syn objects (?).

    Args:
        chunk_size: Size of the cube which are processed by each worker.
        n_folders_fs: Number of folders used to create the folder structure in
            the resulting :class:`~syconn.reps.segmentation.SegmentationDataset`
            for the cell supervoxels (``version='sv'``).
        n_folders_fs_sc: Number of folders used to create the folder structure in
            the resulting :class:`~syconn.reps.segmentation.SegmentationDataset`
            for the cell organelle supervxeols (e.g. ``version='mi'``).
        max_n_jobs: Number of parallel jobs.
        load_cellorganelles_from_kd_overlaycubes:
        transf_func_kd_overlay: Transformation applied on the prob. map or segmentation
            data.
        cube_of_interest_bb: Bounding of the (sub-) volume of the dataset
            which is processed.
    """
    log = initialize_logging('create_sds', global_params.config.working_dir +
                             '/logs/', overwrite=True)
    if transf_func_kd_overlay is None:
        transf_func_kd_overlay = {k: None for k in global_params.existing_cell_organelles}
    if chunk_size is None:
        chunk_size = [512, 512, 512]
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2
        # loading cached data or adapt number of jobs/cache size dynamically, dependent on the
        # dataset
    kd = kd_factory(global_params.config.kd_seg_path)
    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]

    log.info('Converting predictions of cellular organelles to KnossosDatasets for every'
             'type available: {}.'.format(global_params.existing_cell_organelles))
    start = time.time()
    ps = [Process(target=kd_init, args=[co, chunk_size, transf_func_kd_overlay,
                                        load_cellorganelles_from_kd_overlaycubes,
                                        cube_of_interest_bb, log])
          for co in global_params.existing_cell_organelles]
    for p in ps:
        p.start()
        time.sleep(5)
    for p in ps:
        p.join()
    log.info('Finished KD generation after {:.0f}s.'.format(time.time() - start))

    log.info('Generating SegmentationDatasets for subcellular structures {} and'
             ' cell supervoxels.'.format(global_params.existing_cell_organelles))
    start = time.time()
    sd_proc.map_subcell_extract_props(
        global_params.config.kd_seg_path, global_params.config.kd_organelle_seg_paths,
        n_folders_fs=n_folders_fs, n_folders_fs_sc=n_folders_fs_sc, n_chunk_jobs=max_n_jobs,
        cube_of_interest_bb=cube_of_interest_bb, chunk_size=chunk_size, log=log)
    log.info('Finished extraction and mapping after {:.2f}s.'
             ''.format(time.time() - start))

    log.info('Caching properties of subcellular structures {} and cell'
             ' supervoxels'.format(global_params.existing_cell_organelles))
    start = time.time()
    ps = [Process(target=sd_init, args=[co, max_n_jobs, log])
          for co in ["sv"] + global_params.existing_cell_organelles]
    for p in ps:
        p.start()
        time.sleep(5)
    for p in ps:
        p.join()
    log.info('Finished SD caching after {:.2f}s.'
             ''.format(time.time() - start))
Exemple #6
0
def run_syn_generation(chunk_size=(512, 512, 512),
                       n_folders_fs=10000,
                       max_n_jobs=None,
                       cube_of_interest_bb=None):
    """

    Parameters
    ----------
    chunk_size :
    n_folders_fs :
    max_n_jobs :
    cube_of_interest_bb : Tuple[np.ndarray]
        Defines the bounding box of the cube to process. By default this is
        set to (np.zoers(3); kd.boundary).

    Returns
    -------

    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2

    log = initialize_logging('synapse_generation',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)

    kd_seg_path = global_params.config.kd_seg_path
    kd = kd_factory(kd_seg_path)

    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]

    ces.extract_contact_sites(chunk_size=chunk_size,
                              log=log,
                              max_n_jobs=max_n_jobs,
                              cube_of_interest_bb=cube_of_interest_bb,
                              n_folders_fs=n_folders_fs)
    log.info('SegmentationDataset of type "cs" and "syn" was generated.')

    # TODO: add check for SSD existence, which is required at this point
    # This creates an SD of type 'syn_ssv'
    cps.combine_and_split_syn(global_params.config.working_dir,
                              resume_job=False,
                              cs_gap_nm=global_params.cs_gap_nm,
                              log=log,
                              n_folders_fs=n_folders_fs)
    log.info('Synapse objects were created.')
    #
    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    dataset_analysis(sd_syn_ssv, compute_meshprops=True)
    log.info('SegmentationDataset of type "syn_ssv" was generated.')

    cps.map_objects_to_synssv(global_params.config.working_dir, log=log)
    log.info('Cellular organelles were mapped to "syn_ssv".')

    cps.classify_synssv_objects(global_params.config.working_dir, log=log)
    log.info('Synapse property prediction finished.')

    log.info('Collecting and writing syn-ssv objects to SSV attribute '
             'dictionary.')
    # This needs to be run after `classify_synssv_objects` and before
    # `map_synssv_objects` if the latter uses thresholding for synaptic objects
    dataset_analysis(sd_syn_ssv, compute_meshprops=False,
                     recompute=False)  # just collect new data
    # TODO: decide whether this should happen after prob thresholding or not
    map_synssv_objects(log=log)
    log.info('Finished.')
Exemple #7
0

if __name__ == "__main__":
    dictionary_elements = []
    seg_dict = []
    dictionary_elements.append("mi")
    dictionary_elements.append("vc")
    dictionary_elements.append("sj")
    #dictionary_elements.append("sv")
    #dictionary_elements.append("cs")
    offset = (10, 10, 10)
    print(global_params.wd)
    global_params.wd = '/wholebrain/u/atultm/SyConn/example_cube1/'
    cd_dir = global_params.config.working_dir + "chunkdatasets/sv/"
    chunk_size = [128] * 3
    kd = kd_factory(global_params.config.kd_seg_path)
    cd_cell = chunky.ChunkDataset()
    cd_cell.initialize(kd, kd.boundary, chunk_size, cd_dir,
                       box_coords=[0, 0, 0], fit_box_size=True)

    ch = cd_cell.chunk_dict[0]

    seg_cell = kd.from_overlaycubes_to_matrix(offset=ch.coordinates,
                                              size=ch.size)
    # for element in dictionary_elements:
    #     cd_dir = global_params.config.working_dir + "chunkdatasets/" + element + "/"
    #     # Class that contains a dict of chunks (with coordinates) after initializing it
    #     cd_mi = chunky.ChunkDataset()
    #     cd_mi.initialize(kd, kd.boundary, chunk_size, cd_dir,
    #                   box_coords=[0, 0, 0], fit_box_size=True)
    #     ch = cd_mi.chunk_dict[0]
Exemple #8
0
def run_create_sds(chunk_size=None,
                   n_folders_fs=10000,
                   max_n_jobs=None,
                   generate_sv_meshes=False,
                   load_from_kd_overlaycubes=False,
                   cube_of_interest_bb=None):
    """

    Parameters
    ----------
    chunk_size :
    max_n_jobs : int
    n_folders_fs :
    generate_sv_meshes :
    load_from_kd_overlaycubes : bool
        Load prob/seg data from overlaycubes instead of raw cubes.
    cube_of_interest_bb : Tuple[np.ndarray]
        Defines the bounding box of the cube to process. By default this is
        set to (np.zoers(3); kd.boundary).


    Returns
    -------

    """
    if chunk_size is None:
        chunk_size = [512, 512, 512]
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 3
    log = initialize_logging('create_sds',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)

    # Sets initial values of object
    kd = kd_factory(global_params.config.kd_seg_path)
    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]
    size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1
    offset = cube_of_interest_bb[0]
    # TODO: get rid of explicit voxel extraction, all info necessary should be extracted
    #  at the beginning, e.g. size, bounding box etc and then refactor to only use those cached attributes!
    # resulting ChunkDataset, required for SV extraction --
    # Object extraction - 2h, the same has to be done for all cell organelles
    cd_dir = global_params.config.working_dir + "chunkdatasets/sv/"
    # Class that contains a dict of chunks (with coordinates) after initializing it
    cd = chunky.ChunkDataset()
    cd.initialize(kd,
                  kd.boundary,
                  chunk_size,
                  cd_dir,
                  box_coords=[0, 0, 0],
                  fit_box_size=True)
    log.info('Generating SegmentationDatasets for cell and cell '
             'organelle supervoxels.')
    oew.from_ids_to_objects(
        cd,
        "sv",
        overlaydataset_path=global_params.config.kd_seg_path,
        n_chunk_jobs=max_n_jobs,
        hdf5names=["sv"],
        n_max_co_processes=None,
        n_folders_fs=n_folders_fs,
        use_combined_extraction=True,
        size=size,
        offset=offset)

    # Object Processing -- Perform after mapping to also cache mapping ratios
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    sd_proc.dataset_analysis(sd, recompute=True, compute_meshprops=False)

    log.info("Extracted {} cell SVs. Preparing rendering locations "
             "(and meshes if not provided).".format(len(sd.ids)))
    start = time.time()
    # chunk them
    multi_params = chunkify(sd.so_dir_paths, max_n_jobs)
    # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default
    so_kwargs = dict(working_dir=global_params.config.working_dir,
                     obj_type='sv')
    multi_params = [[par, so_kwargs] for par in multi_params]
    if generate_sv_meshes:
        _ = qu.QSUB_script(multi_params,
                           "mesh_caching",
                           n_max_co_processes=global_params.NCORE_TOTAL)
    _ = qu.QSUB_script(multi_params,
                       "sample_location_caching",
                       n_max_co_processes=global_params.NCORE_TOTAL)
    # recompute=False: only collect new sample_location property
    sd_proc.dataset_analysis(sd, compute_meshprops=True, recompute=False)
    log.info(
        'Finished preparation of cell SVs after {:.0f}s.'.format(time.time() -
                                                                 start))
    # create SegmentationDataset for each cell organelle
    for co in global_params.existing_cell_organelles:
        start = time.time()
        cd_dir = global_params.config.working_dir + "chunkdatasets/{}/".format(
            co)
        cd.initialize(kd,
                      kd.boundary,
                      chunk_size,
                      cd_dir,
                      box_coords=[0, 0, 0],
                      fit_box_size=True)
        log.info('Started object extraction of cellular organelles "{}" from '
                 '{} chunks.'.format(co, len(cd.chunk_dict)))
        prob_kd_path_dict = {
            co: getattr(global_params.config, 'kd_{}_path'.format(co))
        }
        # This creates a SegmentationDataset of type 'co'
        prob_thresh = global_params.config.entries["Probathresholds"][
            co]  # get probability threshold

        path = "{}/knossosdatasets/{}_seg/".format(
            global_params.config.working_dir, co)
        target_kd = knossosdataset.KnossosDataset()
        target_kd.initialize_without_conf(path,
                                          kd.boundary,
                                          kd.scale,
                                          kd.experiment_name,
                                          mags=[
                                              1,
                                          ])
        target_kd = knossosdataset.KnossosDataset()
        target_kd.initialize_from_knossos_path(path)
        oew.from_probabilities_to_objects(
            cd,
            co,  # membrane_kd_path=global_params.config.kd_barrier_path,  # TODO: currently does not exist
            prob_kd_path_dict=prob_kd_path_dict,
            thresholds=[prob_thresh],
            workfolder=global_params.config.working_dir,
            hdf5names=[co],
            n_max_co_processes=None,
            target_kd=target_kd,
            n_folders_fs=n_folders_fs,
            debug=False,
            size=size,
            offset=offset,
            load_from_kd_overlaycubes=load_from_kd_overlaycubes)
        sd_co = SegmentationDataset(
            obj_type=co, working_dir=global_params.config.working_dir)

        # TODO: check if this is faster then the alternative below
        sd_proc.dataset_analysis(sd_co,
                                 recompute=True,
                                 compute_meshprops=False)
        multi_params = chunkify(sd_co.so_dir_paths, max_n_jobs)
        so_kwargs = dict(working_dir=global_params.config.working_dir,
                         obj_type=co)
        multi_params = [[par, so_kwargs] for par in multi_params]
        _ = qu.QSUB_script(multi_params,
                           "mesh_caching",
                           n_max_co_processes=global_params.NCORE_TOTAL)
        sd_proc.dataset_analysis(sd_co,
                                 recompute=False,
                                 compute_meshprops=True)
        # # Old alternative, requires much more reads/writes then above solution
        # sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=True)

        # About 0.2 h per object class
        log.info('Started mapping of {} cellular organelles of type "{}" to '
                 'cell SVs.'.format(len(sd_co.ids), co))
        sd_proc.map_objects_to_sv(sd,
                                  co,
                                  global_params.config.kd_seg_path,
                                  n_jobs=max_n_jobs)
        log.info('Finished preparation of {} "{}"-SVs after {:.0f}s.'
                 ''.format(len(sd_co.ids), co,
                           time.time() - start))