Beispiel #1
0
def run_create_rag():
    """
    If ``global_params.config.prior_glia_removal==True``:
        stores pruned RAG at ``global_params.config.pruned_rag_path``, required for all glia
        removal steps. :func:`~syconn.exec.exec_multiview.run_glia_splitting`
        will finally store the ``neuron_rag.bz2`` at the currently active working directory.
    else:
        stores pruned RAG at ``global_params.config.working_dir + /glia/neuron_rag.bz2``,
        required by :func:`~syconn.exec.exec_multiview.run_create_neuron_ssd`.
    """
    log = initialize_logging('create_rag',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)
    # Crop RAG according to cell SVs found during SD generation and apply size threshold
    G = nx.read_edgelist(global_params.config.init_rag_path, nodetype=np.uint)
    if 0 in G.nodes():
        G.remove_node(0)
        log.warning('Found background node 0 in original graph. Removing.')
    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag)))

    # add single SV connected components to initial graph
    sd = SegmentationDataset(obj_type='sv',
                             working_dir=global_params.config.working_dir)
    sv_ids = sd.ids
    diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag))))
    log.info(
        'Found {} single-element connected component SVs which were missing'
        ' in initial RAG.'.format(len(diff)))

    for ix in diff:
        G.add_edge(ix, ix)

    log.debug("Found {} SVs in initial RAG after adding size-one connected "
              "components.".format(G.number_of_nodes()))

    # remove small connected components
    sv_size_dict = {}
    bbs = sd.load_cached_data('bounding_box') * sd.scaling
    for ii in range(len(sd.ids)):
        sv_size_dict[sd.ids[ii]] = bbs[ii]
    ccsize_dict = create_ccsize_dict(G, sv_size_dict)
    log.debug("Finished preparation of SSV size dictionary based "
              "on bounding box diagonal of corresponding SVs.")
    before_cnt = len(G.nodes())
    for ix in list(G.nodes()):
        if ccsize_dict[ix] < global_params.config['glia']['min_cc_size_ssv']:
            G.remove_node(ix)
    cc_gs = list(nx.connected_component_subgraphs(G))
    log.info("Removed {} SVs from RAG because of size. Final RAG contains {}"
             " SVs in {} CCs.".format(before_cnt - G.number_of_nodes(),
                                      G.number_of_nodes(), len(cc_gs)))
    nx.write_edgelist(G, global_params.config.pruned_rag_path)

    if not global_params.config.prior_glia_removal:
        os.makedirs(global_params.config.working_dir + '/glia/', exist_ok=True)
        shutil.copy(global_params.config.pruned_rag_path,
                    global_params.config.working_dir + '/glia/neuron_rag.bz2')
Beispiel #2
0
def run_matrix_export():
    # cache cell attributes
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    ssd.save_dataset_deep()
    log = initialize_logging('synapse_analysis',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)
    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    # as an alternative to the skeletons, use vertex predictions or
    # sample_locations, ~3.5h @ 300 cpus
    # TODO: requires speed-up; one could collect properties only for synapses >
    #  probability threshold
    #     synssv_ids = synssv_ids[syn_prob > .5]
    #     ssv_partners = ssv_partners[syn_prob > .5]
    # One could also re-use the cached synssv IDs (computed during mapping of
    # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (->
    # slow for many synapses)
    cps.collect_properties_from_ssv_partners(global_params.config.working_dir,
                                             debug=True)
    #
    # collect new object attributes collected above partner axoness, celltypes,
    # synapse probabilities etc, no need to compute size/rep_coord etc. ->
    # recompute=False
    dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False)
    log.info('Synapse property collection from SSVs finished.')

    # export_matrix
    log.info('Exporting connectivity matrix now.')
    dest_folder = global_params.config.working_dir + '/connectivity_matrix/'
    cps.export_matrix(dest_folder=dest_folder)
    log.info('Connectivity matrix was epxorted to "{}".'.format(dest_folder))
Beispiel #3
0
    def __init__(self, syconn_path='', logger=None):
        """
        Initializes a SyConn backend for operation.
        This includes in-memory initialization of the
        most important caches. Currently, SyConn Gate
        does not support backend data changes and the server needs
        to restart for changes to be valid. If the backend data
        is changed while the server is running, old content
        might be served.
        All backend functions must return dicts.

        :param syconn_path: str
        """
        self.logger = logger
        self.logger.info('Initializing SyConn backend')

        self.ssd = ss.SuperSegmentationDataset(syconn_path,
                                               sso_locking=False)

        self.logger.info('SuperSegmentation dataset initialized.')

        self.sds = dict(syn_ssv=SegmentationDataset(working_dir=syconn_path,
                                                    obj_type='syn_ssv'))

        # flat array representation of all synapses
        self.conn_dict = conn.load_cached_data_dict()
        self.logger.info('In memory cache of synapses initialized.')
        # directed networkx graph of connectivity
        self.conn_graph = conn.connectivity_to_nx_graph(self.conn_dict)
        self.logger.info('Connectivity graph initialized.')
Beispiel #4
0
def sd_init(co: str, max_n_jobs: int, log: Optional[Logger] = None):
    """
    Initialize :class:`~syconn.reps.segmentation.SegmentationDataset` of given
    supervoxel type `co`.

    Args:
        co: Cellular organelle identifier (e.g. 'mi', 'vc', ...).
        max_n_jobs: Number of parallel jobs.
        log: Logger.
    """
    sd_seg = SegmentationDataset(obj_type=co, working_dir=global_params.config.working_dir,
                                 version="0")
    multi_params = chunkify(sd_seg.so_dir_paths, max_n_jobs)
    so_kwargs = dict(working_dir=global_params.config.working_dir, obj_type=co)
    multi_params = [[par, so_kwargs] for par in multi_params]

    if not global_params.config.use_new_meshing and (co != "sv" or (co == "sv" and
            global_params.config.allow_mesh_gen_cells)):
        _ = qu.QSUB_script(multi_params, "mesh_caching", suffix=co, remove_jobfolder=False,
                           n_max_co_processes=global_params.NCORE_TOTAL, log=log)

    if co == "sv":
        _ = qu.QSUB_script(multi_params, "sample_location_caching",
                           n_max_co_processes=global_params.NCORE_TOTAL,
                           suffix=co, remove_jobfolder=True, log=log)

    # write mesh properties to attribute dictionaries if old meshing is active
    if not global_params.config.use_new_meshing:
        sd_proc.dataset_analysis(sd_seg, recompute=False, compute_meshprops=True)
Beispiel #5
0
    def push_so_attr(self, so_id, so_type, attr_key, attr_value):
        """
        Generic attribute pull, return empty string if key did not exist. Could be optimized
        with the assumption that all attributes have been cached as numpy arrays.

        Parameters
        ----------
        so_id : int
        so_type : str
        attr_key : str
        attr_value :

        Returns
        -------
        bytes
            Empty string of everything went well
        """
        if so_type not in self.sds:
            self.sds[so_type] = SegmentationDataset(obj_type=so_type)
        sd = self.sds[so_type]
        try:
            so = sd.get_segmentation_object(so_id)
            so.save_attributes([attr_key], [attr_value])
            return ""
        except Exception as e:
            return str(e)
Beispiel #6
0
def run_matrix_export():
    """
    Export the matrix as a ``.csv`` file at the ``connectivity_matrix`` folder
    of the currently active working directory.
    Also collects the following synapse properties from prior analysis
    steps:
        * 'partner_axoness': Cell compartment type (axon: 1, dendrite: 0, soma: 2,
            en-passant bouton: 3, terminal bouton: 4) of the partner neurons.
        * 'partner_spiness': Spine compartment predictions of both neurons.
        * 'partner_celltypes': Celltype of the both neurons.
        * 'latent_morph': Local morphology embeddings of the pre- and post-
            synaptic partners.

    Examples:
        See :class:`~syconn.reps.segmentation.SegmentationDataset` for examples.
    """
    # cache cell attributes
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    ssd.save_dataset_deep()
    log = initialize_logging('synapse_analysis',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)

    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    # as an alternative to the skeletons, use vertex predictions or
    # sample_locations, ~3.5h @ 300 cpus
    # TODO: requires speed-up; one could collect properties only for synapses >
    #  probability threshold
    #     synssv_ids = synssv_ids[syn_prob > .5]
    #     ssv_partners = ssv_partners[syn_prob > .5]
    # One could also re-use the cached synssv IDs (computed during mapping of
    # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (->
    # slow for many synapses)
    cps.collect_properties_from_ssv_partners(global_params.config.working_dir,
                                             debug=True)
    #
    # collect new object attributes collected above partner axoness, celltypes,
    # synapse probabilities etc, no need to compute size/rep_coord etc. ->
    # recompute=False
    dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False)
    log.info('Synapse property collection from SSVs finished.')

    # export_matrix
    log.info('Exporting connectivity matrix now.')
    dest_folder = global_params.config.working_dir + '/connectivity_matrix/'
    cps.export_matrix(dest_folder=dest_folder)
    log.info('Connectivity matrix was exported to "{}".'.format(dest_folder))
Beispiel #7
0
def run_glia_splitting():
    log = initialize_logging('glia_splitting',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    # path to networkx file containing the initial rag, TODO: create alternative formats
    G = nx.Graph()  # TODO: Make this more general
    with open(global_params.config.init_rag_path, 'r') as f:
        for l in f.readlines():
            edges = [int(v) for v in re.findall('(\d+)', l)]
            G.add_edge(edges[0], edges[1])

    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag)))

    # add single SV connected components to initial graph
    sd = SegmentationDataset(obj_type='sv',
                             working_dir=global_params.config.working_dir)
    sv_ids = sd.ids
    diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag))))
    log.info('Found {} single connected component SVs which were'
             ' missing in initial RAG.'.format(len(diff)))

    for ix in diff:
        G.add_node(ix)

    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG after adding size-one connected "
             "components. Writing RAG to pkl.".format(len(all_sv_ids_in_rag)))

    if not os.path.isdir(global_params.config.working_dir + "/glia/"):
        os.makedirs(global_params.config.working_dir + "/glia/")
    transform_rag_edgelist2pkl(G)

    # first perform glia splitting based on multi-view predictions, results are
    # stored at SuperSegmentationDataset ssv_gliaremoval
    qsub_glia_splitting()

    # collect all neuron and glia SVs and store them in numpy array
    collect_glia_sv()

    # # here use reconnected RAG or initial rag
    recon_nx = G
    # create glia / neuron RAGs
    write_glia_rag(recon_nx, global_params.min_cc_size_ssv, suffix=rag_suffix)
    log.info("Finished glia splitting. Resulting RAGs are stored at {}."
             "".format(global_params.config.working_dir + "/glia/"))
Beispiel #8
0
    def __init__(self,
                 syconn_path: str = '',
                 logger=None,
                 synthresh=0.5,
                 axodend_only=True):
        """
        Initializes a SyConn backend for operation.
        This includes in-memory initialization of the
        most important caches. Currently, SyConn Gate
        does not support backend data changes and the server needs
        to restart for changes to be valid. If the backend data
        is changed while the server is running, old content
        might be served.
        All backend functions must return dicts.

        Args:
            syconn_path:
            logger:
            synthresh: All synapses below `synthresh` will be excluded.
            axodend_only: If True, only axo-dendritic synapses will be loaded.
        """
        self.logger = logger
        self.logger.info('Initializing SyConn backend')

        self.ssd = ss.SuperSegmentationDataset(syconn_path, sso_locking=False)

        self.logger.info('SuperSegmentation dataset initialized.')

        self.sds = dict(syn_ssv=SegmentationDataset(working_dir=syconn_path,
                                                    obj_type='syn_ssv'))
        self.nb_cpus = cpu_count()
        self.synthresh = synthresh
        self.axodend_only = axodend_only
        # flat array representation of all synapses
        self.conn_dict = conn.load_cached_data_dict()
        self.logger.info('In memory cache of synapses initialized.')
        # directed networkx graph of connectivity
        self.conn_graph = conn.connectivity_to_nx_graph(self.conn_dict)
        self.logger.info('Connectivity graph initialized.')
Beispiel #9
0
    def pull_so_attr(self, so_id, so_type, attr_key):
        """
        Generic attribute pull, return empty string if key did not exist. Could be optimized
        with the assumption that all attributes have been cached as numpy arrays.

        Parameters
        ----------
        so_id : int
        so_type : str
        attr_key : str

        Returns
        -------
        str
        """
        if so_type not in self.sds:
            self.sds[so_type] = SegmentationDataset(obj_type=so_type)
        sd = self.sds[so_type]
        so = sd.get_segmentation_object(so_id)
        so.load_attr_dict()
        if attr_key not in so.attr_dict:
            return ''
        return so.attr_dict[attr_key]
Beispiel #10
0
def run_glia_prediction(e3=False):
    log = initialize_logging('glia_prediction',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py')
    pred_key = "glia_probas"
    # Load initial RAG from  Knossos mergelist text file.
    init_rag_p = global_params.config.working_dir + "initial_rag.txt"
    assert os.path.isfile(init_rag_p), "Initial RAG could not be found at %s."\
                                       % init_rag_p
    init_rag = parse_cc_dict_from_kml(init_rag_p)
    log.info('Found {} CCs with a total of {} SVs in inital RAG.'
             ''.format(len(init_rag),
                       np.sum([len(v) for v in init_rag.values()])))
    # chunk them
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    multi_params = chunkify(sd.so_dir_paths, 100)
    # get model properties
    if e3 == True:
        model_kwargs = 'get_glia_model_e3'
    else:
        m = get_glia_model()
        model_kwargs = dict(model_path=m._path,
                            normalize_data=m.normalize_data,
                            imposed_batch_size=m.imposed_batch_size,
                            nb_labels=m.nb_labels,
                            channels_to_load=m.channels_to_load)
    # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default
    so_kwargs = dict(working_dir=global_params.config.working_dir)
    # for glia views set woglia to False (because glia are included),
    #  raw_only to True
    pred_kwargs = dict(woglia=False,
                       pred_key=pred_key,
                       verbose=False,
                       raw_only=True)

    multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs]
                    for par in multi_params]
    if e3 == True:
        path_to_out = qu.QSUB_script(
            multi_params,
            "predict_sv_views_chunked_e3",
            n_max_co_processes=15,
            pe="openmp",
            queue=None,
            script_folder=None,
            n_cores=10,
            suffix="_glia",
            additional_flags="--gres=gpu:1")  # removed -V
    else:
        # randomly assign to gpu 0 or 1
        for par in multi_params:
            mk = par[1]
            # GPUs are made available for every job via slurm, no need for random assignments: np.random.rand(0, 2)
            mk["init_gpu"] = 0
        path_to_out = qu.QSUB_script(
            multi_params,
            "predict_sv_views_chunked",
            n_max_co_processes=25,
            pe="openmp",
            queue=None,
            n_cores=10,
            suffix="_glia",
            script_folder=None,
            additional_flags="--gres=gpu:1")  # removed -V
    log.info('Finished glia prediction. Checking completeness.')
    res = find_missing_sv_attributes(sd, pred_key, n_cores=10)
    if len(res) > 0:
        log.error("Attribute '{}' missing for follwing"
                  " SVs:\n{}".format(pred_key, res))
    else:
        log.info('Success.')
Beispiel #11
0
def run_glia_rendering(max_n_jobs=None):
    """
    Uses the pruned RAG (stored as edge list .bz2 file) which is computed
     in `init_cell_subcell_sds`.

    Parameters
    ----------
    max_n_jobs :

    Returns
    -------

    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \
            else global_params.NCORE_TOTAL * 4
    log = initialize_logging('glia_view_rendering',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)
    log.info("Preparing RAG.")
    np.random.seed(0)

    # view rendering prior to glia removal, choose SSD accordingly
    # glia removal is based on the initial RAG and does not require explicitly stored SSVs
    # TODO: refactor how splits are stored, currently those are stored at ssv_tmp
    version = "tmp"

    G = nx.read_edgelist(global_params.config.pruned_rag_path,
                         nodetype=np.uint)

    cc_gs = sorted(list(nx.connected_component_subgraphs(G)),
                   key=len,
                   reverse=True)
    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)

    # generate parameter for view rendering of individual SSV
    # TODO: remove SVs below minimum size (-> global_params.min_cc_size_ssv)
    sds = SegmentationDataset("sv",
                              working_dir=global_params.config.working_dir)
    sv_size_dict = {}
    bbs = sds.load_cached_data('bounding_box') * sds.scaling
    for ii in range(len(sds.ids)):
        sv_size_dict[sds.ids[ii]] = bbs[ii]
    ccsize_dict = create_ccsize_dict(cc_gs,
                                     sv_size_dict,
                                     is_connected_components=True)

    multi_params = cc_gs
    big_ssv = []
    small_ssv = []
    for g in multi_params:
        if g.number_of_nodes() > RENDERING_MAX_NB_SV:
            big_ssv.append(g)
        elif ccsize_dict[list(g.nodes())[0]] < global_params.min_cc_size_ssv:
            pass  # ignore this CC
        else:
            small_ssv.append(g)

    log.info("View rendering for glia separation started.")
    # # identify huge SSVs and process them on the entire cluster
    if len(big_ssv) > 0:
        n_threads = 2
        log.info("Processing {} huge SSVs in {} threads on the entire cluster"
                 ".".format(len(big_ssv), n_threads))
        q_in = Queue()
        q_out = Queue()
        for kk, g in enumerate(big_ssv):
            q_in.put((kk, g, version))
        for _ in range(n_threads):
            q_in.put(-1)
        ps = [
            Process(target=_run_huge_ssv_render_worker, args=(q_in, q_out))
            for _ in range(n_threads)
        ]
        for p in ps:
            p.start()
            time.sleep(0.5)
        q_in.close()
        q_in.join_thread()
        for p in ps:
            p.join()
        if q_out.qsize() != len(big_ssv):
            raise ValueError(
                'Not all `_run_huge_ssv_render_worker` jobs completed successfully.'
            )
    # render small SSV without overhead and single cpus on whole cluster
    multi_params = small_ssv
    np.random.shuffle(multi_params)
    multi_params = chunkify(multi_params, max_n_jobs)
    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    multi_params = [(ixs, global_params.config.working_dir, version)
                    for ixs in multi_params]
    _ = qu.QSUB_script(multi_params,
                       "render_views_glia_removal",
                       log=log,
                       n_max_co_processes=global_params.NGPU_TOTAL,
                       n_cores=global_params.NCORES_PER_NODE //
                       global_params.NGPUS_PER_NODE,
                       additional_flags="--gres=gpu:1",
                       remove_jobfolder=True)

    # check completeness
    log.info(
        'Finished view rendering for glia separation. Checking completeness.')
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    res = find_missing_sv_views(sd,
                                woglia=False,
                                n_cores=global_params.NCORES_PER_NODE)
    missing_not_contained_in_rag = []
    missing_contained_in_rag = []
    for el in res:
        if el not in all_sv_ids_in_rag:
            missing_not_contained_in_rag.append(
                el)  # TODO: decide whether to use or not
        else:
            missing_contained_in_rag.append(el)
    if len(missing_contained_in_rag) != 0:
        msg = "Not all SVs were rendered completely! {}/{} missing:\n" \
              "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag),
                          missing_contained_in_rag[:100])
        log.error(msg)
        raise ValueError(msg)
    else:
        log.info('All SVs now contain views required for glia prediction.')
Beispiel #12
0
# -*- coding: utf-8 -*-
# SyConn - Synaptic connectivity inference toolkit
#
# Copyright (c) 2016 - now
# Max Planck Institute of Neurobiology, Martinsried, Germany
# Authors: Philipp Schubert, Joergen Kornfeld
import os
from syconn.mp import batchjob_utils as mu
from syconn.reps.segmentation import SegmentationDataset
from syconn.handler.basics import chunkify

if __name__ == "__main__":
    script_folder = os.path.abspath(
        os.path.dirname(__file__) + "/../qsub_scripts/")
    sds = SegmentationDataset("cs",
                              version="33",
                              working_dir="/wholebrain/scratch/areaxfs/")
    multi_params = chunkify(list(sds.sos), 1000)
    path_to_out = mu.QSUB_script(multi_params,
                                 "map_cs_properties",
                                 n_max_co_processes=40,
                                 pe="openmp",
                                 queue=None,
                                 script_folder=script_folder)
Beispiel #13
0
def run_syn_generation(chunk_size: Tuple[int, int, int] = (512, 512, 512),
                       n_folders_fs: int = 10000,
                       max_n_jobs: Optional[int] = None,
                       cube_of_interest_bb: Optional[np.ndarray] = None):
    """
    Run the synapse generation. Will create
    :class:`~syconn.reps.segmentation.SegmentationDataset` objects with
    the following versions:
        * 'cs': Contact site objects between supervoxels.
        * 'syn': Objects representing the overlap between 'cs' and the initial
          synaptic junction predictions. Note: These objects effectively represent
          synapse fragments between supervoxels.
        * 'syn_ssv': Agglomerated 'syn' objects based on the supervoxel graph.

    Args:
        chunk_size: The size of processed cubes.
        n_folders_fs: Number of folders used to create the folder structure in
            each :class:`~syconn.reps.segmentation.SegmentationDataset`.
        max_n_jobs: Number of parallel jobs.
        cube_of_interest_bb: Defines the bounding box of the cube to process.
            By default this is set to (np.zoers(3); kd.boundary).
    """
    if max_n_jobs is None:
        max_n_jobs = global_params.config.ncore_total * 2

    log = initialize_logging('synapse_generation',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)

    kd_seg_path = global_params.config.kd_seg_path
    kd = kd_factory(kd_seg_path)

    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]

    ces.extract_contact_sites(chunk_size=chunk_size,
                              log=log,
                              max_n_jobs=max_n_jobs,
                              cube_of_interest_bb=cube_of_interest_bb,
                              n_folders_fs=n_folders_fs)
    log.info('SegmentationDataset of type "cs" and "syn" was generated.')

    # # TODO: add check for SSD existence, which is required at this point
    # # This creates an SD of type 'syn_ssv'
    cps.combine_and_split_syn(
        global_params.config.working_dir,
        resume_job=False,
        cs_gap_nm=global_params.config['cell_objects']['cs_gap_nm'],
        log=log,
        n_folders_fs=n_folders_fs)
    log.info('Synapse objects were created.')

    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    dataset_analysis(sd_syn_ssv, compute_meshprops=True)
    log.info('SegmentationDataset of type "syn_ssv" was generated.')

    cps.map_objects_to_synssv(global_params.config.working_dir, log=log)
    log.info('Cellular organelles were mapped to "syn_ssv".')

    cps.classify_synssv_objects(global_params.config.working_dir, log=log)
    log.info('Synapse prediction finished.')

    log.info('Collecting and writing syn-ssv objects to SSV attribute '
             'dictionary.')
    # This needs to be run after `classify_synssv_objects` and before
    # `map_synssv_objects` if the latter uses thresholding for synaptic objects
    # just collect new data: ``recompute=False``
    dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False)
    # TODO: decide whether this should happen after prob thresholding or not
    map_synssv_objects(log=log)
    log.info('Finished.')
Beispiel #14
0
def run_glia_prediction(e3=False):
    log = initialize_logging('glia_prediction',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py')
    pred_key = "glia_probas"

    # Load initial RAG from  Knossos mergelist text file.
    g = nx.read_edgelist(global_params.config.pruned_rag_path,
                         nodetype=np.uint)
    all_sv_ids_in_rag = np.array(list(g.nodes()), dtype=np.uint)

    log.debug('Found {} CCs with a total of {} SVs in inital RAG.'.format(
        nx.number_connected_components(g), g.number_of_nodes()))
    # chunk them
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    multi_params = chunkify(sd.so_dir_paths, global_params.NGPU_TOTAL * 2)
    # get model properties
    if e3 == True:
        model_kwargs = 'get_glia_model_e3'
    else:
        m = get_glia_model()
        model_kwargs = dict(model_path=m._path,
                            normalize_data=m.normalize_data,
                            imposed_batch_size=m.imposed_batch_size,
                            nb_labels=m.nb_labels,
                            channels_to_load=m.channels_to_load)
    # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default
    so_kwargs = dict(working_dir=global_params.config.working_dir)
    # for glia views set woglia to False (because glia are included),
    #  raw_only to True
    pred_kwargs = dict(woglia=False,
                       pred_key=pred_key,
                       verbose=False,
                       raw_only=True)

    multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs]
                    for par in multi_params]
    if e3 is True:
        # TODO: using two GPUs on a single node seems to be error-prone
        #  -> wb13 froze when processing example_cube=2
        n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE
        if 'example_cube' in global_params.config.working_dir:
            n_cores = global_params.NCORES_PER_NODE  # do not run two predictions in parallel
        qu.QSUB_script(multi_params,
                       "predict_sv_views_chunked_e3",
                       log=log,
                       n_max_co_processes=global_params.NGPU_TOTAL,
                       script_folder=None,
                       n_cores=n_cores,
                       suffix="_glia",
                       additional_flags="--gres=gpu:1",
                       remove_jobfolder=True)
    else:
        # randomly assign to gpu 0 or 1
        for par in multi_params:
            mk = par[1]
            # GPUs are made available for every job via slurm,
            # no need for random assignments: np.random.rand(0, 2)
            mk["init_gpu"] = 0
        _ = qu.QSUB_script(multi_params,
                           "predict_sv_views_chunked",
                           log=log,
                           n_max_co_processes=global_params.NGPU_TOTAL,
                           n_cores=global_params.NCORES_PER_NODE //
                           global_params.NGPUS_PER_NODE,
                           suffix="_glia",
                           additional_flags="--gres=gpu:1",
                           remove_jobfolder=True)
    log.info('Finished glia prediction. Checking completeness.')
    res = find_missing_sv_views(sd,
                                woglia=False,
                                n_cores=global_params.NCORES_PER_NODE)
    missing_not_contained_in_rag = []
    missing_contained_in_rag = []
    for el in res:
        if el not in all_sv_ids_in_rag:
            missing_not_contained_in_rag.append(
                el)  # TODO: decide whether to use or not
        else:
            missing_contained_in_rag.append(el)
    if len(missing_contained_in_rag) != 0:
        msg = "Not all SVs were predicted! {}/{} missing:\n" \
              "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag),
                          missing_contained_in_rag[:100])
        log.error(msg)
        raise ValueError(msg)
    else:
        log.info('Success.')
Beispiel #15
0
    num_locs = []
    for p in paths:
        loc_dc = CompressedStorage(p + '/locations.pkl',
                                   read_only=True,
                                   disable_locking=True)
        sample_locs = [np.concatenate(sl) for sl in loc_dc.values()]
        num_locs += [len(sl) for sl in sample_locs]
    return num_locs


# TODO: make this a test on toy data (which has to be created and added to the repo)
if __name__ == '__main__':
    # performed on SSD at '/wholebrain/songbird/j0126/areaxfs_v6//ssv_0/', 17Jan02019
    ssd = SuperSegmentationDataset(
        working_dir='/wholebrain/songbird/j0126/areaxfs_v6/')
    sd = SegmentationDataset(
        obj_type='sv', working_dir='/wholebrain/songbird/j0126/areaxfs_v6/')

    # # Statistics of SSVs in datatset
    # all_paths = chunkify(glob.glob(ssd.path + "/so_storage/*/*/*/"), 500)
    # num_samplelocs = start_multiprocess_imap(helper_func, all_paths, nb_cpus=20)
    # num_samplelocs = np.concatenate(num_samplelocs)  # transform list of lists into 1D array
    # print('#SSVs: {}\nMean #sample_locs: {}\nTotal #sample_locs: {}'.format(len(ssd.ssv_ids),
    #                                                 np.mean(num_samplelocs), np.sum(num_samplelocs)))
    # # Statistics of SVs in the original datatset
    # all_paths = chunkify(sd.so_dir_paths, 500)
    # num_samplelocs = start_multiprocess_imap(helper_func_sd, all_paths, nb_cpus=20)
    # num_samplelocs = np.concatenate(num_samplelocs)  # transform list of lists into 1D array
    # print('#SVs: {}\nMean #sample_locs: {}\nTotal #sample_locs: {}'.format(len(sd.ids),
    #                                                 np.mean(num_samplelocs), np.sum(num_samplelocs)))

    ssvs = ssd.get_super_segmentation_object([26607617, 27525127])
Beispiel #16
0
def run_glia_rendering():
    log = initialize_logging('glia_view_rendering',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    np.random.seed(0)

    # view rendering prior to glia removal, choose SSD accordingly
    version = "tmp"  # glia removal is based on the initial RAG and does not require explicitly stored SSVs

    G = nx.Graph()  # TODO: Add factory method for initial RAG
    with open(global_params.config.init_rag_path, 'r') as f:
        for l in f.readlines():
            edges = [int(v) for v in re.findall('(\d+)', l)]
            G.add_edge(edges[0], edges[1])

    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag)))

    # add single SV connected components to initial graph
    sd = SegmentationDataset(obj_type='sv',
                             working_dir=global_params.config.working_dir)
    sv_ids = sd.ids
    diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag))))
    log.info('Found {} single connected component SVs which were missing'
             ' in initial RAG.'.format(len(diff)))

    for ix in diff:
        G.add_node(ix)

    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG after adding size-one connected "
             "components. Writing kml text file".format(
                 len(all_sv_ids_in_rag)))

    # write out readable format for 'glia_prediction.py'
    ccs = [[n for n in cc] for cc in nx.connected_component_subgraphs(G)]
    kml = knossos_ml_from_ccs([np.sort(cc)[0] for cc in ccs], ccs)
    with open(global_params.config.working_dir + "initial_rag.txt", 'w') as f:
        f.write(kml)

    # generate parameter for view rendering of individual SSV
    log.info("Starting view rendering.")
    multi_params = []
    for cc in nx.connected_component_subgraphs(G):
        multi_params.append(cc)
    multi_params = np.array(multi_params)

    # identify huge SSVs and process them individually on whole cluster
    nb_svs = np.array([g.number_of_nodes() for g in multi_params])
    big_ssv = multi_params[nb_svs > RENDERING_MAX_NB_SV]

    for kk, g in enumerate(big_ssv[::-1]):
        # Create SSV object
        sv_ixs = np.sort(list(g.nodes()))
        log.info("Processing SSV [{}/{}] with {} SVs on whole cluster.".format(
            kk + 1, len(big_ssv), len(sv_ixs)))
        sso = SuperSegmentationObject(
            sv_ixs[0],
            working_dir=global_params.config.working_dir,
            version=version,
            create=False,
            sv_ids=sv_ixs)
        # nodes of sso._rag need to be SV
        new_G = nx.Graph()
        for e in g.edges():
            new_G.add_edge(sso.get_seg_obj("sv", e[0]),
                           sso.get_seg_obj("sv", e[1]))
        sso._rag = new_G
        sso.render_views(add_cellobjects=False,
                         cellobjects_only=False,
                         skip_indexviews=True,
                         woglia=False,
                         qsub_pe="openmp",
                         overwrite=True,
                         qsub_co_jobs=global_params.NCORE_TOTAL)

    # render small SSV without overhead and single cpus on whole cluster
    multi_params = multi_params[nb_svs <= RENDERING_MAX_NB_SV]
    np.random.shuffle(multi_params)
    multi_params = chunkify(multi_params, 2000)

    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    multi_params = [(ixs, global_params.config.working_dir, version)
                    for ixs in multi_params]
    path_to_out = qu.QSUB_script(multi_params,
                                 "render_views_glia_removal",
                                 n_max_co_processes=global_params.NCORE_TOTAL,
                                 pe="openmp",
                                 queue=None,
                                 script_folder=None,
                                 suffix="")

    # check completeness
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    res = find_missing_sv_views(sd, woglia=False, n_cores=10)
    missing_not_contained_in_rag = []
    missing_contained_in_rag = []
    for el in res:
        if el not in all_sv_ids_in_rag:
            missing_not_contained_in_rag.append(el)
        else:
            missing_contained_in_rag.append(el)
    if len(missing_not_contained_in_rag):
        log.info("%d SVs were not rendered but also not part of the initial"
                 "RAG: {}".format(missing_not_contained_in_rag))
    if len(missing_contained_in_rag) != 0:
        msg = "Not all SSVs were rendered completely! Missing:\n" \
              "{}".format(missing_contained_in_rag)
        log.error(msg)
        raise RuntimeError(msg)
Beispiel #17
0
def run_syn_generation(chunk_size=(512, 512, 512),
                       n_folders_fs=10000,
                       max_n_jobs=None,
                       cube_of_interest_bb=None):
    """

    Parameters
    ----------
    chunk_size :
    n_folders_fs :
    max_n_jobs :
    cube_of_interest_bb : Tuple[np.ndarray]
        Defines the bounding box of the cube to process. By default this is
        set to (np.zoers(3); kd.boundary).

    Returns
    -------

    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2

    log = initialize_logging('synapse_generation',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)

    kd_seg_path = global_params.config.kd_seg_path
    kd = kd_factory(kd_seg_path)

    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]

    ces.extract_contact_sites(chunk_size=chunk_size,
                              log=log,
                              max_n_jobs=max_n_jobs,
                              cube_of_interest_bb=cube_of_interest_bb,
                              n_folders_fs=n_folders_fs)
    log.info('SegmentationDataset of type "cs" and "syn" was generated.')

    # TODO: add check for SSD existence, which is required at this point
    # This creates an SD of type 'syn_ssv'
    cps.combine_and_split_syn(global_params.config.working_dir,
                              resume_job=False,
                              cs_gap_nm=global_params.cs_gap_nm,
                              log=log,
                              n_folders_fs=n_folders_fs)
    log.info('Synapse objects were created.')
    #
    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    dataset_analysis(sd_syn_ssv, compute_meshprops=True)
    log.info('SegmentationDataset of type "syn_ssv" was generated.')

    cps.map_objects_to_synssv(global_params.config.working_dir, log=log)
    log.info('Cellular organelles were mapped to "syn_ssv".')

    cps.classify_synssv_objects(global_params.config.working_dir, log=log)
    log.info('Synapse property prediction finished.')

    log.info('Collecting and writing syn-ssv objects to SSV attribute '
             'dictionary.')
    # This needs to be run after `classify_synssv_objects` and before
    # `map_synssv_objects` if the latter uses thresholding for synaptic objects
    dataset_analysis(sd_syn_ssv, compute_meshprops=False,
                     recompute=False)  # just collect new data
    # TODO: decide whether this should happen after prob thresholding or not
    map_synssv_objects(log=log)
    log.info('Finished.')
Beispiel #18
0
def run_create_sds(chunk_size=None,
                   n_folders_fs=10000,
                   max_n_jobs=None,
                   generate_sv_meshes=False,
                   load_from_kd_overlaycubes=False,
                   cube_of_interest_bb=None):
    """

    Parameters
    ----------
    chunk_size :
    max_n_jobs : int
    n_folders_fs :
    generate_sv_meshes :
    load_from_kd_overlaycubes : bool
        Load prob/seg data from overlaycubes instead of raw cubes.
    cube_of_interest_bb : Tuple[np.ndarray]
        Defines the bounding box of the cube to process. By default this is
        set to (np.zoers(3); kd.boundary).


    Returns
    -------

    """
    if chunk_size is None:
        chunk_size = [512, 512, 512]
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 3
    log = initialize_logging('create_sds',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)

    # Sets initial values of object
    kd = kd_factory(global_params.config.kd_seg_path)
    if cube_of_interest_bb is None:
        cube_of_interest_bb = [np.zeros(3, dtype=np.int), kd.boundary]
    size = cube_of_interest_bb[1] - cube_of_interest_bb[0] + 1
    offset = cube_of_interest_bb[0]
    # TODO: get rid of explicit voxel extraction, all info necessary should be extracted
    #  at the beginning, e.g. size, bounding box etc and then refactor to only use those cached attributes!
    # resulting ChunkDataset, required for SV extraction --
    # Object extraction - 2h, the same has to be done for all cell organelles
    cd_dir = global_params.config.working_dir + "chunkdatasets/sv/"
    # Class that contains a dict of chunks (with coordinates) after initializing it
    cd = chunky.ChunkDataset()
    cd.initialize(kd,
                  kd.boundary,
                  chunk_size,
                  cd_dir,
                  box_coords=[0, 0, 0],
                  fit_box_size=True)
    log.info('Generating SegmentationDatasets for cell and cell '
             'organelle supervoxels.')
    oew.from_ids_to_objects(
        cd,
        "sv",
        overlaydataset_path=global_params.config.kd_seg_path,
        n_chunk_jobs=max_n_jobs,
        hdf5names=["sv"],
        n_max_co_processes=None,
        n_folders_fs=n_folders_fs,
        use_combined_extraction=True,
        size=size,
        offset=offset)

    # Object Processing -- Perform after mapping to also cache mapping ratios
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    sd_proc.dataset_analysis(sd, recompute=True, compute_meshprops=False)

    log.info("Extracted {} cell SVs. Preparing rendering locations "
             "(and meshes if not provided).".format(len(sd.ids)))
    start = time.time()
    # chunk them
    multi_params = chunkify(sd.so_dir_paths, max_n_jobs)
    # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default
    so_kwargs = dict(working_dir=global_params.config.working_dir,
                     obj_type='sv')
    multi_params = [[par, so_kwargs] for par in multi_params]
    if generate_sv_meshes:
        _ = qu.QSUB_script(multi_params,
                           "mesh_caching",
                           n_max_co_processes=global_params.NCORE_TOTAL)
    _ = qu.QSUB_script(multi_params,
                       "sample_location_caching",
                       n_max_co_processes=global_params.NCORE_TOTAL)
    # recompute=False: only collect new sample_location property
    sd_proc.dataset_analysis(sd, compute_meshprops=True, recompute=False)
    log.info(
        'Finished preparation of cell SVs after {:.0f}s.'.format(time.time() -
                                                                 start))
    # create SegmentationDataset for each cell organelle
    for co in global_params.existing_cell_organelles:
        start = time.time()
        cd_dir = global_params.config.working_dir + "chunkdatasets/{}/".format(
            co)
        cd.initialize(kd,
                      kd.boundary,
                      chunk_size,
                      cd_dir,
                      box_coords=[0, 0, 0],
                      fit_box_size=True)
        log.info('Started object extraction of cellular organelles "{}" from '
                 '{} chunks.'.format(co, len(cd.chunk_dict)))
        prob_kd_path_dict = {
            co: getattr(global_params.config, 'kd_{}_path'.format(co))
        }
        # This creates a SegmentationDataset of type 'co'
        prob_thresh = global_params.config.entries["Probathresholds"][
            co]  # get probability threshold

        path = "{}/knossosdatasets/{}_seg/".format(
            global_params.config.working_dir, co)
        target_kd = knossosdataset.KnossosDataset()
        target_kd.initialize_without_conf(path,
                                          kd.boundary,
                                          kd.scale,
                                          kd.experiment_name,
                                          mags=[
                                              1,
                                          ])
        target_kd = knossosdataset.KnossosDataset()
        target_kd.initialize_from_knossos_path(path)
        oew.from_probabilities_to_objects(
            cd,
            co,  # membrane_kd_path=global_params.config.kd_barrier_path,  # TODO: currently does not exist
            prob_kd_path_dict=prob_kd_path_dict,
            thresholds=[prob_thresh],
            workfolder=global_params.config.working_dir,
            hdf5names=[co],
            n_max_co_processes=None,
            target_kd=target_kd,
            n_folders_fs=n_folders_fs,
            debug=False,
            size=size,
            offset=offset,
            load_from_kd_overlaycubes=load_from_kd_overlaycubes)
        sd_co = SegmentationDataset(
            obj_type=co, working_dir=global_params.config.working_dir)

        # TODO: check if this is faster then the alternative below
        sd_proc.dataset_analysis(sd_co,
                                 recompute=True,
                                 compute_meshprops=False)
        multi_params = chunkify(sd_co.so_dir_paths, max_n_jobs)
        so_kwargs = dict(working_dir=global_params.config.working_dir,
                         obj_type=co)
        multi_params = [[par, so_kwargs] for par in multi_params]
        _ = qu.QSUB_script(multi_params,
                           "mesh_caching",
                           n_max_co_processes=global_params.NCORE_TOTAL)
        sd_proc.dataset_analysis(sd_co,
                                 recompute=False,
                                 compute_meshprops=True)
        # # Old alternative, requires much more reads/writes then above solution
        # sd_proc.dataset_analysis(sd_co, recompute=True, compute_meshprops=True)

        # About 0.2 h per object class
        log.info('Started mapping of {} cellular organelles of type "{}" to '
                 'cell SVs.'.format(len(sd_co.ids), co))
        sd_proc.map_objects_to_sv(sd,
                                  co,
                                  global_params.config.kd_seg_path,
                                  n_jobs=max_n_jobs)
        log.info('Finished preparation of {} "{}"-SVs after {:.0f}s.'
                 ''.format(len(sd_co.ids), co,
                           time.time() - start))
Beispiel #19
0
def run_create_neuron_ssd(prior_glia_removal=True):
    """
    Creates SuperSegmentationDataset with version 0.

    Parameters
    ----------
    prior_glia_removal : bool
        If False, will apply filtering to create SSO objects above minimum size, see global_params.min_cc_size_ssv
         and cache SV sample locations.

    Returns
    -------

    """
    log = initialize_logging('create_neuron_ssd',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    suffix = global_params.rag_suffix
    # TODO: the following paths currently require prior glia-splitting
    g_p = "{}/glia/neuron_rag{}.bz2".format(global_params.config.working_dir,
                                            suffix)
    rag_g = nx.read_edgelist(g_p, nodetype=np.uint)
    # e.g. if rag was not created by glia splitting procedure this filtering is required
    if not prior_glia_removal:
        sd = SegmentationDataset("sv",
                                 working_dir=global_params.config.working_dir)

        sv_size_dict = {}
        bbs = sd.load_cached_data('bounding_box') * sd.scaling
        for ii in range(len(sd.ids)):
            sv_size_dict[sd.ids[ii]] = bbs[ii]
        ccsize_dict = create_ccsize_dict(rag_g, sv_size_dict)
        log.debug("Finished preparation of SSV size dictionary based "
                  "on bounding box diagional of corresponding SVs.")
        before_cnt = len(rag_g.nodes())
        for ix in list(rag_g.nodes()):
            if ccsize_dict[ix] < global_params.min_cc_size_ssv:
                rag_g.remove_node(ix)
        log.debug("Removed %d neuron CCs because of size." %
                  (before_cnt - len(rag_g.nodes())))

    ccs = nx.connected_components(rag_g)
    cc_dict = {}
    for cc in ccs:
        cc_arr = np.array(list(cc))
        cc_dict[np.min(cc_arr)] = cc_arr

    cc_dict_inv = {}
    for ssv_id, cc in cc_dict.items():
        for sv_id in cc:
            cc_dict_inv[sv_id] = ssv_id
    log.info('Parsed RAG from {} with {} SSVs and {} SVs.'.format(
        g_p, len(cc_dict), len(cc_dict_inv)))

    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir,
        version='0',
        ssd_type="ssv",
        sv_mapping=cc_dict_inv)
    # create cache-arrays for frequently used attributes
    ssd.save_dataset_deep(n_max_co_processes=global_params.NCORE_TOTAL
                          )  # also executes 'ssd.save_dataset_shallow()'

    exec_skeleton.run_skeleton_generation()

    log.info('Finished SSD initialization. Starting cellular '
             'organelle mapping.')

    # map cellular organelles to SSVs
    # TODO: increase number of jobs in the next two QSUB submissions and sort by SSV size (descending)
    ssd_proc.aggregate_segmentation_object_mappings(
        ssd, global_params.existing_cell_organelles, qsub_pe="openmp")
    ssd_proc.apply_mapping_decisions(ssd,
                                     global_params.existing_cell_organelles,
                                     qsub_pe="openmp")
    log.info('Finished mapping of cellular organelles to SSVs. '
             'Writing individual SSV graphs.')

    # Write SSV RAGs
    pbar = tqdm.tqdm(total=len(ssd.ssv_ids), mininterval=0.5)
    for ssv in ssd.ssvs:
        # get all nodes in CC of this SSV
        if len(cc_dict[
                ssv.id]) > 1:  # CCs with 1 node do not exist in the global RAG
            n_list = nx.node_connected_component(rag_g, ssv.id)
            # get SSV RAG as subgraph
            ssv_rag = nx.subgraph(rag_g, n_list)
        else:
            ssv_rag = nx.Graph()
            # ssv.id is the minimal SV ID, and therefore the only SV in this case
            ssv_rag.add_edge(ssv.id, ssv.id)
        nx.write_edgelist(ssv_rag, ssv.edgelist_path)
        pbar.update(1)
    pbar.close()
    log.info('Finished saving individual SSV RAGs.')