Beispiel #1
0
def run_create_rag():
    """
    If ``global_params.config.prior_glia_removal==True``:
        stores pruned RAG at ``global_params.config.pruned_rag_path``, required for all glia
        removal steps. :func:`~syconn.exec.exec_multiview.run_glia_splitting`
        will finally store the ``neuron_rag.bz2`` at the currently active working directory.
    else:
        stores pruned RAG at ``global_params.config.working_dir + /glia/neuron_rag.bz2``,
        required by :func:`~syconn.exec.exec_multiview.run_create_neuron_ssd`.
    """
    log = initialize_logging('create_rag',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)
    # Crop RAG according to cell SVs found during SD generation and apply size threshold
    G = nx.read_edgelist(global_params.config.init_rag_path, nodetype=np.uint)
    if 0 in G.nodes():
        G.remove_node(0)
        log.warning('Found background node 0 in original graph. Removing.')
    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag)))

    # add single SV connected components to initial graph
    sd = SegmentationDataset(obj_type='sv',
                             working_dir=global_params.config.working_dir)
    sv_ids = sd.ids
    diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag))))
    log.info(
        'Found {} single-element connected component SVs which were missing'
        ' in initial RAG.'.format(len(diff)))

    for ix in diff:
        G.add_edge(ix, ix)

    log.debug("Found {} SVs in initial RAG after adding size-one connected "
              "components.".format(G.number_of_nodes()))

    # remove small connected components
    sv_size_dict = {}
    bbs = sd.load_cached_data('bounding_box') * sd.scaling
    for ii in range(len(sd.ids)):
        sv_size_dict[sd.ids[ii]] = bbs[ii]
    ccsize_dict = create_ccsize_dict(G, sv_size_dict)
    log.debug("Finished preparation of SSV size dictionary based "
              "on bounding box diagonal of corresponding SVs.")
    before_cnt = len(G.nodes())
    for ix in list(G.nodes()):
        if ccsize_dict[ix] < global_params.config['glia']['min_cc_size_ssv']:
            G.remove_node(ix)
    cc_gs = list(nx.connected_component_subgraphs(G))
    log.info("Removed {} SVs from RAG because of size. Final RAG contains {}"
             " SVs in {} CCs.".format(before_cnt - G.number_of_nodes(),
                                      G.number_of_nodes(), len(cc_gs)))
    nx.write_edgelist(G, global_params.config.pruned_rag_path)

    if not global_params.config.prior_glia_removal:
        os.makedirs(global_params.config.working_dir + '/glia/', exist_ok=True)
        shutil.copy(global_params.config.pruned_rag_path,
                    global_params.config.working_dir + '/glia/neuron_rag.bz2')
Beispiel #2
0
def run_glia_rendering(max_n_jobs=None):
    """
    Uses the pruned RAG (stored as edge list .bz2 file) which is computed
     in `init_cell_subcell_sds`.

    Parameters
    ----------
    max_n_jobs :

    Returns
    -------

    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \
            else global_params.NCORE_TOTAL * 4
    log = initialize_logging('glia_view_rendering',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)
    log.info("Preparing RAG.")
    np.random.seed(0)

    # view rendering prior to glia removal, choose SSD accordingly
    # glia removal is based on the initial RAG and does not require explicitly stored SSVs
    # TODO: refactor how splits are stored, currently those are stored at ssv_tmp
    version = "tmp"

    G = nx.read_edgelist(global_params.config.pruned_rag_path,
                         nodetype=np.uint)

    cc_gs = sorted(list(nx.connected_component_subgraphs(G)),
                   key=len,
                   reverse=True)
    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)

    # generate parameter for view rendering of individual SSV
    # TODO: remove SVs below minimum size (-> global_params.min_cc_size_ssv)
    sds = SegmentationDataset("sv",
                              working_dir=global_params.config.working_dir)
    sv_size_dict = {}
    bbs = sds.load_cached_data('bounding_box') * sds.scaling
    for ii in range(len(sds.ids)):
        sv_size_dict[sds.ids[ii]] = bbs[ii]
    ccsize_dict = create_ccsize_dict(cc_gs,
                                     sv_size_dict,
                                     is_connected_components=True)

    multi_params = cc_gs
    big_ssv = []
    small_ssv = []
    for g in multi_params:
        if g.number_of_nodes() > RENDERING_MAX_NB_SV:
            big_ssv.append(g)
        elif ccsize_dict[list(g.nodes())[0]] < global_params.min_cc_size_ssv:
            pass  # ignore this CC
        else:
            small_ssv.append(g)

    log.info("View rendering for glia separation started.")
    # # identify huge SSVs and process them on the entire cluster
    if len(big_ssv) > 0:
        n_threads = 2
        log.info("Processing {} huge SSVs in {} threads on the entire cluster"
                 ".".format(len(big_ssv), n_threads))
        q_in = Queue()
        q_out = Queue()
        for kk, g in enumerate(big_ssv):
            q_in.put((kk, g, version))
        for _ in range(n_threads):
            q_in.put(-1)
        ps = [
            Process(target=_run_huge_ssv_render_worker, args=(q_in, q_out))
            for _ in range(n_threads)
        ]
        for p in ps:
            p.start()
            time.sleep(0.5)
        q_in.close()
        q_in.join_thread()
        for p in ps:
            p.join()
        if q_out.qsize() != len(big_ssv):
            raise ValueError(
                'Not all `_run_huge_ssv_render_worker` jobs completed successfully.'
            )
    # render small SSV without overhead and single cpus on whole cluster
    multi_params = small_ssv
    np.random.shuffle(multi_params)
    multi_params = chunkify(multi_params, max_n_jobs)
    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    multi_params = [(ixs, global_params.config.working_dir, version)
                    for ixs in multi_params]
    _ = qu.QSUB_script(multi_params,
                       "render_views_glia_removal",
                       log=log,
                       n_max_co_processes=global_params.NGPU_TOTAL,
                       n_cores=global_params.NCORES_PER_NODE //
                       global_params.NGPUS_PER_NODE,
                       additional_flags="--gres=gpu:1",
                       remove_jobfolder=True)

    # check completeness
    log.info(
        'Finished view rendering for glia separation. Checking completeness.')
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    res = find_missing_sv_views(sd,
                                woglia=False,
                                n_cores=global_params.NCORES_PER_NODE)
    missing_not_contained_in_rag = []
    missing_contained_in_rag = []
    for el in res:
        if el not in all_sv_ids_in_rag:
            missing_not_contained_in_rag.append(
                el)  # TODO: decide whether to use or not
        else:
            missing_contained_in_rag.append(el)
    if len(missing_contained_in_rag) != 0:
        msg = "Not all SVs were rendered completely! {}/{} missing:\n" \
              "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag),
                          missing_contained_in_rag[:100])
        log.error(msg)
        raise ValueError(msg)
    else:
        log.info('All SVs now contain views required for glia prediction.')
Beispiel #3
0
def run_create_neuron_ssd(prior_glia_removal=True):
    """
    Creates SuperSegmentationDataset with version 0.

    Parameters
    ----------
    prior_glia_removal : bool
        If False, will apply filtering to create SSO objects above minimum size, see global_params.min_cc_size_ssv
         and cache SV sample locations.

    Returns
    -------

    """
    log = initialize_logging('create_neuron_ssd',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    suffix = global_params.rag_suffix
    # TODO: the following paths currently require prior glia-splitting
    g_p = "{}/glia/neuron_rag{}.bz2".format(global_params.config.working_dir,
                                            suffix)
    rag_g = nx.read_edgelist(g_p, nodetype=np.uint)
    # e.g. if rag was not created by glia splitting procedure this filtering is required
    if not prior_glia_removal:
        sd = SegmentationDataset("sv",
                                 working_dir=global_params.config.working_dir)

        sv_size_dict = {}
        bbs = sd.load_cached_data('bounding_box') * sd.scaling
        for ii in range(len(sd.ids)):
            sv_size_dict[sd.ids[ii]] = bbs[ii]
        ccsize_dict = create_ccsize_dict(rag_g, sv_size_dict)
        log.debug("Finished preparation of SSV size dictionary based "
                  "on bounding box diagional of corresponding SVs.")
        before_cnt = len(rag_g.nodes())
        for ix in list(rag_g.nodes()):
            if ccsize_dict[ix] < global_params.min_cc_size_ssv:
                rag_g.remove_node(ix)
        log.debug("Removed %d neuron CCs because of size." %
                  (before_cnt - len(rag_g.nodes())))

    ccs = nx.connected_components(rag_g)
    cc_dict = {}
    for cc in ccs:
        cc_arr = np.array(list(cc))
        cc_dict[np.min(cc_arr)] = cc_arr

    cc_dict_inv = {}
    for ssv_id, cc in cc_dict.items():
        for sv_id in cc:
            cc_dict_inv[sv_id] = ssv_id
    log.info('Parsed RAG from {} with {} SSVs and {} SVs.'.format(
        g_p, len(cc_dict), len(cc_dict_inv)))

    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir,
        version='0',
        ssd_type="ssv",
        sv_mapping=cc_dict_inv)
    # create cache-arrays for frequently used attributes
    ssd.save_dataset_deep(n_max_co_processes=global_params.NCORE_TOTAL
                          )  # also executes 'ssd.save_dataset_shallow()'

    exec_skeleton.run_skeleton_generation()

    log.info('Finished SSD initialization. Starting cellular '
             'organelle mapping.')

    # map cellular organelles to SSVs
    # TODO: increase number of jobs in the next two QSUB submissions and sort by SSV size (descending)
    ssd_proc.aggregate_segmentation_object_mappings(
        ssd, global_params.existing_cell_organelles, qsub_pe="openmp")
    ssd_proc.apply_mapping_decisions(ssd,
                                     global_params.existing_cell_organelles,
                                     qsub_pe="openmp")
    log.info('Finished mapping of cellular organelles to SSVs. '
             'Writing individual SSV graphs.')

    # Write SSV RAGs
    pbar = tqdm.tqdm(total=len(ssd.ssv_ids), mininterval=0.5)
    for ssv in ssd.ssvs:
        # get all nodes in CC of this SSV
        if len(cc_dict[
                ssv.id]) > 1:  # CCs with 1 node do not exist in the global RAG
            n_list = nx.node_connected_component(rag_g, ssv.id)
            # get SSV RAG as subgraph
            ssv_rag = nx.subgraph(rag_g, n_list)
        else:
            ssv_rag = nx.Graph()
            # ssv.id is the minimal SV ID, and therefore the only SV in this case
            ssv_rag.add_edge(ssv.id, ssv.id)
        nx.write_edgelist(ssv_rag, ssv.edgelist_path)
        pbar.update(1)
    pbar.close()
    log.info('Finished saving individual SSV RAGs.')