Ejemplo n.º 1
0
def preds2kzip(pred_folder: str, out_path: str, ssd_path: str, col_lookup: dict,
               label_mappings: Optional[List[Tuple[int, int]]] = None):
    pred_folder = os.path.expanduser(pred_folder)
    out_path = os.path.expanduser(out_path)
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    files = glob.glob(pred_folder + '*_preds.pkl')
    ssd = SuperSegmentationDataset(ssd_path)
    for file in tqdm(files):
        hc_voxeled = preds2hc(file)
        sso_id = int(re.findall(r"/sso_(\d+).", file)[0])
        sso = ssd.get_super_segmentation_object(sso_id)

        verts = sso.mesh[1].reshape(-1, 3)
        hc = HybridCloud(nodes=hc_voxeled.nodes, edges=hc_voxeled.edges, node_labels=hc_voxeled.node_labels,
                         pred_node_labels=hc_voxeled.pred_node_labels, vertices=verts)
        hc.nodel2vertl()
        hc.prednodel2predvertl()
        if label_mappings is not None:
            hc.map_labels(label_mappings)

        cols = np.array([col_lookup[el] for el in hc.pred_labels.squeeze()], dtype=np.uint8)
        sso.mesh2kzip(out_path + f'p_{sso_id}.k.zip', ext_color=cols)
        cols = np.array([col_lookup[el] for el in hc.labels.squeeze()], dtype=np.uint8)
        sso.mesh2kzip(out_path + f't_{sso_id}.k.zip', ext_color=cols)

        comments = list(hc.pred_node_labels.reshape(-1))
        for node in range(len(hc.nodes)):
            if hc.pred_node_labels[node] != hc.node_labels[node] and hc.pred_node_labels[node] != -1:
                comments[node] = 'e' + str(comments[node])
        sso.save_skeleton_to_kzip(out_path + f'p_{sso_id}.k.zip', comments=comments)
        comments = hc.node_labels.reshape(-1)
        sso.save_skeleton_to_kzip(out_path + f't_{sso_id}.k.zip', comments=comments)
Ejemplo n.º 2
0
def run_matrix_export():
    # cache cell attributes
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    ssd.save_dataset_deep()
    log = initialize_logging('synapse_analysis',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)
    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    # as an alternative to the skeletons, use vertex predictions or
    # sample_locations, ~3.5h @ 300 cpus
    # TODO: requires speed-up; one could collect properties only for synapses >
    #  probability threshold
    #     synssv_ids = synssv_ids[syn_prob > .5]
    #     ssv_partners = ssv_partners[syn_prob > .5]
    # One could also re-use the cached synssv IDs (computed during mapping of
    # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (->
    # slow for many synapses)
    cps.collect_properties_from_ssv_partners(global_params.config.working_dir,
                                             debug=True)
    #
    # collect new object attributes collected above partner axoness, celltypes,
    # synapse probabilities etc, no need to compute size/rep_coord etc. ->
    # recompute=False
    dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False)
    log.info('Synapse property collection from SSVs finished.')

    # export_matrix
    log.info('Exporting connectivity matrix now.')
    dest_folder = global_params.config.working_dir + '/connectivity_matrix/'
    cps.export_matrix(dest_folder=dest_folder)
    log.info('Connectivity matrix was epxorted to "{}".'.format(dest_folder))
Ejemplo n.º 3
0
def test_multiprocessed_vs_serial_rendering():
    # TODO: use example data and improve logging, see test_backend.py
    working_dir = "/wholebrain/scratch/areaxfs3/"
    render_indexview = True

    ssc = SuperSegmentationDataset(working_dir)
    ssv = ssc.get_super_segmentation_object(29753344)
    ssv.nb_cpus = cpu_count()
    exlocs = np.concatenate(ssv.sample_locations())
    exlocs = exlocs[:1000]
    views = render_sso_coords_multiprocessing(
        ssv,
        working_dir,
        rendering_locations=exlocs,
        render_indexviews=render_indexview,
        n_jobs=10,
        verbose=True)

    # overwrite any precomputed caches by re-initialization of SSV
    ssv = ssc.get_super_segmentation_object(29753344)
    ssv.nb_cpus = cpu_count()
    exlocs = np.concatenate(ssv.sample_locations())
    exlocs = exlocs[:1000]
    if render_indexview:
        views2 = render_sso_coords_index_views(ssv, exlocs, verbose=True)
    else:
        views2 = render_sso_coords(ssv, exlocs, verbose=True)

    print('Fraction of different index values in index-views: {:.4f}'
          ''.format(np.sum(views != views2) / np.prod(views.shape)))
    assert np.all(views == views2)
Ejemplo n.º 4
0
def run_spiness_prediction(max_n_jobs_gpu: Optional[int] = None,
                           max_n_jobs: Optional[int] = None):
    """
    Will store semantic spine labels inside``ssv.label_dict('vertex')['spiness]``.

    Todo:
        * run rendering chunk-wise instead of on-the-fly and then perform
          prediction chunk-wise as well, adopt from spiness step.

    Args:
        max_n_jobs_gpu: Number of parallel GPU jobs. Used for the inference.
        max_n_jobs : Number of parallel CPU jobs. Used for the mapping step.
    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2
    if max_n_jobs_gpu is None:
        max_n_jobs_gpu = global_params.NGPU_TOTAL * 2
    log = initialize_logging('spine_identification', global_params.config.working_dir
                             + '/logs/', overwrite=False)
    ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir)

    # run semantic spine segmentation on multi views
    sd = ssd.get_segmentationdataset("sv")
    # chunk them
    multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu)
    # set model properties
    model_kwargs = dict(src=global_params.config.mpath_spiness,
                        multi_gpu=False)
    so_kwargs = dict(working_dir=global_params.config.working_dir)
    pred_kwargs = dict(pred_key=global_params.semseg2mesh_spines['semseg_key'])
    multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs]
                    for par in multi_params]
    log.info('Starting spine prediction.')
    qu.QSUB_script(multi_params, "predict_spiness_chunked", log=log,
                   n_max_co_processes=global_params.NGPU_TOTAL,
                   n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE,
                   suffix="",  additional_flags="--gres=gpu:1",
                   remove_jobfolder=True)
    log.info('Finished spine prediction.')
    # map semantic spine segmentation of multi views on SSV mesh
    # TODO: CURRENTLY HIGH MEMORY CONSUMPTION
    if not ssd.mapping_dict_exists:
        raise ValueError('Mapping dict does not exist.')
    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id
                               in ssd.ssv_ids])
    # sort ssv ids according to their number of SVs (descending)
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)
    # add ssd parameters
    kwargs_semseg2mesh = global_params.semseg2mesh_spines
    kwargs_semsegforcoords = global_params.semseg2coords_spines
    multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir,
                     kwargs_semseg2mesh, kwargs_semsegforcoords) for ssv_ids in multi_params]
    log.info('Starting mapping of spine predictions to neurite surfaces.')
    qu.QSUB_script(multi_params, "map_spiness", n_max_co_processes=global_params.NCORE_TOTAL,
                   n_cores=4, suffix="", additional_flags="", remove_jobfolder=True, log=log)
    log.info('Finished spine mapping.')
Ejemplo n.º 5
0
def run_spiness_prediction(max_n_jobs_gpu=None, max_n_jobs=None):
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2
    if max_n_jobs_gpu is None:
        max_n_jobs_gpu = global_params.NGPU_TOTAL * 2
    log = initialize_logging('spine_identification',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    pred_key = "spiness"

    # run semantic spine segmentation on multi views
    sd = ssd.get_segmentationdataset("sv")
    # chunk them
    multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu)
    # set model properties
    model_kwargs = dict(src=global_params.config.mpath_spiness,
                        multi_gpu=False)
    so_kwargs = dict(working_dir=global_params.config.working_dir)
    pred_kwargs = dict(pred_key=pred_key)
    multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs]
                    for par in multi_params]
    log.info('Starting spine prediction.')
    qu.QSUB_script(multi_params,
                   "predict_spiness_chunked",
                   log=log,
                   n_max_co_processes=global_params.NGPU_TOTAL,
                   n_cores=global_params.NCORES_PER_NODE //
                   global_params.NGPUS_PER_NODE,
                   suffix="",
                   additional_flags="--gres=gpu:1",
                   remove_jobfolder=True)
    log.info('Finished spine prediction.')
    # map semantic spine segmentation of multi views on SSV mesh
    # TODO: CURRENTLY HIGH MEMORY CONSUMPTION
    if not ssd.mapping_dict_exists:
        raise ValueError('Mapping dict does not exist.')
    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])
    # sort ssv ids according to their number of SVs (descending)
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)
    # add ssd parameters
    kwargs_semseg2mesh = dict(semseg_key=pred_key, force_recompute=True)
    multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir,
                     kwargs_semseg2mesh) for ssv_ids in multi_params]
    log.info('Starting mapping of spine predictions to neurite surfaces.')
    qu.QSUB_script(multi_params,
                   "map_spiness",
                   n_max_co_processes=global_params.NCORE_TOTAL,
                   n_cores=4,
                   suffix="",
                   additional_flags="",
                   remove_jobfolder=True,
                   log=log)
    log.info('Finished spine mapping.')
Ejemplo n.º 6
0
def run_matrix_export():
    """
    Export the matrix as a ``.csv`` file at the ``connectivity_matrix`` folder
    of the currently active working directory.
    Also collects the following synapse properties from prior analysis
    steps:
        * 'partner_axoness': Cell compartment type (axon: 1, dendrite: 0, soma: 2,
            en-passant bouton: 3, terminal bouton: 4) of the partner neurons.
        * 'partner_spiness': Spine compartment predictions of both neurons.
        * 'partner_celltypes': Celltype of the both neurons.
        * 'latent_morph': Local morphology embeddings of the pre- and post-
            synaptic partners.

    Examples:
        See :class:`~syconn.reps.segmentation.SegmentationDataset` for examples.
    """
    # cache cell attributes
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    ssd.save_dataset_deep()
    log = initialize_logging('synapse_analysis',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)

    sd_syn_ssv = SegmentationDataset(
        working_dir=global_params.config.working_dir, obj_type='syn_ssv')

    # as an alternative to the skeletons, use vertex predictions or
    # sample_locations, ~3.5h @ 300 cpus
    # TODO: requires speed-up; one could collect properties only for synapses >
    #  probability threshold
    #     synssv_ids = synssv_ids[syn_prob > .5]
    #     ssv_partners = ssv_partners[syn_prob > .5]
    # One could also re-use the cached synssv IDs (computed during mapping of
    # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (->
    # slow for many synapses)
    cps.collect_properties_from_ssv_partners(global_params.config.working_dir,
                                             debug=True)
    #
    # collect new object attributes collected above partner axoness, celltypes,
    # synapse probabilities etc, no need to compute size/rep_coord etc. ->
    # recompute=False
    dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False)
    log.info('Synapse property collection from SSVs finished.')

    # export_matrix
    log.info('Exporting connectivity matrix now.')
    dest_folder = global_params.config.working_dir + '/connectivity_matrix/'
    cps.export_matrix(dest_folder=dest_folder)
    log.info('Connectivity matrix was exported to "{}".'.format(dest_folder))
Ejemplo n.º 7
0
def run_morphology_embedding(max_n_jobs=None):
    if max_n_jobs is None:
        max_n_jobs = global_params.NGPU_TOTAL * 2
    log = initialize_logging('morphology_embedding',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    pred_key_appendix = ""

    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])
    # sort ssv ids according to their number of SVs (descending)
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)
    # add ssd parameters
    multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir,
                     pred_key_appendix) for ssv_ids in multi_params]
    qu.QSUB_script(multi_params,
                   "generate_morphology_embedding",
                   n_max_co_processes=global_params.NGPU_TOTAL,
                   n_cores=global_params.NCORES_PER_NODE //
                   global_params.NGPUS_PER_NODE,
                   log=log,
                   suffix="",
                   additional_flags="--gres=gpu:1",
                   remove_jobfolder=True)
    log.info('Finished extraction of cell morphology embedding.')
Ejemplo n.º 8
0
def run_axoness_mapping(max_n_jobs=None):
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2
    """Maps axon prediction of rendering locations onto SSV skeletons"""
    log = initialize_logging('axon_mapping',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    pred_key_appendix = ""
    # Working directory has to be changed globally in global_params
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)

    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    # sort ssv ids according to their number of SVs (descending)
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)

    multi_params = [(par, pred_key_appendix) for par in multi_params]
    log.info('Starting axoness mapping.')
    _ = qu.QSUB_script(multi_params,
                       "map_viewaxoness2skel",
                       log=log,
                       n_max_co_processes=global_params.NCORE_TOTAL,
                       suffix="",
                       n_cores=1,
                       remove_jobfolder=True)
    # TODO: perform completeness check
    log.info('Finished axoness mapping.')
Ejemplo n.º 9
0
def run_neuron_rendering(max_n_jobs: Optional[int] = None):
    """
    Render the default views as defined in ``global_params`` [WIP].

    Args:
        max_n_jobs: Number of parallel jobs.

    Notes:
        Requires :func:`~run_create_neuron_ssd`.
    """
    log = initialize_logging('neuron_view_rendering',
                             global_params.config.working_dir + '/logs/')
    ps = [Process(target=_run_neuron_rendering_big_helper, args=(max_n_jobs, )),
          Process(target=_run_neuron_rendering_small_helper, args=(max_n_jobs, ))]
    for p in ps:
        p.start()
        time.sleep(10)
    for p in ps:
        p.join()
    log.info('Finished rendering of all SSVs. Checking completeness.')
    ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir)
    res = find_incomplete_ssv_views(ssd, woglia=True, n_cores=global_params.NCORES_PER_NODE)
    if len(res) != 0:
        msg = "Not all SVs were predicted! {}/{} missing:\n" \
              "{}".format(len(res), len(ssd.ssv_ids),
                          res[:10])
        log.error(msg)
        raise RuntimeError(msg)
    log.info('Success.')
Ejemplo n.º 10
0
def run_morphology_embedding(max_n_jobs: Optional[int] = None):
    """
    Infer local morphology embeddings for all neuron reconstructions base on
    triplet-loss trained cellular morphology learning network (tCMN).

    Args:
        max_n_jobs: Number of parallel jobs.

    Notes:
        Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering` and
        :func:`~syconn.exec.skeleton.run_skeleton_generation`.
    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NGPU_TOTAL * 2
    log = initialize_logging('morphology_embedding', global_params.config.working_dir
                             + '/logs/', overwrite=False)
    ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir)
    pred_key_appendix = ""

    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id
                               in ssd.ssv_ids])
    # sort ssv ids according to their number of SVs (descending)
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)
    # add ssd parameters
    multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir,
                     pred_key_appendix) for ssv_ids in multi_params]
    qu.QSUB_script(multi_params, "generate_morphology_embedding",
                   n_max_co_processes=global_params.NGPU_TOTAL,
                   n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE,
                   log=log, suffix="", additional_flags="--gres=gpu:1",
                   remove_jobfolder=True)
    log.info('Finished extraction of cell morphology embedding.')
Ejemplo n.º 11
0
def run_morphology_embedding():
    log = initialize_logging('morphology_embedding',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    pred_key_appendix = ""

    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])
    # sort ssv ids according to their number of SVs (descending)
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, 2000)
    # add ssd parameters
    multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir,
                     pred_key_appendix) for ssv_ids in multi_params]
    qu.QSUB_script(multi_params,
                   "generate_morphology_embedding",
                   pe="openmp",
                   queue=None,
                   n_cores=10,
                   suffix="",
                   additional_flags="--gres=gpu:1",
                   resume_job=False)  # removed -V (used with QSUB)
    log.info('Finished extraction of cell morphology embedding.')
Ejemplo n.º 12
0
class data:
    ssc = SuperSegmentationDataset('/wholebrain/scratch/areaxfs3/')
    ssv = ssc.get_super_segmentation_object(29753344)

    def __init__(self):
        ssc = SuperSegmentationDataset('/wholebrain/scratch/areaxfs3/')
        ssv = ssc.get_super_segmentation_object(29753344)
        #self.ssc = ssc.get_super_segmentation_object(29753344)
        exloc = np.array([5602, 4173, 4474]) * ssv.scaling
        self.exlocs = np.concatenate(ssv.sample_locations())
Ejemplo n.º 13
0
def load_celltype_ctgt(m):
    ct = SSVCelltype(None, None)
    ssv_ids = list(ct.train_d.squeeze()) + list(ct.valid_d.squeeze())
    ssv_labels = list(ct.train_l) + list(ct.valid_l)
    ssv_labels = np.concatenate([[l] * 3 for l in ssv_labels])
    ssd = SuperSegmentationDataset(working_dir="/wholebrain/scratch/areaxfs/",
                                   version="6")
    predict_latent_ssd(ssd, m, ssv_ids)
    latent = load_latent_data(ssd, ssv_ids)
    return latent, ssv_labels
Ejemplo n.º 14
0
def run_celltype_prediction(max_n_jobs_gpu: Optional[int] = None):
    """
    Run the celltype inference based on the ``img2scalar`` CMN.

    Args:
        max_n_jobs_gpu: Number of parallel GPU jobs.

    Notes:
        Requires :func:`~run_create_neuron_ssd` and :func:`~run_neuron_rendering`.
    """
    if max_n_jobs_gpu is None:
        max_n_jobs_gpu = global_params.NGPU_TOTAL * 2
    log = initialize_logging('celltype_prediction', global_params.config.working_dir+ '/logs/',
                             overwrite=False)
    ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir)
    # shuffle SV IDs
    np.random.seed(0)

    log.info('Starting cell type prediction.')
    nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id])
                               for ssv_id in ssd.ssv_ids])
    multi_params = ssd.ssv_ids
    ordering = np.argsort(nb_svs_per_ssv)
    multi_params = multi_params[ordering[::-1]]
    max_n_jobs_gpu = np.max([max_n_jobs_gpu, len(multi_params) // 200])  # at most 200 SSV per job
    multi_params = chunkify(multi_params, max_n_jobs_gpu)
    # job parameter will be read sequentially, i.e. in order to provide only
    # one list as parameter one needs an additonal axis
    multi_params = [(ixs, ) for ixs in multi_params]

    path_to_out = qu.QSUB_script(multi_params, "predict_cell_type", log=log,
                                 n_max_co_processes=global_params.NNODES_TOTAL,
                                 suffix="", additional_flags="--gres=gpu:1",
                                 n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE,
                                 remove_jobfolder=True)
    log.info('Finished prediction of {} SSVs. Checking completeness.'
             ''.format(len(ordering)))
    out_files = glob.glob(path_to_out + "*.pkl")
    err = []
    for fp in out_files:
        with open(fp, "rb") as f:
            local_err = pkl.load(f)
        err += list(local_err)
    if len(err) > 0:
        msg = "{} errors occurred for SSVs with ID: " \
              "{}".format(len(err), [el[0] for el in err])
        log.error(msg)
        raise ValueError(msg)
    else:
        log.info('Success.')
Ejemplo n.º 15
0
def run_semsegaxoness_mapping(max_n_jobs: Optional[int] = None):
    """
    Map semantic segmentation results of the 2D projections onto the cell
    reconstruction mesh.
    Generates the following attributes by default in
    :py:attr:`~syconn.reps.super_segmentation_object.SuperSegmentationObject.skeleton`:
        * "axoness": Vertex predictions mapped to skeleton (see
          ``global_params.map_properties_semsegax``.
        * "axoness_avg10000": Sliding window average along skeleton (10um traversal length).
        * "axoness_avg10000_comp_maj": Majority vote on connected components after removing the
          soma.

    Args:
        max_n_jobs: Number of parallel jobs.

    Notes:
        Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering`,
        :func:`~run_semsegaxoness_prediction` and
        :func:`~syconn.exec.skeleton.run_skeleton_generation`.
    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2
    """Maps axon prediction of rendering locations onto SSV skeletons"""
    log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/',
                             overwrite=False)
    pred_key_appendix = ""
    # Working directory has to be changed globally in global_params
    ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir)

    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    # sort ssv ids according to their number of SVs (descending)
    nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id
                               in ssd.ssv_ids])
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)

    multi_params = [(par, pred_key_appendix) for par in multi_params]
    log.info('Starting axoness mapping.')
    _ = qu.QSUB_script(multi_params, "map_semsegaxoness2skel", log=log,
                       n_max_co_processes=global_params.NCORE_TOTAL,
                       suffix="", n_cores=1, remove_jobfolder=True)
    # TODO: perform completeness check
    log.info('Finished axoness mapping.')
Ejemplo n.º 16
0
def run_celltype_prediction(max_n_jobs=100):
    log = initialize_logging('celltype_prediction',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    # shuffle SV IDs
    np.random.seed(0)

    log.info('Starting cell type prediction.')
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])
    multi_params = ssd.ssv_ids
    ordering = np.argsort(nb_svs_per_ssv)
    multi_params = multi_params[ordering[::-1]]
    max_n_jobs = np.max([max_n_jobs,
                         len(multi_params) // 200])  # at most 200 SSV per job
    multi_params = chunkify(multi_params, max_n_jobs)
    # job parameter will be read sequentially, i.e. in order to provide only
    # one list as parameter one needs an additonal axis
    multi_params = [(ixs, ) for ixs in multi_params]

    # TODO: switch n_max_co_processes to `global_params.NGPUS_TOTAL` as soon as EGL ressource allocation works!
    path_to_out = qu.QSUB_script(multi_params,
                                 "predict_cell_type",
                                 n_max_co_processes=global_params.NNODES_TOTAL,
                                 suffix="",
                                 additional_flags="--gres=gpu:2",
                                 n_cores=global_params.NCORES_PER_NODE)
    log.info('Finished prediction of {} SSVs. Checking completeness.'
             ''.format(len(ordering)))
    out_files = glob.glob(path_to_out + "*.pkl")
    err = []
    for fp in out_files:
        with open(fp, "rb") as f:
            local_err = pkl.load(f)
        err += list(local_err)
    if len(err) > 0:
        log.error("{} errors occurred for SSVs with ID: "
                  "{}".format(len(err), [el[0] for el in err]))
    else:
        log.info('Success.')
Ejemplo n.º 17
0
def get_sso_specs(set_path: str, out_path: str, ssd: SuperSegmentationDataset):
    set_path = os.path.expanduser(set_path)
    out_path = os.path.expanduser(out_path)
    files = glob.glob(set_path + '*.pkl')
    total_edge_length = 0
    total_voxel_size = 0
    for file in tqdm(files):
        sso_id = int(re.findall(r"/sso_(\d+).", file)[0])
        sso = ssd.get_super_segmentation_object(sso_id)
        total_edge_length += sso.total_edge_length()
        total_voxel_size += sso.size
        info = f'{sso_id}:\nskeleton path length:\t{sso.total_edge_length()}\nvoxel size:\t{sso.size}\n\n'
        with open(out_path, 'a') as f:
            f.write(info)
        f.close()
    with open(out_path, 'a') as f:
        f.write(
            f'total edge length: {total_edge_length}\ntotal voxel size: {total_voxel_size}'
        )
    f.close()
Ejemplo n.º 18
0
def run_axoness_mapping(max_n_jobs: Optional[int] = None):
    """
    Map ``img2scalar`` CMN results of the 2D projections onto the cell
    reconstruction mesh. See :func:`~run_semsegaxoness_mapping` for the
    semantic segmentation approach.

    Args:
        max_n_jobs: Number of parallel jobs.

    Notes:
        Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering`,
        :func:`run_axoness_prediction` and
        :func:`~syconn.exec.skeleton.run_skeleton_generation`.
    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NCORE_TOTAL * 2
    """Maps axon prediction of rendering locations onto SSV skeletons"""
    log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/',
                             overwrite=False)
    pred_key_appendix = ""
    # Working directory has to be changed globally in global_params
    ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir)

    multi_params = np.array(ssd.ssv_ids, dtype=np.uint)
    # sort ssv ids according to their number of SVs (descending)
    nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id
                               in ssd.ssv_ids])
    multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)

    multi_params = [(par, pred_key_appendix) for par in multi_params]
    log.info('Starting axoness mapping.')
    _ = qu.QSUB_script(multi_params, "map_viewaxoness2skel", log=log,
                       n_max_co_processes=global_params.NCORE_TOTAL,
                       suffix="", n_cores=1, remove_jobfolder=True)
    # TODO: perform completeness check
    log.info('Finished axoness mapping.')
Ejemplo n.º 19
0
def run_neuron_rendering(max_n_jobs=None):
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)

    log = initialize_logging('neuron_view_rendering',
                             global_params.config.working_dir + '/logs/')
    ps = [
        Process(target=_run_neuron_rendering_big_helper, args=(max_n_jobs, )),
        Process(target=_run_neuron_rendering_small_helper, args=(max_n_jobs, ))
    ]
    for p in ps:
        p.start()
        time.sleep(10)
    for p in ps:
        p.join()
    log.info('Finished rendering of all SSVs. Checking completeness.')
    res = find_incomplete_ssv_views(ssd,
                                    woglia=True,
                                    n_cores=global_params.NCORES_PER_NODE)
    if len(res) != 0:
        msg = "Not all SSVs were rendered completely! Missing:\n{}".format(res)
        log.error(msg)
        raise RuntimeError(msg)
    log.info('Success.')
Ejemplo n.º 20
0
def run_axoness_prediction(max_n_jobs_gpu=None, e3=False):
    log = initialize_logging('axon_prediction',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    if max_n_jobs_gpu is None:
        max_n_jobs_gpu = global_params.NGPU_TOTAL * 2
    # here because all qsub jobs will start a script referring to 'global_params.config.working_dir'
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    sd = ssd.get_segmentationdataset("sv")
    # chunk them
    multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu)
    pred_key = "axoness_probas"  # leave this fixed because it is used all over
    # get model properties
    log.info(
        'Performing axon prediction of neuron views. Labels will be stored '
        'on SV level in the attribute dict with key "{}"'.format(pred_key))
    if e3 is True:
        model_kwargs = 'get_axoness_model_e3'
    else:
        m = get_axoness_model()
        model_kwargs = dict(model_path=m._path,
                            normalize_data=m.normalize_data,
                            imposed_batch_size=m.imposed_batch_size,
                            nb_labels=m.nb_labels,
                            channels_to_load=m.channels_to_load)

    #all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default
    so_kwargs = dict(working_dir=global_params.config.working_dir)
    # for axoness views set woglia to True (because glia were removed beforehand),
    #  raw_only to False
    pred_kwargs = dict(woglia=True,
                       pred_key=pred_key,
                       verbose=False,
                       raw_only=False)
    multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs]
                    for par in multi_params]

    if e3 is True:
        # TODO: using two GPUs on a single node seems to be error-prone
        #  -> wb13 froze when processing example_cube=2
        n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE
        if 'example_cube' in global_params.config.working_dir:
            n_cores = global_params.NCORES_PER_NODE  # do not run two predictions in parallel
        _ = qu.QSUB_script(multi_params,
                           "predict_sv_views_chunked_e3",
                           log=log,
                           n_max_co_processes=global_params.NGPU_TOTAL,
                           n_cores=n_cores,
                           suffix="_axoness",
                           additional_flags="--gres=gpu:1",
                           remove_jobfolder=True)
    else:
        for par in multi_params:
            mk = par[1]
            # Single GPUs are made available for every job via slurm, no need for random assignments.
            mk["init_gpu"] = 0  # np.random.rand(0, 2)
        _ = qu.QSUB_script(multi_params,
                           "predict_sv_views_chunked",
                           log=log,
                           n_max_co_processes=global_params.NGPU_TOTAL // 2,
                           n_cores=global_params.NCORES_PER_NODE,
                           suffix="_axoness",
                           additional_flags="--gres=gpu:1",
                           remove_jobfolder=True)
    log.info('Finished axon prediction. Now checking for missing predictions.')
    res = find_missing_sv_attributes_in_ssv(
        ssd, pred_key, n_cores=global_params.NCORES_PER_NODE)
    if len(res) > 0:
        log.error("Attribute '{}' missing for follwing"
                  " SVs:\n{}".format(pred_key, res))
    else:
        log.info('Success.')
Ejemplo n.º 21
0
                    n_list.append(n_dict[i])
                except KeyError:
                    n_list.append(0)
                try:
                    v_list.append(v_dict[i])
                except KeyError:
                    v_list.append(0)
            spec_writer.writerow([
                sso_id,
                int(sso.total_edge_length()), sso.size, *n_list, *v_list
            ])
        out_file.write('\n\n\n\n')
    out_file.close()


if __name__ == '__main__':
    # paths = dict(TRAIN='~/working_dir/gt/cmn/dnh/voxeled/',
    #              TEST='~/working_dir/gt/cmn/dnh/voxeled/evaluation/')

    paths = dict(TEST='~/working_dir/gt/cmn/ads/train/voxeled/')

    # ssds = dict(TRAIN=SuperSegmentationDataset("/wholebrain/scratch/areaxfs3/"),
    #             TEST=SuperSegmentationDataset("/wholebrain/songbird/j0126/areaxfs_v6/"))

    ssds = dict(TEST=SuperSegmentationDataset("/wholebrain/scratch/areaxfs3/"))

    dataspecs2csv(paths, '~/working_dir/gt/cmn/ads/test.csv', ssds)

    # get_sso_specs('~/thesis/gt/20_09_27/voxeled/train/', '~/thesis/gt/20_09_27/voxeled/train_info.txt',
    #               ssd=SuperSegmentationDataset("/wholebrain/songbird/j0126/areaxfs_v6/"))
Ejemplo n.º 22
0
def worker_split(id_queue: Queue,
                 chunk_queue: Queue,
                 ssd: SuperSegmentationDataset,
                 ctx: int,
                 base_node_dst: int,
                 parts: Dict[str, List[int]],
                 labels_itf: str,
                 label_mappings: List[Tuple[int, int]],
                 split_jitter: int = 0):
    """
    Args:
        id_queue: Input queue with cell ids.
        chunk_queue: Output queue with cell chunks.
        ssd: SuperSegmentationDataset which contains the cells to which the chunkhandler should get applied.
        ctx: Context size for splitting.
        base_node_dst: Distance between base nodes. Corresponds to redundancy / number of chunks per cell.
        parts: Information about cell surface and organelles, Tuples like (voxel_param, feature) keyed by identifier
            compatible with syconn (e.g. 'sv' or 'mi').
        labels_itf: Label identifier for existing label predictions within the sso objects of the ssd dataset.
        label_mappings: Tuples where label at index 0 should get mapped to label at index 1.
        split_jitter: Derivation from context size during splitting.
    """
    while True:
        if not id_queue.empty():
            ssv_id = id_queue.get()
            sso = ssd.get_super_segmentation_object(ssv_id)
            vert_dc = {}
            label_dc = {}
            encoding = {}
            offset = 0
            obj_bounds = {}
            for ix, k in enumerate(parts):
                pcd = o3d.geometry.PointCloud()
                verts = sso.load_mesh(k)[1].reshape(-1, 3)
                pcd.points = o3d.utility.Vector3dVector(verts)
                pcd, idcs = pcd.voxel_down_sample_and_trace(
                    parts[k][0], pcd.get_min_bound(), pcd.get_max_bound())
                idcs = np.max(idcs, axis=1)
                vert_dc[k] = np.asarray(pcd.points)
                obj_bounds[k] = [offset, offset + len(pcd.points)]
                offset += len(pcd.points)
                if k == 'sv':
                    # prepare mask for filtering background / unpredicted points
                    mask = None
                    if labels_itf == 'axoness':
                        # 0: dendrite, 1: axon, 2: soma, 3: bouton, 4: terminal, 5/6: background/unpredicted
                        labels_total = sso.label_dict()[labels_itf][idcs]
                        mask = labels_total < 5
                        labels_total = labels_total[mask]
                    elif labels_itf == 'spiness':
                        # 1: head, 0: neck, 2: shaft, 3: other, 4/5: background/unpredicted
                        labels_total = sso.label_dict()['axoness'][idcs]
                        spiness = sso.label_dict()['spiness'][idcs]
                        mask = np.logical_not(
                            np.logical_or(
                                labels_total > 4,
                                np.logical_and(labels_total == 0,
                                               spiness > 3)))
                        labels_total = labels_total[mask]
                        spiness = spiness[mask]
                        labels_total[labels_total != 0] = 3
                        labels_total[labels_total == 0] = spiness[labels_total
                                                                  == 0]
                    else:
                        labels_total = sso.label_dict()[labels_itf][idcs]
                        mask = np.ones(len(labels_total)).astype(bool)
                    labels = labels_total
                    vert_dc[k] = vert_dc[k][mask]
                else:
                    labels = np.ones(len(vert_dc[k])) + ix + labels_total.max()
                    encoding[k] = ix + 1 + labels_total.max()
                label_dc[k] = labels
            sample_feats = np.concatenate([[parts[k][1]] * len(vert_dc[k])
                                           for k in parts]).reshape(-1, 1)
            sample_feats = label_binarize(sample_feats,
                                          classes=np.arange(len(parts)))
            sample_pts = np.concatenate([vert_dc[k] for k in parts])
            sample_labels = np.concatenate([label_dc[k] for k in parts])
            # mark cellular organelles to be excluded from loss calculation - see torchhandler for use of no_pred
            no_pred = list(encoding.keys())
            if not sso.load_skeleton():
                raise ValueError(f'Couldnt find skeleton of {sso}')
            nodes, edges = sso.skeleton['nodes'] * sso.scaling, sso.skeleton[
                'edges']
            hc = HybridCloud(nodes,
                             edges,
                             vertices=sample_pts,
                             features=sample_feats,
                             obj_bounds=obj_bounds,
                             no_pred=no_pred,
                             labels=sample_labels,
                             encoding=encoding)
            if label_mappings is not None:
                hc.map_labels(label_mappings)
            _ = hc.verts2node
            jitter = random.randint(0, split_jitter)
            node_arrs, source_nodes = splitting.split_single(
                hc, ctx + jitter, base_node_dst)
            for ix, node_arr in enumerate(node_arrs):
                sample, _ = objects.extract_cloud_subset(hc, node_arr)
                chunk_queue.put(sample)
        else:
            time.sleep(0.5)
Ejemplo n.º 23
0
def _run_neuron_rendering_small_helper(max_n_jobs: Optional[int] = None):
    """
    Render the default views as defined in ``global_params`` [WIP] of small
    neuron reconstructions. Helper method of :func:`~run_neuron_rendering`.

    Args:
        max_n_jobs: Number of parallel jobs.

    Notes:
        Requires :func:`~run_create_neuron_ssd`.
    """

    if max_n_jobs is None:
        max_n_jobs = global_params.config.ngpu_total * 4 if \
            global_params.config['pyopengl_platform'] == 'egl' \
            else global_params.config.ncore_total * 4
    log = initialize_logging('neuron_view_rendering_small',
                             global_params.config.working_dir + '/logs/')
    # view rendering prior to glia removal, choose SSD accordingly
    ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir)

    #  TODO: use actual size criteria, e.g. number of sampling locations
    nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id])
                               for ssv_id in ssd.ssv_ids])

    # render normal size SSVs
    size_mask = nb_svs_per_ssv <= global_params.config['glia']['rendering_max_nb_sv']
    if 'example' in global_params.config.working_dir and np.sum(~size_mask) == 0:
        # generate at least one (artificial) huge SSV
        size_mask[:1] = False
        size_mask[1:] = True

    multi_params = ssd.ssv_ids[size_mask]
    # sort ssv ids according to their number of SVs (descending)
    ordering = np.argsort(nb_svs_per_ssv[size_mask])
    multi_params = multi_params[ordering[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)
    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    multi_params = [(ixs, global_params.config.working_dir) for ixs in multi_params]
    log.info('Started rendering of {} SSVs. '.format(np.sum(size_mask)))

    if global_params.config['pyopengl_platform'] == 'osmesa':  # utilize all CPUs
        qu.QSUB_script(multi_params, "render_views", log=log, suffix='_small',
                       n_max_co_processes=global_params.config.ncore_total,
                       remove_jobfolder=False)
    elif global_params.config['pyopengl_platform'] == 'egl':  # utilize 1 GPU per task
        # run EGL on single node: 20 parallel jobs
        if not qu.batchjob_enabled():
            n_cores = 1
            n_parallel_jobs = global_params.config['ncores_per_node']
            qu.QSUB_script(multi_params, "render_views", suffix='_small',
                           n_max_co_processes=n_parallel_jobs, log=log,
                           additional_flags="--gres=gpu:2", disable_batchjob=True,
                           n_cores=n_cores, remove_jobfolder=True)
        # run on whole cluster
        else:
            n_cores = global_params.config['ncores_per_node'] // global_params.config['ngpus_per_node']
            n_parallel_jobs = global_params.config.ngpu_total
            qu.QSUB_script(multi_params, "render_views_egl", suffix='_small',
                           n_max_co_processes=n_parallel_jobs, log=log,
                           additional_flags="--gres=gpu:1",
                           n_cores=n_cores, remove_jobfolder=True)
    else:
        raise RuntimeError('Specified OpenGL platform "{}" not supported.'
                           ''.format(global_params.config['pyopengl_platform']))
    log.info('Finished rendering of {}/{} SSVs.'.format(len(ordering),
                                                        len(nb_svs_per_ssv)))
Ejemplo n.º 24
0
    ssv = ssc.get_super_segmentation_object(29753344)

    def __init__(self):
        ssc = SuperSegmentationDataset('/wholebrain/scratch/areaxfs3/')
        ssv = ssc.get_super_segmentation_object(29753344)
        #self.ssc = ssc.get_super_segmentation_object(29753344)
        exloc = np.array([5602, 4173, 4474]) * ssv.scaling
        self.exlocs = np.concatenate(ssv.sample_locations())


if __name__ == '__main__':
    # TODO: use toy data and improve logging, see test_backend.py
    working_dir = "/wholebrain/scratch/areaxfs3/"
    render_indexview = True
    now = time.time()
    ssc = SuperSegmentationDataset(working_dir)
    ssv = ssc.get_super_segmentation_object(29753344)
    exlocs = np.concatenate(ssv.sample_locations())
    exlocs = exlocs[::30]
    print("Example location array:", exlocs.shape)
    print(working_dir)
    now2 = time.time()
    print("time for reading data")
    print(now2 - now)
    """
    i = 0
    exlocs = chunkify_successive(exlocs, 10)
    params = []
    for ex in exlocs:
        params.append([exlocs, i])
        i=i+1
Ejemplo n.º 25
0
def run_create_neuron_ssd():
    """
    Creates SuperSegmentationDataset with `version=0`.
    """
    log = initialize_logging('create_neuron_ssd',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    suffix = global_params.rag_suffix
    g_p = "{}/glia/neuron_rag{}.bz2".format(global_params.config.working_dir,
                                            suffix)
    rag_g = nx.read_edgelist(g_p, nodetype=np.uint)
    # e.g. if rag was not created by glia splitting procedure this filtering is required

    ccs = nx.connected_components(rag_g)
    cc_dict = {}
    for cc in ccs:
        cc_arr = np.array(list(cc))
        cc_dict[np.min(cc_arr)] = cc_arr

    cc_dict_inv = {}
    for ssv_id, cc in cc_dict.items():
        for sv_id in cc:
            cc_dict_inv[sv_id] = ssv_id
    log.info('Parsed RAG from {} with {} SSVs and {} SVs.'.format(
        g_p, len(cc_dict), len(cc_dict_inv)))

    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir,
        version='0',
        ssd_type="ssv",
        sv_mapping=cc_dict_inv)
    # create cache-arrays for frequently used attributes
    ssd.save_dataset_deep(n_max_co_processes=global_params.NCORE_TOTAL
                          )  # also executes 'ssd.save_dataset_shallow()'

    exec_skeleton.run_skeleton_generation()

    log.info('Finished SSD initialization. Starting cellular '
             'organelle mapping.')

    # map cellular organelles to SSVs
    # TODO: sort by SSV size (descending)
    ssd_proc.aggregate_segmentation_object_mappings(
        ssd, global_params.existing_cell_organelles)
    ssd_proc.apply_mapping_decisions(ssd,
                                     global_params.existing_cell_organelles)
    log.info('Finished mapping of cellular organelles to SSVs. '
             'Writing individual SSV graphs.')

    # Write SSV RAGs
    pbar = tqdm.tqdm(total=len(ssd.ssv_ids), mininterval=0.5)
    for ssv in ssd.ssvs:
        # get all nodes in CC of this SSV
        if len(cc_dict[
                ssv.id]) > 1:  # CCs with 1 node do not exist in the global RAG
            n_list = nx.node_connected_component(rag_g, ssv.id)
            # get SSV RAG as subgraph
            ssv_rag = nx.subgraph(rag_g, n_list)
        else:
            ssv_rag = nx.Graph()
            # ssv.id is the minimal SV ID, and therefore the only SV in this case
            ssv_rag.add_edge(ssv.id, ssv.id)
        nx.write_edgelist(ssv_rag, ssv.edgelist_path)
        pbar.update(1)
    pbar.close()
    log.info('Finished saving individual SSV RAGs.')
Ejemplo n.º 26
0
def _run_neuron_rendering_big_helper(max_n_jobs=None):
    if max_n_jobs is None:
        max_n_jobs = global_params.NNODES_TOTAL * 2
    log = initialize_logging('neuron_view_rendering_big',
                             global_params.config.working_dir + '/logs/')
    # view rendering prior to glia removal, choose SSD accordingly
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)

    #  TODO: use actual size criteria, e.g. number of sampling locations
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])

    # render normal size SSVs
    size_mask = nb_svs_per_ssv <= global_params.RENDERING_MAX_NB_SV
    if 'example' in global_params.config.working_dir and np.sum(
            ~size_mask) == 0:
        # generate at least one (artificial) huge SSV
        size_mask[:1] = False
        size_mask[1:] = True
    # sort ssv ids according to their number of SVs (descending)
    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    if np.sum(~size_mask) > 0:
        log.info('{} huge SSVs will be rendered on the cluster.'.format(
            np.sum(~size_mask)))
        # identify huge SSVs and process them individually on whole cluster
        big_ssv = ssd.ssv_ids[~size_mask]

        # # TODO: Currently high memory consumption when rendering index views! take into account
        # #  when multiprocessing
        # # TODO: refactor `render_sso_coords_multiprocessing` and then use `QSUB_render_views_egl`
        # #  here!
        # render normal views only
        n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE
        n_parallel_jobs = global_params.NGPU_TOTAL
        render_kwargs = dict(add_cellobjects=True,
                             woglia=True,
                             overwrite=True,
                             skip_indexviews=True)
        sso_kwargs = dict(working_dir=global_params.config.working_dir,
                          nb_cpus=n_cores,
                          enable_locking_so=False,
                          enable_locking=False)

        # sort ssv ids according to their number of SVs (descending)
        ordering = np.argsort(nb_svs_per_ssv[~size_mask])
        multi_params = big_ssv[ordering[::-1]]
        multi_params = chunkify(multi_params, max_n_jobs)
        # list of SSV IDs and SSD parameters need to be given to a single QSUB job
        multi_params = [(ixs, sso_kwargs, render_kwargs)
                        for ixs in multi_params]
        path_to_out = qu.QSUB_script(multi_params,
                                     "render_views",
                                     n_max_co_processes=n_parallel_jobs,
                                     log=log,
                                     additional_flags="--gres=gpu:1",
                                     n_cores=n_cores,
                                     remove_jobfolder=True)
        # # render index-views only
        for ssv_id in big_ssv:
            ssv = SuperSegmentationObject(
                ssv_id, working_dir=global_params.config.working_dir)
            render_sso_coords_multiprocessing(ssv,
                                              global_params.config.working_dir,
                                              verbose=True,
                                              return_views=False,
                                              disable_batchjob=False,
                                              n_jobs=n_parallel_jobs,
                                              n_cores=n_cores,
                                              render_indexviews=True)
        log.info('Finished rendering of {}/{} SSVs.'.format(
            len(big_ssv), len(nb_svs_per_ssv)))
Ejemplo n.º 27
0
from syconn import global_params

path_storage_file = sys.argv[1]
path_out_file = sys.argv[2]

with open(path_storage_file, 'rb') as f:
    args = []
    while True:
        try:
            args.append(pkl.load(f))
        except EOFError:
            break

ssv_ids = args[0]
version = args[1]
version_dict = args[2]
working_dir = args[3]

ssd = SuperSegmentationDataset(working_dir=working_dir,
                               version=version,
                               version_dict=version_dict)
for ssv in ssd.get_super_segmentation_object(ssv_ids):
    ssv.load_skeleton()
    ssv.skeleton["myelin"] = map_myelin2coords(ssv.skeleton["nodes"], mag=4)
    majorityvote_skeleton_property(
        ssv, prop_key='myelin', max_dist=global_params.DIST_AXONESS_AVERAGING)
    ssv.save_skeleton()

with open(path_out_file, "wb") as f:
    pkl.dump("0", f)
Ejemplo n.º 28
0
def _run_neuron_rendering_small_helper(max_n_jobs=None):
    if max_n_jobs is None:
        max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \
            else global_params.NCORE_TOTAL * 4
    log = initialize_logging('neuron_view_rendering_small',
                             global_params.config.working_dir + '/logs/')
    # view rendering prior to glia removal, choose SSD accordingly
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)

    #  TODO: use actual size criteria, e.g. number of sampling locations
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])

    # render normal size SSVs
    size_mask = nb_svs_per_ssv <= global_params.RENDERING_MAX_NB_SV
    if 'example' in global_params.config.working_dir and np.sum(
            ~size_mask) == 0:
        # generate at least one (artificial) huge SSV
        size_mask[:1] = False
        size_mask[1:] = True

    multi_params = ssd.ssv_ids[size_mask]
    # sort ssv ids according to their number of SVs (descending)
    ordering = np.argsort(nb_svs_per_ssv[size_mask])
    multi_params = multi_params[ordering[::-1]]
    multi_params = chunkify(multi_params, max_n_jobs)
    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    multi_params = [(ixs, global_params.config.working_dir)
                    for ixs in multi_params]
    log.info('Started rendering of {} SSVs. '.format(np.sum(size_mask)))
    # generic
    if global_params.PYOPENGL_PLATFORM == 'osmesa':  # utilize all CPUs
        path_to_out = qu.QSUB_script(
            multi_params,
            "render_views",
            log=log,
            n_max_co_processes=global_params.NCORE_TOTAL,
            remove_jobfolder=False)
    elif global_params.PYOPENGL_PLATFORM == 'egl':  # utilize 1 GPU per task
        # run EGL on single node: 20 parallel jobs
        if global_params.config.working_dir is not None and 'example_cube' in \
                global_params.config.working_dir:
            n_cores = 1
            n_parallel_jobs = global_params.NCORES_PER_NODE
            path_to_out = qu.QSUB_script(multi_params,
                                         "render_views",
                                         n_max_co_processes=n_parallel_jobs,
                                         log=log,
                                         additional_flags="--gres=gpu:2",
                                         n_cores=n_cores,
                                         remove_jobfolder=False)
        # run on whole cluster
        else:
            n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE
            n_parallel_jobs = global_params.NGPU_TOTAL
            path_to_out = qu.QSUB_script(multi_params,
                                         "render_views_egl",
                                         n_max_co_processes=n_parallel_jobs,
                                         log=log,
                                         additional_flags="--gres=gpu:1",
                                         n_cores=n_cores,
                                         remove_jobfolder=True)
    else:
        raise RuntimeError('Specified OpenGL platform "{}" not supported.'
                           ''.format(global_params.PYOPENGL_PLATFORM))
    log.info('Finished rendering of {}/{} SSVs.'.format(
        len(ordering), len(nb_svs_per_ssv)))
Ejemplo n.º 29
0
def GT_generation(kzip_paths,
                  ssd_version,
                  gt_type,
                  nb_views,
                  dest_dir=None,
                  n_voting=40,
                  ws=(256, 128),
                  comp_window=8e3):
    """
    Generates a .npy GT file from all kzip paths.

    Parameters
    ----------
    kzip_paths : List[str]
    gt_type : str
    n_voting : int
        Number of collected nodes during BFS for majority vote (label smoothing)
    Returns
    -------

    """
    sso_ids = [
        int(re.findall("/(\d+).", kzip_path)[0]) for kzip_path in kzip_paths
    ]
    ssd = SuperSegmentationDataset()
    if not np.all([
            ssv.lookup_in_attribute_dict("size") is not None
            for ssv in ssd.get_super_segmentation_object(sso_ids)
    ]):
        print("Not all SSV IDs are part of " \
            "the current SSD. IDs: {}".format([sso_id for sso_id in sso_ids if sso_id not in
                                               ssd.ssv_ids]))
        kzip_paths = np.array(kzip_paths)[np.array([
            ssv.lookup_in_attribute_dict("size") is not None
            for ssv in ssd.get_super_segmentation_object(sso_ids)
        ])]
        print("Ignoring missing IDs. Using {} k.zip files for GT "
              "generation,".format(len(kzip_paths)))
    if dest_dir is None:
        dest_dir = os.path.expanduser("~/{}_semseg/".format(gt_type))
    if not os.path.isdir(dest_dir):
        os.makedirs(dest_dir)
    dest_p_cache = "{}/cache_{}votes/".format(dest_dir, n_voting)
    params = [(p, ssd_version, gt_type, n_voting, nb_views, ws, comp_window,
               dest_p_cache) for p in kzip_paths]
    if not os.path.isdir(dest_p_cache):
        os.makedirs(dest_p_cache)
    start_multiprocess_imap(gt_generation_helper,
                            params,
                            nb_cpus=cpu_count(),
                            debug=False)
    # TODO: in case GT is too big to hold all views in memory
    # if gt_type == 'axgt':
    #     return
    # Create Dataset splits for training, validation and test
    all_raw_views = []
    all_label_views = []
    # all_index_views = []  # Removed index views
    print("Writing views.")
    for ii in range(len(kzip_paths)):
        sso_id = int(re.findall("/(\d+).", kzip_paths[ii])[0])
        dest_p = "{}/{}/".format(dest_p_cache, sso_id)
        raw_v = np.load(dest_p + "raw.npy")
        label_v = np.load(dest_p + "label.npy")
        # index_v = np.load(dest_p + "index.npy")  # Removed index views
        all_raw_views.append(raw_v)
        all_label_views.append(label_v)
        # all_index_views.append(index_v)  # Removed index views
    all_raw_views = np.concatenate(all_raw_views)
    all_label_views = np.concatenate(all_label_views)
    # all_index_views = np.concatenate(all_index_views)  # Removed index views
    print("{} view locations collected. Shuffling views.".format(
        len(all_label_views)))
    np.random.seed(0)
    ixs = np.arange(len(all_raw_views))
    np.random.shuffle(ixs)
    all_raw_views = all_raw_views[ixs]
    all_label_views = all_label_views[ixs]
    # all_index_views = all_index_views[ixs]  # Removed index views
    print("Swapping axes.")
    all_raw_views = all_raw_views.swapaxes(2, 1)
    all_label_views = all_label_views.swapaxes(2, 1)
    # all_index_views = all_index_views.swapaxes(2, 1)  # Removed index views
    print("Reshaping arrays.")
    all_raw_views = all_raw_views.reshape((-1, 4, ws[1], ws[0]))
    all_label_views = all_label_views.reshape((-1, 1, ws[1], ws[0]))
    # # all_index_views = all_index_views.reshape((-1, 1, 128, 256))  # Removed index views
    # # all_raw_views = np.concatenate([all_raw_views, all_index_views], axis=1)  # Removed index views
    raw_train, raw_valid, label_train, label_valid = train_test_split(
        all_raw_views, all_label_views, train_size=0.9, shuffle=False)
    # # raw_valid, raw_test, label_valid, label_test = train_test_split(raw_other, label_other, train_size=0.5, shuffle=False)  # Removed index views
    print("Writing h5 files.")
    os.makedirs(dest_dir, exist_ok=True)
    # chunk output data
    for ii in range(5):
        save_to_h5py([raw_train[ii::5]],
                     dest_dir + "/raw_train_{}.h5".format(ii), ["raw"])
        save_to_h5py([raw_valid[ii::5]],
                     dest_dir + "/raw_valid_{}.h5".format(ii), ["raw"])
        # save_to_h5py([raw_test], dest_dir + "/raw_test.h5",
        # ["raw"])  # Removed index views
        save_to_h5py([label_train[ii::5]],
                     dest_dir + "/label_train_{}.h5".format(ii), ["label"])
        save_to_h5py([label_valid[ii::5]],
                     dest_dir + "/label_valid_{}.h5".format(ii), ["label"])
Ejemplo n.º 30
0
def run_semsegaxoness_prediction(max_n_jobs_gpu=None):
    """
    Will store semantic axoness labels as `view_properties_semsegax['semseg_key']` inside
     ssv.label_dict('vertex')[semseg_key]
    TODO: run rendering chunk-wise instead of on-the-fly and then perform
     prediction chunk-wise as well, adopt from spiness step

    Parameters
    ----------
    max_n_jobs_gpu : int

    Returns
    -------

    """
    if max_n_jobs_gpu is None:
        max_n_jobs_gpu = global_params.NGPU_TOTAL * 2
    log = initialize_logging('axoness_prediction',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    ssd = SuperSegmentationDataset(
        working_dir=global_params.config.working_dir)
    # shuffle SV IDs
    np.random.seed(0)

    log.info('Starting axoness prediction.')
    nb_svs_per_ssv = np.array(
        [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids])
    multi_params = ssd.ssv_ids
    ordering = np.argsort(nb_svs_per_ssv)
    multi_params = multi_params[ordering[::-1]]
    max_n_jobs_gpu = np.max([max_n_jobs_gpu,
                             len(multi_params) // 100
                             ])  # at most 100 SSV per job
    multi_params = chunkify(multi_params, max_n_jobs_gpu)
    # job parameter will be read sequentially, i.e. in order to provide only
    # one list as parameter one needs an additonal axis
    multi_params = [(ixs, ) for ixs in multi_params]

    path_to_out = qu.QSUB_script(multi_params,
                                 "predict_axoness_semseg",
                                 log=log,
                                 n_max_co_processes=global_params.NNODES_TOTAL,
                                 suffix="",
                                 additional_flags="--gres=gpu:1",
                                 n_cores=global_params.NCORES_PER_NODE //
                                 global_params.NGPUS_PER_NODE,
                                 remove_jobfolder=True)
    log.info('Finished prediction of {} SSVs. Checking completeness.'
             ''.format(len(ordering)))
    out_files = glob.glob(path_to_out + "*.pkl")
    err = []
    for fp in out_files:
        with open(fp, "rb") as f:
            local_err = pkl.load(f)
        err += list(local_err)
    if len(err) > 0:
        msg = "{} errors occurred for SSVs with ID: " \
              "{}".format(len(err), [el[0] for el in err])
        log.error(msg)
        raise ValueError(msg)
    else:
        log.info('Success.')