def preds2kzip(pred_folder: str, out_path: str, ssd_path: str, col_lookup: dict, label_mappings: Optional[List[Tuple[int, int]]] = None): pred_folder = os.path.expanduser(pred_folder) out_path = os.path.expanduser(out_path) if not os.path.exists(out_path): os.makedirs(out_path) files = glob.glob(pred_folder + '*_preds.pkl') ssd = SuperSegmentationDataset(ssd_path) for file in tqdm(files): hc_voxeled = preds2hc(file) sso_id = int(re.findall(r"/sso_(\d+).", file)[0]) sso = ssd.get_super_segmentation_object(sso_id) verts = sso.mesh[1].reshape(-1, 3) hc = HybridCloud(nodes=hc_voxeled.nodes, edges=hc_voxeled.edges, node_labels=hc_voxeled.node_labels, pred_node_labels=hc_voxeled.pred_node_labels, vertices=verts) hc.nodel2vertl() hc.prednodel2predvertl() if label_mappings is not None: hc.map_labels(label_mappings) cols = np.array([col_lookup[el] for el in hc.pred_labels.squeeze()], dtype=np.uint8) sso.mesh2kzip(out_path + f'p_{sso_id}.k.zip', ext_color=cols) cols = np.array([col_lookup[el] for el in hc.labels.squeeze()], dtype=np.uint8) sso.mesh2kzip(out_path + f't_{sso_id}.k.zip', ext_color=cols) comments = list(hc.pred_node_labels.reshape(-1)) for node in range(len(hc.nodes)): if hc.pred_node_labels[node] != hc.node_labels[node] and hc.pred_node_labels[node] != -1: comments[node] = 'e' + str(comments[node]) sso.save_skeleton_to_kzip(out_path + f'p_{sso_id}.k.zip', comments=comments) comments = hc.node_labels.reshape(-1) sso.save_skeleton_to_kzip(out_path + f't_{sso_id}.k.zip', comments=comments)
def run_matrix_export(): # cache cell attributes ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) ssd.save_dataset_deep() log = initialize_logging('synapse_analysis', global_params.config.working_dir + '/logs/', overwrite=True) sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') # as an alternative to the skeletons, use vertex predictions or # sample_locations, ~3.5h @ 300 cpus # TODO: requires speed-up; one could collect properties only for synapses > # probability threshold # synssv_ids = synssv_ids[syn_prob > .5] # ssv_partners = ssv_partners[syn_prob > .5] # One could also re-use the cached synssv IDs (computed during mapping of # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (-> # slow for many synapses) cps.collect_properties_from_ssv_partners(global_params.config.working_dir, debug=True) # # collect new object attributes collected above partner axoness, celltypes, # synapse probabilities etc, no need to compute size/rep_coord etc. -> # recompute=False dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) log.info('Synapse property collection from SSVs finished.') # export_matrix log.info('Exporting connectivity matrix now.') dest_folder = global_params.config.working_dir + '/connectivity_matrix/' cps.export_matrix(dest_folder=dest_folder) log.info('Connectivity matrix was epxorted to "{}".'.format(dest_folder))
def test_multiprocessed_vs_serial_rendering(): # TODO: use example data and improve logging, see test_backend.py working_dir = "/wholebrain/scratch/areaxfs3/" render_indexview = True ssc = SuperSegmentationDataset(working_dir) ssv = ssc.get_super_segmentation_object(29753344) ssv.nb_cpus = cpu_count() exlocs = np.concatenate(ssv.sample_locations()) exlocs = exlocs[:1000] views = render_sso_coords_multiprocessing( ssv, working_dir, rendering_locations=exlocs, render_indexviews=render_indexview, n_jobs=10, verbose=True) # overwrite any precomputed caches by re-initialization of SSV ssv = ssc.get_super_segmentation_object(29753344) ssv.nb_cpus = cpu_count() exlocs = np.concatenate(ssv.sample_locations()) exlocs = exlocs[:1000] if render_indexview: views2 = render_sso_coords_index_views(ssv, exlocs, verbose=True) else: views2 = render_sso_coords(ssv, exlocs, verbose=True) print('Fraction of different index values in index-views: {:.4f}' ''.format(np.sum(views != views2) / np.prod(views.shape))) assert np.all(views == views2)
def run_spiness_prediction(max_n_jobs_gpu: Optional[int] = None, max_n_jobs: Optional[int] = None): """ Will store semantic spine labels inside``ssv.label_dict('vertex')['spiness]``. Todo: * run rendering chunk-wise instead of on-the-fly and then perform prediction chunk-wise as well, adopt from spiness step. Args: max_n_jobs_gpu: Number of parallel GPU jobs. Used for the inference. max_n_jobs : Number of parallel CPU jobs. Used for the mapping step. """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('spine_identification', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) # run semantic spine segmentation on multi views sd = ssd.get_segmentationdataset("sv") # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu) # set model properties model_kwargs = dict(src=global_params.config.mpath_spiness, multi_gpu=False) so_kwargs = dict(working_dir=global_params.config.working_dir) pred_kwargs = dict(pred_key=global_params.semseg2mesh_spines['semseg_key']) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] log.info('Starting spine prediction.') qu.QSUB_script(multi_params, "predict_spiness_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished spine prediction.') # map semantic spine segmentation of multi views on SSV mesh # TODO: CURRENTLY HIGH MEMORY CONSUMPTION if not ssd.mapping_dict_exists: raise ValueError('Mapping dict does not exist.') multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters kwargs_semseg2mesh = global_params.semseg2mesh_spines kwargs_semsegforcoords = global_params.semseg2coords_spines multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, kwargs_semseg2mesh, kwargs_semsegforcoords) for ssv_ids in multi_params] log.info('Starting mapping of spine predictions to neurite surfaces.') qu.QSUB_script(multi_params, "map_spiness", n_max_co_processes=global_params.NCORE_TOTAL, n_cores=4, suffix="", additional_flags="", remove_jobfolder=True, log=log) log.info('Finished spine mapping.')
def run_spiness_prediction(max_n_jobs_gpu=None, max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('spine_identification', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) pred_key = "spiness" # run semantic spine segmentation on multi views sd = ssd.get_segmentationdataset("sv") # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu) # set model properties model_kwargs = dict(src=global_params.config.mpath_spiness, multi_gpu=False) so_kwargs = dict(working_dir=global_params.config.working_dir) pred_kwargs = dict(pred_key=pred_key) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] log.info('Starting spine prediction.') qu.QSUB_script(multi_params, "predict_spiness_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished spine prediction.') # map semantic spine segmentation of multi views on SSV mesh # TODO: CURRENTLY HIGH MEMORY CONSUMPTION if not ssd.mapping_dict_exists: raise ValueError('Mapping dict does not exist.') multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters kwargs_semseg2mesh = dict(semseg_key=pred_key, force_recompute=True) multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, kwargs_semseg2mesh) for ssv_ids in multi_params] log.info('Starting mapping of spine predictions to neurite surfaces.') qu.QSUB_script(multi_params, "map_spiness", n_max_co_processes=global_params.NCORE_TOTAL, n_cores=4, suffix="", additional_flags="", remove_jobfolder=True, log=log) log.info('Finished spine mapping.')
def run_matrix_export(): """ Export the matrix as a ``.csv`` file at the ``connectivity_matrix`` folder of the currently active working directory. Also collects the following synapse properties from prior analysis steps: * 'partner_axoness': Cell compartment type (axon: 1, dendrite: 0, soma: 2, en-passant bouton: 3, terminal bouton: 4) of the partner neurons. * 'partner_spiness': Spine compartment predictions of both neurons. * 'partner_celltypes': Celltype of the both neurons. * 'latent_morph': Local morphology embeddings of the pre- and post- synaptic partners. Examples: See :class:`~syconn.reps.segmentation.SegmentationDataset` for examples. """ # cache cell attributes ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) ssd.save_dataset_deep() log = initialize_logging('synapse_analysis', global_params.config.working_dir + '/logs/', overwrite=True) sd_syn_ssv = SegmentationDataset( working_dir=global_params.config.working_dir, obj_type='syn_ssv') # as an alternative to the skeletons, use vertex predictions or # sample_locations, ~3.5h @ 300 cpus # TODO: requires speed-up; one could collect properties only for synapses > # probability threshold # synssv_ids = synssv_ids[syn_prob > .5] # ssv_partners = ssv_partners[syn_prob > .5] # One could also re-use the cached synssv IDs (computed during mapping of # synssv to SSVs) -> saves finding SSV ID indices in synapse arrays (-> # slow for many synapses) cps.collect_properties_from_ssv_partners(global_params.config.working_dir, debug=True) # # collect new object attributes collected above partner axoness, celltypes, # synapse probabilities etc, no need to compute size/rep_coord etc. -> # recompute=False dataset_analysis(sd_syn_ssv, compute_meshprops=False, recompute=False) log.info('Synapse property collection from SSVs finished.') # export_matrix log.info('Exporting connectivity matrix now.') dest_folder = global_params.config.working_dir + '/connectivity_matrix/' cps.export_matrix(dest_folder=dest_folder) log.info('Connectivity matrix was exported to "{}".'.format(dest_folder))
def run_morphology_embedding(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 2 log = initialize_logging('morphology_embedding', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) pred_key_appendix = "" multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, pred_key_appendix) for ssv_ids in multi_params] qu.QSUB_script(multi_params, "generate_morphology_embedding", n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, log=log, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished extraction of cell morphology embedding.')
def run_axoness_mapping(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 """Maps axon prediction of rendering locations onto SSV skeletons""" log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/', overwrite=False) pred_key_appendix = "" # Working directory has to be changed globally in global_params ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) multi_params = np.array(ssd.ssv_ids, dtype=np.uint) # sort ssv ids according to their number of SVs (descending) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) multi_params = [(par, pred_key_appendix) for par in multi_params] log.info('Starting axoness mapping.') _ = qu.QSUB_script(multi_params, "map_viewaxoness2skel", log=log, n_max_co_processes=global_params.NCORE_TOTAL, suffix="", n_cores=1, remove_jobfolder=True) # TODO: perform completeness check log.info('Finished axoness mapping.')
def run_neuron_rendering(max_n_jobs: Optional[int] = None): """ Render the default views as defined in ``global_params`` [WIP]. Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`. """ log = initialize_logging('neuron_view_rendering', global_params.config.working_dir + '/logs/') ps = [Process(target=_run_neuron_rendering_big_helper, args=(max_n_jobs, )), Process(target=_run_neuron_rendering_small_helper, args=(max_n_jobs, ))] for p in ps: p.start() time.sleep(10) for p in ps: p.join() log.info('Finished rendering of all SSVs. Checking completeness.') ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) res = find_incomplete_ssv_views(ssd, woglia=True, n_cores=global_params.NCORES_PER_NODE) if len(res) != 0: msg = "Not all SVs were predicted! {}/{} missing:\n" \ "{}".format(len(res), len(ssd.ssv_ids), res[:10]) log.error(msg) raise RuntimeError(msg) log.info('Success.')
def run_morphology_embedding(max_n_jobs: Optional[int] = None): """ Infer local morphology embeddings for all neuron reconstructions base on triplet-loss trained cellular morphology learning network (tCMN). Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering` and :func:`~syconn.exec.skeleton.run_skeleton_generation`. """ if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 2 log = initialize_logging('morphology_embedding', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) pred_key_appendix = "" multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, pred_key_appendix) for ssv_ids in multi_params] qu.QSUB_script(multi_params, "generate_morphology_embedding", n_max_co_processes=global_params.NGPU_TOTAL, n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, log=log, suffix="", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished extraction of cell morphology embedding.')
def run_morphology_embedding(): log = initialize_logging('morphology_embedding', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) pred_key_appendix = "" multi_params = np.array(ssd.ssv_ids, dtype=np.uint) nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # sort ssv ids according to their number of SVs (descending) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, 2000) # add ssd parameters multi_params = [(ssv_ids, ssd.version, ssd.version_dict, ssd.working_dir, pred_key_appendix) for ssv_ids in multi_params] qu.QSUB_script(multi_params, "generate_morphology_embedding", pe="openmp", queue=None, n_cores=10, suffix="", additional_flags="--gres=gpu:1", resume_job=False) # removed -V (used with QSUB) log.info('Finished extraction of cell morphology embedding.')
class data: ssc = SuperSegmentationDataset('/wholebrain/scratch/areaxfs3/') ssv = ssc.get_super_segmentation_object(29753344) def __init__(self): ssc = SuperSegmentationDataset('/wholebrain/scratch/areaxfs3/') ssv = ssc.get_super_segmentation_object(29753344) #self.ssc = ssc.get_super_segmentation_object(29753344) exloc = np.array([5602, 4173, 4474]) * ssv.scaling self.exlocs = np.concatenate(ssv.sample_locations())
def load_celltype_ctgt(m): ct = SSVCelltype(None, None) ssv_ids = list(ct.train_d.squeeze()) + list(ct.valid_d.squeeze()) ssv_labels = list(ct.train_l) + list(ct.valid_l) ssv_labels = np.concatenate([[l] * 3 for l in ssv_labels]) ssd = SuperSegmentationDataset(working_dir="/wholebrain/scratch/areaxfs/", version="6") predict_latent_ssd(ssd, m, ssv_ids) latent = load_latent_data(ssd, ssv_ids) return latent, ssv_labels
def run_celltype_prediction(max_n_jobs_gpu: Optional[int] = None): """ Run the celltype inference based on the ``img2scalar`` CMN. Args: max_n_jobs_gpu: Number of parallel GPU jobs. Notes: Requires :func:`~run_create_neuron_ssd` and :func:`~run_neuron_rendering`. """ if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('celltype_prediction', global_params.config.working_dir+ '/logs/', overwrite=False) ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) # shuffle SV IDs np.random.seed(0) log.info('Starting cell type prediction.') nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = ssd.ssv_ids ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] max_n_jobs_gpu = np.max([max_n_jobs_gpu, len(multi_params) // 200]) # at most 200 SSV per job multi_params = chunkify(multi_params, max_n_jobs_gpu) # job parameter will be read sequentially, i.e. in order to provide only # one list as parameter one needs an additonal axis multi_params = [(ixs, ) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "predict_cell_type", log=log, n_max_co_processes=global_params.NNODES_TOTAL, suffix="", additional_flags="--gres=gpu:1", n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, remove_jobfolder=True) log.info('Finished prediction of {} SSVs. Checking completeness.' ''.format(len(ordering))) out_files = glob.glob(path_to_out + "*.pkl") err = [] for fp in out_files: with open(fp, "rb") as f: local_err = pkl.load(f) err += list(local_err) if len(err) > 0: msg = "{} errors occurred for SSVs with ID: " \ "{}".format(len(err), [el[0] for el in err]) log.error(msg) raise ValueError(msg) else: log.info('Success.')
def run_semsegaxoness_mapping(max_n_jobs: Optional[int] = None): """ Map semantic segmentation results of the 2D projections onto the cell reconstruction mesh. Generates the following attributes by default in :py:attr:`~syconn.reps.super_segmentation_object.SuperSegmentationObject.skeleton`: * "axoness": Vertex predictions mapped to skeleton (see ``global_params.map_properties_semsegax``. * "axoness_avg10000": Sliding window average along skeleton (10um traversal length). * "axoness_avg10000_comp_maj": Majority vote on connected components after removing the soma. Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering`, :func:`~run_semsegaxoness_prediction` and :func:`~syconn.exec.skeleton.run_skeleton_generation`. """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 """Maps axon prediction of rendering locations onto SSV skeletons""" log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/', overwrite=False) pred_key_appendix = "" # Working directory has to be changed globally in global_params ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) multi_params = np.array(ssd.ssv_ids, dtype=np.uint) # sort ssv ids according to their number of SVs (descending) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) multi_params = [(par, pred_key_appendix) for par in multi_params] log.info('Starting axoness mapping.') _ = qu.QSUB_script(multi_params, "map_semsegaxoness2skel", log=log, n_max_co_processes=global_params.NCORE_TOTAL, suffix="", n_cores=1, remove_jobfolder=True) # TODO: perform completeness check log.info('Finished axoness mapping.')
def run_celltype_prediction(max_n_jobs=100): log = initialize_logging('celltype_prediction', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # shuffle SV IDs np.random.seed(0) log.info('Starting cell type prediction.') nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = ssd.ssv_ids ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] max_n_jobs = np.max([max_n_jobs, len(multi_params) // 200]) # at most 200 SSV per job multi_params = chunkify(multi_params, max_n_jobs) # job parameter will be read sequentially, i.e. in order to provide only # one list as parameter one needs an additonal axis multi_params = [(ixs, ) for ixs in multi_params] # TODO: switch n_max_co_processes to `global_params.NGPUS_TOTAL` as soon as EGL ressource allocation works! path_to_out = qu.QSUB_script(multi_params, "predict_cell_type", n_max_co_processes=global_params.NNODES_TOTAL, suffix="", additional_flags="--gres=gpu:2", n_cores=global_params.NCORES_PER_NODE) log.info('Finished prediction of {} SSVs. Checking completeness.' ''.format(len(ordering))) out_files = glob.glob(path_to_out + "*.pkl") err = [] for fp in out_files: with open(fp, "rb") as f: local_err = pkl.load(f) err += list(local_err) if len(err) > 0: log.error("{} errors occurred for SSVs with ID: " "{}".format(len(err), [el[0] for el in err])) else: log.info('Success.')
def get_sso_specs(set_path: str, out_path: str, ssd: SuperSegmentationDataset): set_path = os.path.expanduser(set_path) out_path = os.path.expanduser(out_path) files = glob.glob(set_path + '*.pkl') total_edge_length = 0 total_voxel_size = 0 for file in tqdm(files): sso_id = int(re.findall(r"/sso_(\d+).", file)[0]) sso = ssd.get_super_segmentation_object(sso_id) total_edge_length += sso.total_edge_length() total_voxel_size += sso.size info = f'{sso_id}:\nskeleton path length:\t{sso.total_edge_length()}\nvoxel size:\t{sso.size}\n\n' with open(out_path, 'a') as f: f.write(info) f.close() with open(out_path, 'a') as f: f.write( f'total edge length: {total_edge_length}\ntotal voxel size: {total_voxel_size}' ) f.close()
def run_axoness_mapping(max_n_jobs: Optional[int] = None): """ Map ``img2scalar`` CMN results of the 2D projections onto the cell reconstruction mesh. See :func:`~run_semsegaxoness_mapping` for the semantic segmentation approach. Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`, :func:`~run_neuron_rendering`, :func:`run_axoness_prediction` and :func:`~syconn.exec.skeleton.run_skeleton_generation`. """ if max_n_jobs is None: max_n_jobs = global_params.NCORE_TOTAL * 2 """Maps axon prediction of rendering locations onto SSV skeletons""" log = initialize_logging('axon_mapping', global_params.config.working_dir + '/logs/', overwrite=False) pred_key_appendix = "" # Working directory has to be changed globally in global_params ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) multi_params = np.array(ssd.ssv_ids, dtype=np.uint) # sort ssv ids according to their number of SVs (descending) nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = multi_params[np.argsort(nb_svs_per_ssv)[::-1]] multi_params = chunkify(multi_params, max_n_jobs) multi_params = [(par, pred_key_appendix) for par in multi_params] log.info('Starting axoness mapping.') _ = qu.QSUB_script(multi_params, "map_viewaxoness2skel", log=log, n_max_co_processes=global_params.NCORE_TOTAL, suffix="", n_cores=1, remove_jobfolder=True) # TODO: perform completeness check log.info('Finished axoness mapping.')
def run_neuron_rendering(max_n_jobs=None): ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) log = initialize_logging('neuron_view_rendering', global_params.config.working_dir + '/logs/') ps = [ Process(target=_run_neuron_rendering_big_helper, args=(max_n_jobs, )), Process(target=_run_neuron_rendering_small_helper, args=(max_n_jobs, )) ] for p in ps: p.start() time.sleep(10) for p in ps: p.join() log.info('Finished rendering of all SSVs. Checking completeness.') res = find_incomplete_ssv_views(ssd, woglia=True, n_cores=global_params.NCORES_PER_NODE) if len(res) != 0: msg = "Not all SSVs were rendered completely! Missing:\n{}".format(res) log.error(msg) raise RuntimeError(msg) log.info('Success.')
def run_axoness_prediction(max_n_jobs_gpu=None, e3=False): log = initialize_logging('axon_prediction', global_params.config.working_dir + '/logs/', overwrite=False) if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 # here because all qsub jobs will start a script referring to 'global_params.config.working_dir' ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) sd = ssd.get_segmentationdataset("sv") # chunk them multi_params = chunkify(sd.so_dir_paths, max_n_jobs_gpu) pred_key = "axoness_probas" # leave this fixed because it is used all over # get model properties log.info( 'Performing axon prediction of neuron views. Labels will be stored ' 'on SV level in the attribute dict with key "{}"'.format(pred_key)) if e3 is True: model_kwargs = 'get_axoness_model_e3' else: m = get_axoness_model() model_kwargs = dict(model_path=m._path, normalize_data=m.normalize_data, imposed_batch_size=m.imposed_batch_size, nb_labels=m.nb_labels, channels_to_load=m.channels_to_load) #all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default so_kwargs = dict(working_dir=global_params.config.working_dir) # for axoness views set woglia to True (because glia were removed beforehand), # raw_only to False pred_kwargs = dict(woglia=True, pred_key=pred_key, verbose=False, raw_only=False) multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs] for par in multi_params] if e3 is True: # TODO: using two GPUs on a single node seems to be error-prone # -> wb13 froze when processing example_cube=2 n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE if 'example_cube' in global_params.config.working_dir: n_cores = global_params.NCORES_PER_NODE # do not run two predictions in parallel _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked_e3", log=log, n_max_co_processes=global_params.NGPU_TOTAL, n_cores=n_cores, suffix="_axoness", additional_flags="--gres=gpu:1", remove_jobfolder=True) else: for par in multi_params: mk = par[1] # Single GPUs are made available for every job via slurm, no need for random assignments. mk["init_gpu"] = 0 # np.random.rand(0, 2) _ = qu.QSUB_script(multi_params, "predict_sv_views_chunked", log=log, n_max_co_processes=global_params.NGPU_TOTAL // 2, n_cores=global_params.NCORES_PER_NODE, suffix="_axoness", additional_flags="--gres=gpu:1", remove_jobfolder=True) log.info('Finished axon prediction. Now checking for missing predictions.') res = find_missing_sv_attributes_in_ssv( ssd, pred_key, n_cores=global_params.NCORES_PER_NODE) if len(res) > 0: log.error("Attribute '{}' missing for follwing" " SVs:\n{}".format(pred_key, res)) else: log.info('Success.')
n_list.append(n_dict[i]) except KeyError: n_list.append(0) try: v_list.append(v_dict[i]) except KeyError: v_list.append(0) spec_writer.writerow([ sso_id, int(sso.total_edge_length()), sso.size, *n_list, *v_list ]) out_file.write('\n\n\n\n') out_file.close() if __name__ == '__main__': # paths = dict(TRAIN='~/working_dir/gt/cmn/dnh/voxeled/', # TEST='~/working_dir/gt/cmn/dnh/voxeled/evaluation/') paths = dict(TEST='~/working_dir/gt/cmn/ads/train/voxeled/') # ssds = dict(TRAIN=SuperSegmentationDataset("/wholebrain/scratch/areaxfs3/"), # TEST=SuperSegmentationDataset("/wholebrain/songbird/j0126/areaxfs_v6/")) ssds = dict(TEST=SuperSegmentationDataset("/wholebrain/scratch/areaxfs3/")) dataspecs2csv(paths, '~/working_dir/gt/cmn/ads/test.csv', ssds) # get_sso_specs('~/thesis/gt/20_09_27/voxeled/train/', '~/thesis/gt/20_09_27/voxeled/train_info.txt', # ssd=SuperSegmentationDataset("/wholebrain/songbird/j0126/areaxfs_v6/"))
def worker_split(id_queue: Queue, chunk_queue: Queue, ssd: SuperSegmentationDataset, ctx: int, base_node_dst: int, parts: Dict[str, List[int]], labels_itf: str, label_mappings: List[Tuple[int, int]], split_jitter: int = 0): """ Args: id_queue: Input queue with cell ids. chunk_queue: Output queue with cell chunks. ssd: SuperSegmentationDataset which contains the cells to which the chunkhandler should get applied. ctx: Context size for splitting. base_node_dst: Distance between base nodes. Corresponds to redundancy / number of chunks per cell. parts: Information about cell surface and organelles, Tuples like (voxel_param, feature) keyed by identifier compatible with syconn (e.g. 'sv' or 'mi'). labels_itf: Label identifier for existing label predictions within the sso objects of the ssd dataset. label_mappings: Tuples where label at index 0 should get mapped to label at index 1. split_jitter: Derivation from context size during splitting. """ while True: if not id_queue.empty(): ssv_id = id_queue.get() sso = ssd.get_super_segmentation_object(ssv_id) vert_dc = {} label_dc = {} encoding = {} offset = 0 obj_bounds = {} for ix, k in enumerate(parts): pcd = o3d.geometry.PointCloud() verts = sso.load_mesh(k)[1].reshape(-1, 3) pcd.points = o3d.utility.Vector3dVector(verts) pcd, idcs = pcd.voxel_down_sample_and_trace( parts[k][0], pcd.get_min_bound(), pcd.get_max_bound()) idcs = np.max(idcs, axis=1) vert_dc[k] = np.asarray(pcd.points) obj_bounds[k] = [offset, offset + len(pcd.points)] offset += len(pcd.points) if k == 'sv': # prepare mask for filtering background / unpredicted points mask = None if labels_itf == 'axoness': # 0: dendrite, 1: axon, 2: soma, 3: bouton, 4: terminal, 5/6: background/unpredicted labels_total = sso.label_dict()[labels_itf][idcs] mask = labels_total < 5 labels_total = labels_total[mask] elif labels_itf == 'spiness': # 1: head, 0: neck, 2: shaft, 3: other, 4/5: background/unpredicted labels_total = sso.label_dict()['axoness'][idcs] spiness = sso.label_dict()['spiness'][idcs] mask = np.logical_not( np.logical_or( labels_total > 4, np.logical_and(labels_total == 0, spiness > 3))) labels_total = labels_total[mask] spiness = spiness[mask] labels_total[labels_total != 0] = 3 labels_total[labels_total == 0] = spiness[labels_total == 0] else: labels_total = sso.label_dict()[labels_itf][idcs] mask = np.ones(len(labels_total)).astype(bool) labels = labels_total vert_dc[k] = vert_dc[k][mask] else: labels = np.ones(len(vert_dc[k])) + ix + labels_total.max() encoding[k] = ix + 1 + labels_total.max() label_dc[k] = labels sample_feats = np.concatenate([[parts[k][1]] * len(vert_dc[k]) for k in parts]).reshape(-1, 1) sample_feats = label_binarize(sample_feats, classes=np.arange(len(parts))) sample_pts = np.concatenate([vert_dc[k] for k in parts]) sample_labels = np.concatenate([label_dc[k] for k in parts]) # mark cellular organelles to be excluded from loss calculation - see torchhandler for use of no_pred no_pred = list(encoding.keys()) if not sso.load_skeleton(): raise ValueError(f'Couldnt find skeleton of {sso}') nodes, edges = sso.skeleton['nodes'] * sso.scaling, sso.skeleton[ 'edges'] hc = HybridCloud(nodes, edges, vertices=sample_pts, features=sample_feats, obj_bounds=obj_bounds, no_pred=no_pred, labels=sample_labels, encoding=encoding) if label_mappings is not None: hc.map_labels(label_mappings) _ = hc.verts2node jitter = random.randint(0, split_jitter) node_arrs, source_nodes = splitting.split_single( hc, ctx + jitter, base_node_dst) for ix, node_arr in enumerate(node_arrs): sample, _ = objects.extract_cloud_subset(hc, node_arr) chunk_queue.put(sample) else: time.sleep(0.5)
def _run_neuron_rendering_small_helper(max_n_jobs: Optional[int] = None): """ Render the default views as defined in ``global_params`` [WIP] of small neuron reconstructions. Helper method of :func:`~run_neuron_rendering`. Args: max_n_jobs: Number of parallel jobs. Notes: Requires :func:`~run_create_neuron_ssd`. """ if max_n_jobs is None: max_n_jobs = global_params.config.ngpu_total * 4 if \ global_params.config['pyopengl_platform'] == 'egl' \ else global_params.config.ncore_total * 4 log = initialize_logging('neuron_view_rendering_small', global_params.config.working_dir + '/logs/') # view rendering prior to glia removal, choose SSD accordingly ssd = SuperSegmentationDataset(working_dir=global_params.config.working_dir) # TODO: use actual size criteria, e.g. number of sampling locations nb_svs_per_ssv = np.array([len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # render normal size SSVs size_mask = nb_svs_per_ssv <= global_params.config['glia']['rendering_max_nb_sv'] if 'example' in global_params.config.working_dir and np.sum(~size_mask) == 0: # generate at least one (artificial) huge SSV size_mask[:1] = False size_mask[1:] = True multi_params = ssd.ssv_ids[size_mask] # sort ssv ids according to their number of SVs (descending) ordering = np.argsort(nb_svs_per_ssv[size_mask]) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir) for ixs in multi_params] log.info('Started rendering of {} SSVs. '.format(np.sum(size_mask))) if global_params.config['pyopengl_platform'] == 'osmesa': # utilize all CPUs qu.QSUB_script(multi_params, "render_views", log=log, suffix='_small', n_max_co_processes=global_params.config.ncore_total, remove_jobfolder=False) elif global_params.config['pyopengl_platform'] == 'egl': # utilize 1 GPU per task # run EGL on single node: 20 parallel jobs if not qu.batchjob_enabled(): n_cores = 1 n_parallel_jobs = global_params.config['ncores_per_node'] qu.QSUB_script(multi_params, "render_views", suffix='_small', n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:2", disable_batchjob=True, n_cores=n_cores, remove_jobfolder=True) # run on whole cluster else: n_cores = global_params.config['ncores_per_node'] // global_params.config['ngpus_per_node'] n_parallel_jobs = global_params.config.ngpu_total qu.QSUB_script(multi_params, "render_views_egl", suffix='_small', n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:1", n_cores=n_cores, remove_jobfolder=True) else: raise RuntimeError('Specified OpenGL platform "{}" not supported.' ''.format(global_params.config['pyopengl_platform'])) log.info('Finished rendering of {}/{} SSVs.'.format(len(ordering), len(nb_svs_per_ssv)))
ssv = ssc.get_super_segmentation_object(29753344) def __init__(self): ssc = SuperSegmentationDataset('/wholebrain/scratch/areaxfs3/') ssv = ssc.get_super_segmentation_object(29753344) #self.ssc = ssc.get_super_segmentation_object(29753344) exloc = np.array([5602, 4173, 4474]) * ssv.scaling self.exlocs = np.concatenate(ssv.sample_locations()) if __name__ == '__main__': # TODO: use toy data and improve logging, see test_backend.py working_dir = "/wholebrain/scratch/areaxfs3/" render_indexview = True now = time.time() ssc = SuperSegmentationDataset(working_dir) ssv = ssc.get_super_segmentation_object(29753344) exlocs = np.concatenate(ssv.sample_locations()) exlocs = exlocs[::30] print("Example location array:", exlocs.shape) print(working_dir) now2 = time.time() print("time for reading data") print(now2 - now) """ i = 0 exlocs = chunkify_successive(exlocs, 10) params = [] for ex in exlocs: params.append([exlocs, i]) i=i+1
def run_create_neuron_ssd(): """ Creates SuperSegmentationDataset with `version=0`. """ log = initialize_logging('create_neuron_ssd', global_params.config.working_dir + '/logs/', overwrite=False) suffix = global_params.rag_suffix g_p = "{}/glia/neuron_rag{}.bz2".format(global_params.config.working_dir, suffix) rag_g = nx.read_edgelist(g_p, nodetype=np.uint) # e.g. if rag was not created by glia splitting procedure this filtering is required ccs = nx.connected_components(rag_g) cc_dict = {} for cc in ccs: cc_arr = np.array(list(cc)) cc_dict[np.min(cc_arr)] = cc_arr cc_dict_inv = {} for ssv_id, cc in cc_dict.items(): for sv_id in cc: cc_dict_inv[sv_id] = ssv_id log.info('Parsed RAG from {} with {} SSVs and {} SVs.'.format( g_p, len(cc_dict), len(cc_dict_inv))) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir, version='0', ssd_type="ssv", sv_mapping=cc_dict_inv) # create cache-arrays for frequently used attributes ssd.save_dataset_deep(n_max_co_processes=global_params.NCORE_TOTAL ) # also executes 'ssd.save_dataset_shallow()' exec_skeleton.run_skeleton_generation() log.info('Finished SSD initialization. Starting cellular ' 'organelle mapping.') # map cellular organelles to SSVs # TODO: sort by SSV size (descending) ssd_proc.aggregate_segmentation_object_mappings( ssd, global_params.existing_cell_organelles) ssd_proc.apply_mapping_decisions(ssd, global_params.existing_cell_organelles) log.info('Finished mapping of cellular organelles to SSVs. ' 'Writing individual SSV graphs.') # Write SSV RAGs pbar = tqdm.tqdm(total=len(ssd.ssv_ids), mininterval=0.5) for ssv in ssd.ssvs: # get all nodes in CC of this SSV if len(cc_dict[ ssv.id]) > 1: # CCs with 1 node do not exist in the global RAG n_list = nx.node_connected_component(rag_g, ssv.id) # get SSV RAG as subgraph ssv_rag = nx.subgraph(rag_g, n_list) else: ssv_rag = nx.Graph() # ssv.id is the minimal SV ID, and therefore the only SV in this case ssv_rag.add_edge(ssv.id, ssv.id) nx.write_edgelist(ssv_rag, ssv.edgelist_path) pbar.update(1) pbar.close() log.info('Finished saving individual SSV RAGs.')
def _run_neuron_rendering_big_helper(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NNODES_TOTAL * 2 log = initialize_logging('neuron_view_rendering_big', global_params.config.working_dir + '/logs/') # view rendering prior to glia removal, choose SSD accordingly ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: use actual size criteria, e.g. number of sampling locations nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # render normal size SSVs size_mask = nb_svs_per_ssv <= global_params.RENDERING_MAX_NB_SV if 'example' in global_params.config.working_dir and np.sum( ~size_mask) == 0: # generate at least one (artificial) huge SSV size_mask[:1] = False size_mask[1:] = True # sort ssv ids according to their number of SVs (descending) # list of SSV IDs and SSD parameters need to be given to a single QSUB job if np.sum(~size_mask) > 0: log.info('{} huge SSVs will be rendered on the cluster.'.format( np.sum(~size_mask))) # identify huge SSVs and process them individually on whole cluster big_ssv = ssd.ssv_ids[~size_mask] # # TODO: Currently high memory consumption when rendering index views! take into account # # when multiprocessing # # TODO: refactor `render_sso_coords_multiprocessing` and then use `QSUB_render_views_egl` # # here! # render normal views only n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE n_parallel_jobs = global_params.NGPU_TOTAL render_kwargs = dict(add_cellobjects=True, woglia=True, overwrite=True, skip_indexviews=True) sso_kwargs = dict(working_dir=global_params.config.working_dir, nb_cpus=n_cores, enable_locking_so=False, enable_locking=False) # sort ssv ids according to their number of SVs (descending) ordering = np.argsort(nb_svs_per_ssv[~size_mask]) multi_params = big_ssv[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, sso_kwargs, render_kwargs) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "render_views", n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:1", n_cores=n_cores, remove_jobfolder=True) # # render index-views only for ssv_id in big_ssv: ssv = SuperSegmentationObject( ssv_id, working_dir=global_params.config.working_dir) render_sso_coords_multiprocessing(ssv, global_params.config.working_dir, verbose=True, return_views=False, disable_batchjob=False, n_jobs=n_parallel_jobs, n_cores=n_cores, render_indexviews=True) log.info('Finished rendering of {}/{} SSVs.'.format( len(big_ssv), len(nb_svs_per_ssv)))
from syconn import global_params path_storage_file = sys.argv[1] path_out_file = sys.argv[2] with open(path_storage_file, 'rb') as f: args = [] while True: try: args.append(pkl.load(f)) except EOFError: break ssv_ids = args[0] version = args[1] version_dict = args[2] working_dir = args[3] ssd = SuperSegmentationDataset(working_dir=working_dir, version=version, version_dict=version_dict) for ssv in ssd.get_super_segmentation_object(ssv_ids): ssv.load_skeleton() ssv.skeleton["myelin"] = map_myelin2coords(ssv.skeleton["nodes"], mag=4) majorityvote_skeleton_property( ssv, prop_key='myelin', max_dist=global_params.DIST_AXONESS_AVERAGING) ssv.save_skeleton() with open(path_out_file, "wb") as f: pkl.dump("0", f)
def _run_neuron_rendering_small_helper(max_n_jobs=None): if max_n_jobs is None: max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \ else global_params.NCORE_TOTAL * 4 log = initialize_logging('neuron_view_rendering_small', global_params.config.working_dir + '/logs/') # view rendering prior to glia removal, choose SSD accordingly ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # TODO: use actual size criteria, e.g. number of sampling locations nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) # render normal size SSVs size_mask = nb_svs_per_ssv <= global_params.RENDERING_MAX_NB_SV if 'example' in global_params.config.working_dir and np.sum( ~size_mask) == 0: # generate at least one (artificial) huge SSV size_mask[:1] = False size_mask[1:] = True multi_params = ssd.ssv_ids[size_mask] # sort ssv ids according to their number of SVs (descending) ordering = np.argsort(nb_svs_per_ssv[size_mask]) multi_params = multi_params[ordering[::-1]] multi_params = chunkify(multi_params, max_n_jobs) # list of SSV IDs and SSD parameters need to be given to a single QSUB job multi_params = [(ixs, global_params.config.working_dir) for ixs in multi_params] log.info('Started rendering of {} SSVs. '.format(np.sum(size_mask))) # generic if global_params.PYOPENGL_PLATFORM == 'osmesa': # utilize all CPUs path_to_out = qu.QSUB_script( multi_params, "render_views", log=log, n_max_co_processes=global_params.NCORE_TOTAL, remove_jobfolder=False) elif global_params.PYOPENGL_PLATFORM == 'egl': # utilize 1 GPU per task # run EGL on single node: 20 parallel jobs if global_params.config.working_dir is not None and 'example_cube' in \ global_params.config.working_dir: n_cores = 1 n_parallel_jobs = global_params.NCORES_PER_NODE path_to_out = qu.QSUB_script(multi_params, "render_views", n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:2", n_cores=n_cores, remove_jobfolder=False) # run on whole cluster else: n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE n_parallel_jobs = global_params.NGPU_TOTAL path_to_out = qu.QSUB_script(multi_params, "render_views_egl", n_max_co_processes=n_parallel_jobs, log=log, additional_flags="--gres=gpu:1", n_cores=n_cores, remove_jobfolder=True) else: raise RuntimeError('Specified OpenGL platform "{}" not supported.' ''.format(global_params.PYOPENGL_PLATFORM)) log.info('Finished rendering of {}/{} SSVs.'.format( len(ordering), len(nb_svs_per_ssv)))
def GT_generation(kzip_paths, ssd_version, gt_type, nb_views, dest_dir=None, n_voting=40, ws=(256, 128), comp_window=8e3): """ Generates a .npy GT file from all kzip paths. Parameters ---------- kzip_paths : List[str] gt_type : str n_voting : int Number of collected nodes during BFS for majority vote (label smoothing) Returns ------- """ sso_ids = [ int(re.findall("/(\d+).", kzip_path)[0]) for kzip_path in kzip_paths ] ssd = SuperSegmentationDataset() if not np.all([ ssv.lookup_in_attribute_dict("size") is not None for ssv in ssd.get_super_segmentation_object(sso_ids) ]): print("Not all SSV IDs are part of " \ "the current SSD. IDs: {}".format([sso_id for sso_id in sso_ids if sso_id not in ssd.ssv_ids])) kzip_paths = np.array(kzip_paths)[np.array([ ssv.lookup_in_attribute_dict("size") is not None for ssv in ssd.get_super_segmentation_object(sso_ids) ])] print("Ignoring missing IDs. Using {} k.zip files for GT " "generation,".format(len(kzip_paths))) if dest_dir is None: dest_dir = os.path.expanduser("~/{}_semseg/".format(gt_type)) if not os.path.isdir(dest_dir): os.makedirs(dest_dir) dest_p_cache = "{}/cache_{}votes/".format(dest_dir, n_voting) params = [(p, ssd_version, gt_type, n_voting, nb_views, ws, comp_window, dest_p_cache) for p in kzip_paths] if not os.path.isdir(dest_p_cache): os.makedirs(dest_p_cache) start_multiprocess_imap(gt_generation_helper, params, nb_cpus=cpu_count(), debug=False) # TODO: in case GT is too big to hold all views in memory # if gt_type == 'axgt': # return # Create Dataset splits for training, validation and test all_raw_views = [] all_label_views = [] # all_index_views = [] # Removed index views print("Writing views.") for ii in range(len(kzip_paths)): sso_id = int(re.findall("/(\d+).", kzip_paths[ii])[0]) dest_p = "{}/{}/".format(dest_p_cache, sso_id) raw_v = np.load(dest_p + "raw.npy") label_v = np.load(dest_p + "label.npy") # index_v = np.load(dest_p + "index.npy") # Removed index views all_raw_views.append(raw_v) all_label_views.append(label_v) # all_index_views.append(index_v) # Removed index views all_raw_views = np.concatenate(all_raw_views) all_label_views = np.concatenate(all_label_views) # all_index_views = np.concatenate(all_index_views) # Removed index views print("{} view locations collected. Shuffling views.".format( len(all_label_views))) np.random.seed(0) ixs = np.arange(len(all_raw_views)) np.random.shuffle(ixs) all_raw_views = all_raw_views[ixs] all_label_views = all_label_views[ixs] # all_index_views = all_index_views[ixs] # Removed index views print("Swapping axes.") all_raw_views = all_raw_views.swapaxes(2, 1) all_label_views = all_label_views.swapaxes(2, 1) # all_index_views = all_index_views.swapaxes(2, 1) # Removed index views print("Reshaping arrays.") all_raw_views = all_raw_views.reshape((-1, 4, ws[1], ws[0])) all_label_views = all_label_views.reshape((-1, 1, ws[1], ws[0])) # # all_index_views = all_index_views.reshape((-1, 1, 128, 256)) # Removed index views # # all_raw_views = np.concatenate([all_raw_views, all_index_views], axis=1) # Removed index views raw_train, raw_valid, label_train, label_valid = train_test_split( all_raw_views, all_label_views, train_size=0.9, shuffle=False) # # raw_valid, raw_test, label_valid, label_test = train_test_split(raw_other, label_other, train_size=0.5, shuffle=False) # Removed index views print("Writing h5 files.") os.makedirs(dest_dir, exist_ok=True) # chunk output data for ii in range(5): save_to_h5py([raw_train[ii::5]], dest_dir + "/raw_train_{}.h5".format(ii), ["raw"]) save_to_h5py([raw_valid[ii::5]], dest_dir + "/raw_valid_{}.h5".format(ii), ["raw"]) # save_to_h5py([raw_test], dest_dir + "/raw_test.h5", # ["raw"]) # Removed index views save_to_h5py([label_train[ii::5]], dest_dir + "/label_train_{}.h5".format(ii), ["label"]) save_to_h5py([label_valid[ii::5]], dest_dir + "/label_valid_{}.h5".format(ii), ["label"])
def run_semsegaxoness_prediction(max_n_jobs_gpu=None): """ Will store semantic axoness labels as `view_properties_semsegax['semseg_key']` inside ssv.label_dict('vertex')[semseg_key] TODO: run rendering chunk-wise instead of on-the-fly and then perform prediction chunk-wise as well, adopt from spiness step Parameters ---------- max_n_jobs_gpu : int Returns ------- """ if max_n_jobs_gpu is None: max_n_jobs_gpu = global_params.NGPU_TOTAL * 2 log = initialize_logging('axoness_prediction', global_params.config.working_dir + '/logs/', overwrite=False) ssd = SuperSegmentationDataset( working_dir=global_params.config.working_dir) # shuffle SV IDs np.random.seed(0) log.info('Starting axoness prediction.') nb_svs_per_ssv = np.array( [len(ssd.mapping_dict[ssv_id]) for ssv_id in ssd.ssv_ids]) multi_params = ssd.ssv_ids ordering = np.argsort(nb_svs_per_ssv) multi_params = multi_params[ordering[::-1]] max_n_jobs_gpu = np.max([max_n_jobs_gpu, len(multi_params) // 100 ]) # at most 100 SSV per job multi_params = chunkify(multi_params, max_n_jobs_gpu) # job parameter will be read sequentially, i.e. in order to provide only # one list as parameter one needs an additonal axis multi_params = [(ixs, ) for ixs in multi_params] path_to_out = qu.QSUB_script(multi_params, "predict_axoness_semseg", log=log, n_max_co_processes=global_params.NNODES_TOTAL, suffix="", additional_flags="--gres=gpu:1", n_cores=global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE, remove_jobfolder=True) log.info('Finished prediction of {} SSVs. Checking completeness.' ''.format(len(ordering))) out_files = glob.glob(path_to_out + "*.pkl") err = [] for fp in out_files: with open(fp, "rb") as f: local_err = pkl.load(f) err += list(local_err) if len(err) > 0: msg = "{} errors occurred for SSVs with ID: " \ "{}".format(len(err), [el[0] for el in err]) log.error(msg) raise ValueError(msg) else: log.info('Success.')