def _create_output_h5(input_file, output_file, edges_root, n_edges): mode = 'r+' if os.path.exists(output_file) else 'w' out_h5 = h5py.File(output_file, mode=mode) add_hdf5_version(out_h5) add_hdf5_magic(out_h5) root_grp = out_h5.create_group( edges_root) if edges_root not in out_h5 else out_h5[edges_root] if 'source_node_id' not in root_grp: root_grp.create_dataset('source_node_id', (n_edges, ), dtype=np.uint64) if 'target_node_id' not in root_grp: root_grp.create_dataset('target_node_id', (n_edges, ), dtype=np.uint64) if 'edge_type_id' not in root_grp: root_grp.create_dataset('edge_type_id', (n_edges, ), dtype=np.uint32) if 'edge_group_id' not in root_grp: root_grp.create_dataset('edge_group_id', (n_edges, ), dtype=np.uint32) if 'edge_group_index' not in root_grp: root_grp.create_dataset('edge_group_index', (n_edges, ), dtype=np.uint32) with h5py.File(input_file, 'r') as in_h5: for h5obj in in_h5[edges_root].values(): if isinstance(h5obj, h5py.Group): root_grp.copy(h5obj, h5obj.name) return root_grp
def create_multipop_h5(): spikes_df = pd.read_csv('spike_files/spikes.multipop.csv', sep=' ') lgn_spikes_df = spikes_df[spikes_df['population'] == 'lgn'] tw_spikes_df = spikes_df[spikes_df['population'] == 'tw'] with h5py.File('spike_files/spikes.multipop.h5', 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) lgn_grp = h5.create_group('/spikes/lgn') lgn_grp.attrs['sorting'] = 'by_id' ts_ds = lgn_grp.create_dataset('timestamps', data=lgn_spikes_df['timestamps'], dtype=np.float64) ts_ds.attrs['units'] = 'milliseconds' lgn_grp.create_dataset('node_ids', data=lgn_spikes_df['node_ids'], dtype=np.uint64) tw_grp = h5.create_group('/spikes/tw') tw_grp.attrs['sorting'] = 'by_id' ts_ds = tw_grp.create_dataset('timestamps', data=tw_spikes_df['timestamps'], dtype=np.float64) ts_ds.attrs['units'] = 'milliseconds' tw_grp.create_dataset('node_ids', data=tw_spikes_df['node_ids'], dtype=np.uint64)
def test_oldsonata_reader(): # A special reader for an older version of the spikes format tmp_h5 = tempfile.NamedTemporaryFile(suffix='.h5') with h5py.File(tmp_h5.name, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) h5.create_dataset('/spikes/gids', data=[0, 0, 0, 0, 2, 1, 2], dtype=np.uint) h5.create_dataset('/spikes/timestamps', data=[0.25, 0.5, 0.75, 1.0, 3.0, 0.001, 2.0], dtype=np.double) st = SonataOldReader(path=tmp_h5.name) assert (np.all(st.populations == [pop_na])) assert (st.n_spikes() == 7) assert (set(st.node_ids()) == {0, 1, 2}) assert (np.allclose(np.sort(st.get_times(0)), [0.25, 0.50, 0.75, 1.0])) df = st.to_dataframe() assert (df.shape == (7, 3)) assert (set(df.columns) == {'timestamps', 'population', 'node_ids'}) all_spikes = list(st.spikes()) assert (len(all_spikes) == 7) assert (isinstance(all_spikes[0][0], (np.double, np.float))) assert (all_spikes[0][1] == pop_na) assert (isinstance(all_spikes[0][2], (np.int, np.uint)))
def _create_h5_file(self): self._h5_handle = h5py.File(self._file_name, 'w', driver='mpio', comm=MPI.COMM_WORLD) add_hdf5_version(self._h5_handle) add_hdf5_magic(self._h5_handle)
def _create_h5file(self): fdir = os.path.dirname(os.path.abspath(self._file_name)) if not os.path.exists(fdir): os.mkdir(fdir) self.h5_base = h5py.File(self._file_name, 'w') add_hdf5_version(self.h5_base) add_hdf5_magic(self.h5_base)
def _create_h5_file(self): self._h5_handle = h5py.File(self._file_name, 'w', driver='mpio', comm=io.bmtk_world_comm.comm) add_hdf5_version(self._h5_handle) add_hdf5_magic(self._h5_handle)
def to_hdf5(self, hdf5_file, sort_order=None, gid_map=None): if self._mpi_rank == 0: with h5py.File(hdf5_file, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) self._count_spikes() spikes_grp = h5.create_group('/spikes') spikes_grp.attrs[ 'sorting'] = 'none' if sort_order is None else sort_order time_ds = spikes_grp.create_dataset( 'timestamps', shape=(self._spike_count, ), dtype=np.float) gid_ds = spikes_grp.create_dataset('gids', shape=(self._spike_count, ), dtype=np.uint64) def file_write_fnc_identity(time, gid, indx): time_ds[indx] = time gid_ds[indx] = gid def file_write_fnc_transform(time, gid, indx): time_ds[indx] = time gid_ds[indx] = gid_map[gid] file_write_fnc = file_write_fnc_identity if gid_map is None else file_write_fnc_transform self._to_file(hdf5_file, sort_order, file_write_fnc)
def update_aibs_edges(edges_file, edge_types_file, trg_network, src_network, population_name=None, output_dir='output'): population_name = population_name if population_name is not None else '{}_to_{}'.format(src_network, trg_network) edges_h5 = h5py.File(edges_file, 'r') src_gids = edges_h5['/src_gids'] n_edges = len(src_gids) trg_gids = np.zeros(n_edges, dtype=np.uint64) start = edges_h5['/edge_ptr'][0] for trg_gid, end in enumerate(edges_h5['/edge_ptr'][1:]): trg_gids[start:end] = [trg_gid]*(end-start) start = end edges_output_fn = os.path.join(output_dir, '{}_{}_edges.h5'.format(src_network, trg_network)) if not os.path.exists(output_dir): os.mkdir(output_dir) with h5py.File(edges_output_fn, 'w') as hf: add_hdf5_magic(hf) add_hdf5_version(hf) grp = hf.create_group('/edges/{}'.format(population_name)) grp.create_dataset('target_node_id', data=trg_gids, dtype='uint64') grp['target_node_id'].attrs['node_population'] = trg_network grp.create_dataset('source_node_id', data=edges_h5['src_gids'], dtype='uint64') grp['source_node_id'].attrs['node_population'] = src_network grp.create_dataset('edge_group_id', data=np.zeros(n_edges), dtype='uint32') grp.create_dataset('edge_group_index', data=np.arange(0, n_edges)) grp.create_dataset('edge_type_id', data=edges_h5['edge_types']) grp.create_dataset('0/nsyns', data=edges_h5['num_syns'], dtype='uint32') grp.create_group('0/dynamics_params') create_index(trg_gids, grp, index_type=INDEX_TARGET) create_index(src_gids, grp, index_type=INDEX_SOURCE) update_edge_types_file(edge_types_file, src_network, trg_network, output_dir)
def _create_ecp_file(self, sim): dt = sim.dt tstop = sim.tstop self._nsteps = int(round(tstop / dt)) # create file to temporary store ecp data on each rank self._tmp_ecp_handle = h5py.File(self._tmp_ecp_file, 'a') self._tmp_ecp_handle.create_dataset('data', (self._nsteps, self._rel_nsites), maxshape=(None, self._rel_nsites), chunks=True) # only the primary node will need to save the final ecp if MPI_RANK == 0: with h5py.File(self._ecp_output, 'w') as f5: add_hdf5_magic(f5) add_hdf5_version(f5) f5.create_dataset('data', (self._nsteps, self._rel_nsites), maxshape=(None, self._rel_nsites), chunks=True) f5.attrs['dt'] = dt f5.attrs['tstart'] = 0.0 f5.attrs['tstop'] = tstop # Save channels. Current we record from all channels, may want to be more selective in the future. f5.create_dataset('channel_id', data=np.arange(self._rel.nsites)) pc.barrier()
def _create_h5file(self): fdir = os.path.dirname(os.path.abspath(self._interm_fpath)) if not os.path.exists(fdir): os.mkdir(fdir) self._h5_handle = h5py.File(self._interm_fpath, self._mode) add_hdf5_version(self._h5_handle) add_hdf5_magic(self._h5_handle)
def __init__(self, file_path, src_pop, trg_pop): # TODO: Merge with NetworkBuilder code for building SONATA files self._nsyns = 0 self._n_biosyns = 0 self._n_pointsyns = 0 self._block_size = 5 self._pop_name = '{}_{}'.format(src_pop, trg_pop) # self._h5_file = h5py.File(os.path.join(network_dir, '{}_edges.h5'.format(self._pop_name)), 'w') self._h5_file = h5py.File(file_path, 'w') add_hdf5_magic(self._h5_file) add_hdf5_version(self._h5_file) self._pop_root = self._h5_file.create_group('/edges/{}'.format( self._pop_name)) self._pop_root.create_dataset('edge_group_id', (self._block_size, ), dtype=np.uint16, chunks=(self._block_size, ), maxshape=(None, )) self._pop_root.create_dataset('source_node_id', (self._block_size, ), dtype=np.uint64, chunks=(self._block_size, ), maxshape=(None, )) self._pop_root['source_node_id'].attrs['node_population'] = src_pop self._pop_root.create_dataset('target_node_id', (self._block_size, ), dtype=np.uint64, chunks=(self._block_size, ), maxshape=(None, )) self._pop_root['target_node_id'].attrs['node_population'] = trg_pop self._pop_root.create_dataset('edge_type_id', (self._block_size, ), dtype=np.uint32, chunks=(self._block_size, ), maxshape=(None, )) self._pop_root.create_dataset('0/syn_weight', (self._block_size, ), dtype=np.float, chunks=(self._block_size, ), maxshape=(None, )) self._pop_root.create_dataset('0/sec_id', (self._block_size, ), dtype=np.uint64, chunks=(self._block_size, ), maxshape=(None, )) self._pop_root.create_dataset('0/sec_x', (self._block_size, ), chunks=(self._block_size, ), maxshape=(None, ), dtype=np.float) self._pop_root.create_dataset('1/syn_weight', (self._block_size, ), dtype=np.float, chunks=(self._block_size, ), maxshape=(None, ))
def write_sonata_itr(path, spiketrain_reader, mode='w', sort_order=SortOrder.none, units='ms', population_renames=None, **kwargs): path_dir = os.path.dirname(path) if MPI_rank == 0 and path_dir and not os.path.exists(path_dir): os.makedirs(path_dir) spiketrain_reader.flush() comm_barrier() conv_factor = find_conversion(spiketrain_reader.units, units) if MPI_rank == 0: h5 = h5py.File(path, mode=mode) add_hdf5_magic(h5) add_hdf5_version(h5) spikes_root = h5.create_group( '/spikes') if '/spikes' not in h5 else h5['/spikes'] population_renames = population_renames or {} for pop_name in spiketrain_reader.populations: n_spikes = spiketrain_reader.n_spikes(pop_name) if n_spikes <= 0: continue if MPI_rank == 0: spikes_grp = spikes_root.create_group('{}'.format( population_renames.get(pop_name, pop_name))) if sort_order != SortOrder.unknown: spikes_grp.attrs['sorting'] = sort_order.value timestamps_ds = spikes_grp.create_dataset('timestamps', shape=(n_spikes, ), dtype=np.float64) timestamps_ds.attrs['units'] = units node_ids_ds = spikes_grp.create_dataset('node_ids', shape=(n_spikes, ), dtype=np.uint64) for i, spk in enumerate( spiketrain_reader.spikes(populations=pop_name, sort_order=sort_order)): if MPI_rank == 0: timestamps_ds[i] = spk[0] * conv_factor node_ids_ds[i] = spk[2] comm_barrier()
def test_sonata_reader(): # Test ability to read an existing sonata file tmp_h5 = tempfile.NamedTemporaryFile(suffix='.h5') with h5py.File(tmp_h5.name, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) h5.create_dataset('/spikes/V1/node_ids', data=[0, 0, 0, 0, 2, 1, 2], dtype=np.uint) h5.create_dataset('/spikes/V1/timestamps', data=[0.25, 0.5, 0.75, 1.0, 3.0, 0.001, 2.0], dtype=np.double) h5.create_dataset('/spikes/V2/node_ids', data=[10, 10, 10], dtype=np.uint) h5.create_dataset('/spikes/V2/timestamps', data=[4.0, 4.0, 4.0], dtype=np.double) h5.create_group('/spikes/V3') st = SonataSTReader(path=tmp_h5.name, default_population='V1') assert (set(st.populations) == {'V1', 'V2'}) assert (st.n_spikes() == 7) assert (st.n_spikes(population='V1') == 7) assert (st.n_spikes(population='V2') == 3) assert (set(st.node_ids()) == {0, 1, 2}) assert (set(st.node_ids(population='V1')) == {0, 1, 2}) assert (np.all(st.node_ids(population='V2') == [10])) assert (np.allclose(np.sort(st.get_times(0)), [0.25, 0.50, 0.75, 1.0])) assert (np.allclose(st.get_times(1, population='V1'), [0.001])) assert (np.allclose(st.get_times(10, population='V2'), [4.0, 4.0, 4.0])) df = st.to_dataframe() assert (len(df) == 10) assert (set(df.columns) == {'timestamps', 'population', 'node_ids'}) df = st.to_dataframe(populations='V1', sort_order=sort_order.by_id, with_population_col=False) assert (len(df) == 7) assert (set(df.columns) == {'timestamps', 'node_ids'}) assert (np.all(np.diff(df['node_ids']) >= 0)) all_spikes = list(st.spikes()) assert (len(all_spikes) == 10) assert (isinstance(all_spikes[0][0], (np.double, np.float))) assert (isinstance(all_spikes[0][1], string_types)) assert (isinstance(all_spikes[0][2], (np.int, np.uint)))
def write_sonata(path, spiketrain_reader, mode='w', sort_order=SortOrder.none, units='ms', population_renames=None, **kwargs): path_dir = os.path.dirname(path) if MPI_rank == 0 and path_dir and not os.path.exists(path_dir): os.makedirs(path_dir) spiketrain_reader.flush() comm_barrier() populations = spiketrain_reader.populations spikes_root = None if MPI_rank == 0: h5 = h5py.File(path, mode=mode) add_hdf5_magic(h5) add_hdf5_version(h5) spikes_root = h5.create_group( '/spikes') if '/spikes' not in h5 else h5['/spikes'] for pop_name in populations: # metrics.keys(): if MPI_rank == 0 and pop_name in spikes_root: # Problem if file already contains /spikes/<pop_name> # TODO: append new data to old spikes?!? raise ValueError( 'sonata file {} already contains a spikes group {}, '.format( path, pop_name) + 'skiping(use option mode="w" to overwrite)') pop_df = spiketrain_reader.to_dataframe(populations=pop_name, with_population_col=False, sort_order=sort_order, on_rank='root') if MPI_rank == 0: spikes_pop_grp = spikes_root.create_group(pop_name) if sort_order != SortOrder.unknown: spikes_pop_grp.attrs['sorting'] = sort_order.value spikes_pop_grp.create_dataset('timestamps', data=pop_df['timestamps']) spikes_pop_grp['timestamps'].attrs[ 'units'] = spiketrain_reader.units() spikes_pop_grp.create_dataset('node_ids', data=pop_df['node_ids']) comm_barrier()
def create_single_pop_h5(): h5_file_old = h5py.File('spike_files/spikes.old.h5', 'r') node_ids = h5_file_old['/spikes/gids'] timestamps = h5_file_old['/spikes/timestamps'] with h5py.File('spike_files/spikes.one_pop.h5', 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) core_grp = h5.create_group('/spikes/v1') core_grp.attrs['sorting'] = 'by_time' ts_ds = core_grp.create_dataset('timestamps', data=timestamps, dtype=np.float64) ts_ds.attrs['units'] = 'milliseconds' nids_ds = core_grp.create_dataset('node_ids', data=node_ids, dtype=np.uint64)
def to_hdf5(self, hdf5_file, sort_order=None, gid_map=None): if self._mpi_rank == 0: with h5py.File(hdf5_file, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) self._count_spikes(recount=True) spikes_grp = h5.create_group('/spikes') spikes_grp.attrs[ 'sorting'] = 'none' if sort_order is None else sort_order time_ds = spikes_grp.create_dataset( 'timestamps', shape=(self._spike_count, ), maxshape=(None, ), dtype=np.float64) gid_ds = spikes_grp.create_dataset('gids', shape=(self._spike_count, ), maxshape=(None, ), dtype=np.uint64) def resize_data(): # There have been (unreproducable) mpi conditons where _count_spikes() is not correct, even with # proper barriers and file flushing. Add a quick fix in case when converting csv to hdf5. self._count_spikes(recount=True) time_ds.resize((self._spike_count, )) gid_ds.resize((self._spike_count, )) def file_write_fnc_identity(time, gid, indx): if indx >= self._spike_count: resize_data() time_ds[indx] = time gid_ds[indx] = gid def file_write_fnc_transform(time, gid, indx): if indx >= self._spike_count: resize_data() time_ds[indx] = time gid_ds[indx] = gid_map[gid] file_write_fnc = file_write_fnc_identity if gid_map is None else file_write_fnc_transform self._to_file(hdf5_file, sort_order, file_write_fnc)
def test_load_sonata(): warnings.simplefilter("ignore", UserWarning) # Sonata adaptor's factory method tmp_sonata = tempfile.NamedTemporaryFile(suffix='.h5') with h5py.File(tmp_sonata.name, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) h5.create_dataset('/spikes/V1/node_ids', data=[0, 0, 0, 0, 2, 1, 2], dtype=np.uint) h5.create_dataset('/spikes/V1/timestamps', data=[0.25, 0.5, 0.75, 1.0, 3.0, 0.001, 2.0], dtype=np.double) h5.create_dataset('/spikes/V2/node_ids', data=[10, 10, 10], dtype=np.uint) h5.create_dataset('/spikes/V2/timestamps', data=[4.0, 4.0, 4.0], dtype=np.double) h5.create_group('/spikes/V3') tmp_sonata_old = tempfile.NamedTemporaryFile(suffix='.h5') with h5py.File(tmp_sonata_old.name, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) h5.create_dataset('/spikes/gids', data=[0, 0, 0, 0, 2, 1, 2], dtype=np.uint) h5.create_dataset('/spikes/timestamps', data=[0.25, 0.5, 0.75, 1.0, 3.0, 0.001, 2.0], dtype=np.double) tmp_sonata_empty = tempfile.NamedTemporaryFile(suffix='.h5') with h5py.File(tmp_sonata_empty.name, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) h5.create_group('/spikes/') sr = load_sonata_file(tmp_sonata.name) assert(isinstance(sr, SonataSTReader)) sr = load_sonata_file(tmp_sonata_old.name) assert(isinstance(sr, SonataOldReader)) sr = load_sonata_file(tmp_sonata_empty.name) assert(isinstance(sr, EmptySonataReader))
def write_sonata(path, spiketrain_reader, mode='a', sort_order=SortOrder.none, units='ms', population_renames=None, **kwargs): path_dir = os.path.dirname(path) if path_dir and not os.path.exists(path_dir): os.makedirs(path_dir) conv_factor = find_conversion(spiketrain_reader.units, units) with h5py.File(path, mode=mode) as h5: add_hdf5_magic(h5) add_hdf5_version(h5) # Even if there is no spikes (thus no populations to report), still create the /spikes group. spikes_root = h5.create_group('/spikes') population_renames = population_renames or {} for pop_name in spiketrain_reader.populations: n_spikes = spiketrain_reader.n_spikes(pop_name) if n_spikes <= 0: continue spikes_grp = spikes_root.create_group('{}'.format( population_renames.get(pop_name, pop_name))) if sort_order != SortOrder.unknown: spikes_grp.attrs['sorting'] = sort_order.value timestamps_ds = spikes_grp.create_dataset('timestamps', shape=(n_spikes, ), dtype=np.float64) timestamps_ds.attrs['units'] = units node_ids_ds = spikes_grp.create_dataset('node_ids', shape=(n_spikes, ), dtype=np.uint64) for i, spk in enumerate( spiketrain_reader.spikes(populations=pop_name, sort_order=sort_order)): timestamps_ds[i] = spk[0] * conv_factor node_ids_ds[i] = spk[2]
def test_sort(sort_func, sort_params): tmp_edges_h5 = tempfile.NamedTemporaryFile(suffix='.h5') source_node_ids = np.tile([0, 1], 5) target_node_ids = np.arange(20, 0, -2, dtype=int) edge_type_ids = np.repeat([103, 100, 104, 101, 102], 2) edge_group_ids = np.repeat([1, 0], 5) edge_group_indices = np.tile(range(5), 2) n_edges = 10 with h5py.File(tmp_edges_h5.name, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) h5.create_dataset('/edges/a_to_b/source_node_id', data=source_node_ids) h5['/edges/a_to_b/source_node_id'].attrs['node_population'] = 'a' h5.create_dataset('/edges/a_to_b/target_node_id', data=target_node_ids) h5['/edges/a_to_b/target_node_id'].attrs['node_population'] = 'b' h5.create_dataset('/edges/a_to_b/edge_group_id', data=edge_group_ids) h5.create_dataset('/edges/a_to_b/edge_group_index', data=edge_group_indices) h5.create_dataset('/edges/a_to_b/edge_type_id', data=edge_type_ids) for grp_id in np.unique(h5['/edges/a_to_b/source_node_id'][()]): model_grp = h5.create_group('/edges/a_to_b/{}'.format(grp_id)) grp_mask = edge_group_ids == grp_id model_grp.create_dataset('src_id', data=source_node_ids[grp_mask]) model_grp.create_dataset('trg_id', data=target_node_ids[grp_mask]) model_grp.create_dataset('et_id', data=edge_type_ids[grp_mask]) # Sort by source_node_id sorted_tmp_edges_h5 = tempfile.NamedTemporaryFile(suffix='.h5') sort_func( input_edges_path=tmp_edges_h5.name, output_edges_path=sorted_tmp_edges_h5.name, edges_population='/edges/a_to_b', sort_by='source_node_id', **sort_params ) with h5py.File(sorted_tmp_edges_h5.name, 'r') as h5: assert(np.all(np.diff(h5['/edges/a_to_b/source_node_id'][()]) >= 0)) _check_edges(h5, n_edges=n_edges) # Sort by target_node_id sorted_tmp_edges_h5 = tempfile.NamedTemporaryFile(suffix='.h5') sort_func( input_edges_path=tmp_edges_h5.name, output_edges_path=sorted_tmp_edges_h5.name, edges_population='/edges/a_to_b', sort_by='target_node_id', **sort_params ) with h5py.File(sorted_tmp_edges_h5.name, 'r') as h5: assert(np.all(np.diff(h5['/edges/a_to_b/target_node_id'][()]) >= 0)) _check_edges(h5, n_edges=n_edges) # Sort by edge_type_id sorted_tmp_edges_h5 = tempfile.NamedTemporaryFile(suffix='.h5') sort_func( input_edges_path=tmp_edges_h5.name, output_edges_path=sorted_tmp_edges_h5.name, edges_population='/edges/a_to_b', sort_by='edge_type_id', **sort_params ) with h5py.File(sorted_tmp_edges_h5.name, 'r') as h5: assert(np.all(np.diff(h5['/edges/a_to_b/edge_type_id'][()]) >= 0)) _check_edges(h5, n_edges=n_edges) # Sort by edge_group_id sorted_tmp_edges_h5 = tempfile.NamedTemporaryFile(suffix='.h5') sort_func( input_edges_path=tmp_edges_h5.name, output_edges_path=sorted_tmp_edges_h5.name, edges_population='/edges/a_to_b', sort_by='edge_group_id', **sort_params ) with h5py.File(sorted_tmp_edges_h5.name, 'r') as h5: assert(np.all(np.diff(h5['/edges/a_to_b/edge_group_id'][()]) >= 0)) _check_edges(h5, n_edges=n_edges)
def test_create_index(indexer_func, indexer_args): tmp_edges_h5 = tempfile.NamedTemporaryFile(suffix='.h5') n_edges = 20 source_node_ids = np.tile([0, 1, 3, 4], 5) target_node_ids = np.repeat([73, 72, 52, 4], 5) edge_type_ids = np.random.choice([100, 102, 103], size=n_edges, replace=True) edge_group_ids = np.full(n_edges, fill_value=0) edge_group_indices = np.arange(0, n_edges, dtype=np.int) with h5py.File(tmp_edges_h5.name, 'w') as h5: add_hdf5_magic(h5) add_hdf5_version(h5) h5.create_dataset('/edges/a_to_b/source_node_id', data=source_node_ids) h5['/edges/a_to_b/source_node_id'].attrs['node_population'] = 'a' h5.create_dataset('/edges/a_to_b/target_node_id', data=target_node_ids) h5['/edges/a_to_b/target_node_id'].attrs['node_population'] = 'b' h5.create_dataset('/edges/a_to_b/edge_group_id', data=edge_group_ids) h5.create_dataset('/edges/a_to_b/edge_group_index', data=edge_group_indices) h5.create_dataset('/edges/a_to_b/edge_type_id', data=edge_type_ids) h5.create_group('/edges/a_to_b/0') # Add target_node_id index indexer_func( edges_file=tmp_edges_h5.name, edges_population='/edges/a_to_b', index_type='target_node_id', **indexer_args ) with h5py.File(tmp_edges_h5.name, 'r') as h5: _check_index( h5=h5, index_col='/edges/a_to_b/target_node_id', id_to_range_col='/edges/a_to_b/indices/target_to_source/node_id_to_range', range_to_edge_col='/edges/a_to_b/indices/target_to_source/range_to_edge_id', n_edges=n_edges ) # Add source_node_id index indexer_func( edges_file=tmp_edges_h5.name, edges_population='/edges/a_to_b', index_type='source_node_id', **indexer_args ) with h5py.File(tmp_edges_h5.name, 'r') as h5: _check_index( h5=h5, index_col='/edges/a_to_b/source_node_id', id_to_range_col='/edges/a_to_b/indices/source_to_target/node_id_to_range', range_to_edge_col='/edges/a_to_b/indices/source_to_target/range_to_edge_id', n_edges=n_edges ) # Add edge_type_id index indexer_func( edges_file=tmp_edges_h5.name, edges_population='/edges/a_to_b', index_type='edge_type_id', **indexer_args ) with h5py.File(tmp_edges_h5.name, 'r') as h5: _check_index( h5=h5, index_col='/edges/a_to_b/edge_type_id', id_to_range_col='/edges/a_to_b/indices/edge_type_to_index/node_id_to_range', range_to_edge_col='/edges/a_to_b/indices/edge_type_to_index/range_to_edge_id', n_edges=n_edges )
def __init__(self, network_dir, grp_keys): self._network_dir = network_dir self._grp_keys = list(grp_keys) self._edge_counts = {(s, t): 0 for s, t in self._grp_keys} self._biophys_edge_count = {(s, t): 0 for s, t in self._grp_keys} self._point_edge_count = {(s, t): 0 for s, t in self._grp_keys} self._tmp_files = {(s, t): [] for s, t in self._grp_keys} for (src_pop, trg_pop), r in product(self._grp_keys, range(N_HOSTS)): fname = '.core{}.{}_{}_edges.h5'.format(r, src_pop, trg_pop) fpath = os.path.join(self._network_dir, fname) if not os.path.exists(fpath): io.log_warning('Expected file {} is missing'.format(fpath)) h5file = h5py.File(fpath, 'r') edges_grp = h5file['/edges/{}_{}'.format(src_pop, trg_pop)] self._tmp_files[(src_pop, trg_pop)].append(edges_grp) self._edge_counts[(src_pop, trg_pop)] += len(edges_grp['source_node_id']) self._biophys_edge_count[(src_pop, trg_pop)] += len( edges_grp['0/syn_weight']) self._point_edge_count[(src_pop, trg_pop)] += len(edges_grp['1/syn_weight']) for (src_pop, trg_pop), in_grps in self._tmp_files.items(): out_h5 = h5py.File( os.path.join(self._network_dir, '{}_{}_edges.h5'.format(src_pop, trg_pop)), 'w') add_hdf5_magic(out_h5) add_hdf5_version(out_h5) pop_root = out_h5.create_group('/edges/{}_{}'.format( src_pop, trg_pop)) n_edges_total = self._edge_counts[(src_pop, trg_pop)] n_edges_bio = self._biophys_edge_count[(src_pop, trg_pop)] n_edges_point = self._point_edge_count[(src_pop, trg_pop)] pop_root.create_dataset('source_node_id', (n_edges_total, ), dtype=np.uint64) pop_root['source_node_id'].attrs['node_population'] = src_pop pop_root.create_dataset('target_node_id', (n_edges_total, ), dtype=np.uint64) pop_root['target_node_id'].attrs['node_population'] = trg_pop pop_root.create_dataset('edge_group_id', (n_edges_total, ), dtype=np.uint16) pop_root.create_dataset('edge_group_index', (n_edges_total, ), dtype=np.uint16) pop_root.create_dataset('edge_type_id', (n_edges_total, ), dtype=np.uint32) pop_root.create_dataset('0/syn_weight', (n_edges_bio, ), dtype=np.float) pop_root.create_dataset('0/sec_id', (n_edges_bio, ), dtype=np.uint64) pop_root.create_dataset('0/sec_x', (n_edges_bio, ), dtype=np.float) pop_root.create_dataset('1/syn_weight', (n_edges_point, ), dtype=np.float) total_offset = 0 bio_offset = 0 point_offset = 0 for grp in in_grps: n_ds = len(grp['source_node_id']) pop_root['source_node_id'][total_offset:( total_offset + n_ds)] = grp['source_node_id'][()] pop_root['target_node_id'][total_offset:( total_offset + n_ds)] = grp['target_node_id'][()] pop_root['edge_group_id'][total_offset:( total_offset + n_ds)] = grp['edge_group_id'][()] pop_root['edge_group_index'][total_offset:( total_offset + n_ds)] = grp['edge_group_index'][()] pop_root['edge_type_id'][total_offset:( total_offset + n_ds)] = grp['edge_type_id'][()] total_offset += n_ds n_ds = len(grp['0/syn_weight']) # print(grp['0/syn_weight'][()]) pop_root['0/syn_weight'][bio_offset:( bio_offset + n_ds)] = grp['0/syn_weight'][()] pop_root['0/sec_id'][bio_offset:(bio_offset + n_ds)] = grp['0/sec_id'][()] pop_root['0/sec_x'][bio_offset:(bio_offset + n_ds)] = grp['0/sec_x'][()] bio_offset += n_ds n_ds = len(grp['1/syn_weight']) pop_root['1/syn_weight'][point_offset:( point_offset + n_ds)] = grp['1/syn_weight'][()] point_offset += n_ds fname = grp.file.filename grp.file.close() if os.path.exists(fname): os.remove(fname) self._create_index(pop_root, index_type='target') self._create_index(pop_root, index_type='source') out_h5.close()
def _create_h5_file(self): self._h5_handle = h5py.File(self._file_name, 'w') add_hdf5_version(self._h5_handle) add_hdf5_magic(self._h5_handle)
import h5py import glob from bmtk.utils.sonata.utils import add_hdf5_magic, add_hdf5_version for h5file in glob.glob('*.h5'): with h5py.File(h5file, 'r+') as h5: add_hdf5_version(h5) add_hdf5_magic(h5)