def test_write_dataset_list_fillvalue(self): a = H5DataIO(np.arange(20).reshape(5, 4), fillvalue=-1) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.fillvalue, -1)
def test_write_dataset_list_chunked(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), chunks=(1, 1, 3)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.chunks, (1, 1, 3))
def test_warning_on_non_gzip_compression(self): # Make sure no warning is issued when using gzip with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='gzip') self.assertEqual(len(w), 0) self.assertEqual(dset.io_settings['compression'], 'gzip') # Make sure no warning is issued when using szip with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='szip') self.assertEqual(len(w), 1) self.assertEqual(dset.io_settings['compression'], 'szip') # Make sure no warning is issued when using lzf with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='lzf') self.assertEqual(len(w), 1) self.assertEqual(dset.io_settings['compression'], 'lzf')
def test_dataio_list_data(self): num_samples = 100 data = list(range(num_samples)) ts1 = TimeSeries('test_ts1', H5DataIO(data), 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, num_samples) assert data == list(ts1.data)
def test_write_dataset_list_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compress=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip')
def test_link_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_softlink', H5DataIO(data=self.f['test_dataset'], link_data=True), attributes={})) self.assertTrue(isinstance(self.f.get('test_softlink', getlink=True), SoftLink))
def test_dataio_list_data(self): data = H5DataIO(list(range(100))) ts1 = TimeSeries('test_ts1', 'unit test test_DataIO', data, 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, 100)
def test_dataio_dci_data(self): data = H5DataIO(DataChunkIterator(data=(i for i in range(100)))) ts1 = TimeSeries('test_ts1', 'unit test test_DataIO', data, 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, -1)
def test_write_dataset_custom_fillvalue(self): a = H5DataIO(np.arange(20).reshape(5, 4), fillvalue=-1) ts = TimeSeries('ts_name', a, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) infile = File(self.path, 'r') dset = infile['/acquisition/ts_name/data'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.fillvalue, -1)
def test_dataio_dci_data(self): def generator_factory(): return (i for i in range(100)) data = H5DataIO(DataChunkIterator(data=generator_factory())) ts1 = TimeSeries('test_ts1', data, 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, -1) for xi, yi in zip(data, generator_factory()): assert np.allclose(xi, yi)
def test_copy_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_copy', H5DataIO(data=self.f['test_dataset'], link_data=False), # Force dataset copy attributes={}), link_data=True) # Make sure the default behavior is set to link the data self.assertTrue(isinstance(self.f.get('test_copy', getlink=True), HardLink)) self.assertListEqual(self.f['test_dataset'][:].tolist(), self.f['test_copy'][:].tolist())
def test_gzip_timestamps(self): ts = TimeSeries('ts_name', [1, 2, 3], 'A', timestamps=H5DataIO(np.array([1., 2., 3.]), compression='gzip')) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) # confirm that the dataset was indeed compressed infile = File(self.path, 'r') self.assertEquals( infile['/acquisition/ts_name/timestamps'].compression, 'gzip')
def test_warning_on_setting_io_options_on_h5dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) with warnings.catch_warnings(record=True) as w: H5DataIO(self.f['test_dataset'], compression='gzip', compression_opts=4, fletcher32=True, shuffle=True, maxshape=(10, 20), chunks=(10,), fillvalue=100) self.assertEqual(len(w), 7)
def test_write_dataset_list_disable_default_compress(self): with warnings.catch_warnings(record=True) as w: a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=False, compression_opts=5) self.assertEqual(len(w), 1) # We expect a warning that compression options are being ignored self.assertFalse('compression_ops' in a.io_settings) self.assertFalse('compression' in a.io_settings) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, None)
def test_write_dataset_list_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def write_nwb(cell_data, fpath='../data/soltesz_data.nwb', compress=True): """ Parameters ---------- cell_data: dict output of get_neuroh5_cell_data fpath: str compress: bool, optional if True, will compress all data. default=True Returns ------- """ if compress: cell_data = { key: H5DataIO(val, compress=True) for key, val in cell_data.items() } fname = os.path.split(fpath)[0] source = fname[:-3] f = NWBFile(file_name=fname, source=source, session_description=fname[:-3], identifier=fname[:-3], session_start_time=datetime.now(), lab='Soltesz', institution='Stanford') population_module = f.create_processing_module(name='spikes', source='source', description='description') population_module.add_container( CatCellInfo(name='Cell Types', source=source, values=cell_data['unique_cell_types'], indices=cell_data['cell_type_indices'], cell_index=cell_data['cell_index'])) population_module.add_container( PopulationSpikeTimes(name='Population Spike Times', source=source, cell_index=cell_data['cell_index'], value=cell_data['value'], pointer=cell_data['value_pointer'])) with NWBHDF5IO(fpath, mode='w') as io: io.write()
def test_write_dataset_data_chunk_iterator_with_compression(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) wrapped_dci = H5DataIO(data=dci, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True, chunks=(2,)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) self.assertEqual(dset.chunks, (2,))
def test_write_dataset_custom_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) ts = TimeSeries('ts_name', a, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) infile = File(self.path, 'r') dset = infile['/acquisition/ts_name/data'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_write_dataset_iterable_multidimensional_array_compression(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) wrapped_daiter = H5DataIO(data=daiter, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_daiter, attributes={})) dset = self.f['test_dataset'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_pass_through_of_recommended_chunks(self): class DC(DataChunkIterator): def recommended_chunk_shape(self): return (5, 1, 1) dci = DC(data=np.arange(30).reshape(5, 2, 3)) wrapped_dci = H5DataIO(data=dci, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) dset = self.f['test_dataset'] self.assertEqual(dset.chunks, (5, 1, 1)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_write_dataset_datachunkiterator_with_compression(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) wrapped_daiter = H5DataIO(data=daiter, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) ts = TimeSeries('ts_name', wrapped_daiter, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) infile = File(self.path, 'r') dset = infile['/acquisition/ts_name/data'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def main(): import os.path # prerequisites: start import numpy as np rate = 10.0 np.random.seed(1234) data_len = 1000 ephys_data = np.random.rand(data_len) ephys_timestamps = np.arange(data_len) / rate spatial_timestamps = ephys_timestamps[::10] spatial_data = np.cumsum(np.random.normal(size=(2, len(spatial_timestamps))), axis=-1).T # prerequisites: end # create-nwbfile: start from datetime import datetime from dateutil.tz import tzlocal from pynwb import NWBFile f = NWBFile( 'the PyNWB tutorial', 'my first synthetic recording', 'EXAMPLE_ID', datetime.now(tzlocal()), experimenter='Dr. Bilbo Baggins', lab='Bag End Laboratory', institution='University of Middle Earth at the Shire', experiment_description= 'I went on an adventure with thirteen dwarves to reclaim vast treasures.', session_id='LONELYMTN') # create-nwbfile: end # save-nwbfile: start from pynwb import NWBHDF5IO filename = "example.h5" io = NWBHDF5IO(filename, mode='w') io.write(f) io.close() # save-nwbfile: end os.remove(filename) # create-device: start device = f.create_device(name='trodes_rig123', source="a source") # create-device: end # create-electrode-groups: start electrode_name = 'tetrode1' source = "an hypothetical source" description = "an example tetrode" location = "somewhere in the hippocampus" electrode_group = f.create_electrode_group(electrode_name, source=source, description=description, location=location, device=device) # create-electrode-groups: end # create-electrode-table-region: start for idx in [1, 2, 3, 4]: f.add_electrode(idx, x=1.0, y=2.0, z=3.0, imp=float(-idx), location='CA1', filtering='none', description='channel %s' % idx, group=electrode_group) electrode_table_region = f.create_electrode_table_region( [0, 2], 'the first and third electrodes') # create-electrode-table-region: end # create-timeseries: start from pynwb.ecephys import ElectricalSeries from pynwb.behavior import SpatialSeries ephys_ts = ElectricalSeries( 'test_ephys_data', 'an hypothetical source', ephys_data, electrode_table_region, timestamps=ephys_timestamps, # Alternatively, could specify starting_time and rate as follows # starting_time=ephys_timestamps[0], # rate=rate, resolution=0.001, comments= "This data was randomly generated with numpy, using 1234 as the seed", description="Random numbers generated with numpy.random.rand") f.add_acquisition(ephys_ts) spatial_ts = SpatialSeries( 'test_spatial_timeseries', 'a stumbling rat', spatial_data, 'origin on x,y-plane', timestamps=spatial_timestamps, resolution=0.1, comments="This data was generated with numpy, using 1234 as the seed", description="This 2D Brownian process generated with " "np.cumsum(np.random.normal(size=(2, len(spatial_timestamps))), axis=-1).T" ) f.add_acquisition(spatial_ts) # create-timeseries: end # create-data-interface: start from pynwb.ecephys import LFP from pynwb.behavior import Position lfp = f.add_acquisition(LFP('a hypothetical source')) ephys_ts = lfp.create_electrical_series( 'test_ephys_data', 'an hypothetical source', ephys_data, electrode_table_region, timestamps=ephys_timestamps, resolution=0.001, comments= "This data was randomly generated with numpy, using 1234 as the seed", # noqa: E501 description="Random numbers generated with numpy.random.rand") pos = f.add_acquisition(Position('a hypothetical source')) spatial_ts = pos.create_spatial_series( 'test_spatial_timeseries', 'a stumbling rat', spatial_data, 'origin on x,y-plane', timestamps=spatial_timestamps, resolution=0.1, comments="This data was generated with numpy, using 1234 as the seed", description="This 2D Brownian process generated with " "np.cumsum(np.random.normal(size=(2, len(spatial_timestamps))), axis=-1).T" ) # noqa: E501 # create-data-interface: end # create-epochs: start epoch_tags = ('example_epoch', ) f.add_epoch(name='epoch1', start_time=0.0, stop_time=1.0, tags=epoch_tags, description="the first test epoch", timeseries=[ephys_ts, spatial_ts]) f.add_epoch(name='epoch2', start_time=0.0, stop_time=1.0, tags=epoch_tags, description="the second test epoch", timeseries=[ephys_ts, spatial_ts]) # create-epochs: end # create-compressed-timeseries: start from pynwb.ecephys import ElectricalSeries from pynwb.behavior import SpatialSeries from pynwb.form.backends.hdf5 import H5DataIO ephys_ts = ElectricalSeries( 'test_compressed_ephys_data', 'an hypothetical source', H5DataIO(ephys_data, compress=True), electrode_table_region, timestamps=H5DataIO(ephys_timestamps, compress=True), resolution=0.001, comments= "This data was randomly generated with numpy, using 1234 as the seed", description="Random numbers generated with numpy.random.rand") f.add_acquisition(ephys_ts) spatial_ts = SpatialSeries( 'test_compressed_spatial_timeseries', 'a stumbling rat', H5DataIO(spatial_data, compress=True), 'origin on x,y-plane', timestamps=H5DataIO(spatial_timestamps, compress=True), resolution=0.1, comments="This data was generated with numpy, using 1234 as the seed", description="This 2D Brownian process generated with " "np.cumsum(np.random.normal(size=(2, len(spatial_timestamps))), axis=-1).T" ) f.add_acquisition(spatial_ts)
def test_warning_on_linking_of_regular_array(self): with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), link_data=True) self.assertEqual(len(w), 1) self.assertEqual(dset.link_data, False)