def test_write_dataset_list_chunked(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), chunks=(1, 1, 3)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.chunks, (1, 1, 3))
def test_dataio_list_data(self): length = 100 data = list(range(length)) ts1 = TimeSeries('test_ts1', H5DataIO(data), 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, length) assert data == list(ts1.data)
def test_link_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_softlink', H5DataIO(data=self.f['test_dataset'], link_data=True), attributes={})) self.assertTrue(isinstance(self.f.get('test_softlink', getlink=True), SoftLink))
def to_nwb(self, nwbfile: NWBFile) -> NWBFile: events = self.value.set_index('cell_roi_id') ophys_module = nwbfile.processing['ophys'] dff_interface = ophys_module.data_interfaces['dff'] traces = dff_interface.roi_response_series['traces'] seg_interface = ophys_module.data_interfaces['image_segmentation'] cell_specimen_table = ( seg_interface.plane_segmentations['cell_specimen_table']) cell_specimen_df = cell_specimen_table.to_dataframe() # We only want to store the subset of rois that have events data rois_with_events_indices = [ cell_specimen_df.index.get_loc(label) for label in events.index ] roi_table_region = cell_specimen_table.create_roi_table_region( description="Cells with detected events", region=rois_with_events_indices) events_data = np.vstack(events['events']) events = OphysEventDetection( # time x rois instead of rois x time # store using compression since sparse data=H5DataIO(events_data.T, compression=True), lambdas=events['lambda'].values, noise_stds=events['noise_std'].values, unit='N/A', rois=roi_table_region, timestamps=traces.timestamps) ophys_module.add_data_interface(events) return nwbfile
def test_write_dataset_list_enable_default_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=True) self.assertEqual(a.io_settings['compression'], 'gzip') self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip')
def test_build_dataio_datachunkiterator(self): # hdmf#512 """Test building of a dataset with no dtype and no data_type with value DataIO wrapping a DCI.""" container = Baz( 'my_baz', H5DataIO(DataChunkIterator(['a', 'b', 'c', 'd']), chunks=True), 'value1') builder = self.type_map.build(container) self.assertIsInstance(builder.get('data'), H5DataIO) self.assertIsInstance(builder.get('data').data, DataChunkIterator)
def test_write_dataset_custom_chunks(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), chunks=(1, 1, 3)) ts = TimeSeries('ts_name', a, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) infile = File(self.path, 'r') dset = infile['/acquisition/ts_name/data'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.chunks, (1, 1, 3))
def test_write_dataset_custom_fillvalue(self): a = H5DataIO(np.arange(20).reshape(5, 4), fillvalue=-1) ts = TimeSeries('ts_name', a, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile, cache_spec=False) with File(self.path, 'r') as f: dset = f['/acquisition/ts_name/data'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.fillvalue, -1)
def test_warning_on_non_gzip_compression(self): # Make sure no warning is issued when using gzip with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='gzip') self.assertEqual(len(w), 0) self.assertEqual(dset.io_settings['compression'], 'gzip') # Make sure no warning is issued when using szip with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='szip') self.assertEqual(len(w), 1) self.assertEqual(dset.io_settings['compression'], 'szip') # Make sure no warning is issued when using lzf with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='lzf') self.assertEqual(len(w), 1) self.assertEqual(dset.io_settings['compression'], 'lzf')
def test_gzip_timestamps(self): ts = TimeSeries(name='ts_name', data=[1, 2, 3], unit='A', timestamps=H5DataIO(np.array([1., 2., 3.]), compression='gzip')) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile, cache_spec=False) # confirm that the dataset was indeed compressed with File(self.path, 'r') as f: self.assertEqual(f['/acquisition/ts_name/timestamps'].compression, 'gzip')
def test_write_dataset_custom_chunks(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), chunks=(1, 1, 3)) ts = TimeSeries(name='ts_name', data=a, unit='A', timestamps=np.arange(5.)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile, cache_spec=False) with File(self.path, 'r') as f: dset = f['/acquisition/ts_name/data'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.chunks, (1, 1, 3))
def test_copy_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_copy', H5DataIO(data=self.f['test_dataset'], link_data=False), # Force dataset copy attributes={}), link_data=True) # Make sure the default behavior is set to link the data self.assertTrue(isinstance(self.f.get('test_copy', getlink=True), HardLink)) self.assertListEqual(self.f['test_dataset'][:].tolist(), self.f['test_copy'][:].tolist())
def test_warning_on_setting_io_options_on_h5dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) with warnings.catch_warnings(record=True) as w: H5DataIO(self.f['test_dataset'], compression='gzip', compression_opts=4, fletcher32=True, shuffle=True, maxshape=(10, 20), chunks=(10,), fillvalue=100) self.assertEqual(len(w), 7)
def test_gzip_timestamps(self): ts = TimeSeries('ts_name', [1, 2, 3], 'A', timestamps=H5DataIO(np.array([1., 2., 3.]), compression='gzip')) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) # confirm that the dataset was indeed compressed infile = File(self.path, 'r') self.assertEquals( infile['/acquisition/ts_name/timestamps'].compression, 'gzip')
def test_write_dataset_list_disable_default_compress(self): with warnings.catch_warnings(record=True) as w: a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=False, compression_opts=5) self.assertEqual(len(w), 1) # We expect a warning that compression options are being ignored self.assertFalse('compression_ops' in a.io_settings) self.assertFalse('compression' in a.io_settings) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, None)
def test_write_dataset_list_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_dataio_dci_data(self): def generator_factory(): return (i for i in range(100)) data = H5DataIO(DataChunkIterator(data=generator_factory())) ts1 = TimeSeries('test_ts1', data, 'grams', starting_time=0.0, rate=0.1) with self.assertWarnsWith(UserWarning, 'The data attribute on this TimeSeries (named: test_ts1) has a ' '__len__, but it cannot be read'): self.assertIs(ts1.num_samples, None) for xi, yi in zip(data, generator_factory()): assert np.allclose(xi, yi)
def test_dataio_dci_data(self): def generator_factory(): return (i for i in range(100)) data = H5DataIO(DataChunkIterator(data=generator_factory())) ts1 = TimeSeries('test_ts1', data, 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, -1) for xi, yi in zip(data, generator_factory()): assert np.allclose(xi, yi)
def test_write_dataset_data_chunk_iterator_with_compression(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) wrapped_dci = H5DataIO(data=dci, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True, chunks=(2,)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) self.assertEqual(dset.chunks, (2,))
def test_write_dataset_iterable_multidimensional_array_compression(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) wrapped_daiter = H5DataIO(data=daiter, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_daiter, attributes={})) dset = self.f['test_dataset'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_write_dataset_custom_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) ts = TimeSeries('ts_name', a, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile, cache_spec=False) with File(self.path, 'r') as f: dset = f['/acquisition/ts_name/data'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def add_ekg(nwbfile, ecog_path, ekg_elecs): if os.path.split(ecog_path)[1] == 'RawHTK': rate, data = readhtks(ecog_path, ekg_elecs) elif os.path.split(ecog_path)[1] == 'ecog.mat': with File(ecog_path, 'r') as f: data = f['ecogDS']['data'][:, ekg_elecs] rate = f['ecogDS']['sampFreq'][:].ravel()[0] elif os.path.split(ecog_path)[1] == 'raw.mat': rate, data = load_wavs(ecog_path, ekg_elecs) ekg_ts = TimeSeries('EKG', H5DataIO(data, compression='gzip'), unit='V', rate=rate, conversion=.001, description='electrotorticography') nwbfile.add_acquisition(ekg_ts)
def test_pass_through_of_recommended_chunks(self): class DC(DataChunkIterator): def recommended_chunk_shape(self): return (5, 1, 1) dci = DC(data=np.arange(30).reshape(5, 2, 3)) wrapped_dci = H5DataIO(data=dci, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) dset = self.f['test_dataset'] self.assertEqual(dset.chunks, (5, 1, 1)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_write_dataset_datachunkiterator_with_compression(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) wrapped_daiter = H5DataIO(data=daiter, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) ts = TimeSeries(name='ts_name', data=wrapped_daiter, unit='A', timestamps=np.arange(5.)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile, cache_spec=False) with File(self.path, 'r') as f: dset = f['/acquisition/ts_name/data'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_build_dataio(self): bar_spec = GroupSpec( 'A test group specification with a data type', data_type_def='Bar', datasets=[ DatasetSpec('an example dataset', 'text', name='data', shape=(None, ), attributes=[ AttributeSpec('attr2', 'an example integer attribute', 'int') ]) ], attributes=[ AttributeSpec('attr1', 'an example string attribute', 'text') ]) type_map = self.customSetUp(bar_spec) type_map.register_map(Bar, BarMapper) bar_inst = Bar('my_bar', H5DataIO(['a', 'b', 'c', 'd'], chunks=True), 'value1', 10) builder = type_map.build(bar_inst) self.assertIsInstance(builder.get('data').data, H5DataIO)
def test_h5dataio_array_conversion_datachunkiterator(self): # Test that H5DataIO.__array__ is working when wrapping a python list test_speed = DataChunkIterator(data=[10., 20.]) data = H5DataIO(test_speed) with self.assertRaises(NotImplementedError): np.isfinite(data) # Force call of H5DataIO.__array__
def test_h5dataio_array_conversion_list(self): # Test that H5DataIO.__array__ is working when wrapping a python list test_speed = [10., 20.] data = H5DataIO(test_speed) self.assertTrue(np.all(np.isfinite(data))) # Force call of H5DataIO.__array__
def test_h5dataio_array_conversion_numpy(self): # Test that H5DataIO.__array__ is working when wrapping an ndarray test_speed = np.array([10., 20.]) data = H5DataIO((test_speed)) self.assertTrue(np.all(np.isfinite(data))) # Force call of H5DataIO.__array__
def test_write_dataset_list_fillvalue(self): a = H5DataIO(np.arange(20).reshape(5, 4), fillvalue=-1) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.fillvalue, -1)
def test_warning_on_linking_of_regular_array(self): with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), link_data=True) self.assertEqual(len(w), 1) self.assertEqual(dset.link_data, False)