def test_multidimensional_list(self): a = np.arange(30).reshape(5, 2, 3).tolist() dci = DataChunkIterator(a) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (1, 2, 3)) count += 1 self.assertEqual(count, 5) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape())
def test_none_iter(self): dci = DataChunkIterator(None) self.assertIsNone(dci.maxshape) self.assertIsNone(dci.dtype) count = 0 for chunk in dci: self.assertEqual(chunk.data, None) self.assertEqual(chunk.selection, None) count += 1 self.assertEqual(count, 0) self.assertIsNone(dci.recommended_data_shape()) self.assertIsNone(dci.recommended_chunk_shape())
def test_standard_iterator_unbuffered(self): dci = DataChunkIterator(data=range(10), buffer_size=1) self.assertEqual(dci.dtype, np.dtype(int)) self.assertTupleEqual(dci.maxshape, (10,)) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration count = 0 for chunk in dci: self.assertEqual(chunk.data.shape[0], 1) count += 1 self.assertEqual(count, 10) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration self.assertIsNone(dci.recommended_chunk_shape())
def test_DataChunkIterators_match(self): # Compare data chunk iterators d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = assertEqualShape(d1, d2) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1))
def test_DataChunkIterator_ignore_undetermined_axis(self): # Compare data chunk iterators with undetermined axis (ignore axis) d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5), maxshape=(None, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = ShapeValidator.assertEqualShape(d1, d2, ignore_undetermined=True) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ((0, 0),)) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (None, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1))
def test_DataChunkIterator_error_on_undetermined_axis(self): # Compare data chunk iterators with undetermined axis (error on undetermined axis) d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5), maxshape=(None, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = ShapeValidator.assertEqualShape(d1, d2, ignore_undetermined=False) self.assertFalse(res.result) self.assertEquals(res.error, 'AXIS_LEN_ERROR') self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ((0, 0),)) self.assertTupleEqual(res.shape1, (None, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1))
def test_numpy_iter_unmatched_buffer_size(self): a = np.arange(10) dci = DataChunkIterator(data=a, buffer_size=3) self.assertTupleEqual(dci.maxshape, a.shape) self.assertEqual(dci.dtype, a.dtype) count = 0 for chunk in dci: if count < 3: self.assertEqual(chunk.data.shape[0], 3) else: self.assertEqual(chunk.data.shape[0], 1) count += 1 self.assertEqual(count, 4) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape())
def test_write_dataset_iterable_multidimensional_array(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', daiter, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), a.tolist())
def test__chunked_iter_fill_numpy_unmatched_buffer_size( self): # noqa: F811 a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a.tolist(), buffer_size=3) my_dset = HDF5IO.__chunked_iter_fill__(self.f, 'test_dataset', dci) self.assertTrue(np.all(my_dset[:] == a)) self.assertTupleEqual(my_dset.shape, a.shape)
def test_dataio_dci_data(self): data = H5DataIO(DataChunkIterator(data=(i for i in range(100)))) ts1 = TimeSeries('test_ts1', 'unit test test_DataIO', data, 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, -1)
def test_write_dataset_datachunkiterator(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) ts = TimeSeries('ts_name', daiter, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) infile = File(self.path, 'r') dset = infile['/acquisition/ts_name/data'] self.assertListEqual(dset[:].tolist(), a.tolist())
def test_dci_data_arr(self): def generator_factory(): return (np.array([i, i+1]) for i in range(100)) data = DataChunkIterator(data=generator_factory()) ts1 = TimeSeries('test_ts1', data, 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, -1) for xi, yi in zip(data, generator_factory()): assert np.allclose(xi, yi)
def test_dataio_dci_data(self): def generator_factory(): return (i for i in range(100)) data = H5DataIO(DataChunkIterator(data=generator_factory())) ts1 = TimeSeries('test_ts1', 'unit test test_DataIO', data, 'grams', starting_time=0.0, rate=0.1) self.assertEqual(ts1.num_samples, -1) for xi, yi in zip(data, generator_factory()): assert np.allclose(xi, yi)
def test_write_dataset_data_chunk_iterator_with_compression(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) wrapped_dci = H5DataIO(data=dci, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True, chunks=(2,)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) self.assertEqual(dset.chunks, (2,))
def test_write_dataset_iterable_multidimensional_array_compression(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) wrapped_daiter = H5DataIO(data=daiter, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_daiter, attributes={})) dset = self.f['test_dataset'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test_write_dataset_datachunkiterator_with_compression(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) wrapped_daiter = H5DataIO(data=daiter, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) ts = TimeSeries('ts_name', wrapped_daiter, 'A', timestamps=np.arange(5)) self.nwbfile.add_acquisition(ts) with NWBHDF5IO(self.path, 'w') as io: io.write(self.nwbfile) infile = File(self.path, 'r') dset = infile['/acquisition/ts_name/data'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True)
def test__chunked_iter_fill_list_matched_buffer_size(self): a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a.tolist(), buffer_size=1) my_dset = self.io.__chunked_iter_fill__(self.f, 'test_dataset', dci) self.assertTrue(np.all(my_dset[:] == a)) self.assertTupleEqual(my_dset.shape, a.shape)
def test_dtype(self): a = np.arange(30, dtype='int32').reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.assertEqual(daiter.dtype, a.dtype)
def test__chunked_iter_fill_iterator_unmatched_buffer_size(self): dci = DataChunkIterator(data=range(10), buffer_size=3) my_dset = HDF5IO.__chunked_iter_fill__(self.f, 'test_dataset', dci) self.assertListEqual(my_dset[:].tolist(), list(range(10)))
def test_write_dataset_data_chunk_iterator(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', dci, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10)))
while (x < 0.5 and num_chunks < max_chunks): val = np.asarray([sin(random() * 2 * pi) for i in range(chunk_length)]) x = random() num_chunks += 1 yield val return #################### # Step 2: Wrap the generator in a DataChunkIterator # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # from pynwb.form.data_utils import DataChunkIterator data = DataChunkIterator(data=iter_sin(10)) #################### # Step 3: Write the data as usual # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Here we use our wrapped generator to create the data for a synthetic time series. write_test_file(filename='basic_iterwrite_example.nwb', data=data) #################### # Discussion # ^^^^^^^^^^ # Note, we here actually do not know how long our timeseries will be. print(
def test_maxshape(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.assertEqual(daiter.maxshape, (None, 2, 3))
def iter_sin(chunk_length=10, max_chunks=100): """ Generator creating a random number of chunks (but at most max_chunks) of length chunk_length containing random samples of sin([0, 2pi]). """ x = 0 num_chunks = 0 while (x < 0.5 and num_chunks < max_chunks): val = np.asarray([sin(random() * 2 * pi) for i in range(chunk_length)]) x = random() num_chunks += 1 yield val return # Step 2: Wrap the generator in a DataChunkIterator from pynwb.form.data_utils import DataChunkIterator data = DataChunkIterator(data=iter_sin(10)) # Step 3: Write the data as usual # Here we use our wrapped generator to create the data for a synthetic time series. write_test_file(filename='NWB_Tutorial_Trial.nwb', data=data) print( "maxshape=%s, recommended_data_shape=%s, dtype=%s" % (str(data.maxshape), str(data.recommended_data_shape()), str(data.dtype)))