def __read_dataset(self, h5obj, name=None): h5obj_maxshape = h5obj.shape kwargs = { "attributes": self.__read_attrs(h5obj), "dtype": h5obj.dtype, "maxshape": h5obj_maxshape } if name is None: name = str(os.path.basename(h5obj.name)) kwargs['source'] = h5obj.file.filename ndims = len(h5obj.shape) if ndims == 0: # read scalar scalar = h5obj[()] if isinstance(scalar, bytes): scalar = scalar.decode('UTF-8') # TO DO Reference # deref_obj = None if isinstance(scalar, str) and scalar != '': try: deref_obj = h5obj.file[scalar] except: pass if deref_obj is not None: # TODO (AJTRITT): This should call __read_ref to support Group references target = deref_obj target = self.__set_rgroup(target) target_builder = self.__read_dataset(target) self.__set_built(target.file.filename, target.id, target_builder) # TO DO Region Reference # # TO DO # kwargs['data'] = ReferenceBuilder(target_builder) else: kwargs["data"] = scalar elif ndims == 1: d = None if h5obj.dtype.kind == 'O' and len(h5obj) > 0: elem1 = h5obj[0] if isinstance(elem1, (str, bytes)): d = h5obj # TO DO # elif h5obj.dtype == 'uint64' and len(h5obj) > 0: d = BuilderH5ReferenceDataset(HDMFArray(h5obj), self) # read list of references # TO DO Region Reference # elif h5obj.dtype.kind == 'V': # table cpd_dt = h5obj.dtype # TO DO check_dtype # ref_cols = [cpd_dt[i] == 'uint64' for i in range(len(cpd_dt))] d = BuilderH5TableDataset(HDMFArray(h5obj), self, ref_cols) else: d = h5obj kwargs["data"] = d else: kwargs["data"] = h5obj ret = DatasetBuilder(name, **kwargs) ret.written = True return ret
def setUp(self): super().setUp() self.foo_bucket = FooBucket('test_foo_bucket', [ Foo('my_foo1', list(range(10)), 'value1', 10), Foo('my_foo2', list(range(10, 20)), 'value2', 20)]) self.foo_builders = { 'my_foo1': GroupBuilder('my_foo1', datasets={'my_data': DatasetBuilder( 'my_data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': self.foo_bucket.foos['my_foo1'].object_id}), 'my_foo2': GroupBuilder('my_foo2', datasets={'my_data': DatasetBuilder( 'my_data', list(range(10, 20)), attributes={'attr2': 20})}, attributes={'attr1': 'value2', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': self.foo_bucket.foos['my_foo2'].object_id}) } self.setUpBucketBuilder() self.setUpBucketSpec() self.spec_catalog.register_spec(self.bucket_spec, 'test.yaml') self.type_map.register_container_type(CORE_NAMESPACE, 'FooBucket', FooBucket) self.type_map.register_map(FooBucket, self.setUpBucketMapper()) self.manager = BuildManager(self.type_map)
def setUpBuilder(self): return GroupBuilder('test_timeseries', attributes={ 'namespace': base.CORE_NAMESPACE, 'neurodata_type': 'TimeSeries', 'description': 'no description', 'comments': 'no comments', 'help': 'General time series object' }, datasets={ 'data': DatasetBuilder('data', list(range(100, 200, 10)), attributes={ 'unit': 'SIunit', 'conversion': 1.0, 'resolution': 0.1 }), 'timestamps': DatasetBuilder('timestamps', list(range(10)), attributes={ 'unit': 'Seconds', 'interval': 1 }) })
def setUpBuilder(self): ids_builder = DatasetBuilder('id', [0, 1], attributes={'neurodata_type': 'ElementIdentifiers', 'namespace': 'core'}) st_builder = DatasetBuilder('spike_times', [0, 1, 2, 3, 4, 5], attributes={'neurodata_type': 'VectorData', 'namespace': 'core', 'description': 'the spike times for each unit'}) sti_builder = DatasetBuilder('spike_times_index', [3, 6], attributes={'neurodata_type': 'VectorIndex', 'namespace': 'core', 'target': ReferenceBuilder(st_builder)}) obs_builder = DatasetBuilder('obs_intervals', [[0, 1], [2, 3], [2, 5], [6, 7]], attributes={'neurodata_type': 'VectorData', 'namespace': 'core', 'description': 'the observation intervals for each unit'}) obsi_builder = DatasetBuilder('obs_intervals_index', [2, 4], attributes={'neurodata_type': 'VectorIndex', 'namespace': 'core', 'target': ReferenceBuilder(obs_builder)}) return GroupBuilder('UnitsTest', attributes={'neurodata_type': 'Units', 'namespace': 'core', 'description': 'a simple table for testing Units', 'colnames': (b'spike_times', b'obs_intervals',)}, datasets={'id': ids_builder, 'spike_times': st_builder, 'spike_times_index': sti_builder, 'obs_intervals': obs_builder, 'obs_intervals_index': obsi_builder})
def test_invalid_isodatetime_array(self): builder = GroupBuilder('my_bar', attributes={ 'data_type': 'Bar', 'attr1': 'a string attribute' }, datasets=[ DatasetBuilder('data', 100, attributes={'attr2': 10}), DatasetBuilder( 'time', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), DatasetBuilder( 'time_array', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())) ]) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 1) self.assertIsInstance(result[0], ExpectedArrayError) # noqa: F405 self.assertEqual(result[0].name, 'Bar/time_array')
def test_link_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_softlink', H5DataIO(data=self.f['test_dataset'], link_data=True), attributes={})) self.assertTrue(isinstance(self.f.get('test_softlink', getlink=True), SoftLink))
def setUpBuilder(self): ps_builder = TestPlaneSegmentation.get_plane_segmentation_builder(self) return GroupBuilder( 'test_roi_response_series', attributes={ 'namespace': base.CORE_NAMESPACE, 'comments': 'no comments', 'description': 'no description', 'neurodata_type': 'RoiResponseSeries', 'help': ('ROI responses over an imaging plane. Each element on the second dimension of data[] ' 'should correspond to the signal from one ROI')}, datasets={ 'data': DatasetBuilder( 'data', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], attributes={ 'unit': 'lumens', 'conversion': 1.0, 'resolution': 0.0} ), 'timestamps': DatasetBuilder('timestamps', [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], attributes={'unit': 'Seconds', 'interval': 1}), 'rois': DatasetBuilder('rois', data=[0], attributes={'help': 'a subset (i.e. slice or region) of a DynamicTable', 'description': 'the first of two ROIs', 'table': ReferenceBuilder(ps_builder), 'namespace': 'core', 'neurodata_type': 'DynamicTableRegion'}), })
def test_copy_h5py_dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_copy', self.f['test_dataset'], attributes={}), link_data=False) self.assertTrue(isinstance(self.f.get('test_copy', getlink=True), HardLink)) self.assertListEqual(self.f['test_dataset'][:].tolist(), self.f['test_copy'][:].tolist())
def test_intersecting_datasets(self): gb1 = GroupBuilder( 'gb1', datasets={'dataset2': DatasetBuilder('dataset2', [1, 2, 3])}) gb2 = GroupBuilder( 'gb2', datasets={'dataset2': DatasetBuilder('dataset2', [4, 5, 6])}) gb1.deep_update(gb2) self.assertIn('dataset2', gb1) self.assertListEqual(gb1['dataset2'].data, gb2['dataset2'].data)
def test_mutually_exclusive_datasets(self): gb1 = GroupBuilder( 'gb1', datasets={'dataset1': DatasetBuilder('dataset1', [1, 2, 3])}) gb2 = GroupBuilder( 'gb2', datasets={'dataset2': DatasetBuilder('dataset2', [4, 5, 6])}) gb1.deep_update(gb2) self.assertIn('dataset2', gb1) # self.assertIs(gb1['dataset2'], gb2['dataset2']) self.assertListEqual(gb1['dataset2'].data, gb2['dataset2'].data)
def test_copy_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_copy', H5DataIO(data=self.f['test_dataset'], link_data=False), # Force dataset copy attributes={}), link_data=True) # Make sure the default behavior is set to link the data self.assertTrue(isinstance(self.f.get('test_copy', getlink=True), HardLink)) self.assertListEqual(self.f['test_dataset'][:].tolist(), self.f['test_copy'][:].tolist())
def test_repr(self): gb1 = GroupBuilder('gb1') db1 = DatasetBuilder( name='db1', data=[1, 2, 3], dtype=int, attributes={'attr2': 10}, maxshape=10, chunks=True, parent=gb1, source='source', ) expected = "gb1/db1 DatasetBuilder {'attributes': {'attr2': 10}, 'data': [1, 2, 3]}" self.assertEqual(db1.__repr__(), expected)
def test_write_dataset_iterable_multidimensional_array(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', daiter, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), a.tolist())
def test_write_dataset_list_chunked(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), chunks=(1, 1, 3)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.chunks, (1, 1, 3))
def test_write_dataset_string(self): a = 'test string' self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTupleEqual(dset.shape, ()) # self.assertEqual(dset[()].decode('utf-8'), a) self.assertEqual(dset[()], a)
def test_construct_scalar_compound_dataset(self): """Test construct on a compound h5py.Dataset with shape (1, ) for scalar spec does not resolve the data.""" with h5py.File('test.h5', 'w') as file: comp_type = np.dtype([('id', np.uint64), ('attr1', h5py.special_dtype(vlen=str))]) test_ds = file.create_dataset(name='test_ds', data=np.array((1, 'text'), dtype=comp_type), shape=(1, ), dtype=comp_type) expected = BazScalarCompound( name='MyBaz', data=(1, 'text'), ) builder = DatasetBuilder( name='MyBaz', data=test_ds, attributes={ 'data_type': 'BazScalarCompound', 'namespace': CORE_NAMESPACE, 'object_id': expected.object_id }, ) container = self.mapper.construct(builder, self.manager) self.assertEqual(type(container.data), h5py.Dataset) self.assertContainerEqual(container, expected) os.remove('test.h5')
def test_build_empty_data(self): """Test building of a Data object with empty data.""" baz_inc_spec = DatasetSpec(doc='doc', data_type_inc='Baz', quantity=ZERO_OR_MANY) baz_holder_spec = GroupSpec(doc='doc', data_type_def='BazHolder', datasets=[baz_inc_spec]) self.spec_catalog.register_spec(baz_holder_spec, 'test.yaml') self.type_map.register_container_type(CORE_NAMESPACE, 'BazHolder', BazHolder) self.holder_mapper = ObjectMapper(baz_holder_spec) baz = Baz('MyBaz', [], 'abcdefghijklmnopqrstuvwxyz') holder = BazHolder('holder', [baz]) builder = self.holder_mapper.build(holder, self.manager) expected = GroupBuilder( name='holder', datasets=[ DatasetBuilder(name='MyBaz', data=[], attributes={ 'baz_attr': 'abcdefghijklmnopqrstuvwxyz', 'data_type': 'Baz', 'namespace': 'test_core', 'object_id': baz.object_id }) ]) self.assertBuilderEqual(builder, expected)
def test_is_empty_false_group_dataset(self): """Test is_empty() when group has a subgroup with a dataset""" gb1 = GroupBuilder( 'my_subgroup', datasets={'my_dataset': DatasetBuilder('my_dataset')}) gb2 = GroupBuilder('gb', {'my_subgroup': gb1}) self.assertFalse(gb2.is_empty())
def test_build_data(self): """Test building a container which contains an untyped empty subgroup and an untyped non-empty dataset.""" bucket = BasicBucket(name='test_bucket', untyped_dataset=3, untyped_array_dataset=[3]) # an optional untyped empty group builder will NOT be created by default untyped_dataset_builder = DatasetBuilder(name='untyped_dataset', data=3) untyped_array_dataset_builder = DatasetBuilder(name='untyped_array_dataset', data=[3]) bucket_builder = GroupBuilder( name='test_bucket', datasets={'untyped_dataset': untyped_dataset_builder, 'untyped_array_dataset': untyped_array_dataset_builder}, attributes={'namespace': CORE_NAMESPACE, 'data_type': 'BasicBucket', 'object_id': bucket.object_id} ) builder = self.manager.build(bucket) self.assertDictEqual(builder, bucket_builder)
def setUpBuilder(self): table_builder = self.get_table_builder(self) data = list(zip(range(10), range(10, 20))) timestamps = list(map(lambda x: x / 10, range(10))) return GroupBuilder( 'test_eS', attributes={ 'namespace': base.CORE_NAMESPACE, 'comments': 'no comments', 'description': 'no description', 'neurodata_type': 'ElectricalSeries', 'help': 'Stores acquired voltage data from extracellular recordings' }, datasets={ 'data': DatasetBuilder('data', data, attributes={ 'unit': 'volt', 'conversion': 1.0, 'resolution': 0.0 }), 'timestamps': DatasetBuilder('timestamps', timestamps, attributes={ 'unit': 'Seconds', 'interval': 1 }), 'electrodes': DatasetBuilder( 'electrodes', data=[0, 2], attributes={ 'neurodata_type': 'DynamicTableRegion', 'namespace': 'core', 'table': ReferenceBuilder(table_builder), 'description': 'the first and third electrodes', 'help': 'a subset (i.e. slice or region) of a DynamicTable' }) }) # noqa: E501
def test_write_dataset_list_enable_default_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=True) self.assertEqual(a.io_settings['compression'], 'gzip') self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip')
def test_constructor(self): gb = GroupBuilder('gb1') db = DatasetBuilder('db1', [1, 2, 3]) lb = LinkBuilder(db, 'link_name', gb, 'link_source') self.assertIs(lb.builder, db) self.assertEqual(lb.name, 'link_name') self.assertIs(lb.parent, gb) self.assertEqual(lb.source, 'link_source')
def test_set_dataset(self): gb = GroupBuilder('gb') db = DatasetBuilder('db', list(range(10))) gb.set_dataset(db) self.assertIs(db.parent, gb) self.assertIn('db', gb.obj_type) self.assertIn('db', gb.datasets) self.assertIs(gb['db'], db)
def test_set_exists_wrong_type(self): gb1 = GroupBuilder('gb1') gb2 = GroupBuilder('gb2') db = DatasetBuilder('gb2') gb1.set_group(gb2) msg = "'gb2' already exists in gb1.groups, cannot set in datasets." with self.assertRaisesWith(ValueError, msg): gb1.set_dataset(db)
def setUpBuilder(self): optchan_builder = GroupBuilder( 'optchan1', attributes={ 'neurodata_type': 'OpticalChannel', 'namespace': 'core', 'help': 'Metadata about an optical channel used to record from an imaging plane'}, datasets={ 'description': DatasetBuilder('description', 'a fake OpticalChannel'), 'emission_lambda': DatasetBuilder('emission_lambda', 500.)}, ) device_builder = GroupBuilder('dev1', attributes={'neurodata_type': 'Device', 'namespace': 'core', 'help': 'A recording device e.g. amplifier'}) return GroupBuilder( 'imgpln1', attributes={ 'neurodata_type': 'ImagingPlane', 'namespace': 'core', 'help': 'Metadata about an imaging plane'}, datasets={ 'description': DatasetBuilder('description', 'a fake ImagingPlane'), 'excitation_lambda': DatasetBuilder('excitation_lambda', 600.), 'imaging_rate': DatasetBuilder('imaging_rate', 300.), 'indicator': DatasetBuilder('indicator', 'GFP'), 'location': DatasetBuilder('location', 'somewhere in the brain')}, groups={ 'optchan1': optchan_builder }, links={ 'device': LinkBuilder(device_builder, 'device') } )
def setUp(self): self.manager = get_manager() self.path = "test_pynwb_io_hdf5.nwb" self.start_time = datetime(1970, 1, 1, 12, tzinfo=tzutc()) self.create_date = datetime(2017, 4, 15, 12, tzinfo=tzlocal()) self.container = NWBFile(session_description='a test NWB File', identifier='TEST123', session_start_time=self.start_time, file_create_date=self.create_date) ts = TimeSeries(name='test_timeseries', data=list(range(100, 200, 10)), unit='SIunit', timestamps=np.arange(10.), resolution=0.1) self.container.add_acquisition(ts) ts_builder = GroupBuilder('test_timeseries', attributes={'neurodata_type': 'TimeSeries'}, datasets={'data': DatasetBuilder('data', list(range(100, 200, 10)), attributes={'unit': 'SIunit', 'conversion': 1.0, 'resolution': 0.1}), 'timestamps': DatasetBuilder('timestamps', np.arange(10.), attributes={'unit': 'seconds', 'interval': 1})}) self.builder = GroupBuilder( 'root', groups={'acquisition': GroupBuilder('acquisition', groups={'test_timeseries': ts_builder}), 'analysis': GroupBuilder('analysis'), 'general': GroupBuilder('general'), 'processing': GroupBuilder('processing'), 'stimulus': GroupBuilder( 'stimulus', groups={'presentation': GroupBuilder('presentation'), 'templates': GroupBuilder('templates')})}, datasets={'file_create_date': DatasetBuilder('file_create_date', [self.create_date.isoformat()]), 'identifier': DatasetBuilder('identifier', 'TEST123'), 'session_description': DatasetBuilder('session_description', 'a test NWB File'), 'nwb_version': DatasetBuilder('nwb_version', '1.0.6'), 'session_start_time': DatasetBuilder('session_start_time', self.start_time.isoformat())}, attributes={'neurodata_type': 'NWBFile'})
def setUpBuilder(self): device = GroupBuilder('device_name', attributes={ 'help': 'A recording device e.g. amplifier', 'namespace': 'core', 'neurodata_type': 'Device' }) datasets = [ DatasetBuilder('slice', data=u'tissue slice'), DatasetBuilder('resistance', data=u'something measured in ohms'), DatasetBuilder('seal', data=u'sealing method'), DatasetBuilder('description', data=u'a fake electrode object'), DatasetBuilder('location', data=u'Springfield Elementary School'), DatasetBuilder('filtering', data=u'a meaningless free-form text field'), DatasetBuilder('initial_access_resistance', data=u'I guess this changes'), ] elec = GroupBuilder('elec0', attributes={ 'help': 'Metadata about an intracellular electrode', 'namespace': 'core', 'neurodata_type': 'IntracellularElectrode', }, datasets={d.name: d for d in datasets}, links={'device': LinkBuilder(device, 'device')}) return elec
def test_valid_isodatetime(self): builder = GroupBuilder( 'my_bar', attributes={ 'data_type': 'Bar', 'attr1': 'a string attribute' }, datasets=[ DatasetBuilder('data', 100, attributes={'attr2': 10}), DatasetBuilder( 'time', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), DatasetBuilder( 'time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]) ]) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 0)
def test_build(self): ''' Test default mapping functionality when no attributes are nested ''' container = Baz('MyBaz', list(range(10)), 'abcdefghijklmnopqrstuvwxyz') builder = self.mapper.build(container, self.manager) expected = DatasetBuilder( 'MyBaz', list(range(10)), attributes={'baz_attr': 'abcdefghijklmnopqrstuvwxyz'}) self.assertBuilderEqual(builder, expected)
def test_is_empty_false_group_dataset(self): """Test is_empty() when group has a subgroup with a dataset""" gb = GroupBuilder( 'gb', { 'my_subgroup': GroupBuilder( 'my_subgroup', datasets={'my_dataset': DatasetBuilder('my_dataset')}) }) self.assertEqual(gb.is_empty(), False)