def test_round_trip_container(self): """Test read and write the container by itself""" category_names = ['test1', 'test2', 'test3'] num_rows = 10 categories = [ DynamicTable(name=val, description=val + " description", columns=[ VectorData(name=t, description=val + t + ' description', data=np.arange(num_rows)) for t in ['c1', 'c2', 'c3'] ]) for val in category_names ] curr = AlignedDynamicTable(name='test_aligned_table', description='Test aligned container', category_tables=categories) with HDF5IO(self.path, manager=get_manager(), mode='w') as io: io.write(curr) with HDF5IO(self.path, manager=get_manager(), mode='r') as io: incon = io.read() self.assertListEqual(incon.categories, curr.categories) for n in category_names: assert_frame_equal(incon[n], curr[n])
def test_roundtrip(self): # NOTE this does not use H5RoundTripMixin because this requires custom validation test_table = self.TestTable(name='test_table', description='my test table') test_table.add_column('dynamic_column', 'this is a dynamic column') test_table.add_row( my_col=3.0, indexed_col=[1.0, 3.0], dynamic_column=4, optional_col2=.5, ) self.filename = os.path.join(self.test_dir, 'test_TestTable.h5') with HDF5IO(self.filename, manager=self.manager, mode='w') as write_io: write_io.write(test_table, cache_spec=True) self.reader = HDF5IO(self.filename, manager=self.manager, mode='r') read_container = self.reader.read() self.assertIsNotNone(str(test_table)) # added as a test to make sure printing works self.assertIsNotNone(str(read_container)) # make sure we get a completely new object self.assertNotEqual(id(test_table), id(read_container)) # the name of the root container of a file is always 'root' (see h5tools.py ROOT_NAME) # thus, ignore the name of the container when comparing original container vs read container self.assertContainerEqual(read_container, test_table, ignore_name=True) builder = self.reader.read_builder() # TODO fix ValueError: No specification for 'Container' in namespace 'test_core' validator = ValidatorMap(self.manager.namespace_catalog.get_namespace(name=CORE_NAMESPACE)) errors = validator.validate(builder) if errors: for err in errors: raise Exception(err) self.reader.close()
def roundtripContainer(self, cache_spec=False): self.build_nwbfile() self.writer = HDF5IO(self.filename, manager=get_manager(), mode='w') self.writer.write(self.nwbfile, cache_spec=cache_spec) self.writer.close() self.reader = HDF5IO(self.filename, manager=get_manager(), mode='r') self.read_nwbfile = self.reader.read()
def test_write_cache_spec(self): ''' Round-trip test for writing spec and reading it back in ''' with File(self.path) as fil: with HDF5IO(self.path, manager=self.manager, file=fil, mode='a') as io: io.write(self.container) with File(self.path) as f: self.assertIn('specifications', f) ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) HDF5IO.load_namespaces(ns_catalog, self.path, namespaces=['core']) original_ns = self.manager.namespace_catalog.get_namespace('core') cached_ns = ns_catalog.get_namespace('core') self.maxDiff = None for key in ('author', 'contact', 'doc', 'full_name', 'name'): with self.subTest(namespace_field=key): self.assertEqual(original_ns[key], cached_ns[key]) for dt in original_ns.get_registered_types(): with self.subTest(neurodata_type=dt): original_spec = original_ns.get_spec(dt) cached_spec = cached_ns.get_spec(dt) with self.subTest(test='data_type spec read back in'): self.assertIsNotNone(cached_spec) with self.subTest(test='cached spec preserved original spec'): self.assertDictEqual(original_spec, cached_spec)
def roundtripContainer(self): data_file = NWBFile( session_description='a test file', identifier='data_file', session_start_time=self.start_time ) data_file.add_acquisition(self.container) with HDF5IO(self.data_filename, 'w', manager=get_manager()) as self.data_write_io: self.data_write_io.write(data_file) with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: data_file_obt = self.data_read_io.read() with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: link_file = NWBFile( session_description='a test file', identifier='link_file', session_start_time=self.start_time ) link_file.add_acquisition(TimeSeries( name='test_mod_ts', unit='V', data=data_file_obt.get_acquisition('data_ts'), timestamps=H5DataIO( data=data_file_obt.get_acquisition('data_ts').timestamps, link_data=True ) )) link_write_io.write(link_file) with HDF5IO(self.link_filename, 'r', manager=get_manager()) as self.link_file_reader: return self.getContainer(self.link_file_reader.read())
def test_write_clobber(self): with HDF5IO(self.path, manager=self.manager, mode='a') as io: io.write(self.container) with self.assertRaisesWith(UnsupportedOperation, "Unable to open file %s in 'w-' mode. File already exists." % self.path): with HDF5IO(self.path, manager=self.manager, mode='w-') as io: pass
def test_read(self): hdf5io = HDF5IO(self.path, manager=self.manager, mode='a') hdf5io.write(self.container) hdf5io.close() hdf5io = HDF5IO(self.path, manager=self.manager, mode='a') container = hdf5io.read() self.assertIsInstance(container, NWBFile) raw_ts = container.acquisition self.assertEqual(len(raw_ts), 1) self.assertEqual(len(container.analysis), 1) for v in raw_ts.values(): self.assertIsInstance(v, TimeSeries) hdf5io.close()
def test_write_clobber(self): io = HDF5IO(self.path, manager=self.manager, mode='a') io.write(self.container) io.close() f = File(self.path) # noqa: F841 if six.PY2: assert_file_exists = IOError elif six.PY3: assert_file_exists = OSError with self.assertRaises(assert_file_exists): io = HDF5IO(self.path, manager=self.manager, mode='w-') io.write(self.container) io.close()
def test_read(self): """ Test reading the NWBFile using HDF5IO """ hdf5io = HDF5IO(self.filename, manager=self.manager, mode='w') hdf5io.write(self.nwbfile) hdf5io.close() hdf5io = HDF5IO(self.filename, manager=self.manager, mode='r') container = hdf5io.read() self.assertIsInstance(container, NWBFile) self.assertEqual(len(container.acquisition), 1) self.assertEqual(len(container.analysis), 1) for v in container.acquisition.values(): self.assertIsInstance(v, TimeSeries) self.assertContainerEqual(container, self.nwbfile) hdf5io.close()
def testInFromMatNWB(self): filename = 'MatNWB.' + self.__class__.__name__ + '.testOutToPyNWB.nwb' with HDF5IO(filename, manager=get_manager(), mode='r') as io: matfile = io.read() matcontainer = self.getContainer(matfile) pycontainer = self.getContainer(self.file) self.assertContainerEqual(matcontainer, pycontainer)
def test_write_cache_spec(self): """Roundtrip test for writing spec and reading it back in.""" with HDF5IO(self.path, manager=self.manager, mode="a") as io: io.write(self.container) with File(self.path, 'r') as f: self.assertIn('specifications', f) self._check_spec()
def test_read_builder(self): self.maxDiff = None io = HDF5IO(self.path, manager=self.manager, mode='a') io.write_builder(self.builder) builder = io.read_builder() self.assertBuilderEqual(builder, self.builder) io.close()
def test_write_no_cache_spec(self): ''' Round-trip test for not writing spec ''' with HDF5IO(self.path, manager=self.manager, mode="a") as io: io.write(self.container, cache_spec=False) with File(self.path) as f: self.assertNotIn('specifications', f)
def test_write_attribute_reference_builder(self): writer = HDF5IO(self.path, manager=self.manager, mode='a') self.builder.set_attribute('ref_attribute', self.foo_builder) writer.write_builder(self.builder) writer.close() f = self.check_fields() self.assertIsInstance(f.attrs['ref_attribute'], Reference) self.assertEqual(f['test_bucket/foo_holder/foo1'], f[f.attrs['ref_attribute']])
def test_write_cache_spec_injected(self): """Roundtrip test for writing spec and reading it back in when HDF5IO is passed an open h5py.File.""" with File(self.path, 'w') as fil: with HDF5IO(self.path, manager=self.manager, file=fil, mode='a') as io: io.write(self.container) with File(self.path, 'r') as f: self.assertIn('specifications', f) self._check_spec()
def test_dataset_shape(self): self.maxDiff = None io = HDF5IO(self.path, manager=self.manager, mode='a') io.write_builder(self.builder) builder = io.read_builder() dset = builder['test_bucket']['foo_holder']['foo1']['my_data'].data self.assertEqual(get_data_shape(dset), (10, )) io.close()
def test_overwrite_written(self): self.maxDiff = None io = HDF5IO(self.path, manager=self.manager, mode='a') io.write_builder(self.builder) builder = io.read_builder() with self.assertRaisesWith(ValueError, "cannot change written to not written"): builder.written = False io.close()
def roundtripContainer(self, cache_spec=False): description = 'a file to test writing and reading a %s' % self.container_type identifier = 'TEST_%s' % self.container_type nwbfile = NWBFile(description, identifier, self.start_time, file_create_date=self.create_date) self.addContainer(nwbfile) self.writer = HDF5IO(self.filename, manager=get_manager(), mode='w') self.writer.write(nwbfile, cache_spec=cache_spec) self.writer.close() self.reader = HDF5IO(self.filename, manager=get_manager(), mode='r') self.read_nwbfile = self.reader.read() try: tmp = self.getContainer(self.read_nwbfile) return tmp except Exception as e: self.reader.close() self.reader = None raise e
def roundtripContainer(self): # create and write data file data_file = NWBFile( session_description='a test file', identifier='data_file', session_start_time=self.start_time ) data_file.add_acquisition(self.container) with HDF5IO(self.data_filename, 'w', manager=get_manager()) as data_write_io: data_write_io.write(data_file) # read data file with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: data_file_obt = self.data_read_io.read() # write "link file" with timeseries.data that is an external link to the timeseries in "data file" # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: link_file = NWBFile( session_description='a test file', identifier='link_file', session_start_time=self.start_time ) self.link_container = TimeSeries( name='test_mod_ts', unit='V', data=data_file_obt.get_acquisition('data_ts'), # test direct link timestamps=H5DataIO( data=data_file_obt.get_acquisition('data_ts').timestamps, link_data=True # test with setting link data ) ) link_file.add_acquisition(self.link_container) link_write_io.write(link_file) # note that self.link_container contains a link to a dataset that is now closed # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) self.read_nwbfile = self.link_read_io.read() return self.getContainer(self.read_nwbfile)
def _check_spec(self): ns_catalog = NamespaceCatalog() HDF5IO.load_namespaces(ns_catalog, self.path) self.maxDiff = None for namespace in self.manager.namespace_catalog.namespaces: with self.subTest(namespace=namespace): original_ns = self.manager.namespace_catalog.get_namespace(namespace) cached_ns = ns_catalog.get_namespace(namespace) ns_fields_to_check = list(original_ns.keys()) ns_fields_to_check.remove('schema') # schema fields will not match, so reset for ns_field in ns_fields_to_check: with self.subTest(namespace_field=ns_field): self.assertEqual(original_ns[ns_field], cached_ns[ns_field]) for dt in original_ns.get_registered_types(): with self.subTest(data_type=dt): original_spec = original_ns.get_spec(dt) cached_spec = cached_ns.get_spec(dt) with self.subTest('Data type spec is read back in'): self.assertIsNotNone(cached_spec) with self.subTest('Cached spec matches original spec'): self.assertDictEqual(original_spec, cached_spec)
def test_nwbio(self): with HDF5IO(self.path, manager=self.manager, mode='a') as io: io.write(self.container) with File(self.path) as f: self.assertIn('acquisition', f) self.assertIn('analysis', f) self.assertIn('general', f) self.assertIn('processing', f) self.assertIn('file_create_date', f) self.assertIn('identifier', f) self.assertIn('session_description', f) self.assertIn('session_start_time', f) acq = f.get('acquisition') self.assertIn('test_timeseries', acq)
def test_link_root(self): # create and write data file data_file = NWBFile( session_description='a test file', identifier='data_file', session_start_time=self.start_time ) data_file.add_acquisition(self.container) with HDF5IO(self.data_filename, 'w', manager=get_manager()) as data_write_io: data_write_io.write(data_file) # read data file manager = get_manager() with HDF5IO(self.data_filename, 'r', manager=manager) as data_read_io: data_file_obt = data_read_io.read() link_file = NWBFile( session_description='a test file', identifier='link_file', session_start_time=self.start_time ) link_container = data_file_obt.acquisition[self.container.name] link_file.add_acquisition(link_container) self.assertIs(link_container.parent, data_file_obt) with HDF5IO(self.link_filename, 'w', manager=manager) as link_write_io: link_write_io.write(link_file) # read the link file, check container sources with HDF5IO(self.link_filename, 'r+', manager=get_manager()) as link_file_reader: read_nwbfile = link_file_reader.read() self.assertNotEqual(read_nwbfile.acquisition[self.container.name].container_source, read_nwbfile.container_source) self.assertEqual(read_nwbfile.acquisition[self.container.name].container_source, self.data_filename) self.assertEqual(read_nwbfile.container_source, self.link_filename)
def test_nwbio(self): fil = File(self.path) io = HDF5IO(self.path, manager=self.manager, file=fil, mode="a") io.write(self.container) io.close() f = File(self.path) self.assertIn('acquisition', f) self.assertIn('analysis', f) self.assertIn('general', f) self.assertIn('processing', f) self.assertIn('file_create_date', f) self.assertIn('identifier', f) self.assertIn('session_description', f) self.assertIn('session_start_time', f) acq = f.get('acquisition') self.assertIn('test_timeseries', acq)
def test_write_context_manager(self): with HDF5IO(self.path, manager=self.manager, mode='a') as writer: writer.write_builder(self.builder) self.check_fields()
def test_write(self): """ Test writing the NWBFile using HDF5IO """ hdf5io = HDF5IO(self.filename, manager=self.manager, mode='a') hdf5io.write(self.nwbfile) hdf5io.close()
def testOutToMatNWB(self): filename = 'PyNWB.' + self.__class__.__name__ + '.testOutToMatNWB.nwb' with HDF5IO(filename, manager=get_manager(), mode='w') as io: io.write(self.file) self.assertTrue(os.path.isfile(filename))
def main(): ep = """ use --nspath to validate against an extension. If --ns is not specified, validate against all namespaces in namespace file. """ parser = ArgumentParser(description="Validate an NWB file", epilog=ep) parser.add_argument("paths", type=str, nargs='+', help="NWB file paths") parser.add_argument('-p', '--nspath', type=str, help="the path to the namespace YAML file") parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") feature_parser = parser.add_mutually_exclusive_group(required=False) feature_parser.add_argument("--cached-namespace", dest="cached_namespace", action='store_true', help="Use the cached namespace (default).") feature_parser.add_argument('--no-cached-namespace', dest="cached_namespace", action='store_false', help="Don't use the cached namespace.") parser.set_defaults(cached_namespace=True) args = parser.parse_args() ret = 0 if args.nspath: if not os.path.isfile(args.nspath): print("The namespace file {} is not a valid file.".format( args.nspath), file=sys.stderr) sys.exit(1) if args.cached_namespace: print( "Turning off validation against cached namespace information" "as --nspath was passed.", file=sys.stderr) args.cached_namespace = False for path in args.paths: if not os.path.isfile(path): print("The file {} does not exist.".format(path), file=sys.stderr) ret = 1 continue if args.cached_namespace: catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) namespaces = HDF5IO.load_namespaces(catalog, path).keys() if len(namespaces) > 0: tm = TypeMap(catalog) manager = BuildManager(tm) specloc = "cached namespace information" else: manager = None namespaces = available_namespaces() specloc = "pynwb namespace information" print("The file {} has no cached namespace information. " "Falling back to {}.".format(path, specloc), file=sys.stderr) elif args.nspath: catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) namespaces = catalog.load_namespaces(args.nspath) if len(namespaces) == 0: print("Could not load namespaces from file {}.".format( args.nspath), file=sys.stderr) sys.exit(1) tm = TypeMap(catalog) manager = BuildManager(tm) specloc = "--nspath namespace information" else: manager = None namespaces = available_namespaces() specloc = "pynwb namespace information" if args.ns: if args.ns in namespaces: namespaces = [args.ns] else: print("The namespace {} could not be found in {}.".format( args.ns, specloc), file=sys.stderr) ret = 1 continue with NWBHDF5IO(path, mode='r', manager=manager) as io: for ns in namespaces: print("Validating {} against {} using namespace {}.".format( path, specloc, ns)) ret = ret or _validate_helper(io=io, namespace=ns) sys.exit(ret)
def test_write(self): hdf5io = HDF5IO(self.path, manager=self.manager, mode='a') hdf5io.write(self.container) hdf5io.close()
from dna_table import DNATable from hdmf.backends.hdf5 import HDF5IO, H5DataIO import hdmf.common as common import os fnapath = "../../../../deep_index/gtdb/test_data/genomes/all/GCA/000/989/525/GCA_000989525.1_ASM98952v1/GCA_000989525.1_ASM98952v1_cds_from_genomic.fna.gz" h5path = "../../../../deep_index/gtdb/test_data/genomes/all/GCA/000/989/525/GCA_000989525.1_ASM98952v1/seq.h5" # ## Read Fasta sequence print("reading %s" % (fnapath)) fasize = os.path.getsize(fnapath) print("Fasta size:", fasize) sc = SeqConcat() data, seqindex, ltags = sc._read_path(fnapath) # ## Pack sequence and write to HDF5 file packed, padded = pack_ohe_dna(data) table = DNATable('root', 'a test table', H5DataIO(ltags, compression='gzip'), H5DataIO(packed, compression='gzip'), H5DataIO(seqindex, compression='gzip')) with HDF5IO(h5path, 'w', manager=common.get_manager()) as io: io.write(table) print("reading %s" % (h5path)) h5size = os.path.getsize(h5path) print("HDF5 size:", h5size)
def test_write_builder(self): writer = HDF5IO(self.path, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() self.check_fields()