def __init__(self, filenames, selection='/*', chunk_size=5000, **kw): super(H5Reader, self).__init__(chunksize=chunk_size) self._is_reader = True self._is_random_accessible = True from pyerna.coordinates.data.data_in_memory import (DataInMemoryCuboidRandomAccessStrategy, DataInMemoryJaggedRandomAccessStrategy, DataInMemoryLinearRandomAccessStrategy, DataInMemoryLinearItrajRandomAccessStrategy) self._ra_cuboid = DataInMemoryCuboidRandomAccessStrategy(self, 3) self._ra_jagged = DataInMemoryJaggedRandomAccessStrategy(self, 3) self._ra_linear_strategy = DataInMemoryLinearRandomAccessStrategy(self, 2) self._ra_linear_itraj_strategy = DataInMemoryLinearItrajRandomAccessStrategy(self, 3) # set selection first, so we can use it the filename setter. self.selection = selection # we count data sets as itrajs, because a hdf5 file can contain multiple data sets. from collections import defaultdict self._itraj_dataset_mapping = defaultdict(int) # we explicitly do not want to cache anything for H5, because the user can provide different selections # and the interface of the cache does not allow for such a mapping (1:1 relation filename:(dimension, len)). from pyerna.util.contexts import settings with settings(use_trajectory_lengths_cache=False): self.filenames = filenames # we need to override the ntraj attribute to be equal with the itraj_counter to respect all data sets. self._ntraj = self._itraj_counter # sanity if self._itraj_counter == 0: raise ValueError('Your provided selection did not match anything in your provided files. ' 'Check the log output')
def test_chunksize_max_memory(self): from pyerna.util.contexts import settings data = np.random.random((10000, 10)) max_size = 1024 with settings(default_chunksize=str(max_size)): r = DataInMemory(data) for itraj, x in r.iterator(): self.assertLessEqual(x.nbytes, max_size)
def test_invalid_data_in_input_inf(self): self.d[1][-1] = np.inf r = DataInMemory(self.d, chunksize=5) it = r.iterator() from pyerna.coordinates.data._base.datasource import InvalidDataInStreamException with settings(coordinates_check_output=True): with self.assertRaises(InvalidDataInStreamException) as cm: for itraj, X in it: pass
def test_exception_getoutput_invalid_data(self): """ensure we get a proper exception if invalid data is contained in the stream""" from pyerna.util.contexts import settings data = np.ones(10) data[-1] = np.nan reader = pyerna.coordinates.source(data) from pyerna.coordinates.data._base.datasource import InvalidDataInStreamException with settings(coordinates_check_output=True), self.assertRaises(InvalidDataInStreamException): reader.get_output()
def test_max_size(self): data = [np.random.random((150, 10)) for _ in range(150)] max_size = 1 files = [] with TemporaryDirectory() as td, settings(traj_info_max_size=max_size, show_progress_bars=False): for i, arr in enumerate(data): f = os.path.join(td, "%s.txt" % i) # save as txt to enforce creation of offsets np.savetxt(f, arr) files.append(f) pyerna.coordinates.source(files) self.assertLessEqual(os.stat(self.db.database_filename).st_size / 1024, config.traj_info_max_size) self.assertGreater(self.db.num_entries, 0)
def test_featurereader_xtc(self): # cause cache failures with settings(use_trajectory_lengths_cache=False): reader = FeatureReader(xtcfiles, pdbfile) results = {} for f in xtcfiles: traj_info = self.db[f, reader] results[f] = traj_info.ndim, traj_info.length, traj_info.offsets expected = {} for f in xtcfiles: with mdtraj.open(f) as fh: length = len(fh) ndim = fh.read(1)[0].shape[1] offsets = fh.offsets if hasattr(fh, 'offsets') else [] expected[f] = ndim, length, offsets np.testing.assert_equal(results, expected)
def test_config_2(self): self.pg.show_progress = False with settings(show_progress_bars=True): assert not self.pg.show_progress
def test_config_override(self): self.pg.show_progress = True with settings(show_progress_bars=False): assert self.pg.show_progress == False