def setUpClass(cls): cls.tmpdir = tempfile.mkdtemp('test_random_access') cls.dim = 5 cls.data = [ np.random.random((100, cls.dim)).astype(np.float32), np.random.random((20, cls.dim)).astype(np.float32), np.random.random((20, cls.dim)).astype(np.float32) ] cls.stride = np.asarray([[0, 1], [0, 3], [0, 3], [0, 5], [0, 6], [0, 7], [2, 1], [2, 1]]) cls.stride2 = np.asarray([[2, 0]]) cls.topfile = pkg_resources.resource_filename(__name__, 'data/test.pdb') trajfile1, xyz1, n_frames1 = create_traj(cls.topfile, dir=cls.tmpdir, format=".binpos", length=100) trajfile2, xyz2, n_frames2 = create_traj(cls.topfile, dir=cls.tmpdir, format=".binpos", length=20) trajfile3, xyz3, n_frames3 = create_traj(cls.topfile, dir=cls.tmpdir, format=".binpos", length=20) cls.data_feature_reader = [trajfile1, trajfile2, trajfile3]
def setUpClass(cls): # create a fake trajectory which has 3 atoms and coordinates are just a range # over all frames. cls.tmpdir = tempfile.mkdtemp('test_feature_reader') cls.topfile = pkg_resources.resource_filename(__name__, 'data/test.pdb') cls.trajfile, cls.xyz, cls.n_frames = create_traj(cls.topfile, dir=cls.tmpdir) cls.trajfile2, cls.xyz2, cls.n_frames2 = create_traj(cls.topfile, dir=cls.tmpdir) return cls
def test_fragmented_reader_random_access(self): with TemporaryDirectory() as td: trajfiles = [] for i in range(3): trajfiles.append( create_traj(start=i * 10, dir=td, length=20)[0]) topfile = get_top() trajfiles = [ trajfiles[0], (trajfiles[0], trajfiles[1]), trajfiles[2] ] source = coor.source(trajfiles, top=topfile) assert isinstance(source, FragmentedTrajectoryReader) for chunksize in [0, 2, 3, 100000]: out = source.get_output(stride=self.stride, chunk=chunksize) keys = np.unique(self.stride[:, 0]) for i, coords in enumerate(out): if i in keys: traj = mdtraj.load(trajfiles[i], top=topfile) np.testing.assert_equal( coords, traj.xyz[np.array( self.stride[self.stride[:, 0] == i][:, 1])].reshape( -1, 3 * 3))
def _test_ra_with_format(format, stride): from pyerna.coordinates.tests.test_featurereader import create_traj topfile = pkg_resources.resource_filename(__name__, 'data/test.pdb') trajfiles = [] for _ in range(3): f, _, _ = create_traj(topfile, format=format) trajfiles.append(f) try: source = coor.source(trajfiles, top=topfile) source.chunksize = 2 out = source.get_output(stride=stride) keys = np.unique(stride[:, 0]) for i, coords in enumerate(out): if i in keys: traj = mdtraj.load(trajfiles[i], top=topfile) np.testing.assert_equal( coords, traj.xyz[np.array( stride[stride[:, 0] == i][:, 1])].reshape(-1, 9)) finally: for t in trajfiles: try: os.unlink(t) except EnvironmentError: pass
def test_fragmented_xtc(self): from pyerna.coordinates.tests.util import create_traj top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb') trajfiles = [] for _ in range(3): f, _, _ = create_traj(top_file) trajfiles.append(f) try: # three trajectories: one consisting of all three, one consisting of the first, # one consisting of the first and the last source = coor.source([trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file) source.chunksize = 1000 out = source.get_output(stride=1) trajs = [mdtraj.load(trajfiles[i], top=top_file).xyz.reshape(-1,9) for i in range(0,3)] np.testing.assert_equal(out[0], np.vstack(trajs)) np.testing.assert_equal(out[1], trajs[0]) np.testing.assert_equal(out[2], np.vstack((trajs[0], trajs[2]))) finally: for t in trajfiles: try: os.unlink(t) except EnvironmentError: pass
def test_lagged_iterator(self): import pyerna.coordinates as coor from pyerna.coordinates.tests.util import create_traj, get_top trajectory_length = 4720 lagtime = 1000 n_trajs = 15 top = get_top() trajs_data = [ create_traj(top=top, length=trajectory_length) for _ in range(n_trajs) ] trajs = [t[0] for t in trajs_data] xyzs = [t[1].reshape(-1, 9) for t in trajs_data] reader = coor.source(trajs, top=top, chunksize=5000) for chunk in [ None, 0, trajectory_length, trajectory_length + 1, trajectory_length + 1000 ]: it = reader.iterator(lag=lagtime, chunk=chunk, return_trajindex=True) with it: for itraj, X, Y in it: np.testing.assert_equal(X.shape, Y.shape) np.testing.assert_equal(X.shape[0], trajectory_length - lagtime) np.testing.assert_array_almost_equal( X, xyzs[itraj][:trajectory_length - lagtime]) np.testing.assert_array_almost_equal( Y, xyzs[itraj][lagtime:])
def test_lagged_access_small_files(self): """ itraj 0 should be skipped, since it is too short.""" top = self.topfile trajs = [ create_traj(top=top, length=10, format='.xtc', dir=self.tmpdir)[0], create_traj(top=top, length=20, format='.xtc', dir=self.tmpdir)[0] ] reader = source(trajs, top=top) it = reader.iterator(lag=11, chunk=0) res = {} with it: for itraj, x, y in it: res[itraj] = (x.shape, y.shape) self.assertNotIn(0, res) self.assertIn(1, res)
def test_trajs_larger_than_frame_index(self): """ file list is larger than largest traj file """ from pyerna.coordinates.tests.util import create_traj, get_top files = [create_traj(length=10)[0] for _ in range(20)] inds = np.vstack((np.arange(20), np.arange(20))).T with self.assertRaises(ValueError) as cm: _frames_from_file(files, top=get_top(), frames=inds) import re matches = re.match(r".*10\).*is larger than trajectory length.*\= 10", cm.exception.args[0]) assert matches
def test_fragmented_reader(self): top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb') trajfiles = [] nframes = [] with TemporaryDirectory() as wd: for _ in range(3): f, _, l = create_traj(top_file, dir=wd) trajfiles.append(f) nframes.append(l) # three trajectories: one consisting of all three, one consisting of the first, # one consisting of the first and the last reader = api.source( [trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file) np.testing.assert_equal(reader.trajectory_lengths(), [sum(nframes), nframes[0], nframes[0] + nframes[2]])
def test_lagged_iterator_optimized(self): import pyerna.coordinates as coor from pyerna.coordinates.tests.util import create_traj, get_top from pyerna.coordinates.util.patches import iterload trajectory_length = 4720 lagtime = 20 n_trajs = 15 stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1 top = get_top() trajs_data = [ create_traj(top=top, length=trajectory_length) for _ in range(n_trajs) ] trajs = [t[0] for t in trajs_data] xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data] xyzs_lagged = [ t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data ] reader = coor.source(trajs, stride=stride, top=top, chunksize=5000) memory_cutoff = iterload.MEMORY_CUTOFF try: iterload.MEMORY_CUTOFF = 8 it = reader.iterator(stride=stride, lag=lagtime, chunk=5000, return_trajindex=True) with it: curr_itraj = 0 t = 0 for itraj, X, Y in it: if itraj != curr_itraj: curr_itraj = itraj t = 0 np.testing.assert_equal(X.shape, Y.shape) l = len(X) np.testing.assert_array_almost_equal( X, xyzs[itraj][t:t + l]) np.testing.assert_array_almost_equal( Y, xyzs_lagged[itraj][t:t + l]) t += l finally: iterload.MEMORY_CUTOFF = memory_cutoff
def test_fragmented_reader_random_access1(self): with TemporaryDirectory() as td: trajfiles = [] for i in range(3): trajfiles.append( create_traj(start=i * 10, dir=td, length=20)[0]) topfile = get_top() trajfiles = [(trajfiles[0], trajfiles[1]), trajfiles[0], trajfiles[2]] source = coor.source(trajfiles, top=topfile) assert isinstance(source, FragmentedTrajectoryReader) for r in source._readers: if not isinstance(r, (list, tuple)): r = r[0] for _r in r: _r._return_traj_obj = True from collections import defaultdict for chunksize in [0, 2, 3, 100000]: frames = defaultdict(list) with source.iterator(chunk=chunksize, return_trajindex=True, stride=self.stride) as it: for itraj, t in it: frames[itraj].append(t) dest = [] for itraj in frames.keys(): dest.append(frames[itraj][0]) for t in frames[itraj][1:]: dest[-1] = dest[-1].join(t) keys = np.unique(self.stride[:, 0]) for i, coords in enumerate(dest): if i in keys: traj = mdtraj.load(trajfiles[i], top=topfile) np.testing.assert_equal( coords.xyz, traj.xyz[np.array( self.stride[self.stride[:, 0] == i][:, 1])], err_msg="not equal for chunksize=%s" % chunksize)
def test_with_fragmented_reader(self): from pyerna.util.files import TemporaryDirectory trajlen = 35 # trajectory 0 (first trajectory, is trajfiles[2]) # -> skipped # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]}) # fragment 1: # -> frames 0,1,2,10 # fragment 2: # -> frames 1 (i.e., 36) and 34 (i.e., 69) # trajectory 2 (third trajectory, is trajfiles[2]) # -> frame 5 ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10], [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]], dtype=int) with TemporaryDirectory() as td: trajfiles = [] xyzs = [] for i in range(3): tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen) trajfiles.append(tf) xyzs.append(xyz) topfile = get_top() frag_traj = [ trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2] ] expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array( [1, 34])], np.array([(xyzs[2][5, :])]) expected = np.vstack(expected) reader = coor.source(frag_traj, top=topfile) for cs in range(1, 10): traj = save_traj(reader, ra_indices, None, chunksize=cs) np.testing.assert_almost_equal(traj.xyz, expected)
def test_RA_high_stride(self): """ ensure we use a random access pattern for high strides chunksize combinations to avoid memory issues.""" from pyerna.coordinates.util.patches import iterload n = int(1e5) n_bytes = 3 * 3 * 8 * n # ~8Mb savable_formats_mdtra_18 = ('.xtc', '.trr', '.dcd', '.h5', '.binpos', '.nc', '.netcdf', '.ncdf', '.tng') for ext in savable_formats_mdtra_18: traj = create_traj(length=n, dir=self.tmpdir, format=ext)[0] from unittest.mock import patch # temporarily overwrite the memory cutoff with a smaller value, to trigger the switch to RA stride. with patch( 'pyerna.coordinates.util.patches.iterload.MEMORY_CUTOFF', n_bytes - 1): r = coor.source(traj, top=get_top()) it = r.iterator(stride=1000, chunk=100000) next(it) assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or it._mditer.is_ra_iter out_ra = r.get_output(stride=1000, chunk=10000) it = r.iterator(stride=1) next(it) assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or not it._mditer.is_ra_iter out = r.get_output(stride=1000) np.testing.assert_equal(out_ra, out) # check max stride exceeding it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA + 1) next(it) assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or it._mditer.is_ra_iter it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA) next(it) assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or not it._mditer.is_ra_iter