def setUpClass(cls):
     cls.tmpdir = tempfile.mkdtemp('test_random_access')
     cls.dim = 5
     cls.data = [
         np.random.random((100, cls.dim)).astype(np.float32),
         np.random.random((20, cls.dim)).astype(np.float32),
         np.random.random((20, cls.dim)).astype(np.float32)
     ]
     cls.stride = np.asarray([[0, 1], [0, 3], [0, 3], [0, 5], [0, 6],
                              [0, 7], [2, 1], [2, 1]])
     cls.stride2 = np.asarray([[2, 0]])
     cls.topfile = pkg_resources.resource_filename(__name__,
                                                   'data/test.pdb')
     trajfile1, xyz1, n_frames1 = create_traj(cls.topfile,
                                              dir=cls.tmpdir,
                                              format=".binpos",
                                              length=100)
     trajfile2, xyz2, n_frames2 = create_traj(cls.topfile,
                                              dir=cls.tmpdir,
                                              format=".binpos",
                                              length=20)
     trajfile3, xyz3, n_frames3 = create_traj(cls.topfile,
                                              dir=cls.tmpdir,
                                              format=".binpos",
                                              length=20)
     cls.data_feature_reader = [trajfile1, trajfile2, trajfile3]
Esempio n. 2
0
    def setUpClass(cls):
        # create a fake trajectory which has 3 atoms and coordinates are just a range
        # over all frames.
        cls.tmpdir = tempfile.mkdtemp('test_feature_reader')

        cls.topfile = pkg_resources.resource_filename(__name__,
                                                      'data/test.pdb')
        cls.trajfile, cls.xyz, cls.n_frames = create_traj(cls.topfile,
                                                          dir=cls.tmpdir)
        cls.trajfile2, cls.xyz2, cls.n_frames2 = create_traj(cls.topfile,
                                                             dir=cls.tmpdir)

        return cls
Esempio n. 3
0
    def test_fragmented_xtc(self):
        from pyemma.coordinates.tests.util import create_traj

        top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
        trajfiles = []
        for _ in range(3):
            f, _, _ = create_traj(top_file)
            trajfiles.append(f)
        try:
            # three trajectories: one consisting of all three, one consisting of the first,
            # one consisting of the first and the last
            source = coor.source(
                [trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]],
                top=top_file)
            source.chunksize = 1000

            out = source.get_output(stride=1)
            trajs = [
                mdtraj.load(trajfiles[i], top=top_file).xyz.reshape(-1, 9)
                for i in range(0, 3)
            ]

            np.testing.assert_equal(out[0], np.vstack(trajs))
            np.testing.assert_equal(out[1], trajs[0])
            np.testing.assert_equal(out[2], np.vstack((trajs[0], trajs[2])))
        finally:
            for t in trajfiles:
                try:
                    os.unlink(t)
                except EnvironmentError:
                    pass
    def test_fragmented_reader_random_access(self):
        with TemporaryDirectory() as td:
            trajfiles = []
            for i in range(3):
                trajfiles.append(
                    create_traj(start=i * 10, dir=td, length=20)[0])
            topfile = get_top()

            trajfiles = [
                trajfiles[0], (trajfiles[0], trajfiles[1]), trajfiles[2]
            ]

            source = coor.source(trajfiles, top=topfile)
            assert isinstance(source, FragmentedTrajectoryReader)

            for chunksize in [0, 2, 3, 100000]:
                out = source.get_output(stride=self.stride, chunk=chunksize)
                keys = np.unique(self.stride[:, 0])
                for i, coords in enumerate(out):
                    if i in keys:
                        traj = mdtraj.load(trajfiles[i], top=topfile)
                        np.testing.assert_equal(
                            coords, traj.xyz[np.array(
                                self.stride[self.stride[:,
                                                        0] == i][:,
                                                                 1])].reshape(
                                                                     -1,
                                                                     3 * 3))
Esempio n. 5
0
    def test_fragmented_reader(self):
        from pyemma.coordinates.tests.util import create_traj
        from pyemma.util.files import TemporaryDirectory

        top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
        trajfiles = []

        with TemporaryDirectory() as d:
            for _ in range(3):
                f, _, _ = create_traj(top_file, dir=d)
                trajfiles.append(f)
            # three trajectories: one consisting of all three, one consisting of the first,
            # one consisting of the first and the last
            frag_trajs = [
                trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]
            ]
            chunksize = 232
            source = coor.source(frag_trajs, top=top_file, chunksize=chunksize)
            params = {
                'chunksize': chunksize,
                'ndim': source.ndim,
                '_trajectories': trajfiles
            }
            restored = self.compare(source, params)

            np.testing.assert_equal(source.get_output(), restored.get_output())
def _test_ra_with_format(format, stride):
    from pyemma.coordinates.tests.test_featurereader import create_traj

    topfile = pkg_resources.resource_filename(__name__, 'data/test.pdb')
    trajfiles = []
    for _ in range(3):
        f, _, _ = create_traj(topfile, format=format)
        trajfiles.append(f)
    try:
        source = coor.source(trajfiles, top=topfile)
        source.chunksize = 2

        out = source.get_output(stride=stride)
        keys = np.unique(stride[:, 0])
        for i, coords in enumerate(out):
            if i in keys:
                traj = mdtraj.load(trajfiles[i], top=topfile)
                np.testing.assert_equal(
                    coords, traj.xyz[np.array(
                        stride[stride[:, 0] == i][:, 1])].reshape(-1, 9))
    finally:
        for t in trajfiles:
            try:
                os.unlink(t)
            except EnvironmentError:
                pass
    def test_lagged_iterator(self):
        import pyemma.coordinates as coor
        from pyemma.coordinates.tests.util import create_traj, get_top

        trajectory_length = 4720
        lagtime = 1000
        n_trajs = 15

        top = get_top()
        trajs_data = [
            create_traj(top=top, length=trajectory_length)
            for _ in range(n_trajs)
        ]
        trajs = [t[0] for t in trajs_data]
        xyzs = [t[1].reshape(-1, 9) for t in trajs_data]

        reader = coor.source(trajs, top=top, chunksize=5000)

        for chunk in [
                None, 0, trajectory_length, trajectory_length + 1,
                trajectory_length + 1000
        ]:
            it = reader.iterator(lag=lagtime,
                                 chunk=chunk,
                                 return_trajindex=True)
            with it:
                for itraj, X, Y in it:
                    np.testing.assert_equal(X.shape, Y.shape)
                    np.testing.assert_equal(X.shape[0],
                                            trajectory_length - lagtime)
                    np.testing.assert_array_almost_equal(
                        X, xyzs[itraj][:trajectory_length - lagtime])
                    np.testing.assert_array_almost_equal(
                        Y, xyzs[itraj][lagtime:])
Esempio n. 8
0
    def test_RA_high_stride(self):
        """ ensure we use a random access pattern for high strides chunksize combinations to avoid memory issues."""
        n = int(1e5)
        n_bytes = 3 * 3 * 8 * n  # ~8Mb
        savable_formats_mdtra_18 = ('.xtc', '.trr', '.dcd', '.h5', '.binpos',
                                    '.nc', '.netcdf', '.ncdf', '.tng')
        for ext in savable_formats_mdtra_18:
            traj = create_traj(length=n, dir=self.tmpdir, format=ext)[0]

            from mock import patch
            # temporarily overwrite the memory cutoff with a smaller value, to trigger the switch to RA stride.
            with patch(
                    'pyemma.coordinates.util.patches.iterload.MEMORY_CUTOFF',
                    n_bytes - 1):
                r = coor.source(traj, top=get_top())
                it = r.iterator(stride=1000, chunk=100000)
                assert it._mditer.is_ra_iter

                out_ra = r.get_output(stride=1000, chunk=10000)
            it = r.iterator(stride=1)
            assert not it._mditer.is_ra_iter
            out = r.get_output(stride=1000)
            np.testing.assert_equal(out_ra, out)

            # check max stride exceeding
            from pyemma.coordinates.util.patches import iterload
            it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA + 1)
            assert it._mditer.is_ra_iter

            it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA)
            assert not it._mditer.is_ra_iter
Esempio n. 9
0
    def test_lagged_access_small_files(self):
        """ itraj 0 should be skipped, since it is too short."""
        top = self.topfile
        trajs = [
            create_traj(top=top, length=10, format='.xtc', dir=self.tmpdir)[0],
            create_traj(top=top, length=20, format='.xtc', dir=self.tmpdir)[0]
        ]

        reader = source(trajs, top=top)
        it = reader.iterator(lag=11, chunk=0)
        res = {}
        with it:
            for itraj, x, y in it:
                res[itraj] = (x.shape, y.shape)

        self.assertNotIn(0, res)
        self.assertIn(1, res)
Esempio n. 10
0
    def test_trajs_larger_than_frame_index(self):
        """ file list is larger than largest traj file """
        from pyemma.coordinates.tests.util import create_traj, get_top
        files = [create_traj(length=10)[0] for _ in range(20)]
        inds = np.vstack((np.arange(20), np.arange(20))).T

        with self.assertRaises(ValueError) as cm:
            _frames_from_file(files, top=get_top(), frames=inds)
        import re
        matches = re.match(".*10\).*is larger than trajectory length.*\= 10",
                           cm.exception.args[0])
        assert matches
Esempio n. 11
0
    def setUpClass(cls):
        c = super(TestFeatureReader, cls).setUpClass()
        # create a fake trajectory which has 3 atoms and coordinates are just a range
        # over all frames.
        cls.tmpdir = tempfile.mkdtemp('test_feature_reader')

        cls.topfile = pkg_resources.resource_filename(__name__,
                                                      'data/test.pdb')
        cls.trajfile, cls.xyz, cls.n_frames = create_traj(cls.topfile,
                                                          dir=cls.tmpdir)
        cls.trajfile2, cls.xyz2, cls.n_frames2 = create_traj(cls.topfile,
                                                             dir=cls.tmpdir)
        traj = mdtraj.load(cls.trajfile, top=cls.topfile)
        for fo in traj._savers():
            if fo in ('.crd', '.mdcrd', '.h5', '.ncrst', '.lh5'):
                continue
            log.debug("creating traj for " + fo)
            traj_file = create_traj(cls.topfile, format=fo, dir=cls.tmpdir)[0]
            test_mtd = create_loader_case(traj_file, cls.topfile)
            test_mtd.__name__ = 'test_loader_' + fo
            setattr(cls, test_mtd.__name__, test_mtd)

        return c
Esempio n. 12
0
 def test_fragmented_reader(self):
     top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
     trajfiles = []
     nframes = []
     with TemporaryDirectory() as wd:
         for _ in range(3):
             f, _, l = create_traj(top_file, dir=wd)
             trajfiles.append(f)
             nframes.append(l)
         # three trajectories: one consisting of all three, one consisting of the first,
         # one consisting of the first and the last
         reader = api.source(
             [trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file)
         np.testing.assert_equal(reader.trajectory_lengths(),
                                 [sum(nframes), nframes[0], nframes[0] + nframes[2]])
    def test_lagged_iterator_optimized(self):
        import pyemma.coordinates as coor
        from pyemma.coordinates.tests.util import create_traj, get_top
        from pyemma.coordinates.util.patches import iterload

        trajectory_length = 4720
        lagtime = 20
        n_trajs = 15
        stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1

        top = get_top()
        trajs_data = [
            create_traj(top=top, length=trajectory_length)
            for _ in range(n_trajs)
        ]
        trajs = [t[0] for t in trajs_data]
        xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data]
        xyzs_lagged = [
            t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data
        ]

        reader = coor.source(trajs, stride=stride, top=top, chunksize=5000)

        memory_cutoff = iterload.MEMORY_CUTOFF
        try:
            iterload.MEMORY_CUTOFF = 8
            it = reader.iterator(stride=stride,
                                 lag=lagtime,
                                 chunk=5000,
                                 return_trajindex=True)
            with it:
                curr_itraj = 0
                t = 0
                for itraj, X, Y in it:
                    if itraj != curr_itraj:
                        curr_itraj = itraj
                        t = 0
                    np.testing.assert_equal(X.shape, Y.shape)
                    l = len(X)
                    np.testing.assert_array_almost_equal(
                        X, xyzs[itraj][t:t + l])
                    np.testing.assert_array_almost_equal(
                        Y, xyzs_lagged[itraj][t:t + l])
                    t += l
        finally:
            iterload.MEMORY_CUTOFF = memory_cutoff
    def test_fragmented_reader_random_access1(self):
        with TemporaryDirectory() as td:
            trajfiles = []
            for i in range(3):
                trajfiles.append(
                    create_traj(start=i * 10, dir=td, length=20)[0])
            topfile = get_top()
            trajfiles = [(trajfiles[0], trajfiles[1]), trajfiles[0],
                         trajfiles[2]]

            source = coor.source(trajfiles, top=topfile)
            assert isinstance(source, FragmentedTrajectoryReader)

            for r in source._readers:
                if not isinstance(r, (list, tuple)):
                    r = r[0]
                for _r in r:
                    _r._return_traj_obj = True

            from collections import defaultdict
            for chunksize in [0, 2, 3, 100000]:
                frames = defaultdict(list)
                with source.iterator(chunk=chunksize,
                                     return_trajindex=True,
                                     stride=self.stride) as it:
                    for itraj, t in it:
                        frames[itraj].append(t)

                dest = []
                for itraj in frames.keys():
                    dest.append(frames[itraj][0])

                    for t in frames[itraj][1:]:
                        dest[-1] = dest[-1].join(t)

                keys = np.unique(self.stride[:, 0])
                for i, coords in enumerate(dest):
                    if i in keys:
                        traj = mdtraj.load(trajfiles[i], top=topfile)
                        np.testing.assert_equal(
                            coords.xyz,
                            traj.xyz[np.array(
                                self.stride[self.stride[:, 0] == i][:, 1])],
                            err_msg="not equal for chunksize=%s" % chunksize)
Esempio n. 15
0
    def test_with_fragmented_reader(self):
        from pyemma.util.files import TemporaryDirectory
        trajlen = 35
        # trajectory 0 (first trajectory, is trajfiles[2])
        #   -> skipped
        # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]})
        #   fragment 1:
        #       -> frames 0,1,2,10
        #   fragment 2:
        #       -> frames 1 (i.e., 36) and 34 (i.e., 69)
        # trajectory 2 (third trajectory, is trajfiles[2])
        #   -> frame 5
        ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10],
                               [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]],
                              dtype=int)
        with TemporaryDirectory() as td:

            trajfiles = []
            xyzs = []
            for i in range(3):
                tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen)
                trajfiles.append(tf)
                xyzs.append(xyz)

            topfile = get_top()
            frag_traj = [
                trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2]
            ]

            expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array(
                [1, 34])], np.array([(xyzs[2][5, :])])
            expected = np.vstack(expected)

            reader = coor.source(frag_traj, top=topfile)

            for cs in range(1, 10):
                traj = save_traj(reader, ra_indices, None, chunksize=cs)
                np.testing.assert_almost_equal(traj.xyz, expected)