예제 #1
0
    def test_write_to_csv_propagate_filenames(self):
        from pyerna.coordinates import source, tica
        with TemporaryDirectory() as td:
            data = [np.random.random((20, 3))] * 3
            fns = [
                os.path.join(td, f)
                for f in ('blah.npy', 'blub.npy', 'foo.npy')
            ]
            for x, fn in zip(data, fns):
                np.save(fn, x)
            reader = source(fns)
            assert reader.filenames == fns
            tica_obj = tica(reader, lag=1, dim=2)
            tica_obj.write_to_csv(extension=".exotic", chunksize=3)
            res = sorted([
                os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic')
            ])
            self.assertEqual(len(res), len(fns))
            desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns])
            self.assertEqual(res, desired_fns)

            # compare written results
            expected = tica_obj.get_output()
            actual = source(list(s.replace('.npy', '.exotic')
                                 for s in fns)).get_output()
            assert len(actual) == len(fns)
            for a, e in zip(actual, expected):
                np.testing.assert_allclose(a, e)
예제 #2
0
    def test_fragmented_trajs(self):
        """ build two fragmented readers consisting out of two fragments each and check if they are merged properly."""
        segment_0 = np.arange(20)
        segment_1 = np.arange(20, 40)

        s1 = source([(segment_0, segment_1)])
        s2 = source([(segment_0, segment_1)])

        sm = SourcesMerger((s1, s2))

        out = sm.get_output()
        x = np.atleast_2d(np.arange(40))
        expected = [np.concatenate((x, x), axis=0).T]

        np.testing.assert_equal(out, expected)
예제 #3
0
    def test_length_and_content_feature_reader_and_TICA(self):
        for stride in range(1, 100, 23):
            r = coor.source(self.trajnames, top=self.temppdb)
            t = coor.tica(data=r, lag=2, dim=2)

            # subsample data
            out_tica = t.get_output(stride=stride)
            out_reader = r.get_output(stride=stride)

            # get length in different ways
            len_tica = [x.shape[0] for x in out_tica]
            len_reader = [x.shape[0] for x in out_reader]
            len_trajs = t.trajectory_lengths(stride=stride)
            len_ref = [(x.shape[0]-1)//stride+1 for x in self.data]
            # print 'len_ref', len_ref

            # compare length
            np.testing.assert_equal(len_trajs, len_ref)
            self.assertTrue(len_ref == len_tica)
            self.assertTrue(len_ref == len_reader)

            # compare content (reader)
            for ref_data, test_data in zip(self.data, out_reader):
                ref_data_reshaped = ref_data.reshape((ref_data.shape[0], ref_data.shape[1]*3))
                self.assertTrue(np.allclose(ref_data_reshaped[::stride, :], test_data, atol=1E-3))
예제 #4
0
 def test_non_matching_lengths(self):
     data = self.readers[1].data
     data = [data[0], data[1], data[2][:20]]
     self.readers.append(source(data))
     with self.assertRaises(ValueError) as ctx:
         SourcesMerger(self.readers)
     self.assertIn('matching', ctx.exception.args[0])
예제 #5
0
파일: vamp.py 프로젝트: hackyhacker/PyERNA
    def partial_fit(self, X):
        """ incrementally update the covariances and mean.

        Parameters
        ----------
        X: array, list of arrays, PyEMMA reader
            input data.

        Notes
        -----
        The projection matrix is first being calculated upon its first access.
        """
        from pyerna.coordinates import source
        iterable = source(X)

        if isinstance(self.dim, int):
            indim = iterable.dimension()
            if not self.dim <= indim:
                raise RuntimeError(
                    "requested more output dimensions (%i) than dimension"
                    " of input data (%i)" % (self.dim, indim))

        self._covar = self._init_covar(partial=True)
        self._covar.partial_fit(iterable)
        self.model.update_model_params(
            mean_0=self._covar.mean,  # TODO: inefficient, fixme
            mean_t=self._covar.mean_tau,
            C00=self._covar.C00_,
            C0t=self._covar.C0t_,
            Ctt=self._covar.Ctt_)

        self._estimated = False
        return self.model
예제 #6
0
    def test_with_save_traj(self):
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep

        pdb_file = os.path.join(path, 'bpti_ca.pdb')
        traj_files = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_034-066.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')
        ]

        source_frag = coor.source([traj_files], top=pdb_file)
        full_data = source_frag.get_output()[0]
        last_frame_fragment_0 = [0,32]
        first_frame_fragment_1 = [0,33]
        first_frame_fragment_2 = [0,66]

        reshape = lambda f: f.xyz.reshape((f.xyz.shape[0],f.xyz.shape[1] * f.xyz.shape[2])).squeeze()

        # Frames in the first fragment:
        frames = coor.save_traj(source_frag, [last_frame_fragment_0], None)
        np.testing.assert_equal(reshape(frames), full_data[32])

        # Frames the first and second fragments
        frames = coor.save_traj(source_frag, [last_frame_fragment_0, first_frame_fragment_1], None)
        np.testing.assert_equal(reshape(frames), full_data[np.array([32, 33])])

        # Frames only in the second fragment
        frames = coor.save_traj(source_frag, [first_frame_fragment_1], None)
        np.testing.assert_equal(reshape(frames), full_data[33])

        # Frames only in the second and third fragment
        frames = coor.save_traj(source_frag, [first_frame_fragment_1, first_frame_fragment_2], None)
        np.testing.assert_equal(reshape(frames), full_data[np.array([33, 66])])
예제 #7
0
    def test_fragmented_xtc(self):
        from pyerna.coordinates.tests.util import create_traj

        top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
        trajfiles = []
        for _ in range(3):
            f, _, _ = create_traj(top_file)
            trajfiles.append(f)
        try:
            # three trajectories: one consisting of all three, one consisting of the first,
            # one consisting of the first and the last
            source = coor.source([trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file)
            source.chunksize = 1000

            out = source.get_output(stride=1)
            trajs = [mdtraj.load(trajfiles[i], top=top_file).xyz.reshape(-1,9) for i in range(0,3)]

            np.testing.assert_equal(out[0], np.vstack(trajs))
            np.testing.assert_equal(out[1], trajs[0])
            np.testing.assert_equal(out[2], np.vstack((trajs[0], trajs[2])))
        finally:
            for t in trajfiles:
                try:
                    os.unlink(t)
                except EnvironmentError:
                    pass
예제 #8
0
    def _test_fragment_reader(self, file_format, stride, lag, chunksize):
        trajs = self.test_trajs[file_format]

        reader = coor.source([trajs], top=self.pdb_file, chunksize=chunksize)
        assert isinstance(reader, FragmentedTrajectoryReader)

        data = np.vstack(self.traj_data)
        itraj = None

        if lag > 0:
            collected = []
            collected_lagged = []
            for itraj, X, Y in reader.iterator(stride=stride, lag=lag):
                collected.append(X)
                collected_lagged.append(Y)
            assert collected
            assert collected_lagged
            assert len(collected) == len(collected_lagged)
            collected = np.vstack(collected)
            collected_lagged = np.vstack(collected_lagged)
            np.testing.assert_allclose(data[::stride][0:len(collected_lagged)], collected, atol=self.eps,
                                                    err_msg="lag={}, stride={}, cs={}".format(lag, stride, chunksize
                                                 ))
            np.testing.assert_allclose(data[lag::stride], collected_lagged, atol=self.eps)
        else:
            collected = []
            for itraj, X in reader.iterator(stride=stride):
                collected.append(X)
            assert collected
            collected = np.vstack(collected)
            np.testing.assert_allclose(data[::stride], collected, atol=self.eps)
            assert itraj == 0 # only one trajectory
예제 #9
0
    def test_lagged_iterator(self):
        import pyerna.coordinates as coor
        from pyerna.coordinates.tests.util import create_traj, get_top

        trajectory_length = 4720
        lagtime = 1000
        n_trajs = 15

        top = get_top()
        trajs_data = [
            create_traj(top=top, length=trajectory_length)
            for _ in range(n_trajs)
        ]
        trajs = [t[0] for t in trajs_data]
        xyzs = [t[1].reshape(-1, 9) for t in trajs_data]

        reader = coor.source(trajs, top=top, chunksize=5000)

        for chunk in [
                None, 0, trajectory_length, trajectory_length + 1,
                trajectory_length + 1000
        ]:
            it = reader.iterator(lag=lagtime,
                                 chunk=chunk,
                                 return_trajindex=True)
            with it:
                for itraj, X, Y in it:
                    np.testing.assert_equal(X.shape, Y.shape)
                    np.testing.assert_equal(X.shape[0],
                                            trajectory_length - lagtime)
                    np.testing.assert_array_almost_equal(
                        X, xyzs[itraj][:trajectory_length - lagtime])
                    np.testing.assert_array_almost_equal(
                        Y, xyzs[itraj][lagtime:])
예제 #10
0
    def setUp(self):
        self.readers = []
        data_dir = pkg_resources.resource_filename('pyerna.coordinates.tests',
                                                   'data')
        # three md trajs
        trajs = glob(data_dir + "/bpti_0*.xtc")
        top = os.path.join(data_dir, 'bpti_ca.pdb')
        self.readers.append(source(trajs, top=top))
        self.readers[0].featurizer.add_all()
        ndim = self.readers[0].ndim
        # three random arrays
        lengths = self.readers[0].trajectory_lengths()
        arrays = [np.random.random((length, ndim)) for length in lengths]
        self.readers.append(source(arrays))

        self.readers.append(tica(self.readers[-1], dim=20))
예제 #11
0
 def test_MD_data(self):
     # this is too little data to get reasonable results. We just test to avoid exceptions
     path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
     self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
     self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
     inp = source(self.xtc_file, top=self.pdb_file)
     # see if this doesn't raise
     ticamini = tica(inp, lag=1)
예제 #12
0
파일: pca.py 프로젝트: hackyhacker/PyERNA
    def partial_fit(self, X):
        from pyerna.coordinates import source
        iterable = source(X)

        self._estimate(iterable, partial=True)
        self._estimated = False

        return self
예제 #13
0
 def test_parametrize_with_stride(self):
     for stride in range(1, 100, 23):
         r = coor.source(self.trajnames, top=self.temppdb)
         tau = 5
         try:
             t = coor.tica(r, lag=tau, stride=stride, dim=2)
             # force_eigenvalues_le_one=True enables an internal consistency check in TICA
             self.assertTrue(np.all(t.eigenvalues <= 1.0+1.E-12))
         except RuntimeError:
             assert tau % stride != 0
예제 #14
0
 def test_pass_reader(self):
     from pyerna.coordinates import source
     reader = source(self.trajfiles, top=self.pdbfile)
     reader.in_memory = True
     inds = np.vstack((np.random.randint(0, 1), np.random.randint(0,
                                                                  100))).T
     traj_test = _frames_from_file(reader.filenames,
                                   self.pdbfile,
                                   inds,
                                   reader=reader)
예제 #15
0
    def _test_base_reader(self, file_format, stride, skip, chunksize, transform):
        trajs = self.test_trajs[file_format]
        reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize)

        if transform == 'identity':
            reader = util.create_transform(reader)

        if chunksize is not None:
            np.testing.assert_equal(reader.chunksize, chunksize)

        it = reader.iterator(stride=stride, skip=skip, lag=0, chunk=chunksize)

        assert it.chunksize is not None
        if chunksize is None:
            max_frames = max_chunksize_from_config(reader.output_type().itemsize)
            assert it.chunksize <= max_frames
            # now we set the chunksize to max_frames, to be able to compare the actual shapes of iterator output.
            chunksize = max_frames

        traj_data = [data[skip::stride] for data in self.traj_data]
        valid_itraj = [i for i, x in enumerate(traj_data) if len(x) > 0]
        output = defaultdict(list)

        with it:
            current_itraj = None
            t = t_total = 0
            for itraj, chunk in it:
                # reset t upon next trajectory
                if itraj != current_itraj:
                    current_itraj = itraj
                    t = 0

                assert len(chunk) <= chunksize or chunksize == 0, '%s' % it
                if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize:
                    assert len(chunk) == chunksize
                elif chunksize == 0:
                    assert len(chunk) == len(traj_data[itraj])

                output[itraj].append(chunk)

                t += len(chunk)
                t_total += len(chunk)

            for itraj in valid_itraj:
                assert itraj in output.keys()

            for itraj in output.keys():
                assert itraj in valid_itraj
                output[itraj] = np.vstack(output[itraj])
                np.testing.assert_allclose(output[itraj], traj_data[itraj], atol=self.eps)

            assert t_total == sum(len(x) for x in output.values())
            assert t_total == reader.n_frames_total(stride=stride, skip=skip)
예제 #16
0
    def setUpClass(cls):
        from pyerna.datasets import get_bpti_test_data

        d = get_bpti_test_data()
        trajs, top = d['trajs'], d['top']
        s = source(trajs, top=top)

        t = tica(s, lag=1)

        c = cluster_kmeans(t)
        cls.model_file = tempfile.mktemp()
        c.save(cls.model_file, save_streaming_chain=True)
예제 #17
0
    def _test_lagged_reader(self, file_format, stride, skip, chunksize, lag):
        trajs = self.test_trajs[file_format]
        reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize)

        it = reader.iterator(stride=stride, skip=skip, lag=lag, chunk=chunksize)
        traj_data = [data[skip::stride] for data in self.traj_data]
        traj_data_lagged = [data[skip + lag::stride] for data in self.traj_data]
        valid_itrajs = [i for i, x in enumerate(traj_data_lagged) if len(x) > 0]

        assert it.chunksize is not None
        if chunksize is None:
            chunksize = max_chunksize_from_config(reader.output_type().itemsize)

        with it:
            current_itraj = None
            t = t_total = 0
            collected = defaultdict(list)
            collected_lag = defaultdict(list)

            for itraj, chunk, chunk_lagged in it:
                # reset t upon next trajectory
                if itraj != current_itraj:
                    current_itraj = itraj
                    t = 0
                assert len(chunk) <= chunksize or chunksize == 0
                if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize:
                    assert len(chunk) <= chunksize
                elif chunksize == 0:
                    assert len(chunk) == len(chunk_lagged) == len(traj_data_lagged[itraj])
                collected[itraj].append(chunk)
                collected_lag[itraj].append(chunk_lagged)

                t += len(chunk)
                t_total += len(chunk)

        for itraj in valid_itrajs:
            assert itraj in collected.keys()
            assert itraj in collected_lag.keys()

        assert set(collected.keys()) == set(collected_lag.keys())
        for itraj in collected.keys():
            assert itraj in valid_itrajs
            collected[itraj] = np.vstack(collected[itraj])
            collected_lag[itraj] = np.vstack(collected_lag[itraj])
            # unlagged data is truncated to the length of the lagged data.
            max_len = len(traj_data_lagged[itraj])
            np.testing.assert_allclose(collected[itraj], traj_data[itraj][:max_len], atol=self.eps)
            np.testing.assert_allclose(collected_lag[itraj], traj_data_lagged[itraj], atol=self.eps)

        assert t_total == sum(len(x) for x in collected.values())
        assert t_total == reader.n_frames_total(stride=stride, skip=skip+lag)
예제 #18
0
    def test_feature_correlation_MD(self):
        # Copying from the test_MD_data
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
        self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
        self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
        inp = source(self.xtc_file, top=self.pdb_file)
        ticamini = tica(inp, lag=1, kinetic_map=False)

        feature_traj = ticamini.data_producer.get_output()[0]
        tica_traj = ticamini.get_output()[0]
        test_corr = ticamini.feature_TIC_correlation
        true_corr = mycorrcoef(feature_traj, tica_traj, ticamini.lag)
        #assert np.isclose(test_corr, true_corr).all()
        np.testing.assert_allclose(test_corr, true_corr, atol=1.E-8)
예제 #19
0
    def partial_fit(self, X):
        """ incrementally update the estimates

        Parameters
        ----------
        X: array, list of arrays, PyEMMA reader
            input data.
        """
        from pyerna.coordinates import source

        self._estimate(source(X), partial_fit=True)
        self._estimated = True

        return self
예제 #20
0
    def test_with_fragmented_reader_chunksize_0(self):
        # intentionally group bpti dataset to a fake fragmented traj
        frag_traj = [[self.trajfiles[0], self.trajfiles[1]], self.trajfiles[2],
                     self.trajfiles[2]]
        reader = coor.source(frag_traj, top=self.pdbfile, chunksize=0)
        assert reader.chunksize == 0
        traj = save_traj(reader, self.sets, None)
        traj_ref = save_traj_w_md_load_frame(self.reader, self.sets)
        # Check for diffs
        (found_diff, errmsg) = compare_coords_md_trajectory_objects(traj,
                                                                    traj_ref,
                                                                    atom=0)

        np.testing.assert_equal(traj.xyz, traj_ref.xyz)
        self.assertFalse(found_diff, errmsg)
예제 #21
0
    def test_feature_correlation_MD(self):
        # Copying from the test_MD_data
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
        self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
        self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
        inp = source(self.xtc_file, top=self.pdb_file)
        pcamini = pca(inp)

        feature_traj = pcamini.data_producer.get_output()[0]
        nfeat = feature_traj.shape[1]
        pca_traj = pcamini.get_output()[0]
        npcs = pca_traj.shape[1]

        test_corr = pcamini.feature_PC_correlation
        true_corr = np.corrcoef(feature_traj.T, pca_traj.T)[:nfeat, -npcs:]
        np.testing.assert_allclose(test_corr, true_corr, atol=1.E-8)
예제 #22
0
    def test_min_rmsd(self):
        import pyerna.datasets as data
        d = data.get_bpti_test_data()
        reader = coor.source(d['trajs'], top=d['top'])

        N_centers = 9
        centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]],
                              reader.ra_itraj_jagged[1, [32, 1, 23]],
                              reader.ra_itraj_jagged[2, [17, 8, 15]])
                             ).reshape((N_centers, -1))
        dtraj = coor.assign_to_centers(reader, centers=centers, metric='minRMSD', return_dtrajs=True)

        num_assigned_states = len(np.unique(np.concatenate(dtraj)))
        self.assertEqual(num_assigned_states, N_centers,
                         "assigned states=%s out of %s possible ones."
                         % (num_assigned_states, N_centers))
예제 #23
0
    def test_content_data_in_memory(self):
        # prepare test data
        N_trajs = 10
        d = []
        for _ in range(N_trajs):
            N = int(np.random.rand()*1000+10)
            d.append(np.random.randn(N, 10).astype(np.float32))

        # read data
        reader = coor.source(d)

        # compare
        for stride in range(1, 10, 3):
            out_reader = reader.get_output(stride=stride)
            for ref_data, test_data in zip(d, out_reader):
                self.assertTrue(np.all(ref_data[::stride] == test_data))  # here we can test exact equality
예제 #24
0
    def test_assignment_multithread_minrsmd(self):
        # re-do assignment with multiple threads and compare results
        import pyerna.datasets as data
        d = data.get_bpti_test_data()
        reader = coor.source(d['trajs'], top=d['top'])

        N_centers = 9
        centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]],
                              reader.ra_itraj_jagged[1, [32, 1, 23]],
                              reader.ra_itraj_jagged[2, [17, 8, 15]])
                             ).reshape((N_centers, -1))
        chunksize = 1000

        assignment_mp = coor.assign_to_centers(reader, centers, n_jobs=2, chunksize=chunksize, metric='minRMSD')
        assignment_sp = coor.assign_to_centers(reader, centers, n_jobs=1, chunksize=chunksize, metric='minRMSD')

        np.testing.assert_equal(assignment_mp, assignment_sp)
예제 #25
0
    def test_lagged_iterator_optimized(self):
        import pyerna.coordinates as coor
        from pyerna.coordinates.tests.util import create_traj, get_top
        from pyerna.coordinates.util.patches import iterload

        trajectory_length = 4720
        lagtime = 20
        n_trajs = 15
        stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1

        top = get_top()
        trajs_data = [
            create_traj(top=top, length=trajectory_length)
            for _ in range(n_trajs)
        ]
        trajs = [t[0] for t in trajs_data]
        xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data]
        xyzs_lagged = [
            t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data
        ]

        reader = coor.source(trajs, stride=stride, top=top, chunksize=5000)

        memory_cutoff = iterload.MEMORY_CUTOFF
        try:
            iterload.MEMORY_CUTOFF = 8
            it = reader.iterator(stride=stride,
                                 lag=lagtime,
                                 chunk=5000,
                                 return_trajindex=True)
            with it:
                curr_itraj = 0
                t = 0
                for itraj, X, Y in it:
                    if itraj != curr_itraj:
                        curr_itraj = itraj
                        t = 0
                    np.testing.assert_equal(X.shape, Y.shape)
                    l = len(X)
                    np.testing.assert_array_almost_equal(
                        X, xyzs[itraj][t:t + l])
                    np.testing.assert_array_almost_equal(
                        Y, xyzs_lagged[itraj][t:t + l])
                    t += l
        finally:
            iterload.MEMORY_CUTOFF = memory_cutoff
    def test_partial_fit(self):
        from pyerna.coordinates import source
        reader = source(self.trajnames, top=self.temppdb)
        reader_output = reader.get_output()

        for output_params in [{
                'kinetic_map': False
        }, {
                'kinetic_map': True
        }, {
                'kinetic_map': False,
                'commute_map': True
        }]:
            params = {'lag': 10, 'dim': self.dim}
            params.update(output_params)

            tica_obj = tica(**params)
            tica_obj.partial_fit(reader_output[0])
            assert not tica_obj._estimated
            # acccess eigenvectors to force diagonalization
            tica_obj.eigenvectors
            assert tica_obj._estimated

            tica_obj.partial_fit(reader_output[1])
            assert not tica_obj._estimated

            tica_obj.eigenvalues
            assert tica_obj._estimated

            for traj in reader_output[2:]:
                tica_obj.partial_fit(traj)

            # reference
            ref = tica(reader, **params)

            np.testing.assert_allclose(tica_obj.cov, ref.cov, atol=1e-15)
            np.testing.assert_allclose(tica_obj.cov_tau,
                                       ref.cov_tau,
                                       atol=1e-15)

            np.testing.assert_allclose(tica_obj.eigenvalues,
                                       ref.eigenvalues,
                                       atol=1e-15)
예제 #27
0
    def setUp(self):
        self.eps = 1e-10
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
        self.pdbfile = os.path.join(path, 'bpti_ca.pdb')
        self.trajfiles = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_034-066.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')
        ]

        # Create random sets of files and frames to be retrieved from trajfiles
        n_members_set1 = 10
        n_members_set2 = 20
        from pyerna.util.contexts import numpy_random_seed
        with numpy_random_seed(34):
            set_1 = np.vstack((np.random.permutation(
                [0, 2] * n_members_set1)[:n_members_set1],
                               np.random.randint(32, size=n_members_set1))).T

            set_2 = np.vstack((np.random.permutation(
                [0, 2] * n_members_set2)[:n_members_set2],
                               np.random.randint(32, size=n_members_set2))).T

        self.sets = [set_1, set_2]

        self.subdir = tempfile.mkdtemp(suffix='save_trajs_test/')
        self.outfile = os.path.join(self.subdir, 'save_traj_test.xtc')

        # Instantiate the reader
        self.reader = coor.source(self.trajfiles, top=self.pdbfile)
        self.reader.chunksize = 30
        self.n_pass_files = [
            self.subdir + 'n_pass.set_%06u.xtc' % ii
            for ii in range(len(self.sets))
        ]
        self.one_pass_files = [
            self.subdir + '1_pass.set_%06u.xtc' % ii
            for ii in range(len(self.sets))
        ]

        self.traj_ref = save_traj_w_md_load_frame(self.reader, self.sets)
        self.strides = [2, 3, 5]
예제 #28
0
파일: tica.py 프로젝트: hackyhacker/PyERNA
    def partial_fit(self, X):
        """ incrementally update the covariances and mean.

        Parameters
        ----------
        X: array, list of arrays, PyEMMA reader
            input data.

        Notes
        -----
        The projection matrix is first being calculated upon its first access.
        """
        from pyerna.coordinates import source
        iterable = source(X, chunksize=self.chunksize)

        indim = iterable.dimension()
        if not self.dim <= indim:
            raise RuntimeError(
                "requested more output dimensions (%i) than dimension"
                " of input data (%i)" % (self.dim, indim))
        if self._covar is None:
            self._covar = LaggedCovariance(c00=True,
                                           c0t=True,
                                           ctt=False,
                                           remove_data_mean=True,
                                           reversible=self.reversible,
                                           lag=self.lag,
                                           bessel=False,
                                           stride=self.stride,
                                           skip=self.skip,
                                           weights=self.weights,
                                           ncov_max=self.ncov_max)
        self._covar.partial_fit(iterable)
        self.model.update_model_params(
            mean=self._covar.mean,  # TODO: inefficient, fixme
            cov=self._covar.C00_,
            cov_tau=self._covar.C0t_)

        self._estimated = False

        return self
예제 #29
0
    def test_with_fragmented_reader(self):
        from pyerna.util.files import TemporaryDirectory
        trajlen = 35
        # trajectory 0 (first trajectory, is trajfiles[2])
        #   -> skipped
        # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]})
        #   fragment 1:
        #       -> frames 0,1,2,10
        #   fragment 2:
        #       -> frames 1 (i.e., 36) and 34 (i.e., 69)
        # trajectory 2 (third trajectory, is trajfiles[2])
        #   -> frame 5
        ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10],
                               [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]],
                              dtype=int)
        with TemporaryDirectory() as td:

            trajfiles = []
            xyzs = []
            for i in range(3):
                tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen)
                trajfiles.append(tf)
                xyzs.append(xyz)

            topfile = get_top()
            frag_traj = [
                trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2]
            ]

            expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array(
                [1, 34])], np.array([(xyzs[2][5, :])])
            expected = np.vstack(expected)

            reader = coor.source(frag_traj, top=topfile)

            for cs in range(1, 10):
                traj = save_traj(reader, ra_indices, None, chunksize=cs)
                np.testing.assert_almost_equal(traj.xyz, expected)
예제 #30
0
    def _test_base_reader_with_random_access_stride(self, file_format, stride, chunksize):
        trajs = self.test_trajs[file_format]
        reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize)

        if chunksize is not None:
            np.testing.assert_equal(reader.chunksize, chunksize)

        it = reader.iterator(stride=stride, lag=0, chunk=chunksize)

        assert it.chunksize is not None
        if chunksize is None:
            max_frames = max_chunksize_from_config(reader.output_type().itemsize)
            assert it.chunksize <= max_frames
            # now we set the chunksize to max_frames, to be able to compare the actual shapes of iterator output.
            chunksize = max_frames
        traj_data = [data[stride[stride[:, 0] == i][:, 1]] for i, data in enumerate(self.traj_data)]

        with it:
            current_itraj = None
            t = t_total = 0
            for itraj, chunk in it:
                # reset t upon next trajectory
                if itraj != current_itraj:
                    current_itraj = itraj
                    t = 0

                assert len(chunk) <= chunksize or chunksize == 0
                if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize:
                    assert len(chunk) == chunksize
                elif chunksize == 0:
                    assert len(chunk) == len(traj_data[itraj])

                np.testing.assert_allclose(chunk, traj_data[itraj][t:t+len(chunk)], atol=self.eps)

                t += len(chunk)
                t_total += len(chunk)
            assert t_total == reader.n_frames_total(stride=stride, skip=0) == it.n_frames_total()