예제 #1
0
    def test_score_vs_MSM(self):
        from pyemma.util.contexts import numpy_random_seed
        with numpy_random_seed(32):
            trajs_test, trajs_train = cvsplit_dtrajs(self.trajs)
        with numpy_random_seed(32):
            dtrajs_test, dtrajs_train = cvsplit_dtrajs(self.dtrajs)

        methods = ('VAMP1', 'VAMP2', 'VAMPE')

        for m in methods:
            msm_train = estimate_markov_model(dtrajs=dtrajs_train,
                                              lag=self.lag,
                                              reversible=False)
            score_msm = msm_train.score(dtrajs_test,
                                        score_method=m,
                                        score_k=None)

            vamp_train = pyemma_api_vamp(data=trajs_train,
                                         lag=self.lag,
                                         dim=1.0)
            score_vamp = vamp_train.score(test_data=trajs_test, score_method=m)

            self.assertAlmostEqual(score_msm,
                                   score_vamp,
                                   places=2 if m == 'VAMPE' else 3,
                                   msg=m)
예제 #2
0
 def test_blocksplit_dtrajs_cvsplit(self):
     dtrajs = [
         np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
         np.array([0, 1, 9, 10])
     ]
     for lag in range(1, 5):
         dtrajs_new = blocksplit_dtrajs(dtrajs,
                                        lag=lag,
                                        sliding=False,
                                        shift=0)
         dtrajs_train, dtrajs_test = cvsplit_dtrajs(dtrajs_new)
         dtrajs_train = ensure_dtraj_list(dtrajs_train)
         dtrajs_test = ensure_dtraj_list(dtrajs_test)
         assert len(dtrajs_train) > 0
         assert len(dtrajs_test) > 0
예제 #3
0
    def score_cv(self, dtrajs, n=10, score_method=None, score_k=None):
        """ Scores the MSM using the variational approach for Markov processes [1]_ [2]_ and crossvalidation [3]_ .

        Divides the data into training and test data, fits a MSM using the training
        data using the parameters of this estimator, and scores is using the test
        data.
        Currently only one way of splitting is implemented, where for each n,
        the data is randomly divided into two approximately equally large sets of
        discrete trajectory fragments with lengths of at least the lagtime.

        Currently only implemented using dense matrices - will be slow for large state spaces.

        Parameters
        ----------
        dtrajs : list of arrays
            Test data (discrete trajectories).
        n : number of samples
            Number of repetitions of the cross-validation. Use large n to get solid
            means of the score.
        score_method : str, optional, default='VAMP2'
            Overwrite scoring method to be used if desired. If `None`, the estimators scoring
            method will be used.
            Available scores are based on the variational approach for Markov processes [1]_ [2]_ :

            *  'VAMP1'  Sum of singular values of the symmetrized transition matrix [2]_ .
                        If the MSM is reversible, this is equal to the sum of transition
                        matrix eigenvalues, also called Rayleigh quotient [1]_ [3]_ .
            *  'VAMP2'  Sum of squared singular values of the symmetrized transition matrix [2]_ .
                        If the MSM is reversible, this is equal to the kinetic variance [4]_ .

        score_k : int or None
            The maximum number of eigenvalues or singular values used in the
            score. If set to None, all available eigenvalues will be used.

        References
        ----------
        .. [1] Noe, F. and F. Nueske: A variational approach to modeling slow processes
            in stochastic dynamical systems. SIAM Multiscale Model. Simul. 11, 635-655 (2013).
        .. [2] Wu, H and F. Noe: Variational approach for learning Markov processes
            from time series data (in preparation).
        .. [3] McGibbon, R and V. S. Pande: Variational cross-validation of slow
            dynamical modes in molecular kinetics, J. Chem. Phys. 142, 124105 (2015).
        .. [4] Noe, F. and C. Clementi: Kinetic distance and kinetic maps from molecular
            dynamics simulation. J. Chem. Theory Comput. 11, 5002-5011 (2015).

        """
        dtrajs = ensure_dtraj_list(dtrajs)  # ensure format

        from pyemma.msm.estimators._dtraj_stats import cvsplit_dtrajs
        if self.count_mode not in ('sliding', 'sample'):
            raise ValueError(
                'score_cv currently only supports count modes "sliding" and "sample"'
            )
        sliding = self.count_mode == 'sliding'
        scores = []
        from pyemma._ext.sklearn.base import clone
        estimator = clone(self)
        for i in range(n):
            dtrajs_split = self._blocksplit_dtrajs(dtrajs, sliding)
            dtrajs_train, dtrajs_test = cvsplit_dtrajs(dtrajs_split)
            estimator.fit(dtrajs_train)
            s = estimator.score(dtrajs_test,
                                score_method=score_method,
                                score_k=score_k)
            scores.append(s)
        return _np.array(scores)