def test_transition_matrix(self):
     print("Transition Matrix:\n")
     print(self.smm0.trans)
     print(mdot(self.smm0.eigenvectors_right(), np.diag(np.power(self.smm0.eigenvalues().real, 1)), self.smm0.eigenvectors_left()))
     print("Transition Matrix Squared:\n")
     print(mdot(self.smm0.trans, self.smm0.trans))
     print(mdot(self.smm0.eigenvectors_right(), np.diag(np.power(self.smm0.eigenvalues().real, 2)), self.smm0.eigenvectors_left()))
    def test_propagate(self):
        # If the nonstationary MM is actually stationary, should get the same result as eval(0).propagate
        print(self.lcnsmm2.propagate(self.p0_1, 0))
        print(self.lcnsmm2.propagate(self.p0_1, 1))
        print(self.lcnsmm2.propagate(self.p0_1, 2))
        print(self.lcnsmm2.propagate(self.p0_1, 3))
        print(self.lcnsmm2.eval(0).propagate(self.p0_1, 0))
        print(self.lcnsmm2.eval(0).propagate(self.p0_1, 1))
        print(self.lcnsmm2.eval(0).propagate(self.p0_1, 2))
        print(self.lcnsmm2.eval(0).propagate(self.p0_1, 3))
        print(self.p0_1)
        print(np.dot(self.p0_1, self.trans2))
        print(np.dot(self.p0_1, np.dot(self.trans2, self.trans2)))
        print(np.dot(self.p0_1, np.dot(self.trans2, np.dot(self.trans2, self.trans2))))
        print(mdot(self.p0_1, self.trans2, self.trans2, self.trans2))
        print(mdot(self.p0_1.T, self.trans2, self.trans2, self.trans2))
        print(self.lcnsmm2.eval(0).propagate(np.array([1.0, 0, 0]), 2))
        print(self.lcnsmm2.eval(0).propagate(np.array([0, 1.0, 0]), 2))
        print(self.lcnsmm2.eval(0).propagate(np.array([0, 0, 1.0]), 2))

        k = int(self.timeendpoint)
        self.assertTrue(
            np.allclose(self.lcnsmm2.propagate(self.p0_1, k), self.lcnsmm2.eval(0).propagate(self.p0_1, k)))

        # If MM0 and MM1 have the same stationary distribution, it should be stationary for the ccnsmm
        self.assertTrue(np.allclose(self.lcnsmm2.propagate(self.p0_0, k), self.p0_0))
Esempio n. 3
0
def vamp_e_score(K, C00_train, C0t_train, Ctt_train, C00_test, C0t_test, Ctt_test, k=None):
    """ Computes the VAMP-E score of a kinetic model.

    Ranks the kinetic model described by the estimation of covariances C00, C0t and Ctt,
    defined by:


        :math:`C_{0t}^{train} = E_t[x_t x_{t+\tau}^T]`
        :math:`C_{tt}^{train} = E_t[x_{t+\tau} x_{t+\tau}^T]`

    These model covariances might have been subject to symmetrization or reweighting,
    depending on the type of model used.

    The covariances C00, C0t and Ctt of the test data are direct empirical estimates.
    singular vectors U and V using the test data
    with covariances C00, C0t, Ctt. U and V should come from the SVD of the symmetrized
    transition matrix or Koopman matrix:

        :math:`(C00^{train})^{-(1/2)} C0t^{train} (Ctt^{train})^{-(1/2)}  = U S V.T`

    Parameters:
    -----------
    K : ndarray(n, k)
        left singular vectors of the symmetrized transition matrix or Koopman matrix
    C00_train : ndarray(n, n)
        covariance matrix of the training data, defined by
        :math:`C_{00}^{train} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_t^T`
    C0t_train : ndarray(n, n)
        time-lagged covariance matrix of the training data, defined by
        :math:`C_{0t}^{train} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_{t+\tau}^T`
    Ctt_train : ndarray(n, n)
        covariance matrix of the training data, defined by
        :math:`C_{tt}^{train} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_{t+\tau} x_{t+\tau}^T`
    C00_test : ndarray(n, n)
        covariance matrix of the test data, defined by
        :math:`C_{00}^{test} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_t^T`
    C0t_test : ndarray(n, n)
        time-lagged covariance matrix of the test data, defined by
        :math:`C_{0t}^{test} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_{t+\tau}^T`
    Ctt_test : ndarray(n, n)
        covariance matrix of the test data, defined by
        :math:`C_{tt}^{test} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_{t+\tau} x_{t+\tau}^T`
    k : int
        number of slow processes to consider in the score

    Returns:
    --------
    vampE : float
        VAMP-E score

    """
    # SVD of symmetrized operator in empirical distribution
    U, s, V = _svd_sym_koopman(K, C00_train, Ctt_train)
    if k is not None:
        U = U[:, :k]
        S = np.diag(s[:k])
        V = V[:, :k]
    score = np.trace(2.0 * mdot(V, S, U.T, C0t_test) - mdot(V, S, U.T, C00_test, U, S, V.T, Ctt_test))
    return score
Esempio n. 4
0
def _svd_sym_koopman(K, C00_train, Ctt_train):
    """ Computes the SVD of the symmetrized Koopman operator in the empirical distribution.
    """
    from pyemma._ext.variational.solvers.direct import spd_inv_sqrt
    # reweight operator to empirical distribution
    C0t_re = mdot(C00_train, K)
    # symmetrized operator and SVD
    K_sym = mdot(spd_inv_sqrt(C00_train), C0t_re, spd_inv_sqrt(Ctt_train))
    U, S, Vt = np.linalg.svd(K_sym, compute_uv=True, full_matrices=False)
    # projects back to singular functions of K
    U = mdot(spd_inv_sqrt(C00_train), U)
    Vt = mdot(Vt,spd_inv_sqrt(Ctt_train))
    return U, S, Vt.T
    def is_scalable_tm(self):
        """
        ToDo Document

        :param transd: ndarray - diagonal array
        :param transu: ndarray - left eigenvector matrix
        :param transv: ndarray (default=None) - inverse matrix of transu
        :return: bool - True if D*U*V is scalable, otherwise False
        """

        # For large scaling factors (tau), the scaling of the transition matrix approaches
        #
        #   I + (1/tau) ln(trans)
        #
        # This will be a  transition matrix for sufficiently large tau if
        #    1. all diagonal elements of ln(trans) are <= 0
        #    2. all off-diagonal elements ln(trans) are >= 0
        #
        # Therefore the matrix is called "scalable" if it satisfies these properties.
        #
        # The diagonal decomposition is used for a fast calculation of the natural log
        lntransd = np.diag(np.log(np.diag(self.transD)))
        delta = mdot(self.transV, lntransd, self.transU)
        # FIXME: This is not optimized, it does twice as many sign checks as necessary
        deltadiag = np.diag(delta)
        deltatril = np.tril(delta, -1)
        deltatriu = np.triu(delta, 1)
        if np.all(deltadiag <= 0) and np.all(deltatril >= 0) and np.all(deltatriu >= 0):
            return True
        else:

            return False
Esempio n. 6
0
    def is_scalable_tm(self, transd, transu, transv=None):
        """
        A transition matrix has all non-negative entries whose rows sum to one.
        The input parameters contain the diagonal decomposition of a transition matrix:
           trans = transv * transd * transu
        
        where transd is diagonal and transv is the inverse of transu.
        The scaling of trans by scaling factor tau is defined as
        the transition matrix S satisfying
                  S = trans^(1/tau)
        For large scaling factors (tau), the scaling of a base transition matrix trans approaches
          
           I + (1/tau) ln( trans)
        Proof: For any matrix M,
            M = exp( ln(M) )
            M^(1/tau) = exp( (1/tau)*ln(M) )
          When |(1/tau)*ln(M)|<< 1,
            M^(1/tau) ~ I + (1/tau)*ln(M)
            
        
        The matrix I + (1/tau)*ln(trans) will have non-negative elements for sufficiently large tau if and only if
          1. all diagonal elements of ln(trans) are <= 0
          2. all off-diagonal elements of ln(trans) are >= 0
        
        Therefore the matrix trans is called "scalable" if it satisfies these properties.
        
        The definition of the natural logarithm for a matrix is given in terms of its Taylor's series:
        
          ln(I + X) = X - (1/2) X^2  + (1/3) X^3 ...
        
        Based on this definition, the diagonal decomposition may be used for a fast calculation of natural logarithm of trans.

           ln(trans) = transv * ln(transd) * transu
        
        The natural logarithm of a diagonal matrix is obtained by taking the
        natural logarithm of each diagonal entry.
        This is also a result of the Taylor's series definition.
        
        :param transd: ndarray - diagonal array
        :param transu: ndarray - left eigenvector matrix
        :param transv: ndarray (default=None) - inverse matrix of transu
        :return: bool - True if trans = transv*transd*transu is scalable, otherwise False
        """

        if transv is None:
            transv = np.linalg.inv(transu)                
        lntransd = np.diag(np.log(np.diag(transd)))
        delta = mdot(transv, lntransd, transu)            # delta = natural logarithm of the transition matrix  
        deltadiag = np.diag(delta)                        # the diagonal of the matrix delta
        deltatril = np.tril(delta, -1)                    # the strict lower triangular part of delta
        deltatriu = np.triu(delta, 1)                     # the strict upper triangular part of delta 
        if np.all(deltadiag <= 0) and np.all(deltatril >= 0) and np.all(deltatriu >= 0):
            return True
        else:
            return False
Esempio n. 7
0
    def propagate(self, p0, k):
        r""" Propagates the initial distribution p0 k times

        Computes the product

        .. math::

            p_k = p_0^T P^k

        If the lag time of transition matrix :math:`P` is :math:`\tau`, this
        will provide the probability distribution at time :math:`k \tau`.

        Parameters
        ----------
        p0 : ndarray(n)
            Initial distribution. Vector of size of the active set.

        k : int
            Number of time steps

        Returns
        ----------
        pk : ndarray(n)
            Distribution after k steps. Vector of size of the active set.

        """
        p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric')
        assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer'
        if k == 0:  # simply return p0 normalized
            return p0 / p0.sum()

        micro = False
        # are we on microstates space?
        if len(p0) == self.nstates_obs:
            micro = True
            # project to hidden and compute
            p0 = _np.dot(self.observation_probabilities, p0)

        self._ensure_eigendecomposition(self.nstates)
        from pyemma.util.linalg import mdot
        pk = mdot(p0.T, self.eigenvectors_right(),
                  _np.diag(_np.power(self.eigenvalues(), k)),
                  self.eigenvectors_left())

        if micro:
            pk = _np.dot(pk, self.observation_probabilities
                         )  # convert back to microstate space

        # normalize to 1.0 and return
        return pk / pk.sum()
Esempio n. 8
0
    def propagate(self, p0, k):
        r""" Propagates the initial distribution p0 k times

        Computes the product

        .. math::

            p_k = p_0^T P^k

        If the lag time of transition matrix :math:`P` is :math:`\tau`, this
        will provide the probability distribution at time :math:`k \tau`.

        Parameters
        ----------
        p0 : ndarray(n,)
            Initial distribution. Vector of size of the active set.

        k : int
            Number of time steps

        Returns
        ----------
        pk : ndarray(n,)
            Distribution after k steps. Vector of size of the active set.

        """
        p0 = _types.ensure_ndarray(p0,
                                   ndim=1,
                                   size=self.nstates,
                                   kind='numeric')
        assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer'

        if k == 0:  # simply return p0 normalized
            return p0 / p0.sum()

        if self.is_sparse:  # sparse: we don't have a full eigenvalue set, so just propagate
            pk = _np.array(p0)
            for i in range(k):
                pk = _np.dot(pk.T, self.transition_matrix)
        else:  # dense: employ eigenvalue decomposition
            self._ensure_eigendecomposition(self.nstates)
            from pyemma.util.linalg import mdot
            pk = mdot(p0.T, self.eigenvectors_right(),
                      _np.diag(_np.power(self.eigenvalues(), k)),
                      self.eigenvectors_left()).real
        # normalize to 1.0 and return
        return pk / pk.sum()
Esempio n. 9
0
    def score(self, test_model=None, score_method='VAMP2'):
        """Compute the VAMP score for this model or the cross-validation score between self and a second model.

        Parameters
        ----------
        test_model : VAMPModel, optional, default=None

            If `test_model` is not None, this method computes the cross-validation score
            between self and `test_model`. It is assumed that self was estimated from
            the "training" data and `test_model` was estimated from the "test" data. The
            score is computed for one realization of self and `test_model`. Estimation
            of the average cross-validation score and partitioning of data into test and
            training part is not performed by this method.

            If `test_model` is None, this method computes the VAMP score for the model
            contained in self.

        score_method : str, optional, default='VAMP2'
            Available scores are based on the variational approach for Markov processes [1]_:

            *  'VAMP1'  Sum of singular values of the half-weighted Koopman matrix [1]_ .
                        If the model is reversible, this is equal to the sum of
                        Koopman matrix eigenvalues, also called Rayleigh quotient [1]_.
            *  'VAMP2'  Sum of squared singular values of the half-weighted Koopman matrix [1]_ .
                        If the model is reversible, this is equal to the kinetic variance [2]_ .
            *  'VAMPE'  Approximation error of the estimated Koopman operator with respect to
                        the true Koopman operator up to an additive constant [1]_ .

        Returns
        -------
        score : float
            If `test_model` is not None, returns the cross-validation VAMP score between
            self and `test_model`. Otherwise return the selected VAMP-score of self.

        References
        ----------
        .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data.
            arXiv:1707.04659v1
        .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation.
            J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553
        """
        # TODO: implement for TICA too
        if test_model is None:
            test_model = self
        Uk = self.U[:, 0:self.dimension()]
        Vk = self.V[:, 0:self.dimension()]
        res = None
        if score_method == 'VAMP1' or score_method == 'VAMP2':
            A = spd_inv_sqrt(Uk.T.dot(test_model.C00).dot(Uk))
            B = Uk.T.dot(test_model.C0t).dot(Vk)
            C = spd_inv_sqrt(Vk.T.dot(test_model.Ctt).dot(Vk))
            ABC = mdot(A, B, C)
            if score_method == 'VAMP1':
                res = np.linalg.norm(ABC, ord='nuc')
            elif score_method == 'VAMP2':
                res = np.linalg.norm(ABC, ord='fro')**2
        elif score_method == 'VAMPE':
            Sk = np.diag(self.singular_values[0:self.dimension()])
            res = np.trace(2.0 * mdot(Vk, Sk, Uk.T, test_model.C0t) - mdot(
                Vk, Sk, Uk.T, test_model.C00, Uk, Sk, Vk.T, test_model.Ctt))
        else:
            raise ValueError('"score" should be one of VAMP1, VAMP2 or VAMPE')
        # add the contribution (+1) of the constant singular functions to the result
        assert res
        return res + 1
Esempio n. 10
0
def vamp_2_score(K, C00_train, C0t_train, Ctt_train, C00_test, C0t_test, Ctt_test, k=None):
    """ Computes the VAMP-2 score of a kinetic model.

    Ranks the kinetic model described by the estimation of covariances C00, C0t and Ctt,
    defined by:


        :math:`C_{0t}^{train} = E_t[x_t x_{t+\tau}^T]`
        :math:`C_{tt}^{train} = E_t[x_{t+\tau} x_{t+\tau}^T]`

    These model covariances might have been subject to symmetrization or reweighting,
    depending on the type of model used.

    The covariances C00, C0t and Ctt of the test data are direct empirical estimates.
    singular vectors U and V using the test data
    with covariances C00, C0t, Ctt. U and V should come from the SVD of the symmetrized
    transition matrix or Koopman matrix:

        :math:`(C00^{train})^{-(1/2)} C0t^{train} (Ctt^{train})^{-(1/2)}  = U S V.T`

    Parameters:
    -----------
    K : ndarray(n, k)
        left singular vectors of the symmetrized transition matrix or Koopman matrix
    C00_train : ndarray(n, n)
        covariance matrix of the training data, defined by
        :math:`C_{00}^{train} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_t^T`
    C0t_train : ndarray(n, n)
        time-lagged covariance matrix of the training data, defined by
        :math:`C_{0t}^{train} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_{t+\tau}^T`
    Ctt_train : ndarray(n, n)
        covariance matrix of the training data, defined by
        :math:`C_{tt}^{train} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_{t+\tau} x_{t+\tau}^T`
    C00_test : ndarray(n, n)
        covariance matrix of the test data, defined by
        :math:`C_{00}^{test} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_t^T`
    C0t_test : ndarray(n, n)
        time-lagged covariance matrix of the test data, defined by
        :math:`C_{0t}^{test} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_t x_{t+\tau}^T`
    Ctt_test : ndarray(n, n)
        covariance matrix of the test data, defined by
        :math:`C_{tt}^{test} = (T-\tau)^{-1} \sum_{t=0}^{T-\tau} x_{t+\tau} x_{t+\tau}^T`
    k : int
        number of slow processes to consider in the score

    Returns:
    --------
    vamp2 : float
        VAMP-2 score

    """
    from pyemma._ext.variational.solvers.direct import spd_inv_sqrt

    # SVD of symmetrized operator in empirical distribution
    U, S, V = _svd_sym_koopman(K, C00_train, Ctt_train)
    if k is not None:
        U = U[:, :k]
        # S = S[:k][:, :k]
        V = V[:, :k]
    A = spd_inv_sqrt(mdot(U.T, C00_test, U))
    B = mdot(U.T, C0t_test, V)
    C = spd_inv_sqrt(mdot(V.T, Ctt_test, V))

    # compute square frobenius, equal to the sum of squares of singular values
    score = np.linalg.norm(mdot(A, B, C), ord='fro') ** 2
    return score