def estimate_transition_matrix_reversible(C,
                                          Xinit=None,
                                          maxiter=1000000,
                                          maxerr=1e-8,
                                          return_statdist=False,
                                          return_conv=False,
                                          warn_not_converged=True):
    """
    iterative method for estimating a maximum likelihood reversible transition matrix

    The iteration equation implemented here is:
        t_ij = (c_ij + c_ji) / ((c_i / x_i) + (c_j / x_j))
    Please note that there is a better (=faster) iteration that has been described in
    Prinz et al, J. Chem. Phys. 134, p. 174105 (2011). We should implement that too.

    Parameters
    ----------
    C : ndarray (n,n)
        count matrix. If a non-connected count matrix is used, the method returns in error
    Xinit = None : ndarray (n,n)
        initial value for the matrix of absolute transition probabilities. Unless set otherwise,
        will use X = diag(pi) T, where T is a nonreversible transition matrix estimated from C,
        i.e. T_ij = c_ij / sum_k c_ik, and pi is its stationary distribution.
    maxerr = 1000000 : int
        maximum number of iterations before the method exits
    maxiter = 1e-8 : float
        convergence tolerance. This specifies the maximum change of the Euclidean norm of relative
        stationary probabilities (x_i = sum_k x_ik). The relative stationary probability changes
        e_i = (x_i^(1) - x_i^(2))/(x_i^(1) + x_i^(2)) are used in order to track changes in small
        probabilities. The Euclidean norm of the change vector, |e_i|_2, is compared to convtol.
    return_statdist : bool, default=False
        If set to true, the stationary distribution is also returned
    return_conv : bool, default=False
        If set to true, the likelihood history and the pi_change history is returned.
    warn_not_converged : bool, default=True
        Prints a warning if not converged.

    Returns
    -------
    T or (T,pi) or (T,lhist,pi_changes) or (T,pi,lhist,pi_changes)
    T : ndarray (n,n)
        transition matrix. This is the only return for return_statdist = False, return_conv = False
    (pi) : ndarray (n)
        stationary distribution. Only returned if return_statdist = True
    (lhist) : ndarray (k)
        likelihood history. Has the length of the number of iterations needed.
        Only returned if return_conv = True
    (pi_changes) : ndarray (k)
        history of likelihood history. Has the length of the number of iterations needed.
        Only returned if return_conv = True
    """
    from msmtools.estimation import is_connected
    from msmtools.estimation import log_likelihood
    # check input
    if (not is_connected(C)):
        ValueError('Count matrix is not fully connected. ' +
                   'Need fully connected count matrix for ' +
                   'reversible transition matrix estimation.')
    converged = False
    n = np.shape(C)[0]
    # initialization
    C2 = C + C.T  # reversibly counted matrix
    nz = np.nonzero(C2)
    csum = np.sum(C, axis=1)  # row sums C
    X = Xinit
    if (X is None):
        X = __initX(C)  # initial X
    xsum = np.sum(X, axis=1)  # row sums x
    D = np.zeros((n, n))  # helper matrix
    T = np.zeros((n, n))  # transition matrix
    # if convergence history requested, initialize variables
    if (return_conv):
        diffs = np.zeros(maxiter)
        # likelihood
        lhist = np.zeros(maxiter)
        T = X / xsum[:, np.newaxis]
        lhist[0] = log_likelihood(C, T)
    # iteration
    i = 1
    while (i < maxiter - 1) and (not converged):
        # c_i / x_i
        c_over_x = csum / xsum
        # d_ij = (c_i/x_i) + (c_j/x_j)
        D[:] = c_over_x[:, np.newaxis]
        D += c_over_x
        # update estimate
        X[nz] = C2[nz] / D[nz]
        X[nz] /= np.sum(X[nz])  # renormalize
        xsumnew = np.sum(X, axis=1)
        # compute difference in pi
        diff = __relative_error(xsum, xsumnew)
        # update pi
        xsum = xsumnew
        # any convergence history wanted?
        if (return_conv):
            # update T and likelihood
            T = X / xsum[:, np.newaxis]
            lhist[i] = log_likelihood(C, T)
            diffs[i] = diff
        # converged?
        converged = (diff < maxerr)
        i += 1
    # finalize and return
    T = X / xsum[:, np.newaxis]
    if warn_not_converged and not converged:
        warnings.warn(
            "Reversible transition matrix estimation didn't converge.",
            msmtools.util.exceptions.NotConvergedWarning)
    if (return_statdist and return_conv):
        return (T, xsum, lhist[0:i], diffs[0:i])
    if (return_statdist):
        return (T, xsum)
    if (return_conv):
        return (T, lhist[0:i], diffs[0:i])
    return T  # else just return T
예제 #2
0
 def _log_likelihood_biased(C, T, E, mhat, ws):
     """ Evaluate AMM likelihood. """
     ll_unbiased = msmest.log_likelihood(C, T)
     ll_bias = -_np.sum(ws * (mhat - E)**2.)
     return ll_unbiased + ll_bias
예제 #3
0
 def test_count_matrix(self):
     """Small test cases"""
     log = log_likelihood(self.C1, self.T1)
     assert_allclose(log, np.log(0.8 * 0.2**3 * 0.9**3 * 0.1))