Esempio n. 1
0
 def test_connected_sets(self):
     """Directed"""
     cc = connected_sets(self.C)
     for i in range(len(cc)):
         self.assertTrue(np.all(self.cc_directed[i] == np.sort(cc[i])))
     """Undirected"""
     cc = connected_sets(self.C, directed=False)
     for i in range(len(cc)):
         self.assertTrue(np.all(self.cc_undirected[i] == np.sort(cc[i])))
Esempio n. 2
0
    def _compute_connected_sets(C, mincount_connectivity, strong=True):
        """ Computes the connected sets of C.

        C : count matrix
        mincount_connectivity : float
            Minimum count which counts as a connection.
        strong : boolean
            True: Seek strongly connected sets. False: Seek weakly connected sets.
        Returns
        -------
        Cconn, S
        """
        import msmtools.estimation as msmest
        import scipy.sparse as scs
        if scs.issparse(C):
            Cconn = C.tocsr(copy=True)
            Cconn.data[Cconn.data < mincount_connectivity] = 0
            Cconn.eliminate_zeros()
        else:
            Cconn = C.copy()
            Cconn[np.where(Cconn < mincount_connectivity)] = 0

        # treat each connected set separately
        S = msmest.connected_sets(Cconn, directed=strong)
        return S
Esempio n. 3
0
def compute_connected_sets(C, connectivity_threshold, directed=True):
    """ Computes the connected sets of a count matrix C.

    C : (N, N) np.ndarray
        count matrix
    mincount_connectivity : float
        Minimum count required to be included in the connected set computation.
    directed : boolean
        True: Seek connected sets in the directed graph. False: Seek connected sets in the undirected graph.
    Returns
    -------
    A list of arrays, each array representing a connected set by enumerating the respective states. The list is in
    descending order by size of connected set.
    """
    import msmtools.estimation as msmest
    import scipy.sparse as scs
    if connectivity_threshold > 0:
        if scs.issparse(C):
            Cconn = C.tocsr(copy=True)
            Cconn.data[Cconn.data < connectivity_threshold] = 0
            Cconn.eliminate_zeros()
        else:
            Cconn = C.copy()
            Cconn[np.where(Cconn < connectivity_threshold)] = 0
    else:
        Cconn = C
    # treat each connected set separately
    S = msmest.connected_sets(Cconn, directed=directed)
    return S
Esempio n. 4
0
    def count_lagged(self, lag, count_mode='sliding'):
        r""" Counts transitions at given lag time

        Parameters
        ----------
        lag : int
            lagtime in trajectory steps

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be one of:

            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.

            * 'sample' : A trajectory of length T will have :math:`T / \tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T)


        """
        # store lag time
        self._lag = lag

        # Compute count matrix
        count_mode = count_mode.lower()
        if count_mode == 'sliding':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True)
        elif count_mode == 'sample':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False)
        elif count_mode == 'effective':
            self._C = msmest.effective_count_matrix(self._dtrajs, lag)
        else:
            raise ValueError('Count mode ' + count_mode + ' is unknown.')

        # Compute reversibly connected sets
        self._connected_sets = msmest.connected_sets(self._C)

        # set sizes and count matrices on reversibly connected sets
        self._connected_set_sizes = np.zeros((len(self._connected_sets)))
        self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object)
        for i in range(len(self._connected_sets)):
            # set size
            self._connected_set_sizes[i] = len(self._connected_sets[i])
            # submatrix
            self._C_sub[i] = submatrix(self._C, self._connected_sets[i])

        # largest connected set
        lcs = self._connected_sets[0]

        # mapping from full to lcs
        self._full2lcs = -1 * np.ones((self._nstates), dtype=int)
        self._full2lcs[lcs] = np.array(list(range(len(lcs))), dtype=int)

        # remember that this function was called
        self._counted_at_lag = True
Esempio n. 5
0
def rdl_decomposition(P, reversible=True):
    # TODO: this treatment is probably not meaningful for weakly connected matrices.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    # output matrices
    n = np.shape(P)[0]
    if reversible:
        dtype = np.float64
        norm = 'reversible'
    else:
        dtype = complex
        norm = 'standard'
    R = np.zeros((n, n), dtype=dtype)
    D = np.zeros((n, n), dtype=dtype)
    L = np.zeros((n, n), dtype=dtype)
    # treat each strongly connected set separately
    S = msmest.connected_sets(P)
    for s in S:
        indices = np.ix_(s, s)
        if len(s) > 1:
            right_eigvec, eigval_diag, left_eigvec = msmana.rdl_decomposition(
                P[s, :][:, s], norm=norm)
            # write to full
            R[indices] = right_eigvec
            D[indices] = eigval_diag
            L[indices] = left_eigvec
        else:  # just one element. Write 1's
            R[indices] = 1
            D[indices] = 1
            L[indices] = 1
    # done
    return R, D, L
def transform_transition_matrix_connected(transition_matrix):

    connected_nodes = connected_sets(transition_matrix)[0]
    connected_matrix = np.take(transition_matrix, connected_nodes, axis=0)
    connected_matrix = np.take(connected_matrix, connected_nodes, axis=1)

    #removed_nodes = [element[0] for element in connected_sets(transition_matrix)[1:]]
    removed_nodes = [
        element for element in range(0, transition_matrix.shape[0])
        if element not in connected_nodes
    ]
    removed_nodes.sort()
    removed_nodes.reverse()
    return connected_matrix, removed_nodes
Esempio n. 7
0
def connected_sets(C, mincount_connectivity=0, strong=True):
    """ Computes the connected sets of C.

    C : count matrix
    mincount_connectivity : float
        Minimum count which counts as a connection.
    strong : boolean
        True: Seek strongly connected sets. False: Seek weakly connected sets.

    """
    import msmtools.estimation as msmest
    Cconn = C.copy()
    Cconn[np.where(C < mincount_connectivity)] = 0
    # treat each connected set separately
    S = msmest.connected_sets(Cconn, directed=strong)
    return S
Esempio n. 8
0
def connected_sets(C, mincount_connectivity=0, strong=True):
    """ Computes the connected sets of C.

    C : count matrix
    mincount_connectivity : float
        Minimum count which counts as a connection.
    strong : boolean
        True: Seek strongly connected sets. False: Seek weakly connected sets.

    """
    import msmtools.estimation as msmest
    Cconn = C.copy()
    Cconn[np.where(C < mincount_connectivity)] = 0
    # treat each connected set separately
    S = msmest.connected_sets(Cconn, directed=strong)
    return S
Esempio n. 9
0
def transform_transition_matrix_connected(transition_matrix):
    #TODO: I would prefer to use msmtools because the functions there are unit-tested...
    raise NotImplementedError('consider using msmtools or remove this line.')

    connected_nodes = connected_sets(transition_matrix)[0]
    connected_matrix = np.take(transition_matrix, connected_nodes, axis=0)
    connected_matrix = np.take(connected_matrix, connected_nodes, axis=1)

    #removed_nodes = [element[0] for element in connected_sets(transition_matrix)[1:]]
    removed_nodes = [
        element for element in range(0, transition_matrix.shape[0])
        if element not in connected_nodes
    ]
    removed_nodes.sort()
    removed_nodes.reverse()
    return connected_matrix, removed_nodes
Esempio n. 10
0
def stationary_distribution(C, P):
    # import emma
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    # disconnected sets
    n = np.shape(C)[0]
    ctot = np.sum(C)
    pi = np.zeros((n))
    # treat each connected set separately
    S = msmest.connected_sets(C)
    for s in S:
        # compute weight
        w = np.sum(C[s,:]) / ctot
        pi[s] = w * msmana.statdist(P[s,:][:,s])
    # reinforce normalization
    pi /= np.sum(pi)
    return pi
Esempio n. 11
0
def estimate_P(C, reversible = True, fixed_statdist=None):
    # import emma
    import msmtools.estimation as msmest
    # output matrix. Initially eye
    n = np.shape(C)[0]
    P = np.eye((n), dtype=np.float64)
    # treat each connected set separately
    S = msmest.connected_sets(C)
    for s in S:
        if len(s) > 1: # if there's only one state, there's nothing to estimate and we leave it with diagonal 1
            # compute transition sub-matrix on s
            Cs = C[s,:][:,s]
            Ps = msmest.transition_matrix(Cs, reversible = reversible, mu=fixed_statdist)
            # write back to matrix
            for i,I in enumerate(s):
                for j,J in enumerate(s):
                    P[I,J] = Ps[i,j]
            P[s,:][:,s] = Ps
    # done
    return P
Esempio n. 12
0
def rdl_decomposition(P, reversible=True):
    # TODO: this treatment is probably not meaningful for weakly connected matrices.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    # output matrices
    n = np.shape(P)[0]
    if reversible:
        dtype = np.float64
    else:
        dtype = complex
    R = np.zeros((n, n), dtype=dtype)
    D = np.zeros((n, n), dtype=dtype)
    L = np.zeros((n, n), dtype=dtype)
    # treat each strongly connected set separately
    S = msmest.connected_sets(P)
    for s in S:
        I = np.ix_(s, s)
        if len(s) > 1:
            if reversible:
                r, d, l = msmana.rdl_decomposition(P[s, :][:, s],
                                                   norm='reversible')
                # everything must be real-valued - this should rather be handled by msmtools
                R[I] = r.real
                D[I] = d.real
                L[I] = l.real
            else:
                r, d, l = msmana.rdl_decomposition(P[s, :][:, s],
                                                   norm='standard')
                # write to full
                R[I] = r
                D[I] = d
                L[I] = l
        else:  # just one element. Write 1's
            R[I] = 1
            D[I] = 1
            L[I] = 1
    # done
    return R, D, L
Esempio n. 13
0
def rdl_decomposition(P, reversible=True):
    # TODO: this treatment is probably not meaningful for weakly connected matrices.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    # output matrices
    n = np.shape(P)[0]
    if reversible:
        dtype = np.float64
    else:
        dtype = complex
    R = np.zeros((n, n), dtype=dtype)
    D = np.zeros((n, n), dtype=dtype)
    L = np.zeros((n, n), dtype=dtype)
    # treat each strongly connected set separately
    S = msmest.connected_sets(P)
    for s in S:
        I = np.ix_(s, s)
        if len(s) > 1:
            if reversible:
                r, d, l = msmana.rdl_decomposition(P[s, :][:, s], norm='reversible')
                # everything must be real-valued - this should rather be handled by msmtools
                R[I] = r.real
                D[I] = d.real
                L[I] = l.real
            else:
                r, d, l = msmana.rdl_decomposition(P[s, :][:, s], norm='standard')
                # write to full
                R[I] = r
                D[I] = d
                L[I] = l
        else:  # just one element. Write 1's
            R[I] = 1
            D[I] = 1
            L[I] = 1
    # done
    return R, D, L
Esempio n. 14
0
    def _estimate(self, dtrajs):
        """ Estimate MSM """

        if self.core_set is not None:
            raise NotImplementedError(
                'Core set MSMs currently not compatible with {}.'.format(
                    self.__class__.__name__))

        # remove last lag steps from dtrajs:
        dtrajs_lag = [traj[:-self.lag] for traj in dtrajs]

        # get trajectory counts. This sets _C_full and _nstates_full
        dtrajstats = self._get_dtraj_stats(dtrajs_lag)
        self._C_full = dtrajstats.count_matrix()  # full count matrix
        self._nstates_full = self._C_full.shape[0]  # number of states

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            self.active_set = dtrajstats.largest_connected_set
        else:
            raise NotImplementedError(
                'OOM based MSM estimation is only implemented for connectivity=\'largest\'.'
            )

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate MSM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)
        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # Estimate transition matrix
        if self.connectivity == 'largest':
            # Re-sampling:
            if self.rank_Ct == 'bootstrap_counts':
                Ceff_full = msmest.effective_count_matrix(dtrajs_lag, self.lag)
                from pyerna.util.linalg import submatrix
                Ceff = submatrix(Ceff_full, self.active_set)
                smean, sdev = bootstrapping_count_matrix(Ceff, nbs=self.nbs)
            else:
                smean, sdev = bootstrapping_dtrajs(dtrajs_lag,
                                                   self.lag,
                                                   self._nstates_full,
                                                   nbs=self.nbs,
                                                   active_set=self._active_set)
            # Estimate two step count matrices:
            C2t = twostep_count_matrix(dtrajs, self.lag, self._nstates_full)
            # Rank decision:
            rank_ind = rank_decision(smean, sdev, tol=self.tol_rank)
            # Estimate OOM components:
            Xi, omega, sigma, l = oom_components(self._C_full.toarray(),
                                                 C2t,
                                                 rank_ind=rank_ind,
                                                 lcc=self.active_set)
            # Compute transition matrix:
            P, lcc_new = equilibrium_transition_matrix(
                Xi, omega, sigma, reversible=self.reversible)
        else:
            raise NotImplementedError(
                'OOM based MSM estimation is only implemented for connectivity=\'largest\'.'
            )

        # Update active set and derived quantities:
        if lcc_new.size < self._nstates:
            self._active_set = self._active_set[lcc_new]
            self._C_active = dtrajstats.count_matrix(subset=self.active_set)
            self._nstates = self._C_active.shape[0]
            self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int)
            self._full2active[self.active_set] = _np.arange(
                len(self.active_set))
            warnings.warn(
                "Caution: Re-estimation of count matrix resulted in reduction of the active set."
            )

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._dtrajs_full = dtrajs
        self._connected_sets = msmest.connected_sets(self._C_full)
        self._Xi = Xi
        self._omega = omega
        self._sigma = sigma
        self._eigenvalues_OOM = l
        self._rank_ind = rank_ind
        self._oom_rank = self._sigma.size
        self._C2t = C2t
        self.set_model_params(P=P,
                              pi=None,
                              reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
Esempio n. 15
0
# so we want to put those into one single state and construct a
# matrix with only 2 states, A and B, as follows:

# old state -> new state
# 0 -> A
# 1 -> A
# 2 -> B

# the membership matrix chi defines this map
# each row gives us a binary encoding for a given
# state 0, 1, 2 to a state A, B

# state  goes to A, B
chi = np.array([
    [1, 0],  # for state 0
    [1, 0],  # for state 1
    [0, 1]
])  # for state 2

# this could be a 'fuzzy' state assignment as well.

# the resulting 'coarse' transition matrix is this.
# compare the probabilities to the above one.

print(cg_transition_matrix(Tmat_full, chi))

C = np.array([[10, 1, 0], [2, 0, 3], [0, 0, 4]])
cc_directed = connected_sets(C)

pdb.set_trace()
Esempio n. 16
0
def pcca(P, m):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_

    Clusters the first m eigenvectors of a transition matrix in order to cluster the states.
    This function does not assume that the transition matrix is fully connected. Disconnected sets
    will automatically define the first metastable states, with perfect membership assignments.

    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.

    m : int
        Number of clusters to group to.

    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True

    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
    [2] F. Noe, multiset PCCA and HMMs, in preparation.

    """
    # imports
    from msmtools.estimation import connected_sets
    from msmtools.analysis import eigenvalues, is_transition_matrix, hitting_probability

    # validate input
    n = np.shape(P)[0]
    if (m > n):
        raise ValueError(
            "Number of metastable states m = " + str(m) +
            " exceeds number of states of transition matrix n = " + str(n))
    if not is_transition_matrix(P):
        raise ValueError("Input matrix is not a transition matrix.")

    # prepare output
    chi = np.zeros((n, m))

    # test connectivity
    components = connected_sets(P)
    # print "all labels ",labels
    n_components = len(
        components
    )  # (n_components, labels) = connected_components(P, connection='strong')
    # print 'n_components'

    # store components as closed (with positive equilibrium distribution)
    # or as transition states (with vanishing equilibrium distribution)
    closed_components = []
    transition_states = []
    for i in range(n_components):
        component = components[i]  # np.argwhere(labels==i).flatten()
        rest = list(set(range(n)) - set(component))
        # is component closed?
        if (np.sum(P[component, :][:, rest]) == 0):
            closed_components.append(component)
        else:
            transition_states.append(component)
    n_closed_components = len(closed_components)
    closed_states = np.concatenate(closed_components)
    if len(transition_states) == 0:
        transition_states = np.array([], dtype=int)
    else:
        transition_states = np.concatenate(transition_states)

    # check if we have enough clusters to support the disconnected sets
    if (m < len(closed_components)):
        raise ValueError("Number of metastable states m = " + str(m) +
                         " is too small. Transition matrix has " +
                         str(len(closed_components)) +
                         " disconnected components")

    # We collect eigenvalues in order to decide which
    closed_components_Psub = []
    closed_components_ev = []
    closed_components_enum = []
    for i in range(n_closed_components):
        component = closed_components[i]
        # print "component ",i," ",component
        # compute eigenvalues in submatrix
        Psub = P[component, :][:, component]
        closed_components_Psub.append(Psub)
        closed_components_ev.append(eigenvalues(Psub))
        closed_components_enum.append(i * np.ones((component.size), dtype=int))

    # flatten
    closed_components_ev_flat = np.array(closed_components_ev).flatten()
    closed_components_enum_flat = np.array(closed_components_enum).flatten()
    # which components should be clustered?
    component_indexes = closed_components_enum_flat[np.argsort(
        closed_components_ev_flat)][0:m]
    # cluster each component
    ipcca = 0
    for i in range(n_closed_components):
        component = closed_components[i]
        # how many PCCA states in this component?
        m_by_component = np.shape(np.argwhere(component_indexes == i))[0]

        # if 1, then the result is trivial
        if (m_by_component == 1):
            chi[component, ipcca] = 1.0
            ipcca += 1
        elif (m_by_component > 1):
            #print "submatrix: ",closed_components_Psub[i]
            chi[component, ipcca:ipcca + m_by_component] = _pcca_connected(
                closed_components_Psub[i], m_by_component)
            ipcca += m_by_component
        else:
            raise RuntimeError("Component " + str(i) + " spuriously has " +
                               str(m_by_component) + " pcca sets")

    # finally assign all transition states
    # print "chi\n", chi
    # print "transition states: ",transition_states
    # print "closed states: ", closed_states
    if (transition_states.size > 0):
        # make all closed states absorbing, so we can see which closed state we hit first
        Pabs = P.copy()
        Pabs[closed_states, :] = 0.0
        Pabs[closed_states, closed_states] = 1.0
        for i in range(closed_states.size):
            # hitting probability to each closed state
            h = hitting_probability(Pabs, closed_states[i])
            for j in range(transition_states.size):
                # transition states belong to closed states with the hitting probability, and inherit their chi
                chi[transition_states[j]] += h[transition_states[j]] * chi[
                    closed_states[i]]

    # check if we have m metastable sets. If less than m, we must raise
    nmeta = np.count_nonzero(chi.sum(axis=0))
    assert m <= nmeta, str(m) + " metastable states requested, but transition matrix only has " + str(nmeta) \
                       + ". Consider using a prior or request less metastable states. "

    # print "chi\n", chi
    return chi
Esempio n. 17
0
def _pcca_connected(P, n, return_rot=False):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_

    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function assumes that the transition matrix is fully connected.

    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.

    n : int
        Number of clusters to group to.

    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True

    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.

    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).

    """

    # test connectivity
    from msmtools.estimation import connected_sets

    labels = connected_sets(P)
    n_components = len(
        labels
    )  # (n_components, labels) = connected_components(P, connection='strong')
    if (n_components > 1):
        raise ValueError(
            "Transition matrix is disconnected. Cannot use pcca_connected.")

    from msmtools.analysis import stationary_distribution

    pi = stationary_distribution(P)
    # print "statdist = ",pi

    from msmtools.analysis import is_reversible

    if not is_reversible(P, mu=pi):
        raise ValueError(
            "Transition matrix does not fulfill detailed balance. "
            "Make sure to call pcca with a reversible transition matrix estimate"
        )
    # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate
    #      pair by the real and imaginary components of one of the two vectors. We could use this but would then need to
    #      orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical
    #      foundation for this, so I'll skip it for now.

    # right eigenvectors, ordered
    from msmtools.analysis import eigenvectors

    evecs = eigenvectors(P, n)

    # orthonormalize
    for i in range(n):
        evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i]))
    # make first eigenvector positive
    evecs[:, 0] = np.abs(evecs[:, 0])

    # Is there a significant complex component?
    if not np.alltrue(np.isreal(evecs)):
        warnings.warn(
            "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance "
            +
            "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid."
        )
    evecs = np.real(evecs)

    # create initial solution using PCCA+. This could have negative memberships
    (chi, rot_matrix) = _pcca_connected_isa(evecs, n)

    #print "initial chi = \n",chi

    # optimize the rotation matrix with PCCA++.
    rot_matrix = _opt_soft(evecs, rot_matrix, n)

    # These memberships should be nonnegative
    memberships = np.dot(evecs[:, :], rot_matrix)

    # We might still have numerical errors. Force memberships to be in [0,1]
    # print "memberships unnormalized: ",memberships
    memberships = np.maximum(0.0, memberships)
    memberships = np.minimum(1.0, memberships)
    # print "memberships unnormalized: ",memberships
    for i in range(0, np.shape(memberships)[0]):
        memberships[i] /= np.sum(memberships[i])

    # print "final chi = \n",chi

    return memberships
Esempio n. 18
0
    def _estimate(self, dtrajs):
        """
            Parameters
            ----------
            dtrajs : list containing ndarrays(dtype=int) or ndarray(n, dtype=int) or :class:`pyemma.msm.util.dtraj_states.DiscreteTrajectoryStats`
                discrete trajectories, stored as integer ndarrays (arbitrary size)
                or a single ndarray for only one trajectory.
            **params :
                Other keyword parameters if different from the settings when this estimator was constructed

            Returns
            -------
            MSM : :class:`pyemma.msm.EstimatedMSM` or :class:`pyemma.msm.MSM`

        """
        # ensure right format
        dtrajs = ensure_dtraj_list(dtrajs)
        # harvest discrete statistics
        if isinstance(dtrajs, _DiscreteTrajectoryStats):
            dtrajstats = dtrajs
        else:
            # compute and store discrete trajectory statistics
            dtrajstats = _DiscreteTrajectoryStats(dtrajs)
            # check if this MSM seems too large to be dense
            if dtrajstats.nstates > 4000 and not self.sparse:
                self.logger.warn(
                    'Building a dense MSM with ' + str(dtrajstats.nstates) +
                    ' states. This can be '
                    'inefficient or unfeasible in terms of both runtime and memory consumption. '
                    'Consider using sparse=True.')

        # count lagged
        dtrajstats.count_lagged(self.lag, count_mode=self.count_mode)

        # full count matrix and number of states
        self._C_full = dtrajstats.count_matrix()
        self._nstates_full = self._C_full.shape[0]

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            if self.statdist_constraint is None:
                # statdist not given - full connectivity on all states
                self.active_set = dtrajstats.largest_connected_set
            else:
                # statdist given - simple connectivity on all nonzero probability states
                nz = _np.nonzero(self.statdist_constraint)[0]
                Cnz = dtrajstats.count_matrix(subset=nz)
                self.active_set = nz[msmest.largest_connected_set(
                    Cnz, directed=False)]
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)
        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones((dtrajstats.nstates), dtype=int)
        self._full2active[self.active_set] = _np.array(list(
            range(len(self.active_set))),
                                                       dtype=int)

        # restrict stationary distribution to active set
        if self.statdist_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.statdist_constraint[self.active_set]
            statdist_active /= statdist_active.sum()  # renormalize

        # Estimate transition matrix
        if self.connectivity == 'largest':
            P = msmest.transition_matrix(self._C_active,
                                         reversible=self.reversible,
                                         mu=statdist_active,
                                         maxiter=self.maxiter,
                                         maxerr=self.maxerr)
        elif self.connectivity == 'none':
            # reversible mode only possible if active set is connected
            # - in this case all visited states are connected and thus
            # this mode is identical to 'largest'
            if self.reversible and not msmest.is_connected(self._C_active):
                raise ValueError(
                    'Reversible MSM estimation is not possible with connectivity mode \'none\', '
                    +
                    'because the set of all visited states is not reversibly connected'
                )
            P = msmest.transition_matrix(self._C_active,
                                         reversible=self.reversible,
                                         mu=statdist_active,
                                         maxiter=self.maxiter,
                                         maxerr=self.maxerr)
        else:
            raise NotImplementedError(
                'MSM estimation with connectivity=\'self.connectivity\' is currently not implemented.'
            )

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()
            P = P.toarray()

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._dtrajs_full = dtrajs
        self._connected_sets = msmest.connected_sets(self._C_full)
        self.set_model_params(P=P,
                              pi=statdist_active,
                              reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
Esempio n. 19
0
    def count_lagged(self,
                     lag,
                     count_mode='sliding',
                     mincount_connectivity='1/n',
                     show_progress=True):
        r""" Counts transitions at given lag time

        Parameters
        ----------
        lag : int
            lagtime in trajectory steps

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be one of:

            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.

            * 'sample' : A trajectory of length T will have :math:`T / \tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T)

        show_progress: bool, default=True
            show the progress for the expensive effective count mode computation.

        """
        # store lag time
        self._lag = lag

        # Compute count matrix
        count_mode = count_mode.lower()
        if count_mode == 'sliding':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True)
        elif count_mode == 'sample':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False)
        elif count_mode == 'effective':
            from pyemma.util.reflection import getargspec_no_self
            argspec = getargspec_no_self(msmest.effective_count_matrix)
            kw = {}
            if show_progress and 'callback' in argspec.args:
                from pyemma._base.progress import ProgressReporter
                from pyemma._base.parallel import get_n_jobs

                pg = ProgressReporter()
                # this is a fast operation
                C_temp = msmest.count_matrix(self._dtrajs, lag, sliding=True)
                pg.register(C_temp.nnz, 'compute statistical inefficiencies')
                del C_temp
                callback = lambda: pg.update(1)
                kw['callback'] = callback
                kw['n_jobs'] = get_n_jobs()

            self._C = msmest.effective_count_matrix(self._dtrajs, lag, **kw)
        else:
            raise ValueError('Count mode ' + count_mode + ' is unknown.')

        # store mincount_connectivity
        if mincount_connectivity == '1/n':
            mincount_connectivity = 1.0 / np.shape(self._C)[0]
        self._mincount_connectivity = mincount_connectivity

        # Compute reversibly connected sets
        if self._mincount_connectivity > 0:
            self._connected_sets = \
                self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity)
        else:
            self._connected_sets = msmest.connected_sets(self._C)

        # set sizes and count matrices on reversibly connected sets
        self._connected_set_sizes = np.zeros((len(self._connected_sets)))
        self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object)
        for i in range(len(self._connected_sets)):
            # set size
            self._connected_set_sizes[i] = len(self._connected_sets[i])
            # submatrix
            # self._C_sub[i] = submatrix(self._C, self._connected_sets[i])

        # largest connected set
        self._lcs = self._connected_sets[0]

        # if lcs has no counts, make lcs empty
        if submatrix(self._C, self._lcs).sum() == 0:
            self._lcs = np.array([], dtype=int)

        # mapping from full to lcs
        self._full2lcs = -1 * np.ones((self._nstates), dtype=int)
        self._full2lcs[self._lcs] = np.arange(len(self._lcs))

        # remember that this function was called
        self._counted_at_lag = True
Esempio n. 20
0
 def weakly_connected_sets(self):
     return msmest.connected_sets(self._Tij, directed=False)
Esempio n. 21
0
    def _estimate(self, dtrajs):
        if self.E is None or self.w is None or self.m is None:
            raise ValueError("E, w or m was not specified. Stopping.")

        # get trajectory counts. This sets _C_full and _nstates_full
        dtrajstats = self._get_dtraj_stats(dtrajs)
        self._C_full = dtrajstats.count_matrix()  # full count matrix
        self._nstates_full = self._C_full.shape[0]  # number of states

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            # statdist not given - full connectivity on all states
            self.active_set = dtrajstats.largest_connected_set
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate AMM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)
        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # slice out active states from E matrix

        _dset = list(set(_np.concatenate(self._dtrajs_full)))
        _rras = [_dset.index(s) for s in self.active_set]
        self.E_active = self.E[_rras]

        if not self.sparse:
            self._C_active = self._C_active.toarray()
            self._C_full = self._C_full.toarray()

        # reversibly counted
        self._C2 = 0.5 * (self._C_active + self._C_active.T)
        self._nz = _np.nonzero(self._C2)
        self._csum = _np.sum(self._C_active, axis=1)  # row sums C

        # get ranges of Markov model expectation values
        if self.support_ci == 1:
            self.E_min = _np.min(self.E_active, axis=0)
            self.E_max = _np.max(self.E_active, axis=0)
        else:
            # PyEMMA confidence interval calculation fails sometimes with conf=1.0
            self.E_min, self.E_max = _ci(self.E_active, conf=self.support_ci)

        # dimensions of E matrix
        self.n_mstates_active, self.n_exp_active = _np.shape(self.E_active)

        assert self.n_exp_active == len(self.w)
        assert self.n_exp_active == len(self.m)

        self.count_outside = []
        self.count_inside = []
        self._lls = []

        i = 0
        # Determine which experimental values are outside the support as defined by the Confidence interval
        for emi, ema, mm, mw in zip(self.E_min, self.E_max, self.m, self.w):
            if mm < emi or ema < mm:
                self.logger.info(
                    "Experimental value %f is outside the support (%f,%f)" %
                    (mm, emi, ema))
                self.count_outside.append(i)
            else:
                self.count_inside.append(i)
            i = i + 1

        self.logger.info(
            "Total experimental constraints outside support %d of %d" %
            (len(self.count_outside), len(self.E_min)))

        # A number of initializations
        self.P, self.pi = msmest.tmatrix(self._C_active,
                                         reversible=True,
                                         return_statdist=True)
        self.lagrange = _np.zeros(self.m.shape)
        self._pihat = self.pi.copy()
        self._update_mhat()
        self._dmhat = 1e-1 * _np.ones(_np.shape(self.mhat))

        # Determine number of slices of R-tensors computable at once with the given cache size
        self._slicesz = _np.floor(self.max_cache /
                                  (self.P.nbytes / 1.e6)).astype(int)
        # compute first bundle of slices
        self._update_Rslices(0)

        self._ll_old = self._log_likelihood_biased(self._C_active, self.P,
                                                   self.m, self.mhat, self.w)

        self._lls = [self._ll_old]

        # make sure everything is initialized

        self._update_pihat()
        self._update_mhat()

        self._update_Q()
        self._update_X_and_pi()

        self._ll_old = self._log_likelihood_biased(self._C_active, self.P,
                                                   self.m, self.mhat, self.w)
        self._update_G()

        #
        # Main estimation algorithm
        # 2-step algorithm, lagrange multipliers and pihat have different convergence criteria
        # when the lagrange multipliers have converged, pihat is updated until the log-likelihood has converged (changes are smaller than 1e-3).
        # These do not always converge together, but usually within a few steps of each other.
        # A better heuristic for the latter may be necessary. For realistic cases (the two ubiquitin examples in [1])
        # this yielded results very similar to those with more stringent convergence criteria (changes smaller than 1e-9) with convergence times
        # which are seconds instead of tens of minutes.
        #

        converged = False  # Convergence flag for lagrange multipliers
        i = 0
        die = False
        while i <= self.maxiter:
            pihat_old = self._pihat.copy()
            self._update_pihat()
            if not _np.all(self._pihat > 0):
                self._pihat = pihat_old.copy()
                die = True
                self.logger.warning(
                    "pihat does not have a finite probability for all states, terminating"
                )
            self._update_mhat()
            self._update_Q()
            if i > 1:
                X_old = self.X.copy()
                self._update_X_and_pi()
                if _np.any(self.X[self._nz] < 0) and i > 0:
                    die = True
                    self.logger.warning(
                        "Warning: new X is not proportional to C... reverting to previous step and terminating"
                    )
                    self.X = X_old.copy()

            if not converged:
                self._newton_lagrange()
            else:  # once Lagrange multipliers are converged compute likelihood here
                P = self.X / self.pi[:, None]
                _ll_new = self._log_likelihood_biased(self._C_active, P,
                                                      self.m, self.mhat,
                                                      self.w)
                self._lls.append(_ll_new)

            # General case fixed-point iteration
            if len(self.count_outside) > 0:
                if i > 1 and _np.all(
                    (_np.abs(self._dmhat) /
                     self.sigmas) < self.eps) and not converged:
                    self.logger.info(
                        "Converged Lagrange multipliers after %i steps..." % i)
                    converged = True
            # Special case
            else:
                if _np.abs(self._lls[-2] - self._lls[-1]) < 1e-8:
                    self.logger.info(
                        "Converged Lagrange multipliers after %i steps..." % i)
                    converged = True
            # if Lagrange multipliers are converged, check whether log-likelihood has converged
            if converged and _np.abs(self._lls[-2] - self._lls[-1]) < 1e-8:
                self.logger.info("Converged pihat after %i steps..." % i)
                die = True
            if die:
                break
            if i == self.maxiter:
                self.logger.info("Failed to converge within %i iterations. "
                                 "Consider increasing max_iter(now=%i)" %
                                 (i, self.max_iter))
            i += 1

        _P = msmest.tmatrix(self._C_active, reversible=True, mu=self._pihat)

        self._connected_sets = msmest.connected_sets(self._C_full)
        self.set_model_params(P=_P,
                              pi=self._pihat,
                              reversible=True,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
Esempio n. 22
0
    def count_lagged(self,
                     lag,
                     count_mode='sliding',
                     mincount_connectivity='1/n',
                     show_progress=True,
                     n_jobs=None,
                     name='',
                     core_set=None,
                     milestoning_method='last_core'):
        r""" Counts transitions at given lag time

        Parameters
        ----------
        lag : int
            lagtime in trajectory steps

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be one of:

            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.

            * 'sample' : A trajectory of length T will have :math:`T / \tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T)

        show_progress: bool, default=True
            show the progress for the expensive effective count mode computation.

        n_jobs: int or None

        """
        # store lag time
        self._lag = lag

        # Compute count matrix
        count_mode = count_mode.lower()
        if core_set is not None and count_mode in ('sliding', 'sample'):
            if milestoning_method == 'last_core':

                # assign -1 frames to last visited core
                for d in self._dtrajs:
                    assert d[0] != -1
                    while -1 in d:
                        mask = (d == -1)
                        d[mask] = d[np.roll(mask, -1)]
                self._C = msmest.count_matrix(self._dtrajs,
                                              lag,
                                              sliding=count_mode == 'sliding')

            else:
                raise NotImplementedError(
                    'Milestoning method {} not implemented.'.format(
                        milestoning_method))

        elif count_mode == 'sliding':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True)
        elif count_mode == 'sample':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False)
        elif count_mode == 'effective':
            if core_set is not None:
                raise RuntimeError(
                    'Cannot estimate core set MSM with effective counting.')
            from pyerna.util.reflection import getargspec_no_self
            argspec = getargspec_no_self(msmest.effective_count_matrix)
            kw = {}
            from pyerna.util.contexts import nullcontext
            ctx = nullcontext()
            if 'callback' in argspec.args:  # msmtools effective cmatrix ready for multiprocessing?
                from pyerna._base.progress import ProgressReporter
                from pyerna._base.parallel import get_n_jobs

                kw['n_jobs'] = get_n_jobs() if n_jobs is None else n_jobs

                if show_progress:
                    pg = ProgressReporter()
                    # this is a fast operation
                    C_temp = msmest.count_matrix(self._dtrajs,
                                                 lag,
                                                 sliding=True)
                    pg.register(
                        C_temp.nnz,
                        '{}: compute stat. inefficiencies'.format(name),
                        stage=0)
                    del C_temp
                    kw['callback'] = pg.update
                    ctx = pg.context(stage=0)
            with ctx:
                self._C = msmest.effective_count_matrix(
                    self._dtrajs, lag, **kw)
        else:
            raise ValueError('Count mode ' + count_mode + ' is unknown.')

        # store mincount_connectivity
        if mincount_connectivity == '1/n':
            mincount_connectivity = 1.0 / np.shape(self._C)[0]
        self._mincount_connectivity = mincount_connectivity

        # Compute reversibly connected sets
        if self._mincount_connectivity > 0:
            self._connected_sets = \
                self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity)
        else:
            self._connected_sets = msmest.connected_sets(self._C)

        # set sizes and count matrices on reversibly connected sets
        self._connected_set_sizes = np.zeros((len(self._connected_sets)))
        self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object)
        for i in range(len(self._connected_sets)):
            # set size
            self._connected_set_sizes[i] = len(self._connected_sets[i])
            # submatrix
            # self._C_sub[i] = submatrix(self._C, self._connected_sets[i])

        # largest connected set
        self._lcs = self._connected_sets[0]

        # if lcs has no counts, make lcs empty
        if submatrix(self._C, self._lcs).sum() == 0:
            self._lcs = np.array([], dtype=int)

        # mapping from full to lcs
        self._full2lcs = -1 * np.ones((self._nstates), dtype=int)
        self._full2lcs[self._lcs] = np.arange(len(self._lcs))

        # remember that this function was called
        self._counted_at_lag = True
def estimate_pi_error(dtrajs,
                      orig_msm,
                      ntrails=10,
                      conf_interval=0.68,
                      return_samples=False):
    """
    Estimate boostrap error for stationary probability
    
    :param dtrajs: list of np.array, discrete trajectories
    :param orig_msm: pyemma.msm.MarkovModel
    Only used for reference of lag time and to incorporate ML 
    stationary distribution to data frame
    :param ntrails: int, the number of bootstrap samples to draw. 
    :param conf_interval: float 0 < conf_interval < 1
    
    :return:
    pandas.DataFrame instance containing ML MSM pi and bootstrap error
    """
    from pyemma.util.statistics import confidence_interval

    #pi_samples = np.zeros((ntrails, len(orig_msm.nstates)))
    pi_samples = np.zeros((ntrails, orig_msm.count_matrix_full.shape[0]))
    all_states = np.arange(start=0,
                           stop=orig_msm.count_matrix_full.shape[0],
                           step=1)
    for trial in tqdm(range(ntrails)):
        try:
            bs_sample = np.random.choice(len(dtrajs),
                                         size=len(dtrajs),
                                         replace=True)
            dtraj_sample = list(np.array(dtrajs)[bs_sample])

            msm = pyemma.msm.estimate_markov_model(dtraj_sample,
                                                   lag=orig_msm.lag)
            stationary_probs = msm.pi
            if len(connected_sets(msm.count_matrix_full)) > 1:
                disconnected_states = [
                    element for element in all_states
                    if element not in connected_sets(msm.count_matrix_full)[0]
                ]
                if len(disconnected_states) > 0:
                    for element in disconnected_states:
                        stationary_probs = np.insert(stationary_probs, element,
                                                     0)

            #pi_samples[trial, msm.active_set] = stationary_probs

            pi_samples[trial, all_states] = stationary_probs
        except Exception as e:
            pdb.set_trace()
            print(e)
    if return_samples:
        return pi_samples

    std = pi_samples.std(axis=0)
    lower_confidence, upper_confidence = confidence_interval(
        pi_samples, conf_interval)

    probabilities = pd.DataFrame(
        np.array([
            orig_msm.active_set, orig_msm.pi, std, lower_confidence,
            upper_confidence
        ]).T,
        columns=['State', 'StatDist', 'Std', 'LowerConf', 'UpperConf'],
    )

    # type cast to int
    probabilities['State'] = probabilities['State'].astype(int)

    return probabilities
Esempio n. 24
0
    def _estimate(self, dtrajs):
        # ensure right format
        dtrajs = ensure_dtraj_list(dtrajs)
        # harvest discrete statistics
        if isinstance(dtrajs, _DiscreteTrajectoryStats):
            dtrajstats = dtrajs
        else:
            # compute and store discrete trajectory statistics
            dtrajstats = _DiscreteTrajectoryStats(dtrajs)
            # check if this MSM seems too large to be dense
            if dtrajstats.nstates > 4000 and not self.sparse:
                self.logger.warning('Building a dense MSM with ' + str(dtrajstats.nstates) + ' states. This can be '
                                  'inefficient or unfeasible in terms of both runtime and memory consumption. '
                                  'Consider using sparse=True.')

        # count lagged
        dtrajstats.count_lagged(self.lag, count_mode=self.count_mode)

        # full count matrix and number of states
        self._C_full = dtrajstats.count_matrix()
        self._nstates_full = self._C_full.shape[0]

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            if self.statdist_constraint is None:
                # statdist not given - full connectivity on all states
                self.active_set = dtrajstats.largest_connected_set
            else:
                active_set = self._prepare_input_revpi(self._C_full,
                                                       self.statdist_constraint)
                self.active_set = active_set
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate MSM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)
        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones((dtrajstats.nstates), dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # restrict stationary distribution to active set
        if self.statdist_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.statdist_constraint[self.active_set]
            statdist_active /= statdist_active.sum()  # renormalize

        # Estimate transition matrix
        if self.connectivity == 'largest':
            P = msmest.transition_matrix(self._C_active, reversible=self.reversible,
                                         mu=statdist_active, maxiter=self.maxiter,
                                         maxerr=self.maxerr)
        elif self.connectivity == 'none':
            # reversible mode only possible if active set is connected
            # - in this case all visited states are connected and thus
            # this mode is identical to 'largest'
            if self.reversible and not msmest.is_connected(self._C_active):
                raise ValueError('Reversible MSM estimation is not possible with connectivity mode "none", '
                                 'because the set of all visited states is not reversibly connected')
            P = msmest.transition_matrix(self._C_active, reversible=self.reversible,
                                         mu=statdist_active,
                                         maxiter=self.maxiter, maxerr=self.maxerr)
        else:
            raise NotImplementedError(
                'MSM estimation with connectivity=%s is currently not implemented.' % self.connectivity)

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()
            P = P.toarray()

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._dtrajs_full = dtrajs
        self._connected_sets = msmest.connected_sets(self._C_full)
        self.set_model_params(P=P, pi=statdist_active, reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
Esempio n. 25
0
 def strongly_connected_sets(self):
     return msmest.connected_sets(self._Tij, directed=True)