Example #1
0
    def update(self, Tij, Pi=None):
        r""" Updates the transition matrix and recomputes all derived quantities """
        # EMMA imports
        from msmtools import analysis as msmana

        # save a copy of the transition matrix
        self._Tij = np.array(Tij)
        assert msmana.is_transition_matrix(
            self._Tij), 'Given transition matrix is not a stochastic matrix'
        assert self._Tij.shape[
            0] == self._nstates, 'Given transition matrix has unexpected number of states '

        # initial / stationary distribution
        if (Pi is not None):
            assert np.all(
                Pi >= 0
            ), 'Given initial distribution contains negative elements.'
            Pi = np.array(Pi) / np.sum(
                Pi)  # ensure normalization and make a copy

        if (self._stationary):
            pT = msmana.stationary_distribution(self._Tij)
            if Pi is None:  # stationary and no stationary distribution fixed, so computing it from trans. mat.
                self._Pi = pT
            else:  # stationary but stationary distribution is fixed, so the transition matrix must be consistent
                assert np.allclose(Pi, pT), 'Stationary HMM requested, but given distribution is not the ' \
                                            'stationary distribution of the given transition matrix.'
                self._Pi = Pi
        else:
            if Pi is None:  # no initial distribution given, so use stationary distribution anyway
                self._Pi = msmana.stationary_distribution(self._Tij)
            else:
                self._Pi = Pi

        # reversible
        if self._reversible:
            assert msmana.is_reversible(
                Tij
            ), 'Reversible HMM requested, but given transition matrix is not reversible.'

        # try to do eigendecomposition by default, because it's very cheap for hidden transition matrices
        from scipy.linalg import LinAlgError
        try:
            if self._reversible:
                self._R, self._D, self._L = msmana.rdl_decomposition(
                    self._Tij, norm='reversible')
                # everything must be real-valued
                self._R = self._R.real
                self._D = self._D.real
                self._L = self._L.real
            else:
                self._R, self._D, self._L = msmana.rdl_decomposition(
                    self._Tij, norm='standard')
            self._eigenvalues = np.diag(self._D)
            self._spectral_decomp_available = True
        except LinAlgError:
            logger().warn('Eigendecomposition failed for transition matrix\n' +
                          str(self._Tij) +
                          '\nspectral properties will not be available')
            self._spectral_decomp_available = False
Example #2
0
    def update(self, Tij, Pi=None):
        r""" Updates the transition matrix and recomputes all derived quantities """
        # EMMA imports
        from msmtools import analysis as msmana

        # save a copy of the transition matrix
        self._Tij = np.array(Tij)
        assert msmana.is_transition_matrix(self._Tij), 'Given transition matrix is not a stochastic matrix'
        assert self._Tij.shape[0] == self._nstates, 'Given transition matrix has unexpected number of states '

        # initial / stationary distribution
        if (Pi is not None):
            assert np.all(Pi >= 0), 'Given initial distribution contains negative elements.'
            Pi = np.array(Pi) / np.sum(Pi) # ensure normalization and make a copy

        if (self._stationary):
            pT = msmana.stationary_distribution(self._Tij)
            if Pi is None: # stationary and no stationary distribution fixed, so computing it from trans. mat.
                self._Pi = pT
            else: # stationary but stationary distribution is fixed, so the transition matrix must be consistent
                assert np.allclose(Pi, pT), 'Stationary HMM requested, but given distribution is not the ' \
                                            'stationary distribution of the given transition matrix.'
                self._Pi = Pi
        else:
            if Pi is None: # no initial distribution given, so use stationary distribution anyway
                self._Pi = msmana.stationary_distribution(self._Tij)
            else:
                self._Pi = Pi

        # reversible
        if self._reversible:
            assert msmana.is_reversible(Tij), 'Reversible HMM requested, but given transition matrix is not reversible.'

        # try to do eigendecomposition by default, because it's very cheap for hidden transition matrices
        from scipy.linalg import LinAlgError
        try:
            if self._reversible:
                self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm='reversible')
                # everything must be real-valued
                self._R = self._R.real
                self._D = self._D.real
                self._L = self._L.real
            else:
                self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm='standard')
            self._eigenvalues = np.diag(self._D)
            self._spectral_decomp_available = True
        except LinAlgError:
            logger().warn('Eigendecomposition failed for transition matrix\n'+str(self._Tij)+
                          '\nspectral properties will not be available')
            self._spectral_decomp_available = False
Example #3
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()
        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)
        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10**(-b)
        q[4] = 10**(-b)
        p[2] = 10**(-b)
        p[4] = 1.0 - 10**(-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        self.dtraj = generate_traj(P, 10000, start=0)
        self.tau = 1
        """Estimate MSM"""
        self.C_MSM = count_matrix(self.dtraj, self.tau, sliding=True)
        self.lcc_MSM = largest_connected_set(self.C_MSM)
        self.Ccc_MSM = largest_connected_submatrix(self.C_MSM,
                                                   lcc=self.lcc_MSM)
        self.P_MSM = transition_matrix(self.Ccc_MSM, reversible=True)
        self.mu_MSM = stationary_distribution(self.P_MSM)
        self.k = 3
        self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
Example #4
0
 def sample(self, N=1, return_statdist=False):
     self.update(N=N)
     if return_statdist:
         pi = stationary_distribution(self.P)
         return self.P, pi
     else:
         return self.P
Example #5
0
def coarsegrain(P, n):
    """
    Coarse-grains transition matrix P to n sets using PCCA

    Coarse-grains transition matrix P such that the dominant eigenvalues are preserved, using:

    ..math:
        \tilde{P} = M^T P M (M^T M)^{-1}

    See [2]_ for the derivation of this form from the coarse-graining method first derived in [1]_.

    References
    ----------
    [1] S. Kube and M. Weber
        A coarse graining method for the identification of transition rates between molecular conformations.
        J. Chem. Phys. 126, 024103 (2007)
    [2] F. Noe, H. Wu, J.-H. Prinz and N. Plattner:
        Projected and hidden Markov models for calculating kinetics and metastable states of complex molecules
        J. Chem. Phys. 139, 184114 (2013)
    """
    M = pcca(P, n)
    # coarse-grained transition matrix
    W = np.linalg.inv(np.dot(M.T, M))
    A = np.dot(np.dot(M.T, P), M)
    P_coarse = np.dot(W, A)

    # symmetrize and renormalize to eliminate numerical errors
    from msmtools.analysis import stationary_distribution
    pi_coarse = np.dot(M.T, stationary_distribution(P))
    X = np.dot(np.diag(pi_coarse), P_coarse)
    P_coarse = X / X.sum(axis=1)[:, None]

    return P_coarse
Example #6
0
    def test_trajectories(self):
        P = np.array([[0.9, 0.1], [0.1, 0.9]])

        # test number of trajectories
        M = 10
        N = 10
        trajs = msmgen.generate_trajs(P, M, N, start=0)
        assert len(trajs) == M

        # test statistics of starting state
        trajs = msmgen.generate_trajs(P, 1000, 1)
        ss = np.concatenate(trajs).astype(int)
        pi = msmana.stationary_distribution(P)
        piest = msmest.count_states(ss) / 1000.0
        assert np.max(np.abs(pi - piest)) < 0.025

        # test stopping state = starting state
        M = 10
        trajs = msmgen.generate_trajs(P, M, N, start=0, stop=0)
        for traj in trajs:
            assert traj.size == 1

        # test if we always stop at stopping state
        M = 100
        stop = 1
        trajs = msmgen.generate_trajs(P, M, N, start=0, stop=stop)
        for traj in trajs:
            assert traj.size == N or traj[-1] == stop
            assert stop not in traj[:-1]
Example #7
0
    def setUpClass(cls) -> None:
        """Store state of the rng"""
        cls.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        cls.dtraj = generate_traj(P, 10000, start=0)
        cls.tau = 1

        """Estimate MSM"""
        import inspect
        argspec = inspect.getfullargspec(MaximumLikelihoodMSM)
        default_maxerr = argspec.defaults[argspec.args.index('maxerr') - 1]
        cls.C_MSM = msmest.count_matrix(cls.dtraj, cls.tau, sliding=True)
        cls.lcc_MSM = msmest.largest_connected_set(cls.C_MSM)
        cls.Ccc_MSM = msmest.largest_connected_submatrix(cls.C_MSM, lcc=cls.lcc_MSM)
        cls.P_MSM = msmest.transition_matrix(cls.Ccc_MSM, reversible=True, maxerr=default_maxerr)
        cls.mu_MSM = msmana.stationary_distribution(cls.P_MSM)
        cls.k = 3
        cls.ts = msmana.timescales(cls.P_MSM, k=cls.k, tau=cls.tau)
Example #8
0
def force_spectroscopy_model():
    """
    Construct a specific three-state test model intended to be representative of
    single-molecule force spectroscopy experiments.

    Returns
    -------
    model : HMM
        The synthetic HMM model.

    Examples
    --------

    >>> model = force_spectroscopy_model()

    """
    nstates = 3

    # Define state emission probabilities.
    output_model = GaussianOutputModel(nstates, means=[3.0, 4.7, 5.6], sigmas=[1.0, 0.3, 0.2])

    # Define a reversible transition matrix.
    Tij = np.array([[0.98      ,  0.01540412,  0.00459588],
                    [0.06331175,  0.9       ,  0.03668825],
                    [0.00339873,  0.00660127,  0.99      ]])

    # Use stationary distribution as initial distribution
    import msmtools.analysis as msmana
    pi = msmana.stationary_distribution(Tij)

    # Construct HMM with these parameters.
    from bhmm import HMM
    model = HMM(pi, Tij, output_model)

    return model
Example #9
0
def transition_matrix_reversible_pisym(C):
    r"""
    Estimates reversible transition matrix as follows:

    ..:math:
        p_{ij} = c_{ij} / c_i where c_i = sum_j c_{ij}
        \pi_j = \sum_j \pi_i p_{ij}
        x_{ij} = \pi_i p_{ij} + \pi_j p_{ji}
        p^{rev}_{ij} = x_{ij} / x_i where x_i = sum_j x_{ij}

    In words: takes the nonreversible transition matrix estimate, uses its
    stationary distribution to compute an equilibrium correlation matrix,
    symmetrizes that correlation matrix and then normalizes to the reversible
    transition matrix estimate.

    Parameters
    ----------
    C: ndarray, shape (n,n)
        count matrix

    Returns
    -------
    T: Estimated transition matrix

    """
    # nonreversible estimate
    T_nonrev = transition_matrix_non_reversible(C)
    from msmtools.analysis import stationary_distribution
    pi = stationary_distribution(T_nonrev)
    # correlation matrix
    X = pi[:, None] * T_nonrev
    X = X.T + X
    # result
    T_rev = X / X.sum(axis=1)[:, None]
    return T_rev
Example #10
0
def pcca(P, m):
    """PCCA+ spectral clustering method with optimized memberships [1]_

    Clusters the first m eigenvectors of a transition matrix in order to cluster the states.
    This function does not assume that the transition matrix is fully connected. Disconnected sets
    will automatically define the first metastable states, with perfect membership assignments.

    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.

    m : int
        Number of clusters to group to.

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
    """
    assert 0 < m <= P.shape[0]
    from scipy.sparse import issparse
    if issparse(P):
        warnings.warn(
            'PCCA is only implemented for dense matrices, '
            'converting sparse transition matrix to dense ndarray.',
            stacklevel=2)
        P = P.toarray()
    # memberships
    # TODO: can be improved. pcca computes stationary distribution internally, we don't need to compute it twice.
    from msmtools.analysis.dense.pcca import pcca as _algorithm_impl
    M = _algorithm_impl(P, m)

    # stationary distribution
    # TODO: in msmtools we recomputed this from P, we actually want to use pi from the msm obj, but this caused #1208
    from msmtools.analysis import stationary_distribution
    pi = stationary_distribution(P)

    # coarse-grained stationary distribution
    pi_coarse = np.dot(M.T, pi)

    # HMM output matrix
    B = mdot(np.diag(1.0 / pi_coarse), M.T, np.diag(pi))
    # renormalize B to make it row-stochastic
    B /= B.sum(axis=1)[:, None]

    # coarse-grained transition matrix
    W = np.linalg.inv(np.dot(M.T, M))
    A = np.dot(np.dot(M.T, P), M)
    P_coarse = np.dot(W, A)

    # symmetrize and renormalize to eliminate numerical errors
    X = np.dot(np.diag(pi_coarse), P_coarse)
    P_coarse = X / X.sum(axis=1)[:, None]

    return PCCAModel(P_coarse, pi_coarse, M, B)
def initial_guess_gaussian_from_data(dtrajs, n_hidden_states, reversible):
    r""" Makes an initial guess :class:`HMM <HiddenMarkovStateModel>` with Gaussian output model.

    To this end, a Gaussian mixture model is estimated using `scikit-learn <https://scikit-learn.org/>`_.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Trajectories which are used for making the initial guess.
    n_hidden_states : int
        Number of hidden states.
    reversible : bool
        Whether the hidden transition matrix is estimated so that it is reversible.

    Returns
    -------
    hmm_init : HiddenMarkovStateModel
        An initial guess for the HMM

    See Also
    --------
    GaussianOutputModel : The type of output model this heuristic uses.
    initial_guess_discrete_from_data : Initial guess with :class:`Discrete output model <sktime.markov.hmm.DiscreteOutputModel>`.
    initial_guess_discrete_from_msm : Initial guess from an already
                                      existing :class:`MSM <sktime.markov.msm.MarkovStateModel>`
                                      with discrete output model.
    """
    from sklearn.mixture import GaussianMixture
    dtrajs = ensure_dtraj_list(dtrajs)
    collected_observations = np.concatenate(dtrajs)
    gmm = GaussianMixture(n_components=n_hidden_states)
    gmm.fit(collected_observations[:, None])
    output_model = GaussianOutputModel(n_hidden_states,
                                       means=gmm.means_[:, 0],
                                       sigmas=np.sqrt(gmm.covariances_[:, 0]))

    # Compute fractional state memberships.
    Nij = np.zeros((n_hidden_states, n_hidden_states))
    for o_t in dtrajs:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.to_state_probability_trajectory(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:, None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T - 1):
            Nij += np.outer(pobs[t, :], pobs[t + 1, :])

    # Compute transition matrix maximum likelihood estimate.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    Tij = msmest.transition_matrix(Nij, reversible=reversible)
    pi = msmana.stationary_distribution(Tij)
    return HiddenMarkovStateModel(transition_model=Tij,
                                  output_model=output_model,
                                  initial_distribution=pi)
Example #12
0
    def run(self, maxiter=100000, on_error='raise'):
        from msmtools.estimation import transition_matrix
        from msmtools.analysis import stationary_distribution
        if self.pi is None:
            self.T = transition_matrix(self.C, reversible=True)
            self.pi = stationary_distribution(self.T)
        else:
            self.T = transition_matrix(self.C, reversible=True, mu=self.pi)

        self.K = (self.T - np.eye(self.N)) / self.dt
        return self.K
Example #13
0
 def test_stats(self):
     # test statistics of starting state
     N = 5000
     trajs = msmgen.generate_trajs(self.P,
                                   N,
                                   1,
                                   random_state=self.random_state)
     ss = np.concatenate(trajs).astype(int)
     pi = msmana.stationary_distribution(self.P)
     piest = msmest.count_states(ss) / float(N)
     np.testing.assert_allclose(piest, pi, atol=0.025)
Example #14
0
def init_model_gaussian1d(observations, n_states, lag, reversible=True):
    """Generate an initial model with 1D-Gaussian output densities

    Parameters
    ----------
    observations : list of ndarray((T_i), dtype=float)
        list of arrays of length T_i with observation data
    n_states : int
        The number of states.

    Examples
    --------

    Generate initial model for a gaussian output model.

    >>> from sktime.markovprocess.bhmm import testsystems
    >>> model, observations, states = testsystems.generate_synthetic_observations(output='gaussian')
    >>> initial_model = init_model_gaussian1d(observations, model.n_states, lag=1)

    """
    # Concatenate all observations.
    collected_observations = np.concatenate(observations)

    # Fit a Gaussian mixture model to obtain emission distributions and state stationary probabilities.
    from sklearn.mixture import GaussianMixture
    gmm = GaussianMixture(n_components=n_states)
    gmm.fit(collected_observations[:, None])
    output_model = GaussianOutputModel(n_states,
                                       means=gmm.means_[:, 0],
                                       sigmas=np.sqrt(gmm.covariances_[:, 0]))

    # Compute fractional state memberships.
    Nij = np.zeros((n_states, n_states))
    for o_t in observations:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.p_obs(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:, None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T - 1):
            Nij += np.outer(pobs[t, :], pobs[t + 1, :])

    # Compute transition matrix maximum likelihood estimate.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    Tij = msmest.transition_matrix(Nij, reversible=reversible)
    pi = msmana.stationary_distribution(Tij)

    # Update model.
    model = HMM(pi, Tij, output_model, lag=lag)

    return model
Example #15
0
    def stationary_distribution(self):
        r""" Compute stationary distribution of hidden states if possible.

        Raises
        ------
        ValueError if the HMM is not stationary

        """
        assert _tmatrix_disconnected.is_connected(self._Tij, strong=False), \
            'No unique stationary distribution because transition matrix is not connected'
        import msmtools.analysis as msmana
        return msmana.stationary_distribution(self._Tij)
Example #16
0
    def stationary_distribution(self):
        r""" Compute stationary distribution of hidden states if possible.

        Raises
        ------
        ValueError if the HMM is not stationary

        """
        assert _tmatrix_disconnected.is_connected(self._Tij, strong=False), \
            'No unique stationary distribution because transition matrix is not connected'
        import msmtools.analysis as msmana
        return msmana.stationary_distribution(self._Tij)
Example #17
0
    def stationary_distribution(self):
        r""" Compute stationary distribution of hidden states if possible.

        Raises
        ------
        ValueError if the HMM is not stationary

        """
        from msmtools.analysis import is_connected, stationary_distribution
        if not is_connected(self.transition_matrix, directed=False):
            raise RuntimeError(
                'No unique stationary distribution because transition matrix is not connected'
            )
        return stationary_distribution(self._Tij)
Example #18
0
    def set_model_params(self, P, m):
        """

        Parameters
        ----------
        P : ndarray (n,n)
            Transition matrix.
        m : int
            Number of clusters to group to.
        """
        # remember input
        from scipy.sparse import issparse
        if issparse(P):
            warnings.warn(
                'PCCA is only implemented for dense matrices, '
                'converting sparse transition matrix to dense ndarray.')
            P = P.toarray()
        self.P = P
        self.m = m

        # pcca coarse-graining
        # --------------------
        # PCCA memberships
        # TODO: can be improved. pcca computes stationary distribution internally, we don't need to compute it twice.
        from msmtools.analysis.dense.pcca import pcca
        self._M = pcca(P, m)

        # stationary distribution
        # TODO: in msmtools we recomputed this from P, we actually want to use pi from the msm obj, but this caused #1208
        from msmtools.analysis import stationary_distribution
        self._pi = stationary_distribution(P)

        # coarse-grained stationary distribution
        self._pi_coarse = np.dot(self._M.T, self._pi)

        # HMM output matrix
        self._B = np.dot(np.dot(np.diag(1.0 / self._pi_coarse), self._M.T),
                         np.diag(self._pi))
        # renormalize B to make it row-stochastic
        self._B /= self._B.sum(axis=1)[:, None]
        self._B /= self._B.sum(axis=1)[:, None]

        # coarse-grained transition matrix
        W = np.linalg.inv(np.dot(self._M.T, self._M))
        A = np.dot(np.dot(self._M.T, P), self._M)
        self._P_coarse = np.dot(W, A)

        # symmetrize and renormalize to eliminate numerical errors
        X = np.dot(np.diag(self._pi_coarse), self._P_coarse)
        self._P_coarse = X / X.sum(axis=1)[:, None]
Example #19
0
    def trajectory(self, N, start=None, stop=None):
        """
        Generates a trajectory realization of length N, starting from state s

        Parameters
        ----------
        N : int
            trajectory length
        start : int, optional, default = None
            starting state. If not given, will sample from the stationary distribution of P
        stop : int or int-array-like, optional, default = None
            stopping set. If given, the trajectory will be stopped before N steps
            once a state of the stop set is reached

        """
        # check input
        stop = types.ensure_int_vector_or_None(stop, require_order=False)

        if start is None:
            if self.mudist is None:
                # compute mu, the stationary distribution of P
                import msmtools.analysis as msmana

                mu = msmana.stationary_distribution(self.P)
                self.mudist = scipy.stats.rv_discrete(
                    values=(list(range(self.n)), mu))
            # sample starting point from mu
            start = self.mudist.rvs()

        # evaluate stopping set
        stopat = np.ndarray((self.n), dtype=bool)
        stopat[:] = False
        if (stop is not None):
            for s in np.array(stop):
                stopat[s] = True

        # result
        traj = np.zeros(N, dtype=int)
        traj[0] = start
        # already at stopping state?
        if stopat[traj[0]]:
            return traj[:1]
        # else run until end or stopping state
        for t in range(1, N):
            traj[t] = self.rgs[traj[t - 1]].rvs()
            if stopat[traj[t]]:
                return traj[:t + 1]
        # return
        return traj
Example #20
0
def is_reversible(P):
    """ Returns if P is reversible on its weakly connected sets """
    import msmtools.analysis as msmana
    # treat each weakly connected set separately
    sets = connected_sets(P, strong=False)
    for s in sets:
        Ps = P[s, :][:, s]
        if not msmana.is_transition_matrix(Ps):
            return False  # isn't even a transition matrix!
        pi = msmana.stationary_distribution(Ps)
        X = pi[:, None] * Ps
        if not np.allclose(X, X.T):
            return False
    # survived.
    return True
Example #21
0
def is_reversible(P):
    """ Returns if P is reversible on its weakly connected sets """
    import msmtools.analysis as msmana
    # treat each weakly connected set separately
    sets = connected_sets(P, strong=False)
    for s in sets:
        Ps = P[s, :][:, s]
        if not msmana.is_transition_matrix(Ps):
            return False  # isn't even a transition matrix!
        pi = msmana.stationary_distribution(Ps)
        X = pi[:, None] * Ps
        if not np.allclose(X, X.T):
            return False
    # survived.
    return True
Example #22
0
    def run(self, maxiter=100000, on_error='raise'):
        from msmtools.estimation import transition_matrix
        from msmtools.analysis import stationary_distribution
        if self.pi is None:
            self.T = transition_matrix(self.C, reversible=True)
            self.pi = stationary_distribution(self.T)
        else:
            self.T = transition_matrix(self.C, reversible=True, mu=self.pi)

        self.K = np.maximum(
            np.array(sp.linalg.logm(np.dot(self.T, self.T)) / (2.0 * self.dt)),
            0)
        np.fill_diagonal(self.K, 0)
        np.fill_diagonal(self.K, -sum1(self.K))

        return self.K
Example #23
0
def enforce_reversible_on_closed(P):
    """ Enforces transition matrix P to be reversible on its closed sets. """
    import msmtools.analysis as msmana
    Prev = P.copy()
    # treat each weakly connected set separately
    sets = closed_sets(P)
    for s in sets:
        indices = np.ix_(s, s)
        # compute stationary probability
        pi_s = msmana.stationary_distribution(P[indices])
        # symmetrize
        X_s = pi_s[:, None] * P[indices]
        X_s = 0.5 * (X_s + X_s.T)
        # normalize
        Prev[indices] = X_s / X_s.sum(axis=1)[:, None]
    return Prev
Example #24
0
    def stationary_distribution(self):
        """The stationary distribution of the genotype-phenotype-map."""
        stat_dist = nx.get_node_attributes(self,
                                           name="stationary_distribution")
        if stat_dist:
            return stat_dist
        else:
            stat_dist = {
                node: prob
                for node, prob in enumerate(
                    mana.stationary_distribution(self.transition_matrix))
            }
            nx.set_node_attributes(self,
                                   name="stationary_distribution",
                                   values=stat_dist)

            return nx.get_node_attributes(self, name="stationary_distribution")
Example #25
0
def enforce_reversible_on_closed(P):
    """ Enforces transition matrix P to be reversible on its closed sets. """
    import msmtools.analysis as msmana
    n = np.shape(P)[0]
    Prev = P.copy()
    # treat each weakly connected set separately
    sets = closed_sets(P)
    for s in sets:
        I = np.ix_(s, s)
        # compute stationary probability
        pi_s = msmana.stationary_distribution(P[I])
        # symmetrize
        X_s = pi_s[:, None] * P[I]
        X_s = 0.5 * (X_s + X_s.T)
        # normalize
        Prev[I] = X_s / X_s.sum(axis=1)[:, None]
    return Prev
Example #26
0
 def _transition_matrix_samples(self, msm, given_pi):
     Psamples = [s.transition_matrix for s in msm.samples]
     # shape
     assert np.array_equal(np.shape(Psamples), (self.nsamples, self.n_states, self.n_states))
     # consistency
     import msmtools.analysis as msmana
     for P in Psamples:
         assert msmana.is_transition_matrix(P)
         try:
             assert msmana.is_reversible(P)
         except AssertionError:
             # re-do calculation msmtools just performed to get details
             from msmtools.analysis import stationary_distribution
             mu = stationary_distribution(P)
             X = mu[:, np.newaxis] * P
             np.testing.assert_allclose(X, np.transpose(X), atol=1e-12,
                                        err_msg="P not reversible, given_pi={}".format(given_pi))
Example #27
0
def stationary_distribution(rate_matrix):
    """Compute the stationary distribution of a rate matrix.

    Parameters
    ----------
    rate_matrix : (M, M) array_like
        A transition rate matrix, with row sums equal to zero.

    Returns
    -------
    (M,) ndarray
        The stationary distribution of `rate_matrix`.

    """
    P = jump_matrix(rate_matrix)
    mu = -msmana.stationary_distribution(P) / np.diagonal(rate_matrix)
    return mu / mu.sum()
Example #28
0
 def test_two_state_model(self):
     """Test the creation of a simple two-state HMM model with analytical parameters.
     """
     # Create a simple two-state model.
     n_states = 2
     Tij = testsystems.generate_transition_matrix(reversible=True)
     # stationary distribution
     import msmtools.analysis as msmana
     Pi = msmana.stationary_distribution(Tij)
     means = [-1, +1]
     sigmas = [1, 1]
     output_model = GaussianOutputModel(n_states, means=means, sigmas=sigmas)
     model = bhmm.HMM(Pi, Tij, output_model)
     # Test model is correct.
     assert_array_almost_equal(model.transition_matrix, Tij)
     assert_array_almost_equal(model.stationary_distribution, Pi)
     assert(np.allclose(model.output_model.means, np.array(means)))
     assert(np.allclose(model.output_model.sigmas, np.array(sigmas)))
Example #29
0
 def test_two_state_model(self):
     """Test the creation of a simple two-state HMM model with analytical parameters.
     """
     from bhmm import HMM
     # Create a simple two-state model.
     nstates = 2
     Tij = testsystems.generate_transition_matrix(reversible=True)
     # stationary distribution
     import msmtools.analysis as msmana
     Pi = msmana.stationary_distribution(Tij)
     from bhmm import GaussianOutputModel
     means = [-1, +1]
     sigmas = [1, 1]
     output_model = GaussianOutputModel(nstates, means=means, sigmas=sigmas)
     model = bhmm.HMM(Pi, Tij, output_model)
     # Test model is correct.
     assert_array_almost_equal(model.transition_matrix, Tij)
     assert_array_almost_equal(model.stationary_distribution, Pi)
     assert(np.allclose(model.output_model.means, np.array(means)))
     assert(np.allclose(model.output_model.sigmas, np.array(sigmas)))
Example #30
0
def transition_matrix_reversible_pisym(C, return_statdist=False, **kwargs):
    r"""
    Estimates reversible transition matrix as follows:

    ..:math:
        p_{ij} = c_{ij} / c_i where c_i = sum_j c_{ij}
        \pi_j = \sum_j \pi_i p_{ij}
        x_{ij} = \pi_i p_{ij} + \pi_j p_{ji}
        p^{rev}_{ij} = x_{ij} / x_i where x_i = sum_j x_{ij}

    In words: takes the nonreversible transition matrix estimate, uses its
    stationary distribution to compute an equilibrium correlation matrix,
    symmetrizes that correlation matrix and then normalizes to the reversible
    transition matrix estimate.

    Parameters
    ----------
    C: ndarray, shape (n,n)
        count matrix

    Returns
    -------
    T: Estimated transition matrix

    """
    # nonreversible estimate
    T_nonrev = transition_matrix_non_reversible(C)
    from msmtools.analysis import stationary_distribution
    pi = stationary_distribution(T_nonrev)
    # correlation matrix
    X = scipy.sparse.diags(pi).dot(T_nonrev)
    X = X.T + X
    # result
    pi_rev = np.array(X.sum(axis=1)).squeeze()
    T_rev = scipy.sparse.diags(1.0 / pi_rev).dot(X)
    if return_statdist:
        #np.testing.assert_allclose(pi, stationary_distribution(T_rev))
        #np.testing.assert_allclose(T_rev.T.dot(pi), pi)
        return T_rev, pi
    return T_rev
Example #31
0
    def test_pcca_coarsegrain(self):
        # fine-grained transition matrix
        P = np.array([[0.9, 0.1, 0.0, 0.0, 0.0], [0.1, 0.89, 0.01, 0.0, 0.0],
                      [0.0, 0.1, 0.8, 0.1, 0.0], [0.0, 0.0, 0.01, 0.79, 0.2],
                      [0.0, 0.0, 0.0, 0.2, 0.8]])
        from msmtools.analysis import stationary_distribution
        pi = stationary_distribution(P)
        Pi = np.diag(pi)
        m = 3
        # Susanna+Marcus' expression ------------
        M = pcca(P, m)
        pi_c = np.dot(M.T, pi)
        Pi_c_inv = np.diag(1.0 / pi_c)
        # restriction and interpolation operators
        R = M.T
        I = np.dot(np.dot(Pi, M), Pi_c_inv)
        # result
        ms1 = np.linalg.inv(np.dot(R, I)).T
        ms2 = np.dot(np.dot(I.T, P), R.T)
        Pc_ref = np.dot(ms1, ms2)
        # ---------------------------------------

        from .pcca import coarsegrain
        Pc = coarsegrain(P, 3)
        # test against Marcus+Susanna's expression
        assert np.max(np.abs(Pc - Pc_ref)) < 1e-10
        # test mass conservation
        assert np.allclose(Pc.sum(axis=1), np.ones(m))

        from .pcca import PCCA
        p = PCCA(P, m)
        # test against Marcus+Susanna's expression
        assert np.max(
            np.abs(p.coarse_grained_transition_matrix - Pc_ref)) < 1e-10
        # test against the present coarse-grained stationary dist
        assert np.max(
            np.abs(p.coarse_grained_stationary_probability - pi_c)) < 1e-10
        # test mass conservation
        assert np.allclose(p.coarse_grained_transition_matrix.sum(axis=1),
                           np.ones(m))
Example #32
0
def force_spectroscopy_model():
    """
    Construct a specific three-state test model intended to be representative of
    single-molecule force spectroscopy experiments.

    Returns
    -------
    model : HMM
        The synthetic HMM model.

    Examples
    --------

    >>> model = force_spectroscopy_model()

    """
    nstates = 3

    # Define state emission probabilities.
    output_model = GaussianOutputModel(nstates,
                                       means=[3.0, 4.7, 5.6],
                                       sigmas=[1.0, 0.3, 0.2])

    # Define a reversible transition matrix.
    Tij = np.array([[0.98, 0.01540412, 0.00459588],
                    [0.06331175, 0.9, 0.03668825],
                    [0.00339873, 0.00660127, 0.99]])

    # Use stationary distribution as initial distribution
    import msmtools.analysis as msmana
    pi = msmana.stationary_distribution(Tij)

    # Construct HMM with these parameters.
    from bhmm import HMM
    model = HMM(pi, Tij, output_model)

    return model
Example #33
0
def plot_markov_model(
    P, pos=None, state_sizes=None, state_scale=1.0, state_colors='#ff5500', state_labels='auto',
    minflux=1e-6, arrow_scale=1.0, arrow_curvature=1.0, arrow_labels='weights',
    arrow_label_format='%2.e', max_width=12, max_height=12, figpadding=0.2, show_frame=False,
    ax=None, **textkwargs):
    r"""Network representation of MSM transition matrix

    This visualization is not optimized for large matrices. It is meant to be
    used for the visualization of small models with up to 10-20 states, e.g.
    obtained by a HMM coarse-graining. If used with large network, the automatic
    node positioning will be very slow and may still look ugly.

    Parameters
    ----------
    P : ndarray(n,n) or MSM object with attribute 'transition matrix'
        Transition matrix or MSM object
    pos : ndarray(n,2), optional, default=None
        User-defined positions to draw the states on. If not given, will try
        to place them automatically.
    state_sizes : ndarray(n), optional, default=None
        User-defined areas of the discs drawn for each state. If not given,
        the stationary probability of P will be used.
    state_colors : string, ndarray(n), or list, optional, default='#ff5500' (orange)
        string :
            a Hex code for a single color used for all states
        array :
            n values in [0,1] which will result in a grayscale plot
        list :
            of len = nstates, with a color for each state. The list can mix strings, RGB values and
            hex codes, e.g. :py:obj:`state_colors` = ['g', 'red', [.23, .34, .35], '#ff5500'] is
            possible.
    state_labels : list of strings, optional, default is 'auto'
        A list with a label for each state, to be displayed at the center
        of each node/state. If left to 'auto', the labels are automatically set to the state
        indices.
    minflux : float, optional, default=1e-6
        The minimal flux (p_i * p_ij) for a transition to be drawn
    arrow_scale : float, optional, default=1.0
        Relative arrow scale. Set to a value different from 1 to increase
        or decrease the arrow width.
    arrow_curvature : float, optional, default=1.0
        Relative arrow curvature. Set to a value different from 1 to make
        arrows more or less curved.
    arrow_labels : 'weights', None or a ndarray(n,n) with label strings. Optional, default='weights'
        Strings to be placed upon arrows. If None, no labels will be used.
        If 'weights', the elements of P will be used. If a matrix of strings is
        given by the user these will be used.
    arrow_label_format : str, optional, default='%10.2f'
        The numeric format to print the arrow labels
    max_width = 12
        The maximum figure width
    max_height = 12
        The maximum figure height
    figpadding = 0.2
        The relative figure size used for the padding
    show_frame: boolean (default=False)
        Draw a frame around the network.
    ax : matplotlib Axes object, optional, default=None
        The axes to plot to. When set to None a new Axes (and Figure) object will be used.
    textkwargs : optional argument for the text of the state and arrow labels.
        See http://matplotlib.org/api/text_api.html#matplotlib.text.Text for more info. The
        parameter 'size' refers to the size of the state and arrow labels and overwrites the
        matplotlib default. The parameter 'arrow_label_size' is only used for the arrow labels;
        please note that 'arrow_label_size' is not part of matplotlib.text.Text's set of parameters
        and will raise an exception when passed to matplotlib.text.Text directly.

    Returns
    -------
    fig, pos : matplotlib.Figure, ndarray(n,2)
    a Figure object containing the plot and the positions of states. 
    Can be used later to plot a different network representation (e.g. the flux)

    Examples
    --------
    >>> import numpy as np
    >>> P = np.array([[0.8,  0.15, 0.05,  0.0,  0.0],
    ...              [0.1,  0.75, 0.05, 0.05, 0.05],
    ...              [0.05,  0.1,  0.8,  0.0,  0.05],
    ...              [0.0,  0.2, 0.0,  0.8,  0.0],
    ...              [0.0,  0.02, 0.02, 0.0,  0.96]])
    >>> plot_markov_model(P) # doctest:+ELLIPSIS
    (<matplotlib.figure.Figure..., array...)

    """
    from msmtools import analysis as msmana
    if isinstance(P, _np.ndarray):
        P = P.copy()
    else:
        # MSM object? then get transition matrix first
        P = P.transition_matrix.copy()
    if state_sizes is None:
        state_sizes = msmana.stationary_distribution(P)
    if minflux > 0:
        F = _np.dot(_np.diag(msmana.stationary_distribution(P)), P)
        I, J = _np.where(F < minflux)
        P[I, J] = 0.0
    plot = NetworkPlot(P, pos=pos, ax=ax)
    fig = plot.plot_network(
        state_sizes=state_sizes, state_scale=state_scale, state_colors=state_colors,
        state_labels=state_labels, arrow_scale=arrow_scale, arrow_curvature=arrow_curvature,
        arrow_labels=arrow_labels, arrow_label_format=arrow_label_format, max_width=max_width,
        max_height=max_height, figpadding=figpadding, xticks=False, yticks=False,
        show_frame=show_frame, **textkwargs)
    return fig, plot.pos
Example #34
0
def dalton_model(nstates=3, omin=-5, omax=5, sigma_min=0.5, sigma_max=2.0,
                 lifetime_max=100, lifetime_min=10, reversible=True, output='gaussian'):
    """
    Construct a test multistate model with regular spaced emission means (linearly interpolated between omin and omax)
    and variable emission widths (linearly interpolated between sigma_min and sigma_max).

    Parameters
    ----------
    nstates : int, optional, default = 3
        number of hidden states
    omin : float, optional, default = -5
        mean position of the first state.
    omax : float, optional, default = 5
        mean position of the last state.
    sigma_min : float, optional, default = 0.5
        The width of the observed gaussian distribution for the first state
    sigma_max : float, optional, default = 2.0
        The width of the observed gaussian distribution for the last state
    lifetime_max : float, optional, default = 100
        maximum lifetime of any state
    lifetime_min : float, optional, default = 10
        minimum lifetime of any state
    reversible : bool, optional, default=True
        If True, the row-stochastic transition matrix will be reversible.
    output_model_type : str, optional, default='gaussian'
        Output model to use, one of ['gaussian', 'discrete']

    Returns
    -------
    model : HMM
        The synthetic HMM model.

    Examples
    --------

    Generate default model.

    >>> model = dalton_model()

    Generate model with specified number of states.

    >>> model = dalton_model(nstates=5)

    Generate non-reversible model.

    >>> model = dalton_model(reversible=False)

    Generate a discrete output model.

    >>> model = dalton_model(output='discrete')

    """

    # parameters
    means = np.linspace(omin, omax, num=nstates)
    sigmas = np.linspace(sigma_min, sigma_max, num=nstates)

    # Define state emission probabilities.
    if output == 'gaussian':
        output_model = GaussianOutputModel(nstates, means=means, sigmas=sigmas)
    elif output == 'discrete':
        # Construct matrix of output probabilities
        # B[i,j] is probability state i produces symbol j, where nsymbols = nstates
        B = np.zeros([nstates, nstates], dtype=np.float64)
        for i in range(nstates):
            for j in range(nstates):
                B[i, j] = np.exp(-0.5 * (means[i] - means[j]) / (sigmas[i] * sigmas[j]))
            B[i, :] /= B[i, :].sum()
        output_model = DiscreteOutputModel(B)
    else:
        raise Exception("output_model_type = '%s' unknown, must be one of ['gaussian', 'discrete']" % output)

    Tij = generate_transition_matrix(nstates, lifetime_max=lifetime_max, lifetime_min=lifetime_min,
                                     reversible=reversible)

    # stationary distribution
    import msmtools.analysis as msmana
    Pi = msmana.stationary_distribution(Tij)

    # Construct HMM with these parameters.
    from bhmm import HMM
    model = HMM(Pi, Tij, output_model)

    return model
Example #35
0
def init_model_gaussian1d(observations, nstates, reversible=True):
    """Generate an initial model with 1D-Gaussian output densities

    Parameters
    ----------
    observations : list of ndarray((T_i), dtype=float)
        list of arrays of length T_i with observation data
    nstates : int
        The number of states.

    Examples
    --------

    Generate initial model for a gaussian output model.

    >>> from bhmm import testsystems
    >>> [model, observations, states] = testsystems.generate_synthetic_observations(output='gaussian')
    >>> initial_model = init_model_gaussian1d(observations, model.nstates)

    """
    ntrajectories = len(observations)

    # Concatenate all observations.
    collected_observations = np.array([], dtype=config.dtype)
    for o_t in observations:
        collected_observations = np.append(collected_observations, o_t)

    # Fit a Gaussian mixture model to obtain emission distributions and state stationary probabilities.
    from bhmm._external.sklearn import mixture
    gmm = mixture.GMM(n_components=nstates)
    gmm.fit(collected_observations[:,None])
    from bhmm import GaussianOutputModel
    output_model = GaussianOutputModel(nstates, means=gmm.means_[:,0], sigmas=np.sqrt(gmm.covars_[:,0]))

    logger().info("Gaussian output model:\n"+str(output_model))

    # Extract stationary distributions.
    Pi = np.zeros([nstates], np.float64)
    Pi[:] = gmm.weights_[:]

    logger().info("GMM weights: %s" % str(gmm.weights_))

    # Compute fractional state memberships.
    Nij = np.zeros([nstates, nstates], np.float64)
    for o_t in observations:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.p_obs(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:,None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T-1):
            Nij[:,:] = Nij[:,:] + np.outer(pobs[t,:], pobs[t+1,:])

        logger().info("Nij\n"+str(Nij))

    # Compute transition matrix maximum likelihood estimate.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    Tij = msmest.transition_matrix(Nij, reversible=reversible)
    pi = msmana.stationary_distribution(Tij)

    # Update model.
    model = HMM(pi, Tij, output_model)

    return model
Example #36
0
 def test_statdist(self):
     P = self.bdc.transition_matrix()
     mu = self.bdc.stationary_distribution()
     mun = stationary_distribution(P)
     assert_allclose(mu, mun)
Example #37
0
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False):
    r""" Computes the A->B reactive flux using transition path theory (TPT)

    Parameters
    ----------
    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix (default) or Rate matrix (if rate_matrix=True)
    A : array_like
        List of integer state labels for set A
    B : array_like
        List of integer state labels for set B
    mu : (M,) ndarray (optional)
        Stationary vector
    qminus : (M,) ndarray (optional)
        Backward committor for A->B reaction
    qplus : (M,) ndarray (optional)
        Forward committor for A-> B reaction
    rate_matrix = False : boolean
        By default (False), T is a transition matrix.
        If set to True, T is a rate matrix.

    Returns
    -------
    tpt: msmtools.flux.ReactiveFlux object
        A python object containing the reactive A->B flux network
        and several additional quantities, such as stationary probability,
        committors and set definitions.

    Notes
    -----
    The central object used in transition path theory is
    the forward and backward comittor function.

    TPT (originally introduced in [1]) for continous systems has a
    discrete version outlined in [2]. Here, we use the transition
    matrix formulation described in [3].

    See also
    --------
    msmtools.analysis.committor, ReactiveFlux

    References
    ----------
    .. [1] W. E and E. Vanden-Eijnden.
        Towards a theory of transition paths.
        J. Stat. Phys. 123: 503-523 (2006)
    .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes.
        Multiscale Model Simul 7: 1192-1219 (2009)
    .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and T. Weikl:
        Constructing the Full Ensemble of Folding Pathways from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)

    """
    import msmtools.analysis as msmana

    if len(A) == 0 or len(B) == 0:
        raise ValueError('set A or B is empty')
    n = T.shape[0]
    if len(A) > n or len(B) > n or max(A) > n or max(B) > n:
        raise ValueError(
            'set A or B defines more states, than given transition matrix.')
    if (rate_matrix is False) and (not msmana.is_transition_matrix(T)):
        raise ValueError('given matrix T is not a transition matrix')
    if (rate_matrix is True):
        raise NotImplementedError(
            'TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.'
        )

    # we can compute the following properties from either dense or sparse T
    # stationary dist
    if mu is None:
        mu = msmana.stationary_distribution(T)
    # forward committor
    if qplus is None:
        qplus = msmana.committor(T, A, B, forward=True)
    # backward committor
    if qminus is None:
        if msmana.is_reversible(T, mu=mu):
            qminus = 1.0 - qplus
        else:
            qminus = msmana.committor(T, A, B, forward=False, mu=mu)
    # gross flux
    grossflux = flux_matrix(T, mu, qminus, qplus, netflux=False)
    # net flux
    netflux = to_netflux(grossflux)

    # construct flux object
    from .reactive_flux import ReactiveFlux

    F = ReactiveFlux(A,
                     B,
                     netflux,
                     mu=mu,
                     qminus=qminus,
                     qplus=qplus,
                     gross_flux=grossflux)
    # done
    return F
Example #38
0
def _pcca_connected(P, n, return_rot=False):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_

    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function assumes that the transition matrix is fully connected.

    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.

    n : int
        Number of clusters to group to.

    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True

    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.

    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+:
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).

    """

    # test connectivity
    from msmtools.estimation import connected_sets

    labels = connected_sets(P)
    n_components = len(
        labels
    )  # (n_components, labels) = connected_components(P, connection='strong')
    if (n_components > 1):
        raise ValueError(
            "Transition matrix is disconnected. Cannot use pcca_connected.")

    from msmtools.analysis import stationary_distribution

    pi = stationary_distribution(P)
    # print "statdist = ",pi

    from msmtools.analysis import is_reversible

    if not is_reversible(P, mu=pi):
        raise ValueError(
            "Transition matrix does not fulfill detailed balance. "
            "Make sure to call pcca with a reversible transition matrix estimate"
        )
    # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate
    #      pair by the real and imaginary components of one of the two vectors. We could use this but would then need to
    #      orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical
    #      foundation for this, so I'll skip it for now.

    # right eigenvectors, ordered
    from msmtools.analysis import eigenvectors

    evecs = eigenvectors(P, n)

    # orthonormalize
    for i in range(n):
        evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i]))
    # make first eigenvector positive
    evecs[:, 0] = np.abs(evecs[:, 0])

    # Is there a significant complex component?
    if not np.alltrue(np.isreal(evecs)):
        warnings.warn(
            "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance "
            +
            "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid."
        )
    evecs = np.real(evecs)

    # create initial solution using PCCA+. This could have negative memberships
    (chi, rot_matrix) = _pcca_connected_isa(evecs, n)

    #print "initial chi = \n",chi

    # optimize the rotation matrix with PCCA++.
    rot_matrix = _opt_soft(evecs, rot_matrix, n)

    # These memberships should be nonnegative
    memberships = np.dot(evecs[:, :], rot_matrix)

    # We might still have numerical errors. Force memberships to be in [0,1]
    # print "memberships unnormalized: ",memberships
    memberships = np.maximum(0.0, memberships)
    memberships = np.minimum(1.0, memberships)
    # print "memberships unnormalized: ",memberships
    for i in range(0, np.shape(memberships)[0]):
        memberships[i] /= np.sum(memberships[i])

    # print "final chi = \n",chi

    return memberships