Example #1
0
def calculate_all_to_all_mfpt(tprob, populations=None):
    """
    Calculate the all-states by all-state matrix of mean first passage
    times.

    This uses the fundamental matrix formalism, and should be much faster
    than GetMFPT for calculating many MFPTs.

    Parameters
    ----------
    tprob : matrix
        transition probability matrix
    populations : array_like, float
        optional argument, the populations of each state. If  not supplied,
        it will be computed from scratch

    Returns
    -------
    MFPT : array, float
        MFPT in time units of LagTime, square array for MFPT from i -> j

    See Also
    --------
    GetMFPT : function
        for calculating a subset of the MFPTs, with functionality for including
        a set of sinks
    """

    msm_analysis.check_transition(tprob)
    
    if scipy.sparse.issparse(tprob):
        tprob = tprob.toarray()
        logger.warning('calculate_all_to_all_mfpt does not support sparse linear algebra')

    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 5)
        if np.count_nonzero(np.imag(eigens[1][:,0])) != 0:
            raise ValueError('First eigenvector has imaginary parts')
        populations = np.real(eigens[1][:,0])

    # ensure that tprob is a transition matrix
    msm_analysis.check_transition(tprob)
    num_states = len(populations)
    if tprob.shape[0] != num_states:
        raise ValueError("Shape of tprob and populations vector don't match")

    eye = np.transpose( np.matrix(np.ones(num_states)) )
    limiting_matrix = eye * populations
    #z = scipy.linalg.inv(scipy.sparse.eye(num_states, num_states) - (tprob - limiting_matrix))
    z = scipy.linalg.inv(np.eye(num_states) - (tprob - limiting_matrix))

    # mfpt[i,j] = z[j,j] - z[i,j] / pi[j]
    mfpt = -z
    for j in range(num_states):
        mfpt[:, j] += z[j, j]
        mfpt[:, j] /= populations[j]

    return mfpt
Example #2
0
def calculate_mfpt(sinks, tprob, lag_time=1.):
    """
    Gets the Mean First Passage Time (MFPT) for all states to a *set*
    of sinks.

    Parameters
    ----------
    sinks : array, int
        indices of the sink states
    tprob : matrix
        transition probability matrix
    LagTime : float
        the lag time used to create T (dictates units of the answer)

    Returns
    -------
    MFPT : array, float
        MFPT in time units of LagTime, for each state (in order of state index)

    See Also
    --------
    calculate_all_to_all_mfpt : function
        A more efficient way to calculate all the MFPTs in a network
    """

    sinks = _ensure_iterable(sinks)
    msm_analysis.check_transition(tprob)

    n = tprob.shape[0]

    if scipy.sparse.isspmatrix(tprob):
        tprob = tprob.tolil()

    for state in sinks:
        tprob[state,:] = 0.0
        tprob[state,state] = 2.0

    if scipy.sparse.isspmatrix(tprob):
        tprob = tprob - scipy.sparse.eye(n,n)
        tprob = tprob.tocsr()
    else:
        tprob = tprob - np.eye(n)

    RHS = -1 * np.ones(n)
    for state in sinks:
        RHS[state] = 0.0

    if scipy.sparse.isspmatrix(tprob):
        MFPT = lag_time * scipy.sparse.linalg.spsolve(tprob, RHS)
    else:
        MFPT = lag_time * np.linalg.solve(tprob, RHS)

    return MFPT
Example #3
0
def calculate_net_fluxes(sources, sinks, tprob, populations=None, committors=None):
    """
    Computes the transition path theory net flux matrix.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    ------
    net_fluxes : mm_matrix
        The net flux matrix

    Optional Parameters
    -------------------
    populations : nd_array, float
        The equilibrium populations, if not provided is re-calculated
    committors : nd_array, float
        The committors associated with `sources`, `sinks`, and `tprob`.
        If not provided, is calculated from scratch. If provided, `sources`
        and `sinks` are ignored.
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
    else:
        dense = True

    n = tprob.shape[0]

    flux = calculate_fluxes(sources, sinks, tprob, populations, committors)
    ind = flux.nonzero()

    if dense:
        net_flux = np.zeros((n, n))
    else:
        net_flux = scipy.sparse.lil_matrix((n, n))

    for k in range(len(ind[0])):
        i, j = ind[0][k], ind[1][k]
        forward = flux[i, j]
        reverse = flux[j, i]
        net_flux[i, j] = max(0, forward - reverse)

    return net_flux
Example #4
0
def calculate_ensemble_mfpt(sources, sinks, tprob, lag_time):
    """
    Calculates the average 'Folding Time' of an MSM defined by T and a LagTime.
    The Folding Time is the average of the MFPTs (to F) of all the states in U.

    Note here 'Folding Time' is defined as the avg MFPT of {U}, to {F}.
    Consider this carefully. This is probably NOT the experimental folding time!

    Parameters
    ----------
    sources : array, int
        indices of the source states
    sinks : array, int
        indices of the sink states
    tprob : matrix
        transition probability matrix
    lag_time : float
        the lag time used to create T (dictates units of the answer)

    Returns
    -------
    avg : float
        the average of the MFPTs
    std : float
        the standard deviation of the MFPTs

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    X = calculate_mfpt(sinks, tprob, lag_time)
    times = np.zeros(len(sources))
    for i in range(len(sources)):
        times[i] = X[sources[i]]

    return np.average(times), np.std(times)
Example #5
0
def calculate_ensemble_mfpt(sources, sinks, tprob, lag_time):
    """
    Calculates the average 'Folding Time' of an MSM defined by T and a LagTime.
    The Folding Time is the average of the MFPTs (to F) of all the states in U.

    Note here 'Folding Time' is defined as the avg MFPT of {U}, to {F}.
    Consider this carefully. This is probably NOT the experimental folding time!

    Parameters
    ----------
    sources : array, int
        indices of the source states
    sinks : array, int
        indices of the sink states
    tprob : matrix
        transition probability matrix
    lag_time : float
        the lag time used to create T (dictates units of the answer)

    Returns
    -------
    avg : float
        the average of the MFPTs
    std : float
        the standard deviation of the MFPTs
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    X = calculate_mfpt(sinks, tprob, lag_time)
    times = np.zeros(len(sources))
    for i in range(len(sources)):
        times[i] = X[ sources[i] ]

    return np.average(times), np.std(times)
Example #6
0
def find_top_paths(sources, sinks, tprob, num_paths=10, node_wipe=False, net_flux=None):
    r"""
    Calls the Dijkstra algorithm to find the top 'NumPaths'.

    Does this recursively by first finding the top flux path, then cutting that
    path and relaxing to find the second top path. Continues until NumPaths
    have been found.

    Parameters
    ----------
    sources : array_like, int
        The indices of the source states
    sinks : array_like, int
        Indices of sink states
    num_paths : int
        The number of paths to find

    Returns
    -------
    Paths : list of lists
        The nodes transversed in each path
    Bottlenecks : list of tuples
        The nodes between which exists the path bottleneck
    Fluxes : list of floats
        The flux through each path

    Optional Parameters
    -------------------
    node_wipe : bool
        If true, removes the bottleneck-generating node from the graph, instead
        of just the bottleneck (not recommended, a debugging functionality)
    net_flux : sparse matrix
        Matrix of the net flux from `sources` to `sinks`, see function `net_flux`.
        If not provided, is calculated from scratch. If provided, `tprob` is
        ignored.

    To Do
    -----
    -- Add periodic flow check

    References
    ----------
    .. [1] Dijkstra, E. W. (1959). "A note on two problems in connexion with 
           graphs". Numerische Mathematik 1: 269–271. doi:10.1007/BF01386390.
    """

    # first, do some checking on the input, esp. `sources` and `sinks`
    # we want to make sure all objects are iterable and the sets are disjoint
    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    # check to see if we get net_flux for free, otherwise calculate it
    if not net_flux:
        net_flux = calculate_net_fluxes(sources, sinks, tprob)

    # initialize objects
    paths = []
    fluxes = []
    bottlenecks = []

    if scipy.sparse.issparse(net_flux):
        net_flux = net_flux.tolil()

    # run the initial Dijkstra pass
    pi, b = Dijkstra(sources, sinks, net_flux)

    logger.info("Path Num | Path | Bottleneck | Flux")

    i = 1
    done = False
    while not done:

        # First find the highest flux pathway
        (path, (b1, b2), flux) = _backtrack(sinks, b, pi, net_flux)

        # Add each result to a Paths, Bottlenecks, Fluxes list
        if flux == 0:
            logger.info("Only %d possible pathways found. Stopping backtrack.", i)
            break
        paths.append(path)
        bottlenecks.append((b1, b2))
        fluxes.append(flux)
        logger.info("%s | %s | %s | %s ", i, path, (b1, b2), flux)

        # Cut the bottleneck, start relaxing from B side of the cut
        if node_wipe:
            net_flux[:, b2] = 0
            logger.info("Wiped node: %s", b2)
        else:
            net_flux[b1, b2] = 0

        G = scipy.sparse.find(net_flux)
        Q = [b2]
        b, pi, net_flux = _back_relax(b2, b, pi, net_flux)

        # Then relax the graph and repeat
        # But only if we still need to
        if i != num_paths - 1:
            while len(Q) > 0:
                w = Q.pop()
                for v in G[1][np.where(G[0] == w)]:
                    if pi[v] == w:
                        b, pi, net_flux = _back_relax(v, b, pi, net_flux)
                        Q.append(v)
                Q = sorted(Q, key=lambda v: b[v])

        i += 1
        if i == num_paths + 1:
            done = True
        if flux == 0:
            logger.info("Only %d possible pathways found. Stopping backtrack.", i)
            done = True

    return paths, bottlenecks, fluxes
Example #7
0
def calculate_all_to_all_mfpt(tprob, populations=None):
    """
    Calculate the all-states by all-state matrix of mean first passage
    times.

    This uses the fundamental matrix formalism, and should be much faster
    than GetMFPT for calculating many MFPTs.

    Parameters
    ----------
    tprob : matrix
        transition probability matrix
    populations : array_like, float
        optional argument, the populations of each state. If  not supplied,
        it will be computed from scratch

    Returns
    -------
    MFPT : array, float
        MFPT in time units of LagTime, square array for MFPT from i -> j

    See Also
    --------
    GetMFPT : function
        for calculating a subset of the MFPTs, with functionality for including
        a set of sinks

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        tprob = tprob.toarray()
        logger.warning('calculate_all_to_all_mfpt does not support sparse linear algebra')

    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 1)
        if np.count_nonzero(np.imag(eigens[1][:, 0])) != 0:
            raise ValueError('First eigenvector has imaginary parts')
        populations = np.real(eigens[1][:, 0])

    # ensure that tprob is a transition matrix
    msm_analysis.check_transition(tprob)
    num_states = len(populations)
    if tprob.shape[0] != num_states:
        raise ValueError("Shape of tprob and populations vector don't match")

    eye = np.transpose(np.matrix(np.ones(num_states)))
    limiting_matrix = eye * populations
    #z = scipy.linalg.inv(scipy.sparse.eye(num_states, num_states) - (tprob - limiting_matrix))
    z = scipy.linalg.inv(np.eye(num_states) - (tprob - limiting_matrix))

    # mfpt[i,j] = z[j,j] - z[i,j] / pi[j]
    mfpt = -z
    for j in range(num_states):
        mfpt[:, j] += z[j, j]
        mfpt[:, j] /= populations[j]

    return mfpt
Example #8
0
def calculate_mfpt(sinks, tprob, lag_time=1.):
    """
    Gets the Mean First Passage Time (MFPT) for all states to a *set*
    of sinks.

    Parameters
    ----------
    sinks : array, int
        indices of the sink states
    tprob : matrix
        transition probability matrix
    LagTime : float
        the lag time used to create T (dictates units of the answer)

    Returns
    -------
    MFPT : array, float
        MFPT in time units of LagTime, for each state (in order of state index)

    See Also
    --------
    calculate_all_to_all_mfpt : function
        A more efficient way to calculate all the MFPTs in a network

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    sinks = _ensure_iterable(sinks)
    msm_analysis.check_transition(tprob)

    n = tprob.shape[0]

    if scipy.sparse.isspmatrix(tprob):
        tprob = tprob.tolil()

    for state in sinks:
        tprob[state, :] = 0.0
        tprob[state, state] = 2.0

    if scipy.sparse.isspmatrix(tprob):
        tprob = tprob - scipy.sparse.eye(n, n)
        tprob = tprob.tocsr()
    else:
        tprob = tprob - np.eye(n)

    RHS = -1 * np.ones(n)
    for state in sinks:
        RHS[state] = 0.0

    if scipy.sparse.isspmatrix(tprob):
        MFPT = lag_time * scipy.sparse.linalg.spsolve(tprob, RHS)
    else:
        MFPT = lag_time * np.linalg.solve(tprob, RHS)

    return MFPT
Example #9
0
def calculate_avg_TP_time(sources, sinks, tprob, lag_time):
    """
    Calculates the Average Transition Path Time for MSM with: T, LagTime.
    The TPTime is the average of the MFPTs (to F) of all the states
    immediately adjacent to U, with the U states effectively deleted.

    Note here 'TP Time' is defined as the avg MFPT of all adjacent states to {U},
    to {F}, ignoring {U}.

    Consider this carefully.

    Parameters
    ----------
    sources : array, int
        indices of the unfolded states
    sinks : array, int
        indices of the folded states
    tprob : matrix
        transition probability matrix
    lag_time : float
        the lag time used to create T (dictates units of the answer)

    Returns
    -------
    avg : float
        the average of the MFPTs
    std : float
        the standard deviation of the MFPTs

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    n = tprob.shape[0]
    if scipy.sparse.issparse(tprob):
        T = tprob.tolil()
        P = scipy.sparse.lil_matrix((n, n))
    else:
        P = np.zeros((n, n))

    for u in sources:
        for i in range(n):
            if i not in sources:
                P[u, i] = T[u, i]

    for u in sources:
        T[u, :] = np.zeros(n)
        T[:, u] = 0

    for i in sources:
        N = T[i, :].sum()
        T[i, :] = T[i, :] / N

    X = calculate_mfpt(sinks, tprob, lag_time)
    TP = P * X.T
    TPtimes = []

    for time in TP:
        if time != 0:
            TPtimes.append(time)

    return np.average(TPtimes), np.std(TPtimes)
Example #10
0
def calculate_net_fluxes(sources, sinks, tprob, populations=None, committors=None):
    """
    Computes the transition path theory net flux matrix.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    ------
    net_fluxes : mm_matrix
        The net flux matrix

    Optional Parameters
    -------------------
    populations : nd_array, float
        The equilibrium populations, if not provided is re-calculated
    committors : nd_array, float
        The committors associated with `sources`, `sinks`, and `tprob`.
        If not provided, is calculated from scratch. If provided, `sources`
        and `sinks` are ignored.

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
    else:
        dense = True

    n = tprob.shape[0]

    flux = calculate_fluxes(sources, sinks, tprob, populations, committors)
    ind = flux.nonzero()

    if dense:
        net_flux = np.zeros((n, n))
    else:
        net_flux = scipy.sparse.lil_matrix((n, n))

    for k in range(len(ind[0])):
        i, j = ind[0][k], ind[1][k]
        forward = flux[i, j]
        reverse = flux[j, i]
        net_flux[i, j] = max(0, forward - reverse)

    return net_flux
Example #11
0
def calculate_fluxes(sources, sinks, tprob, populations=None, committors=None):
    """
    Compute the transition path theory flux matrix.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    ------
    fluxes : mm_matrix
        The flux matrix

    Optional Parameters
    -------------------
    populations : nd_array, float
        The equilibrium populations, if not provided is re-calculated
    committors : nd_array, float
        The committors associated with `sources`, `sinks`, and `tprob`.
        If not provided, is calculated from scratch. If provided, `sources`
        and `sinks` are ignored.
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
    else:
        dense = True

    # check if we got the populations
    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 5)
        if np.count_nonzero(np.imag(eigens[1][:,0])) != 0:
            raise ValueError('First eigenvector has imaginary components')
        populations = np.real(eigens[1][:,0])

    # check if we got the committors
    if committors is None:
        committors = calculate_committors(sources, sinks, tprob)

    # perform the flux computation
    Indx, Indy = tprob.nonzero()

    n = tprob.shape[0]

    if dense:
        X = np.zeros((n, n))
        Y = np.zeros((n, n))
        X[(np.arange(n), np.arange(n))] = populations * (1.0 - committors)
        Y[(np.arange(n), np.arange(n))] = committors
    else:
        X = scipy.sparse.lil_matrix((n,n))
        Y = scipy.sparse.lil_matrix((n,n))
        X.setdiag( populations * (1.0 - committors))
        Y.setdiag(committors)

    if dense:
        fluxes = np.dot(np.dot(X, tprob), Y)
        fluxes[(np.arange(n), np.arange(n))] = np.zeros(n)
    else:
        fluxes = np.dot(np.dot(X.tocsr(), tprob.tocsr()), Y.tocsr())
        fluxes = fluxes.tolil()
        fluxes.setdiag(np.zeros(n))

    return fluxes
Example #12
0
def calculate_hub_score(tprob, waypoint):
    """
    Calculate the hub score for the states `waypoint`.

    The "hub score" is a measure of how well traveled a certain state or
    set of states is in a network. Specifically, it is the fraction of
    times that a walker visits a state en route from some state A to another
    state B, averaged over all combinations of A and B.


    Parameters
    ----------
    tprob : matrix
        The transition probability matrix
    waypoints : int
        The indices of the intermediate state(s)

    Returns
    -------
    Hc : float
        The hub score for the state composed of `waypoints`

    See Also
    --------
    calculate_fraction_visits : function
        Calculate the fraction of times a state is visited on pathways going
        from a set of "sources" to a set of "sinks".
    calculate_all_hub_scores : function
        A more efficient way to compute the hub score for every state in a
        network.

    Notes
    -----
    Employs dense linear algebra,
      memory use scales as N^2
      cycle use scales as N^5

    References
    ----------
    ..[1] Dickson & Brooks (2012), J. Chem. Theory Comput.,
        Article ASAP DOI: 10.1021/ct300537s
    """

    msm_analysis.check_transition(tprob)

    # typecheck
    if type(waypoint) != int:
        if hasattr(waypoint, '__len__'):
            if len(waypoint) == 1:
                waypoint = waypoint[0]
            else:
                raise ValueError('Must pass waypoints as int or list/array of ints')
        else:
            raise ValueError('Must pass waypoints as int or list/array of ints')

    # find out which states to include in A, B (i.e. everything but C)
    N = tprob.shape[0]
    states_to_include = list(range(N))
    states_to_include.remove(waypoint)

    # calculate the hub score
    Hc = 0.0
    for s1 in states_to_include:
        for s2 in states_to_include:
            if (s1 != s2) and (s1 != waypoint) and (s2 != waypoint):
                Hc += calculate_fraction_visits(tprob, waypoint,
                                                s1, s2, return_cond_Q=False)

    Hc /= ((N - 1) * (N - 2))

    return Hc
Example #13
0
def calculate_committors(sources, sinks, tprob):
    """
    Get the forward committors of the reaction sources -> sinks.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    -------
    Q : array_like
        The forward committors for the reaction U -> F.
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
        tprob = tprob.tolil()
    else:
        dense = True

    # construct the committor problem
    n = tprob.shape[0]

    if dense:
        T = np.eye(n) - tprob
    else:
        T = scipy.sparse.eye(n, n, 0, format='lil') - tprob
        T = T.tolil()

    for a in sources:
        T[a,:] = 0.0 #np.zeros(n)
        T[:,a] = 0.0
        T[a,a] = 1.0

    for b in sinks:
        T[b,:] = 0.0 # np.zeros(n)
        T[:,b] = 0.0
        T[b,b] = 1.0

    IdB = np.zeros(n)
    IdB[sinks] = 1.0

    if dense:
        RHS = np.dot(tprob, IdB)
    else:
        RHS = tprob * IdB

    RHS[sources] = 0.0
    RHS[sinks]   = 1.0

    # solve for the committors
    if dense == False:
        Q = scipy.sparse.linalg.spsolve(T.tocsr(), RHS)
    else:
        Q = np.linalg.solve(T, RHS)
        
    assert np.all( Q <= 1.0 )
    assert np.all( Q >= 0.0 )

    return Q
Example #14
0
def calculate_avg_TP_time(sources, sinks, tprob, lag_time):
    """
    Calculates the Average Transition Path Time for MSM with: T, LagTime.
    The TPTime is the average of the MFPTs (to F) of all the states
    immediately adjacent to U, with the U states effectively deleted.

    Note here 'TP Time' is defined as the avg MFPT of all adjacent states to {U},
    to {F}, ignoring {U}.

    Consider this carefully.

    Parameters
    ----------
    sources : array, int
        indices of the unfolded states
    sinks : array, int
        indices of the folded states
    tprob : matrix
        transition probability matrix
    lag_time : float
        the lag time used to create T (dictates units of the answer)

    Returns
    -------
    avg : float
        the average of the MFPTs
    std : float
        the standard deviation of the MFPTs
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    n = tprob.shape[0]
    if scipy.sparse.issparse(tprob):
        T = tprob.tolil()
        P = scipy.sparse.lil_matrix((n, n))
    else:
        p = np.zeros((n, n))

    for u in sources:
        for i in range(n):
            if i not in sources:
                P[u, i] = T[u, i]

    for u in sources:
        T[u, :] = np.zeros(n)
        T[:, u] = 0

    for i in sources:
        N = T[i, :].sum()
        T[i,:] = T[i, :]/N

    X = calculate_mfpt(sinks, tprob, lag_time)
    TP = P * X.T
    TPtimes = []

    for time in TP:
        if time != 0: TPtimes.append(time)

    return np.average(TPtimes), np.std(TPtimes)
Example #15
0
def calculate_committors(sources, sinks, tprob):
    """
    Get the forward committors of the reaction sources -> sinks.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    -------
    Q : array_like
        The forward committors for the reaction U -> F.

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
        tprob = tprob.tolil()
    else:
        dense = True

    # construct the committor problem
    n = tprob.shape[0]

    if dense:
        T = np.eye(n) - tprob
    else:
        T = scipy.sparse.eye(n, n, 0, format='lil') - tprob
        T = T.tolil()

    for a in sources:
        T[a, :] = 0.0  # np.zeros(n)
        T[:, a] = 0.0
        T[a, a] = 1.0

    for b in sinks:
        T[b, :] = 0.0  # np.zeros(n)
        T[:, b] = 0.0
        T[b, b] = 1.0

    IdB = np.zeros(n)
    IdB[sinks] = 1.0

    if dense:
        RHS = np.dot(tprob, IdB)
    else:
        RHS = tprob.dot(IdB)
        # This should be the same as below
        #RHS = tprob * IdB

    RHS[sources] = 0.0
    RHS[sinks] = 1.0

    # solve for the committors
    if dense == False:
        Q = scipy.sparse.linalg.spsolve(T.tocsr(), RHS)
    else:
        Q = np.linalg.solve(T, RHS)

    epsilon = 0.001
    assert np.all(Q <= 1.0 + epsilon)
    assert np.all(Q >= 0.0 - epsilon)

    return Q
Example #16
0
def calculate_fluxes(sources, sinks, tprob, populations=None, committors=None):
    """
    Compute the transition path theory flux matrix.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    ------
    fluxes : mm_matrix
        The flux matrix

    Optional Parameters
    -------------------
    populations : nd_array, float
        The equilibrium populations, if not provided is re-calculated
    committors : nd_array, float
        The committors associated with `sources`, `sinks`, and `tprob`.
        If not provided, is calculated from scratch. If provided, `sources`
        and `sinks` are ignored.

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
    else:
        dense = True

    # check if we got the populations
    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 1)
        if np.count_nonzero(np.imag(eigens[1][:, 0])) != 0:
            raise ValueError('First eigenvector has imaginary components')
        populations = np.real(eigens[1][:, 0])

    # check if we got the committors
    if committors is None:
        committors = calculate_committors(sources, sinks, tprob)

    # perform the flux computation
    Indx, Indy = tprob.nonzero()

    n = tprob.shape[0]

    if dense:
        X = np.zeros((n, n))
        Y = np.zeros((n, n))
        X[(np.arange(n), np.arange(n))] = populations * (1.0 - committors)
        Y[(np.arange(n), np.arange(n))] = committors
    else:
        X = scipy.sparse.lil_matrix((n, n))
        Y = scipy.sparse.lil_matrix((n, n))
        X.setdiag(populations * (1.0 - committors))
        Y.setdiag(committors)

    if dense:
        fluxes = np.dot(np.dot(X, tprob), Y)
        fluxes[(np.arange(n), np.arange(n))] = np.zeros(n)
    else:
        fluxes = (X.tocsr().dot(tprob.tocsr())).dot(Y.tocsr())
        # This should be the same as below, but it's a bit messy...
        #fluxes = np.dot(np.dot(X.tocsr(), tprob.tocsr()), Y.tocsr())
        fluxes = fluxes.tolil()
        fluxes.setdiag(np.zeros(n))

    return fluxes
Example #17
0
def calculate_fraction_visits(tprob, waypoint, source, sink, return_cond_Q=False):
    """
    Calculate the fraction of times a walker on `tprob` going from `sources`
    to `sinks` will travel through the set of states `waypoints` en route.

    Computes the conditional committors q^{ABC^+} and uses them to find the
    fraction of paths mentioned above. The conditional committors can be

    Note that in the notation of Dickson et. al. this computes h_c(A,B), with
        sources   = A
        sinks     = B
        waypoint  = C

    Parameters
    ----------
    tprob : matrix
        The transition probability matrix
    waypoint : int
        The index of the intermediate state
    sources : nd_array, int or int
        The indices of the source state(s)
    sinks : nd_array, int or int
        The indices of the sink state(s)
    return_cond_Q : bool
        Whether or not to return the conditional committors

    Returns
    -------
    fraction_paths : float
        The fraction of times a walker going from `sources` -> `sinks` stops
        by `waypoints` on its way.
    cond_Q : nd_array, float (optional)
        Optionally returned (`return_cond_Q`)

    See Also
    --------
    calculate_hub_score : function
        Compute the 'hub score', the weighted fraction of visits for an
        entire network.
    calculate_all_hub_scores : function
        Wrapper to compute all the hub scores in a network.

    Notes
    -----
    Employs dense linear algebra,
      memory use scales as N^2
      cycle use scales as N^3

    References
    ----------
    ..[1] Dickson & Brooks (2012), J. Chem. Theory Comput.,
          Article ASAP DOI: 10.1021/ct300537s
    """

    # do some typechecking - we need to be sure that the lumped sources are in
    # the second to last row, and the lumped sinks are in the last row
    # check `tprob`
    msm_analysis.check_transition(tprob)
    if type(tprob) != np.ndarray:
        try:
            tprob = tprob.todense()
        except AttributeError as e:
            raise TypeError('Argument `tprob` must be convertable to a dense'
                            'numpy array. \n%s' % e)

    # typecheck
    for data in [source, sink, waypoint]:
        if type(data) == int:
            pass
        elif hasattr(data, 'len'):
            if len(data) == 1:
                data = data[0]
        else:
            raise TypeError('Arguments source/sink/waypoint must be an int')

    if (source == waypoint) or (sink == waypoint) or (sink == source):
        raise ValueError('source, sink, waypoint must all be disjoint!')

    N = tprob.shape[0]
    Q = calculate_committors([source], [sink], tprob)

    # permute the transition matrix into cannonical form - send waypoint the the
    # last row, and source + sink to the end after that
    Bsink_indices = [source, sink, waypoint]
    perm = np.arange(N)
    perm = np.delete(perm, Bsink_indices)
    perm = np.append(perm, Bsink_indices)
    T = MSMLib.permute_mat(tprob, perm)

    # extract P, R
    n = N - len(Bsink_indices)
    P = T[:n, :n]
    R = T[:n, n:]

    # calculate the conditional committors ( B = N*R ), B[i,j] is the prob
    # state i ends in j, where j runs over the source + sink + waypoint
    # (waypoint is position -1)
    B = np.dot(np.linalg.inv(np.eye(n) - P), R)
    # Not sure if this is sparse or not...

    # add probs for the sinks, waypoint / b[i] is P( i --> {C & not A, B} )
    b = np.append(B[:, -1].flatten(), [0.0] * (len(Bsink_indices) - 1) + [1.0])
    cond_Q = b * Q[waypoint]

    epsilon = 1e-6  # some numerical give, hard-coded
    assert cond_Q.shape == (N,)
    assert np.all(cond_Q <= 1.0 + epsilon)
    assert np.all(cond_Q >= 0.0 - epsilon)
    assert np.all(cond_Q <= Q[perm] + epsilon)

    # finally, calculate the fraction of paths h_C(A,B) (eq. 7 in [1])
    fraction_paths = np.sum(T[-3, :] * cond_Q) / np.sum(T[-3, :] * Q[perm])

    assert fraction_paths <= 1.0
    assert fraction_paths >= 0.0

    if return_cond_Q:
        cond_Q = cond_Q[np.argsort(perm)]  # put back in orig. order
        return fraction_paths, cond_Q
    else:
        return fraction_paths
def _run_trial(arg_dict):

    # inject the arg_dict into the local namespace - may be a bad idea...
    for key in arg_dict.keys():
        exec(key + " = arg_dict['" + key + "']")

    # initialize data structures to hold output
    distance_to_target = np.zeros(rounds_of_sampling)
    obs_distance = np.zeros(rounds_of_sampling)

    # the assignments array will hold all of the output of all simulations
    assignments = -1.0 * np.ones((rounds_of_sampling * simultaneous_samplers + 1,
                                      max(size_of_intial_data, length_of_sampling_trajs+1) ))

    # initialize the "true" transition matrix
    if not transition_matrix:
        assert num_states > 0
        C_rand = np.random.randint( 0, 100, (num_states, num_states) )
        C_rand += C_rand.T
        T = MSMLib.estimate_transition_matrix( C_rand )
    else:
        T = transition_matrix
        num_states = T.shape[0]
    T = sparse.csr_matrix(T)
    msm_analysis.check_transition(T)
        
    if observable_function:
        try:
            obs_goal = observable_function(T)
        except Exception as e:
            print >> sys.stderr, e
            raise Exception("Error evaluating function: %s" % observable_function.__name__)
            
    assignments[0,:size_of_intial_data] = msm_analysis.sample(T, None, size_of_intial_data)

    # iterate, adding simulation time
    for sampling_round in range(rounds_of_sampling):
        
        # apply the adaptive sampling method - we need to be true to what a
        # real simulation would actually see for the counts matrix
        mod_assignments = assignments.copy()
        mapping = MSMLib.renumber_states( mod_assignments )
        C_mod = MSMLib.get_count_matrix_from_assignments( mod_assignments )
        T_mod = MSMLib.estimate_transition_matrix(C_mod)
        adaptive_sampling_multivariate = SamplerObject.sample(C_mod)

        # choose the states to sample from (in the original indexing)
        state_inds = np.arange(len(adaptive_sampling_multivariate))
        sampler = stats.rv_discrete(name='sampler', 
                                    values=[state_inds, adaptive_sampling_multivariate])
        starting_states = sampler.rvs( size=simultaneous_samplers )
        starting_states = mapping[starting_states]

        # start new 'simulations' in each of those states
        for i,init_state in enumerate(starting_states):
            a_ind = sampling_round * simultaneous_samplers + i + 1
            s_ind = length_of_sampling_trajs + 1
            assignments[a_ind,:s_ind] = msm_analysis.sample(T, init_state, s_ind)

        # build a new MSM from all the simulation so far
        C_raw = MSMLib.get_count_matrix_from_assignments( assignments, n_states=num_states )
        C_raw = C_raw + C_raw.T # might want to add trimming, etc.
        T_pred = MSMLib.estimate_transition_matrix(C_raw) 

        # calculate the error between the real transition matrix and our best prediction
        assert T.shape == T_pred.shape
        distance_to_target[sampling_round] = np.sqrt( ((T_pred - T).data ** 2).sum() ) \
                                             / float(num_states)

        if observable_function:
            obs_distance[sampling_round] = np.abs(observable_function(T_mod) - obs_goal)

    return distance_to_target, obs_distance