Ejemplo n.º 1
0
    def submodel(self, states: np.ndarray):
        r"""This returns a count model that is restricted to a selection of states.

        Parameters
        ----------
        states : array_like
            The states to restrict to.

        Returns
        -------
        submodel : TransitionCountModel
            A submodel restricted to the requested states.
        """
        states = np.atleast_1d(states)
        if np.max(states) >= self.n_states:
            raise ValueError(
                "Tried restricting model to states that are not represented! "
                "States range from 0 to {}.".format(np.max(states)))
        sub_count_matrix = submatrix(self.count_matrix, states)
        if self.state_symbols is not None:
            sub_symbols = self.state_symbols[states]
        else:
            sub_symbols = None
        if self.state_histogram is not None:
            sub_state_histogram = self.state_histogram[states]
        else:
            sub_state_histogram = None
        return TransitionCountModel(
            sub_count_matrix,
            self.counting_mode,
            self.lagtime,
            sub_state_histogram,
            state_symbols=sub_symbols,
            count_matrix_full=self.count_matrix_full,
            state_histogram_full=self.state_histogram_full)
Ejemplo n.º 2
0
def largest_connected_submatrix(C, directed=True, lcc=None):
    r"""Compute the count matrix of the largest connected set.

    The input count matrix is used as a weight matrix for the
    construction of a directed graph. The largest connected set of the
    constructed graph is computed. Vertices belonging to the largest
    connected component are used to generate a completely connected
    subgraph. The weight matrix of the subgraph is the desired
    completely connected count matrix.

    Parameters
    ----------
    C : scipy.sparse matrix or numpy ndarray
        Count matrix specifying edge weights
    directed : bool, optional
       Whether to compute connected components for a directed or
       undirected graph. Default is True
    lcc : (M,) ndarray, optional
       The largest connected set

    Returns
    -------
    C_cc : scipy.sparse matrix
        Count matrix of largest completely
        connected set of vertices (states)

    """
    if lcc is None:
        lcc = largest_connected_set(C, directed=directed)
    return submatrix(C, lcc)
def bootstrapping_dtrajs(dtrajs, lag, N_full, nbs=10000, active_set=None):
    """
    Perform trajectory based re-sampling.

    Parameters
    ----------
    dtrajs : list of discrete trajectories

    lag : int
        lag time

    N_full : int
        Number of states in discrete trajectories.
    nbs : int, optional
        Number of bootstrapping samples
    active_set : ndarray
        Indices of active set, all count matrices will be restricted
        to active set.

    Returns
    -------
    smean : ndarray(N,)
        mean values of singular values
    sdev : ndarray(N,)
        standard deviations of singular values
    """

    # Get the number of simulations:
    Q = len(dtrajs)
    # Get the number of states in the active set:
    if active_set is not None:
        N = active_set.size
    else:
        N = N_full
    # Build up a matrix of count matrices for each simulation. Size is Q*N^2:
    traj_ind = []
    state1 = []
    state2 = []
    q = 0
    for traj in dtrajs:
        traj_ind.append(q * np.ones(traj[:-lag].size))
        state1.append(traj[:-lag])
        state2.append(traj[lag:])
        q += 1
    traj_inds = np.concatenate(traj_ind)
    pairs = N_full * np.concatenate(state1) + np.concatenate(state2)
    data = np.ones(pairs.size)
    Ct_traj = scipy.sparse.coo_matrix((data, (traj_inds, pairs)),
                                      shape=(Q, N_full * N_full))
    Ct_traj = Ct_traj.tocsr()

    # Perform re-sampling:
    svals = np.zeros((nbs, N))
    for s in range(nbs):
        # Choose selection:
        sel = np.random.choice(Q, Q, replace=True)
        # Compute count matrix for selection:
        Ct_sel = Ct_traj[sel, :].sum(axis=0)
        Ct_sel = np.asarray(Ct_sel).reshape((N_full, N_full))
        if active_set is not None:
            Ct_sel = submatrix(Ct_sel, active_set)
        svals[s, :] = scl.svdvals(Ct_sel)
    # Compute mean and uncertainties:
    smean = np.mean(svals, axis=0)
    sdev = np.std(svals, axis=0)

    return smean, sdev