def submodel(self, states: np.ndarray): r"""This returns a count model that is restricted to a selection of states. Parameters ---------- states : array_like The states to restrict to. Returns ------- submodel : TransitionCountModel A submodel restricted to the requested states. """ states = np.atleast_1d(states) if np.max(states) >= self.n_states: raise ValueError( "Tried restricting model to states that are not represented! " "States range from 0 to {}.".format(np.max(states))) sub_count_matrix = submatrix(self.count_matrix, states) if self.state_symbols is not None: sub_symbols = self.state_symbols[states] else: sub_symbols = None if self.state_histogram is not None: sub_state_histogram = self.state_histogram[states] else: sub_state_histogram = None return TransitionCountModel( sub_count_matrix, self.counting_mode, self.lagtime, sub_state_histogram, state_symbols=sub_symbols, count_matrix_full=self.count_matrix_full, state_histogram_full=self.state_histogram_full)
def largest_connected_submatrix(C, directed=True, lcc=None): r"""Compute the count matrix of the largest connected set. The input count matrix is used as a weight matrix for the construction of a directed graph. The largest connected set of the constructed graph is computed. Vertices belonging to the largest connected component are used to generate a completely connected subgraph. The weight matrix of the subgraph is the desired completely connected count matrix. Parameters ---------- C : scipy.sparse matrix or numpy ndarray Count matrix specifying edge weights directed : bool, optional Whether to compute connected components for a directed or undirected graph. Default is True lcc : (M,) ndarray, optional The largest connected set Returns ------- C_cc : scipy.sparse matrix Count matrix of largest completely connected set of vertices (states) """ if lcc is None: lcc = largest_connected_set(C, directed=directed) return submatrix(C, lcc)
def bootstrapping_dtrajs(dtrajs, lag, N_full, nbs=10000, active_set=None): """ Perform trajectory based re-sampling. Parameters ---------- dtrajs : list of discrete trajectories lag : int lag time N_full : int Number of states in discrete trajectories. nbs : int, optional Number of bootstrapping samples active_set : ndarray Indices of active set, all count matrices will be restricted to active set. Returns ------- smean : ndarray(N,) mean values of singular values sdev : ndarray(N,) standard deviations of singular values """ # Get the number of simulations: Q = len(dtrajs) # Get the number of states in the active set: if active_set is not None: N = active_set.size else: N = N_full # Build up a matrix of count matrices for each simulation. Size is Q*N^2: traj_ind = [] state1 = [] state2 = [] q = 0 for traj in dtrajs: traj_ind.append(q * np.ones(traj[:-lag].size)) state1.append(traj[:-lag]) state2.append(traj[lag:]) q += 1 traj_inds = np.concatenate(traj_ind) pairs = N_full * np.concatenate(state1) + np.concatenate(state2) data = np.ones(pairs.size) Ct_traj = scipy.sparse.coo_matrix((data, (traj_inds, pairs)), shape=(Q, N_full * N_full)) Ct_traj = Ct_traj.tocsr() # Perform re-sampling: svals = np.zeros((nbs, N)) for s in range(nbs): # Choose selection: sel = np.random.choice(Q, Q, replace=True) # Compute count matrix for selection: Ct_sel = Ct_traj[sel, :].sum(axis=0) Ct_sel = np.asarray(Ct_sel).reshape((N_full, N_full)) if active_set is not None: Ct_sel = submatrix(Ct_sel, active_set) svals[s, :] = scl.svdvals(Ct_sel) # Compute mean and uncertainties: smean = np.mean(svals, axis=0) sdev = np.std(svals, axis=0) return smean, sdev