def build_msm_from_counts(counts, lag_time, symmetrize, return_rev_counts=False, trim=True): """ Estimates the transition probability matrix from the counts matrix. Parameters ---------- counts : matrix the MSM counts matrix lag_time : the lag time to build the msm with, in frames symmetrize : {'MLE', 'Transpose', None} symmetrization scheme so that we have reversible counts return_rev_counts : bool whether or not to return the reversible counts or not Returns ------- t_matrix : matrix the transition probability matrix rev_counts : matrix the estimate of the reversible counts (only returned if `return_rev_counts` is True) """ symmetrize = str(symmetrize).lower() symmetrization_error = ValueError("Invalid symmetrization scheme requested: %s. Exiting." % symmetrize) if symmetrize not in ['mle', 'transpose', 'none']: raise symmetrization_error if trim: counts, mapping = ergodic_trim(counts) # Apply a symmetrization scheme if symmetrize == 'mle': rev_counts = mle_reversible_count_matrix(counts, prior=0.0) elif symmetrize == 'transpose': rev_counts = 0.5*(counts + counts.transpose()) elif symmetrize == 'none': rev_counts = counts else: raise symmetrization_error t_matrix = estimate_transition_matrix(rev_counts) if symmetrize in ['mle', 'transpose']: populations = np.array(rev_counts.sum(0)).flatten() elif symmetrize == 'none': vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1] populations = vectors[:, 0] else: populations = None if populations is not None: populations /= populations.sum() return counts, rev_counts, t_matrix, populations, mapping
def fit(self, sequences, y=None): """Estimate model parameters. Parameters ---------- sequences : list List of integer sequences, each of which is one-dimensional y : unused parameter Returns ------- self """ if self.n_states is None: self.n_states = np.max([np.max(x) for x in sequences]) + 1 from msmbuilder import MSMLib MSMLib.logger.info = lambda *args: None from msmbuilder.msm_analysis import get_eigenvectors from msmbuilder.MSMLib import mle_reversible_count_matrix, estimate_transition_matrix, ergodic_trim self.rawcounts_ = self._count_transitions(sequences) if self.prior_counts > 0: self.rawcounts_ = scipy.sparse.csr_matrix(self.rawcounts_.todense() + self.prior_counts) # STEP (1): Ergodic trimming if self.ergodic_trim: self.rawcounts_, mapping = ergodic_trim(scipy.sparse.csr_matrix(self.rawcounts_)) self.mapping_ = {} for i, j in enumerate(mapping): if j != -1: self.mapping_[i] = j else: self.mapping_ = dict((zip(np.arange(self.n_states), np.arange(self.n_states)))) # STEP (2): Reversible counts matrix if self.reversible_type in ["mle", "MLE"]: self.countsmat_ = mle_reversible_count_matrix(self.rawcounts_) elif self.reversible_type in ["transpose", "Transpose"]: self.countsmat_ = 0.5 * (self.rawcounts_ + self.rawcounts_.T) elif self.reversible_type is None: self.countsmat_ = self.rawcounts_ else: raise RuntimeError() # STEP (3): transition matrix self.transmat_ = estimate_transition_matrix(self.countsmat_) # STEP (3.5): Stationary eigenvector if self.reversible_type in ["mle", "MLE", "transpose", "Transpose"]: self.populations_ = np.array(self.countsmat_.sum(0)).flatten() elif self.reversible_type is None: vectors = get_eigenvectors(self.transmat_, 5)[1] self.populations_ = vectors[:, 0] else: raise RuntimeError() self.populations_ /= self.populations_.sum() # ensure normalization return self
def plot_distribution(mixture_model, grid, t_matrix=None, eigen=1, n_contours=80): """Plot the mixture distribution.""" xx, yy = grid mixture_samples = np.c_[xx.ravel(), yy.ravel()] contour_data = mixture_model.score(mixture_samples) contour_data = -contour_data.reshape(xx.shape) if t_matrix is not None: _, vecs = msma.get_eigenvectors(t_matrix, n_eigs=eigen) sizes = vecs[:, -1] * 300 print vecs[:, -1] colors = ['r' if s > 0 else 'b' for s in sizes] sizes = np.abs(sizes) else: sizes = 300 colors = 'y' # Plot means means = mixture_model.means_ pp.scatter(means[:, 0], means[:, 1], c=colors, s=sizes) pp.contour(xx, yy, contour_data, n_contours)
def __init__(self, T, num_macrostates, flux_cutoff=None): """Base class for PCCA and PCCA+. Parameters ---------- T : csr sparse matrix Transition matrix num_macrostates : int Desired number of macrostates flux_cutoff : float, optional Can be set to discard low-flux eigenvectors. """ self.T = T self.num_macrostates = num_macrostates self.eigenvalues, self.left_eigenvectors = msm_analysis.get_eigenvectors( T, self.num_macrostates) utils.normalize_left_eigenvectors(self.left_eigenvectors) if flux_cutoff != None: self.eigenvalues, self.left_eigenvectors = utils.trim_eigenvectors_by_flux( self.eigenvalues, self.left_eigenvectors, flux_cutoff) self.num_macrostates = len(self.eigenvalues) self.populations = self.left_eigenvectors[:, 0] self.num_microstates = len(self.populations) # Construct properly normalized right eigenvectors self.right_eigenvectors = utils.construct_right_eigenvectors( self.left_eigenvectors, self.populations, self.num_macrostates)
def run(tProb, observable, init_pops=None, num_vecs=10, output='evec_amps.h5'): if init_pops is None: init_pops = np.ones(tProb.shape[0]).astype(float) / float(tProb.shape[0]) else: init_pops = init_pops.astype(float) init_pops /= init_pops.sum() assert (observable.shape[0] == init_pops.shape[0]) assert (observable.shape[0] == tProb.shape[0]) try: f = io.loadh('eigs%d.h5' % num_vecs) vals = f['vals'] vecsL = f['vecs'] except: vals, vecsL = msm_analysis.get_eigenvectors(tProb, num_vecs + 1, right=False) io.saveh('eigs%d.h5' % num_vecs, vals=vals, vecs=vecsL) equil = vecsL[:,0] / vecsL[:,0].sum() dyn_vecsL = vecsL[:, 1:] # normalize the left and right eigenvectors dyn_vecsL /= np.sqrt(np.sum(dyn_vecsL * dyn_vecsL / np.reshape(equil, (-1, 1)), axis=0)) dyn_vecsR = dyn_vecsL / np.reshape(equil, (-1, 1)) amps = dyn_vecsL.T.dot(observable) * dyn_vecsR.T.dot(init_pops) io.saveh(output, evals=vals[1:], amplitudes=amps) logger.info("saved output to %s" % output)
def test_get_eigenvectors_left(): # just some random counts N = 100 counts = np.random.randint(1, 10, size=(N,N)) transmat, pi = build_msm(scipy.sparse.csr_matrix(counts), 'MLE')[1:3] values0, vectors0 = get_eigenvectors(transmat, 10) values1, vectors1 = get_reversible_eigenvectors(transmat, 10) values2, vectors2 = get_reversible_eigenvectors(transmat, 10, populations=pi) # check that the eigenvalues are the same using the two methods np.testing.assert_array_almost_equal(values0, values1) # check that the eigenvectors returned by both methods are _actually_ # left eigenvectors of the transmat def test_eigenpairs(values, vectors): for value, vector in zip(values, vectors.T): np.testing.assert_array_almost_equal( (transmat.T.dot(vector) / vector).flatten(), np.ones(N)*value) np.testing.assert_array_almost_equal(pi, vectors0[:, 0]) np.testing.assert_array_almost_equal(pi, vectors1[:, 0]) np.testing.assert_array_almost_equal(pi, vectors2[:, 0]) test_eigenpairs(values0, vectors0) test_eigenpairs(values1, vectors1) test_eigenpairs(values2, vectors2)
def __init__(self, T, num_macrostates, flux_cutoff=None): """Base class for PCCA and PCCA+. Parameters ---------- T : csr sparse matrix Transition matrix num_macrostates : int Desired number of macrostates flux_cutoff : float, optional Can be set to discard low-flux eigenvectors. """ self.T = T self.num_macrostates = num_macrostates self.eigenvalues, self.left_eigenvectors = msm_analysis.get_eigenvectors(T, self.num_macrostates) utils.normalize_left_eigenvectors(self.left_eigenvectors) if flux_cutoff != None: self.eigenvalues, self.left_eigenvectors = utils.trim_eigenvectors_by_flux( self.eigenvalues, self.left_eigenvectors, flux_cutoff) self.num_macrostates = len(self.eigenvalues) self.populations = self.left_eigenvectors[:, 0] self.num_microstates = len(self.populations) # Construct properly normalized right eigenvectors self.right_eigenvectors = utils.construct_right_eigenvectors( self.left_eigenvectors, self.populations, self.num_macrostates)
def build_msm(counts, symmetrize='MLE', ergodic_trimming=True): """ Estimates the transition probability matrix from the counts matrix. Parameters ---------- counts : matrix the MSM counts matrix symmetrize : {'MLE', 'Transpose', None} symmetrization scheme so that we have reversible counts ergodic_trim : bool (optional) whether or not to trim states to achieve an ergodic model Returns ------- rev_counts : matrix the estimate of the reversible counts t_matrix : matrix the transition probability matrix populations : ndarray, float the equilibrium populations of each state mapping : ndarray, int a mapping from the passed counts matrix to the new counts and transition matrices """ symmetrize = str(symmetrize).lower() symmetrization_error = ValueError("Invalid symmetrization scheme requested: %s. Exiting." % symmetrize) if symmetrize not in ['mle', 'transpose', 'none']: raise symmetrization_error if ergodic_trimming: counts, mapping = ergodic_trim(counts) else: mapping = np.arange(counts.shape[0]) # Apply a symmetrization scheme if symmetrize == 'mle': rev_counts = mle_reversible_count_matrix(counts) elif symmetrize == 'transpose': rev_counts = 0.5 * (counts + counts.transpose()) elif symmetrize == 'none': rev_counts = counts else: raise symmetrization_error t_matrix = estimate_transition_matrix(rev_counts) if symmetrize in ['mle', 'transpose']: populations = np.array(rev_counts.sum(0)).flatten() elif symmetrize == 'none': vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1] populations = vectors[:, 0] else: raise symmetrization_error populations /= populations.sum() # ensure normalization return rev_counts, t_matrix, populations, mapping
def calculate_all_to_all_mfpt(tprob, populations=None): """ Calculate the all-states by all-state matrix of mean first passage times. This uses the fundamental matrix formalism, and should be much faster than GetMFPT for calculating many MFPTs. Parameters ---------- tprob : matrix transition probability matrix populations : array_like, float optional argument, the populations of each state. If not supplied, it will be computed from scratch Returns ------- MFPT : array, float MFPT in time units of LagTime, square array for MFPT from i -> j See Also -------- GetMFPT : function for calculating a subset of the MFPTs, with functionality for including a set of sinks """ msm_analysis.check_transition(tprob) if scipy.sparse.issparse(tprob): tprob = tprob.toarray() logger.warning('calculate_all_to_all_mfpt does not support sparse linear algebra') if populations is None: eigens = msm_analysis.get_eigenvectors(tprob, 5) if np.count_nonzero(np.imag(eigens[1][:,0])) != 0: raise ValueError('First eigenvector has imaginary parts') populations = np.real(eigens[1][:,0]) # ensure that tprob is a transition matrix msm_analysis.check_transition(tprob) num_states = len(populations) if tprob.shape[0] != num_states: raise ValueError("Shape of tprob and populations vector don't match") eye = np.transpose( np.matrix(np.ones(num_states)) ) limiting_matrix = eye * populations #z = scipy.linalg.inv(scipy.sparse.eye(num_states, num_states) - (tprob - limiting_matrix)) z = scipy.linalg.inv(np.eye(num_states) - (tprob - limiting_matrix)) # mfpt[i,j] = z[j,j] - z[i,j] / pi[j] mfpt = -z for j in range(num_states): mfpt[:, j] += z[j, j] mfpt[:, j] /= populations[j] return mfpt
def GetRateMatrix(T,EigAns=None,FixNegativity=True): NumStates=T.shape[0] if EigAns==None: EigAns = get_eigenvectors(T,NumStates) Pi=EigAns[1][:,0] print("Done Getting Eigenvectors") """ K=np.zeros((NumStates,NumStates),dtype=T.dtype) for i in range(1,NumStates): phi=EigAns[1][:,i] psi=phi/Pi alpha=np.dot(phi,psi)**.5 psi/=alpha phi/=alpha K-=np.log(EigAns[0][i])*np.outer(psi,phi) """ #To Check, compare the following transition matrix with the input: #T2=scipy.linalg.matfuncs.expm(-K) #T2-T ev=EigAns[1] p=ev[:,0] for i in xrange(NumStates): ev[:,i]/=np.dot(ev[:,i]/p,ev[:,i])**.5 #Ld=np.diag(-np.log(EigAns[0])) #K=np.dot(np.dot(np.dot(np.diag(1./Pi),ev),Ld),ev.transpose()) #return(K) print("Getting evT and deleting old ev.") ev=np.real(ev).copy() lam=EigAns[0] lam=np.abs(lam) #lam[where(lam<0)]=1/np.e #Anything with negative eigenvalues is set to have timescale 1 lagtime. del EigAns D=scipy.sparse.dia_matrix((1/p,0),(NumStates,NumStates)) K=D.dot(ev) print("Done 1st mm") L=scipy.sparse.dia_matrix((-np.log(lam),0),(NumStates,NumStates)) K=K.transpose() K=L.dot(K) print("Done 2nd mm") K=K.transpose() K=np.array(K,dtype=lam.dtype,order="C") ev=ev.transpose() ev=np.array(ev,dtype=lam.dtype,order="C") K=np.dot(K,ev) print("Done 3rd mm") if FixNegativity==True:#This enforces "reasonable" constraints on the rate matrix, e.g. negative off diagonals and positive diagonals RemoveRateDiagonal(K) return(K)
def estimate_mle_populations(matrix): if msmb_version == '2.8.2': t_matrix = estimate_transition_matrix(matrix) populations = get_eigenvectors(t_matrix, 1, **kwargs)[1][:, 0] return populations elif msmb_version == '3.2.0': obj = MarkovStateModel() populations = obj._fit_mle(matrix)[1] return populations
def GetRateMatrix(T, EigAns=None, FixNegativity=True): NumStates = T.shape[0] if EigAns == None: EigAns = get_eigenvectors(T, NumStates) Pi = EigAns[1][:, 0] print("Done Getting Eigenvectors") """ K=np.zeros((NumStates,NumStates),dtype=T.dtype) for i in range(1,NumStates): phi=EigAns[1][:,i] psi=phi/Pi alpha=np.dot(phi,psi)**.5 psi/=alpha phi/=alpha K-=np.log(EigAns[0][i])*np.outer(psi,phi) """ #To Check, compare the following transition matrix with the input: #T2=scipy.linalg.matfuncs.expm(-K) #T2-T ev = EigAns[1] p = ev[:, 0] for i in xrange(NumStates): ev[:, i] /= np.dot(ev[:, i] / p, ev[:, i])**.5 #Ld=np.diag(-np.log(EigAns[0])) #K=np.dot(np.dot(np.dot(np.diag(1./Pi),ev),Ld),ev.transpose()) #return(K) print("Getting evT and deleting old ev.") ev = np.real(ev).copy() lam = EigAns[0] lam = np.abs(lam) #lam[where(lam<0)]=1/np.e #Anything with negative eigenvalues is set to have timescale 1 lagtime. del EigAns D = scipy.sparse.dia_matrix((1 / p, 0), (NumStates, NumStates)) K = D.dot(ev) print("Done 1st mm") L = scipy.sparse.dia_matrix((-np.log(lam), 0), (NumStates, NumStates)) K = K.transpose() K = L.dot(K) print("Done 2nd mm") K = K.transpose() K = np.array(K, dtype=lam.dtype, order="C") ev = ev.transpose() ev = np.array(ev, dtype=lam.dtype, order="C") K = np.dot(K, ev) print("Done 3rd mm") if FixNegativity == True: #This enforces "reasonable" constraints on the rate matrix, e.g. negative off diagonals and positive diagonals RemoveRateDiagonal(K) return (K)
def kl_equilib(gold_eq, comp_tmatrix): """Return the KL divergence of comp_tmatrix from gold_tmatrix.""" comp_vals, comp_vecs = msma.get_eigenvectors(comp_tmatrix, n_eigs=1) # Sanity check if np.abs(comp_vals[0] - 1.0) > EPS: print "Warning, comp eigenvalue is {}".format(comp_vals[0]) # Do KL comp_eq = comp_vecs[0] kl = np.sum(np.log(gold_eq / comp_eq) * gold_eq) return kl
def get_implied_timescales(t_matrix, n_timescales=4, lag_time=1): """Get implied timescales from a transition matrix.""" try: vals, vecs = msma.get_eigenvectors(t_matrix, n_eigs=n_timescales + 1) implied_timescales = -lag_time / np.log(vals[1:]) implied_timescales_pad = np.pad( implied_timescales, (0, n_timescales - len(implied_timescales)), mode='constant') return implied_timescales_pad except Exception: print "+++ Error getting implied timescales +++" return np.zeros(n_timescales)
def get_implied_timescales(t_matrix, n_timescales=4, lag_time=1): """Get implied timescales from a transition matrix.""" try: vals, vecs = msma.get_eigenvectors(t_matrix, n_eigs=n_timescales + 1) implied_timescales = -lag_time / np.log(vals[1:]) implied_timescales_pad = np.pad(implied_timescales, (0, n_timescales - len(implied_timescales)), mode='constant') return implied_timescales_pad except Exception: print "+++ Error getting implied timescales +++" return np.zeros(n_timescales)
def main(dir, coarse , lag, type): data=dict() rmsd=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.rmsd.dat' % (dir, coarse), usecols=(2,)) #data['rmsd']=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.selfrmsd.dat' % (dir, coarse)) data['rmsd']=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.rmsd.dat' % (dir, coarse), usecols=(2,)) com=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.vmd_com.dat' % (dir, coarse), usecols=(1,)) com=[i/com[0] for i in com] data['com']=com[1:] modeldir='%s/msml%s_coarse_r10_d%s/' % (dir, lag, coarse) pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) map_rmsd=[] map_com=[] for x in range(0, len(data['rmsd'])): if map[x]!=-1: map_com.append(data['com'][x]) map_rmsd.append(data['rmsd'][x]) map_com=numpy.array(map_com) map_rmsd=numpy.array(map_rmsd) T=mmread('%s/tProb.mtx' % modeldir) eigs_m=msm_analysis.get_eigenvectors(T, 10) order=numpy.argsort(map_rmsd) ordercom=numpy.argsort(map_com) cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive print numpy.shape(eigs_m[1][:,1]) for i in range(1,4): if i==0: print numpy.where(eigs_m[1][:,i]==max(eigs_m[1][:,i])) else: print numpy.where(eigs_m[1][:,i]==min(eigs_m[1][:,i])) pylab.scatter(map_com[ordercom], map_rmsd[ordercom], c=eigs_m[1][ordercom,i], cmap=cm, s=1000*abs(eigs_m[1][ordercom,i]), alpha=0.5) print map_com[ordercom][numpy.argmax(eigs_m[1][ordercom,i])] print eigs_m[1][ordercom,i][1] # pylab.scatter(map_rmsd[order], statehelix[order]*100., c=eigs_m[1][:,i], cmap=cm, s=50, alpha=0.7) pylab.subplots_adjust(left = 0.1, right = 1.02, bottom = 0.10, top = 0.85, wspace = 0, hspace = 0) CB=pylab.colorbar() l,b,w,h=pylab.gca().get_position().bounds ll, bb, ww, hh=CB.ax.get_position().bounds CB.ax.set_position([ll, b+0.1*h, ww, h*0.8]) ylabel=pylab.ylabel('p53 RMSD to Bound Conformation ($\AA$)') xlabel=pylab.xlabel(r'p53 to S100B($\beta$$\beta$) CoM Separation ($\AA$)') pylab.ylim(0, max(map_rmsd)) #pylab.title('Folding and Binding \n Colored by Magnitudes of Slowest Eigenvector Components') pylab.savefig('%s/2deigs%i_com_prmsd.pdf' %(modeldir, i),dpi=300) pylab.show()
def test_eigenvector_norm(): N = 100 counts = np.random.randint(1, 10, size=(N,N)) transmat, pi = build_msm(scipy.sparse.csr_matrix(counts), 'MLE')[1:3] left_values0, left_vectors0 = get_eigenvectors(transmat, 10, right=False, normalized=True) right_values0, right_vectors0 = get_eigenvectors(transmat, 10, right=True, normalized=True) left_values1, left_vectors1 = get_reversible_eigenvectors(transmat, 10, right=False, normalized=True) right_values1, right_vectors1 = get_reversible_eigenvectors(transmat, 10, right=True, normalized=True) np.testing.assert_array_almost_equal(left_values0, right_values0) np.testing.assert_array_almost_equal(left_values1, right_values1) test_left_vectors1 = left_vectors1 * np.sign(left_vectors0[0].reshape((1,-1))) * np.sign(left_vectors1[0].reshape((1,-1))) test_right_vectors1 = right_vectors1 * np.sign(right_vectors0[0].reshape((1,-1))) * np.sign(right_vectors1[0].reshape((1,-1))) np.testing.assert_array_almost_equal(left_vectors0, test_left_vectors1) np.testing.assert_array_almost_equal(right_vectors0, test_right_vectors1) Id = np.eye(10) np.testing.assert_array_almost_equal(np.abs(left_vectors0.T.dot(right_vectors0)), Id) np.testing.assert_array_almost_equal(np.abs(left_vectors1.T.dot(right_vectors1)), Id)
def analyze_msm(t_matrix, centroids, desc, neigen=4, show=False): """Analyze a particular msm. Right now, it does this by printing eigenvalues and optionally plotting eigenvectors. """ val, vec = msma.get_eigenvectors(t_matrix, neigen) oolambda = -1.0 / np.log(val[1:]) print("\n%s" % desc) print("Eigenvalues:\t%s" % val.__str__()) print("1/lambda:\t%s" % oolambda.__str__()) if show: plot_eigens(centroids, vec, val, desc) return oolambda
def step(self): mapping, tprob = self.tprob() # if we can guarentee that the counts matrix is reversible, we can # do this faster without the eigensolver, but I don't want to do that yet. vectors = msm_analysis.get_eigenvectors(tprob, min(5, tprob.shape[0]))[1] populations = vectors[:, 0] # this chooses the point from the multinomial, but its now indexed # in the mapped (integer) space chosen = np.where(np.random.multinomial(1, populations) == 1)[0][0] # back out the chosen item as a tuple, such that mapping[k] == chosen k = next(k for k,v in mapping.iteritems() if v == chosen) self.walker.set_point(k)
def step(self): mapping, tprob = self.tprob() # if we can guarentee that the counts matrix is reversible, we can # do this faster without the eigensolver, but I don't want to do that yet. vectors = msm_analysis.get_eigenvectors(tprob, min(5, tprob.shape[0]))[1] populations = vectors[:, 0] # this chooses the point from the multinomial, but its now indexed # in the mapped (integer) space chosen = np.where(np.random.multinomial(1, populations) == 1)[0][0] # back out the chosen item as a tuple, such that mapping[k] == chosen k = next(k for k, v in mapping.iteritems() if v == chosen) self.walker.set_point(k)
def get_eigenvalues( count_matrix ): bad_states = np.array(np.where( count_matrix.sum(axis=1) == 0 )[0]).flatten() i_ary = count_matrix.nonzero()[0] j_ary = count_matrix.nonzero()[1] i_ary = np.concatenate( (i_ary, bad_states) ) j_ary = np.concatenate( (j_ary, bad_states) ) new_data = np.concatenate( (count_matrix.data, np.ones(len(bad_states))) ) print i_ary.shape, count_matrix.data.shape, new_data.shape, len(bad_states) count_matrix = scipy.sparse.csr_matrix( (new_data, (i_ary, j_ary)) ) #count_matrix = count_matrix.tolil() #count_matrix[(bad_states, bad_states)] = 1 #count_matrix = count_matrix.tocsr() print count_matrix.data.shape, count_matrix.nonzero()[0].shape #NZ = np.array(count_matrix.nonzero()).T #keep_ind = [] #for i in xrange(len(NZ)): # if NZ[i][0] in bad_states or NZ[i][1] in bad_states: # pass # else: # keep_ind.append(i) #keep_ind = np.array(keep_ind) #N = NZ.max()+1 #count_matrix = scipy.sparse.csr_matrix( (np.array(count_matrix.data)[keep_ind], NZ[keep_ind].T), shape=(N,N), copy=True ) try: t_matrix = MSMLib.build_msm(count_matrix, symmetrize=args.symmetrize)[1] except: return None vals = msm_analysis.get_eigenvectors(t_matrix, args.num_vals, epsilon=1)[0] vals.sort() return vals[::-1]
def set_coordinate_as_eigvector2(self, lag_time=1, symmetrize='transpose'): """ Set the reaction coordinate to be the second eigenvector of the MSM generated by counts, the provided lag_time, and the provided symmetrization method. Parameters ---------- lag_time : int The MSM lag time to use (in units of frames) in the estimation of the MSM transition probability matrix from the `counts` matrix. symmetrize : str {'mle', 'transpose', 'none'} Which symmetrization method to employ in the estimation of the MSM transition probability matrix from the `counts` matrix. """ t_matrix = MSMLib.build_msm_from_counts(self.counts, lag_time, symmetrize) v, w = get_eigenvectors(t_matrix, 5) self.reaction_coordinate_values = w[:, 1].flatten() return
def set_coordinate_as_eigvector2(self, lag_time=1, symmetrize='transpose'): """ Set the reaction coordinate to be the second eigenvector of the MSM generated by counts, the provided lag_time, and the provided symmetrization method. Parameters ---------- lag_time : int The MSM lag time to use (in units of frames) in the estimation of the MSM transition probability matrix from the `counts` matrix. symmetrize : str {'mle', 'transpose', 'none'} Which symmetrization method to employ in the estimation of the MSM transition probability matrix from the `counts` matrix. """ t_matrix = MSMLib.build_msm_from_counts(self.counts, lag_time, symmetrize) v, w = get_eigenvectors(t_matrix, 5) self.reaction_coordinate_values = w[:,1].flatten() return
def get_implied_timescales(self, i, num_vals=2): # Get the model model_sql = self.all_models[i] model = get_model_from_sql(model_sql) transition_mat = model.transition_matrix lag_time = model.lag_time print('\n\n') print(i) if not msma.is_transition_matrix(transition_mat): print ("%d is not a transition matrix!" % i) eigenvals, _ = msma.get_eigenvectors(transition_mat, num_vals + 1, epsilon=1.0) eigenvals = eigenvals[1:] oo_lambda = [-1.0 / np.log(ev) for ev in eigenvals] print(oo_lambda) return oo_lambda
#!/usr/bin/env python from msmbuilder import io from msmbuilder import msm_analysis from scipy.io import mmread from argparse import ArgumentParser import os parser = ArgumentParser() parser.add_argument('-t', dest='tProb', help='transition matrix', default='./tProb.mtx') parser.add_argument('-o', dest='output', help='output filename', default='./eigs.h5') parser.add_argument('-n', dest='num_vecs', help='number of eigenvectors to find.', default=500, type=int) args = parser.parse_args() if os.path.exists(args.output): raise Exception("path (%s) exists!" % args.output) tProb = mmread(args.tProb) eigs = msm_analysis.get_eigenvectors(tProb, args.num_vecs) io.saveh(args.output, vals=eigs[0], vecs=eigs[1])
def kl_equilib_setup(gold_tmatrix): """Save gold equilibrium population.""" gold_vals, gold_vecs = msma.get_eigenvectors(gold_tmatrix, n_eigs=1) assert np.abs(gold_vals[0] - 1.0) < EPS, 'Gold eigenval is {}'.format(gold_vals[0]) return gold_vecs[0]
def GetEigenvectors_Right(*args, **kwargs): warnings.warn( 'GetEigenvectors_Right is deprecated use get_eigenvectors() with the keyword Right=True' ) kwargs['right'] = True return msm_analysis.get_eigenvectors(*args, **kwargs)
def build_msm(counts, symmetrize='MLE', ergodic_trimming=True): """ Estimates the transition probability matrix from the counts matrix. Parameters ---------- counts : scipy.sparse.csr_matrix the MSM counts matrix symmetrize : {'MLE', 'Transpose', None} symmetrization scheme so that we have reversible counts ergodic_trim : bool (optional) whether or not to trim states to achieve an ergodic model Returns ------- rev_counts : matrix the estimate of the reversible counts t_matrix : matrix the transition probability matrix populations : ndarray, float the equilibrium populations of each state mapping : ndarray, int a mapping from the passed counts matrix to the new counts and transition matrices """ symmetrize = str(symmetrize).lower() symmetrization_error = ValueError("Invalid symmetrization scheme " "requested: %s. Exiting." % symmetrize) if symmetrize not in ['mle', 'transpose', 'none']: raise symmetrization_error if ergodic_trimming: counts, mapping = ergodic_trim(counts) else: mapping = np.arange(counts.shape[0]) # Apply a symmetrization scheme if symmetrize == 'mle': if not ergodic_trimming: raise ValueError("MLE symmetrization requires ergodic trimming.") rev_counts = mle_reversible_count_matrix(counts) elif symmetrize == 'transpose': rev_counts = 0.5 * (counts + counts.transpose()) elif symmetrize == 'none': rev_counts = counts else: raise symmetrization_error t_matrix = estimate_transition_matrix(rev_counts) if symmetrize in ['mle', 'transpose']: populations = np.array(rev_counts.sum(0)).flatten() elif symmetrize == 'none': vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1] populations = vectors[:, 0] else: raise symmetrization_error populations /= populations.sum() # ensure normalization return rev_counts, t_matrix, populations, mapping
def kl_equilib_setup(gold_tmatrix): """Save gold equilibrium population.""" gold_vals, gold_vecs = msma.get_eigenvectors(gold_tmatrix, n_eigs=1) assert np.abs(gold_vals[0] - 1.0) < EPS, 'Gold eigenval is {}'.format( gold_vals[0]) return gold_vecs[0]
def build_msm_from_counts(counts, lag_time, symmetrize, return_rev_counts=False, trim=True): """ Estimates the transition probability matrix from the counts matrix. Parameters ---------- counts : matrix the MSM counts matrix lag_time : the lag time to build the msm with, in frames symmetrize : {'MLE', 'Transpose', None} symmetrization scheme so that we have reversible counts return_rev_counts : bool whether or not to return the reversible counts or not Returns ------- t_matrix : matrix the transition probability matrix rev_counts : matrix the estimate of the reversible counts (only returned if `return_rev_counts` is True) """ symmetrize = str(symmetrize).lower() symmetrization_error = ValueError( "Invalid symmetrization scheme requested: %s. Exiting." % symmetrize) if symmetrize not in ['mle', 'transpose', 'none']: raise symmetrization_error if trim: counts, mapping = ergodic_trim(counts) # Apply a symmetrization scheme if symmetrize == 'mle': rev_counts = mle_reversible_count_matrix(counts, prior=0.0) elif symmetrize == 'transpose': rev_counts = 0.5 * (counts + counts.transpose()) elif symmetrize == 'none': rev_counts = counts else: raise symmetrization_error t_matrix = estimate_transition_matrix(rev_counts) if symmetrize in ['mle', 'transpose']: populations = np.array(rev_counts.sum(0)).flatten() elif symmetrize == 'none': vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1] populations = vectors[:, 0] else: populations = None if populations is not None: populations /= populations.sum() return counts, rev_counts, t_matrix, populations, mapping
# try to find eigenvectors in a file file_list = glob.glob("eigs*.h5") try: fn = file_list[0] f = io.loadh(fn) vals = f['vals'] vecs = f['vecs'] vals = vals[:num_vecs + 1] vecs = vecs[:, :num_vecs + 1] except: vals, vecs = msm_analysis.get_eigenvectors( T, num_vecs+1 ) #vals, vecs = eigs( T, k = num_vecs + 1 ) vecs=vecs.real pi = vecs[:,0] / vecs[:,0].sum() vecs /= np.sum( np.square( vecs ) / pi.reshape( (-1,1) ), axis=0 ) ord_param_ind = ord_param.argsort() state_lines = np.array([ np.where( ord_param[ ord_param_ind ] == i )[0][0] for i in np.unique( ord_param ) ])[1:] for i in range( num_vecs ): figure() eval = vals[i+1]
def GetEigenvectors_Right(*args, **kwargs): warnings.warn('GetEigenvectors_Right is deprecated use get_eigenvectors() with the keyword Right=True') kwargs['right'] = True return msm_analysis.get_eigenvectors(*args, **kwargs)
def calculate_fluxes(sources, sinks, tprob, populations=None, committors=None): """ Compute the transition path theory flux matrix. Parameters ---------- sources : array_like, int The set of unfolded/reactant states. sinks : array_like, int The set of folded/product states. tprob : mm_matrix The transition matrix. Returns ------ fluxes : mm_matrix The flux matrix Optional Parameters ------------------- populations : nd_array, float The equilibrium populations, if not provided is re-calculated committors : nd_array, float The committors associated with `sources`, `sinks`, and `tprob`. If not provided, is calculated from scratch. If provided, `sources` and `sinks` are ignored. References ---------- .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 (2009). .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding pathways in network models of coarse-grained protein dynamics. J. Chem. Phys. 130, 205102 (2009). """ sources, sinks = _check_sources_sinks(sources, sinks) msm_analysis.check_transition(tprob) if scipy.sparse.issparse(tprob): dense = False else: dense = True # check if we got the populations if populations is None: eigens = msm_analysis.get_eigenvectors(tprob, 1) if np.count_nonzero(np.imag(eigens[1][:, 0])) != 0: raise ValueError('First eigenvector has imaginary components') populations = np.real(eigens[1][:, 0]) # check if we got the committors if committors is None: committors = calculate_committors(sources, sinks, tprob) # perform the flux computation Indx, Indy = tprob.nonzero() n = tprob.shape[0] if dense: X = np.zeros((n, n)) Y = np.zeros((n, n)) X[(np.arange(n), np.arange(n))] = populations * (1.0 - committors) Y[(np.arange(n), np.arange(n))] = committors else: X = scipy.sparse.lil_matrix((n, n)) Y = scipy.sparse.lil_matrix((n, n)) X.setdiag(populations * (1.0 - committors)) Y.setdiag(committors) if dense: fluxes = np.dot(np.dot(X, tprob), Y) fluxes[(np.arange(n), np.arange(n))] = np.zeros(n) else: fluxes = (X.tocsr().dot(tprob.tocsr())).dot(Y.tocsr()) # This should be the same as below, but it's a bit messy... #fluxes = np.dot(np.dot(X.tocsr(), tprob.tocsr()), Y.tocsr()) fluxes = fluxes.tolil() fluxes.setdiag(np.zeros(n)) return fluxes
def pcca_plus(T, N, flux_cutoff=None, do_minimization=True, objective_function="crisp_metastability"): """Perform PCCA+ lumping Parameters ---------- T : csr sparse matrix Transition matrix M : int desired (maximum) number of macrostates flux_cutoff : float, optional If desired, discard eigenvectors with flux below this value. do_minimization : bool, optional If False, skip the optimization of the transformation matrix. In general, minimization is recommended. objective_function: {'crisp_metastablility', 'metastability', 'metastability'} Possible objective functions. See objective for details. Returns ------- A : ndarray The transformation matrix. chi : ndarray The membership matrix vr : ndarray The right eigenvectors. microstate_mapping : ndarray Mapping from microstates to macrostates. Notes ----- PCCA+ is used to construct a "lumped" state decomposition. First, The eigenvalues and eigenvectors are computed for a transition matrix. An optimization problem is then used to estimate a mapping from microstates to macrostates. For each microstate i, microstate_mapping[i] is chosen as the macrostate with the largest membership (chi) value. The membership matrix chi is given by chi = dot(vr,A). Finally, the transformation matrix A is the output of a constrained optimization problem. References ---------- .. [1] Deuflhard P, et al. "Identification of almost invariant aggregates in reversible nearly uncoupled markov chains," Linear Algebra Appl., vol 315 pp 39-59, 2000. .. [2] Deuflhard P, Weber, M., "Robust perron cluster analysis in conformation dynamics," Linear Algebra Appl., vol 398 pp 161-184 2005. .. [3] Kube S, Weber M. "A coarse graining method for the identification of transition rates between molecular conformations," J. Chem. Phys., vol 126 pp 24103-024113, 2007. See Also -------- PCCA """ lam, vl = msm_analysis.get_eigenvectors(T, N) normalize_left_eigenvectors(vl) if flux_cutoff != None: lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff) N = len(lam) pi = vl[:, 0] vr = vl.copy() for i in range(N): vr[:, i] /= pi vr[:, i] *= np.sign(vr[0, i]) vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i])) A, chi, microstate_mapping = opt_soft(vr, N, pi, lam, T, do_minimization=do_minimization, objective_function=objective_function) return A, chi, vr, microstate_mapping
def PCCA(T, num_macro, tolerance=1E-5, flux_cutoff=None): """Create a lumped model using the PCCA algorithm. 1. Iterate over the eigenvectors, starting with the slowest. 2. Calculate the spread of that eigenvector within each existing macrostate. 3. Pick the macrostate with the largest eigenvector spread. 4. Split the macrostate based on the sign of the eigenvector. Parameters ---------- T : csr sparse matrix A transition matrix num_macro : int The desired number of states. tolerance : float, optional Specifies the numerical cutoff to use when splitting states based on sign. flux_cutoff : float, optional If enabled, discard eigenvectors with flux below this value. Returns ------- microstate_mapping : ndarray mapping from the Microstate indices to the Macrostate indices Notes ----- To construct a Macrostate MSM, you then need to map your Assignment data to the new states (e.g. MSMLib.apply_mapping_to_assignments). References ---------- .. [1] Deuflhard P, et al. "Identification of almost invariant aggregates in reversible nearly uncoupled markov chains," Linear Algebra Appl., vol 315 pp 39-59, 2000. """ n = T.shape[0] lam, vl = msm_analysis.get_eigenvectors(T, num_macro) normalize_left_eigenvectors(vl) if flux_cutoff is not None: lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff) num_macro = len(lam) pi = vl[:, 0] vr = vl.copy() for i in range(num_macro): vr[:, i] /= pi vr[:, i] *= np.sign(vr[0, i]) vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i])) #Remove the stationary eigenvalue and eigenvector. lam = lam[1:] vr = vr[:, 1:] microstate_mapping = np.zeros(n, 'int') #Function to calculate the spread of a single eigenvector. spread = lambda x: x.max() - x.min() """ 1. Iterate over the eigenvectors, starting with the slowest. 2. Calculate the spread of that eigenvector within each existing macrostate. 3. Pick the macrostate with the largest eigenvector spread. 4. Split the macrostate based on the sign of the eigenvector. """ for i in range(num_macro - 1): # Thus, if we want 2 states, we split once. v = vr[:, i] AllSpreads = np.array( [spread(v[microstate_mapping == k]) for k in range(i + 1)]) StateToBeSplit = np.argmax(AllSpreads) microstate_mapping[(microstate_mapping == StateToBeSplit) & (v >= tolerance)] = i + 1 return (microstate_mapping)
def PCCA(T, num_macro, tolerance=1E-5, flux_cutoff=None): """Create a lumped model using the PCCA algorithm. 1. Iterate over the eigenvectors, starting with the slowest. 2. Calculate the spread of that eigenvector within each existing macrostate. 3. Pick the macrostate with the largest eigenvector spread. 4. Split the macrostate based on the sign of the eigenvector. Parameters ---------- T : csr sparse matrix A transition matrix num_macro : int The desired number of states. tolerance : float, optional Specifies the numerical cutoff to use when splitting states based on sign. flux_cutoff : float, optional If enabled, discard eigenvectors with flux below this value. Returns ------- microstate_mapping : ndarray mapping from the Microstate indices to the Macrostate indices Notes ----- To construct a Macrostate MSM, you then need to map your Assignment data to the new states (e.g. MSMLib.apply_mapping_to_assignments). References ---------- .. [1] Deuflhard P, et al. "Identification of almost invariant aggregates in reversible nearly uncoupled markov chains," Linear Algebra Appl., vol 315 pp 39-59, 2000. """ n = T.shape[0] lam, vl = msm_analysis.get_eigenvectors(T, num_macro) normalize_left_eigenvectors(vl) if flux_cutoff is not None: lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff) num_macro = len(lam) pi = vl[:, 0] vr = vl.copy() for i in range(num_macro): vr[:, i] /= pi vr[:, i] *= np.sign(vr[0, i]) vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i])) #Remove the stationary eigenvalue and eigenvector. lam = lam[1:] vr = vr[:, 1:] microstate_mapping = np.zeros(n, 'int') #Function to calculate the spread of a single eigenvector. spread = lambda x: x.max() - x.min() """ 1. Iterate over the eigenvectors, starting with the slowest. 2. Calculate the spread of that eigenvector within each existing macrostate. 3. Pick the macrostate with the largest eigenvector spread. 4. Split the macrostate based on the sign of the eigenvector. """ for i in range(num_macro - 1): # Thus, if we want 2 states, we split once. v = vr[:, i] AllSpreads = np.array([spread(v[microstate_mapping == k]) for k in range(i + 1)]) StateToBeSplit = np.argmax(AllSpreads) microstate_mapping[(microstate_mapping == StateToBeSplit) & (v >= tolerance)] = i + 1 return(microstate_mapping)
from matplotlib.pyplot import * from scipy.io import mmread import os, sys, re matplotlib.rcParams['font.size']=22 #import warnings #warnings.filterwarnings("ignore",category=DeprecationWarning) if os.path.exists( options.writeFN +'%d.npy'%options.num_vals ): print "Found %s, and using these values rather than recalculating them." % ( options.writeFN+'%d.npy'%options.num_vals ) Vals = np.load( options.writeFN+'%d.npy'%options.num_vals ) else: T = mmread( options.T_FN ) Vals,Vecs = msm_analysis.get_eigenvectors( T, options.num_vals+1 ) Vals = Vals.real[1:] np.save( options.writeFN + '%d.npy' % options.num_vals, Vals ) print "Saved values" Vals = Vals[np.where(Vals > 0)] figure() subplot(132) Taus = - options.lag / options.divisor / np.log( Vals ) hlines( Taus, 0, 1, color=options.color) if options.y_lim == None: ylim([10**int(np.log10(Taus.min())),10**int(np.log10(Taus.max())+1)]) else:
def main(modeldir, gensfile, write=False): if not os.path.exists('%s/eig-states/' % modeldir): os.mkdir('%s/eig-states/' % modeldir) ohandle=open('%s/eig-states/eiginfo.txt' % modeldir, 'w') project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) gens=Trajectory.load_from_lhdf(gensfile) T=mmread('%s/tProb.mtx' % modeldir) data=dict() data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0]) com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,)) data['com']=com[1:] pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) map_rmsd=[] map_com=[] for x in range(0, len(data['rmsd'])): if map[x]!=-1: map_com.append(data['com'][x]) map_rmsd.append(data['rmsd'][x]) map_com=numpy.array(map_com) map_rmsd=numpy.array(map_rmsd) T=mmread('%s/tProb.mtx' % modeldir) eigs_m=msm_analysis.get_eigenvectors(T, 10) cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive print numpy.shape(eigs_m[1][:,1]) for i in range(0,1): order=numpy.argsort(eigs_m[1][:,i]) if i==0: maxes=[] gen_maxes=[] values=[] ohandle.write('eig%s maxes\n' % i) ohandle.write('state\tgenstate\tmagnitude\trmsd\tcom\n') for n in order[::-1][:5]: gen_maxes.append(numpy.where(map==n)[0]) maxes.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "maxes at ", maxes, values maxes=numpy.array(maxes) if write==True: get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max') else: maxes=[] gen_maxes=[] values=[] ohandle.write('eig%s maxes\n' % i) for n in order[::-1][:5]: gen_maxes.append(numpy.where(map==n)[0]) maxes.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "maxes at ", maxes, values order=numpy.argsort(eigs_m[1][:,i]) mins=[] gen_mins=[] values=[] ohandle.write('eig%s mins\n' % i) for n in order[:5]: gen_mins.append(numpy.where(map==n)[0]) mins.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "mins at ", mins, values if write==True: get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max') get_structure(modeldir, i, gen_mins, mins, gens, project, ass, type='min') pylab.scatter(map_com[order], map_rmsd[order], c=eigs_m[1][order,i], cmap=cm, s=1000*abs(eigs_m[1][order,i]), alpha=0.5) print map_com[order][numpy.argmax(eigs_m[1][order,i])] print eigs_m[1][order,i][1] CB=pylab.colorbar() l,b,w,h=pylab.gca().get_position().bounds ll, bb, ww, hh=CB.ax.get_position().bounds CB.ax.set_position([ll, b+0.1*h, ww, h*0.8]) CB.set_label('Eig%s Magnitudes' % i) ylabel=pylab.ylabel('Ligand RMSD to Xtal ($\AA$)') xlabel=pylab.xlabel(r'P Active Site - L COM Distance ($\AA$)') pylab.legend(loc=8, frameon=False) pylab.savefig('%s/2deigs%i_com_prmsd.png' %(modeldir, i),dpi=300)
def calculate_all_to_all_mfpt(tprob, populations=None): """ Calculate the all-states by all-state matrix of mean first passage times. This uses the fundamental matrix formalism, and should be much faster than GetMFPT for calculating many MFPTs. Parameters ---------- tprob : matrix transition probability matrix populations : array_like, float optional argument, the populations of each state. If not supplied, it will be computed from scratch Returns ------- MFPT : array, float MFPT in time units of LagTime, square array for MFPT from i -> j See Also -------- GetMFPT : function for calculating a subset of the MFPTs, with functionality for including a set of sinks References ---------- .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 (2009). .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding pathways in network models of coarse-grained protein dynamics. J. Chem. Phys. 130, 205102 (2009). """ msm_analysis.check_transition(tprob) if scipy.sparse.issparse(tprob): tprob = tprob.toarray() logger.warning('calculate_all_to_all_mfpt does not support sparse linear algebra') if populations is None: eigens = msm_analysis.get_eigenvectors(tprob, 1) if np.count_nonzero(np.imag(eigens[1][:, 0])) != 0: raise ValueError('First eigenvector has imaginary parts') populations = np.real(eigens[1][:, 0]) # ensure that tprob is a transition matrix msm_analysis.check_transition(tprob) num_states = len(populations) if tprob.shape[0] != num_states: raise ValueError("Shape of tprob and populations vector don't match") eye = np.transpose(np.matrix(np.ones(num_states))) limiting_matrix = eye * populations #z = scipy.linalg.inv(scipy.sparse.eye(num_states, num_states) - (tprob - limiting_matrix)) z = scipy.linalg.inv(np.eye(num_states) - (tprob - limiting_matrix)) # mfpt[i,j] = z[j,j] - z[i,j] / pi[j] mfpt = -z for j in range(num_states): mfpt[:, j] += z[j, j] mfpt[:, j] /= populations[j] return mfpt
from scipy.io import * from msmbuilder import Trajectory import numpy import pylab from msmbuilder import msm_analysis T=mmread('./l6/tProb.mtx') map=numpy.loadtxt('./l6/Mapping.dat') com_dist=numpy.loadtxt('Gens_com_dist.dat', usecols=(1,)) prmsd=numpy.loadtxt('Gens_p53_rmsd.dat', usecols=(1,)) frames=numpy.where(map!=-1)[0] stateprmsd=prmsd[frames] statecom=com_dist[frames] eigs_m=msm_analysis.get_eigenvectors(T, 10) #import pdb #pdb.set_trace() order=numpy.argsort(stateprmsd) ordercom=numpy.argsort(statecom) cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive print numpy.shape(eigs_m[1][:,1]) print len(stateprmsd) print len(frames) for i in range(0,4): #pylab.scatter(statermsd[order], statehelix[order]*100., c=eigs_m[1][:,i], cmap=cm, s=1000*abs(eigs_m[1][:,i]), alpha=0.7) pylab.scatter(statecom[ordercom], stateprmsd[ordercom]*10., c=eigs_m[1][ordercom,i], cmap=cm, s=1000*abs(eigs_m[1][ordercom,i]), alpha=0.5)
def pcca_plus(T, N, flux_cutoff=None, do_minimization=True, objective_function="crisp_metastability"): """Perform PCCA+ lumping Parameters ---------- T : csr sparse matrix Transition matrix M : int desired (maximum) number of macrostates flux_cutoff : float, optional If desired, discard eigenvectors with flux below this value. do_minimization : bool, optional If False, skip the optimization of the transformation matrix. In general, minimization is recommended. objective_function: {'crisp_metastablility', 'metastability', 'metastability'} Possible objective functions. See objective for details. Returns ------- A : ndarray The transformation matrix. chi : ndarray The membership matrix vr : ndarray The right eigenvectors. microstate_mapping : ndarray Mapping from microstates to macrostates. Notes ----- PCCA+ is used to construct a "lumped" state decomposition. First, The eigenvalues and eigenvectors are computed for a transition matrix. An optimization problem is then used to estimate a mapping from microstates to macrostates. For each microstate i, microstate_mapping[i] is chosen as the macrostate with the largest membership (chi) value. The membership matrix chi is given by chi = dot(vr,A). Finally, the transformation matrix A is the output of a constrained optimization problem. References ---------- .. [1] Deuflhard P, et al. "Identification of almost invariant aggregates in reversible nearly uncoupled markov chains," Linear Algebra Appl., vol 315 pp 39-59, 2000. .. [2] Deuflhard P, Weber, M., "Robust perron cluster analysis in conformation dynamics," Linear Algebra Appl., vol 398 pp 161-184 2005. .. [3] Kube S, Weber M. "A coarse graining method for the identification of transition rates between molecular conformations," J. Chem. Phys., vol 126 pp 24103-024113, 2007. See Also -------- PCCA """ lam, vl = msm_analysis.get_eigenvectors(T, N) normalize_left_eigenvectors(vl) if flux_cutoff != None: lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff) N = len(lam) pi = vl[:, 0] vr = vl.copy() for i in range(N): vr[:, i] /= pi vr[:, i] *= np.sign(vr[0, i]) vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i])) A, chi, microstate_mapping = opt_soft( vr, N, pi, lam, T, do_minimization=do_minimization, objective_function=objective_function) return A, chi, vr, microstate_mapping
def calculate_fluxes(sources, sinks, tprob, populations=None, committors=None): """ Compute the transition path theory flux matrix. Parameters ---------- sources : array_like, int The set of unfolded/reactant states. sinks : array_like, int The set of folded/product states. tprob : mm_matrix The transition matrix. Returns ------ fluxes : mm_matrix The flux matrix Optional Parameters ------------------- populations : nd_array, float The equilibrium populations, if not provided is re-calculated committors : nd_array, float The committors associated with `sources`, `sinks`, and `tprob`. If not provided, is calculated from scratch. If provided, `sources` and `sinks` are ignored. """ sources, sinks = _check_sources_sinks(sources, sinks) msm_analysis.check_transition(tprob) if scipy.sparse.issparse(tprob): dense = False else: dense = True # check if we got the populations if populations is None: eigens = msm_analysis.get_eigenvectors(tprob, 5) if np.count_nonzero(np.imag(eigens[1][:,0])) != 0: raise ValueError('First eigenvector has imaginary components') populations = np.real(eigens[1][:,0]) # check if we got the committors if committors is None: committors = calculate_committors(sources, sinks, tprob) # perform the flux computation Indx, Indy = tprob.nonzero() n = tprob.shape[0] if dense: X = np.zeros((n, n)) Y = np.zeros((n, n)) X[(np.arange(n), np.arange(n))] = populations * (1.0 - committors) Y[(np.arange(n), np.arange(n))] = committors else: X = scipy.sparse.lil_matrix((n,n)) Y = scipy.sparse.lil_matrix((n,n)) X.setdiag( populations * (1.0 - committors)) Y.setdiag(committors) if dense: fluxes = np.dot(np.dot(X, tprob), Y) fluxes[(np.arange(n), np.arange(n))] = np.zeros(n) else: fluxes = np.dot(np.dot(X.tocsr(), tprob.tocsr()), Y.tocsr()) fluxes = fluxes.tolil() fluxes.setdiag(np.zeros(n)) return fluxes