def _param_finish(self): if self._force_eigenvalues_le_one: assert self._N_mean == self._N_cov, 'inconsistency in C(0) and mu' assert self._N_cov == self._N_cov_tau, 'inconsistency in C(0) and C(tau)' # symmetrize covariance matrices self.cov = self.cov + self.cov.T self.cov *= 0.5 self.cov_tau = self.cov_tau + self.cov_tau.T self.cov_tau *= 0.5 # norm self.cov /= self._N_cov - 2 self.cov_tau /= self._N_cov_tau - 2 # diagonalize with low rank approximation self._logger.debug("diagonalize Cov and Cov_tau.") self._eigenvalues, self._eigenvectors = \ eig_corr(self.cov, self.cov_tau, self._epsilon) self._logger.debug("finished diagonalisation.") # compute cumulative variance self._cumvar = np.cumsum(self._eigenvalues**2) self._cumvar /= self._cumvar[-1] if len(self._skipped_trajs) >= 1: self._skipped_trajs = np.asarray(self._skipped_trajs) self._logger.warn( "Had to skip %u trajectories for being too short. " "Their indexes are in self._skipped_trajs." % len(self._skipped_trajs))
def TestObjective(c,sp,tp,M): n = c.shape[0] iu = np.triu_indices(sp*tp) Ctau = np.zeros((sp*tp,sp*tp)) Ctau[iu] = c[:n/2] Ctau = Ctau + Ctau.T - np.diag(np.diag(Ctau)) C0 = np.zeros((sp*tp,sp*tp)) C0[iu] = c[n/2:] C0 = C0 + C0.T - np.diag(np.diag(C0)) D,_ = pla.eig_corr(C0, Ctau) D = D[:M] return -np.sum(D)
def _opentica_npz(ticanpzfile): r"""Open a simon-type of ticafile.npz and return some variables """ lag_str = os.path.basename(ticanpzfile).replace('tica_','').replace('.npz','') trajdata = _np.load(ticanpzfile, encoding='latin1') icov, icovtau = trajdata['tica_cov'], trajdata['tica_cov_tau'] l, U = eig_corr(icov, icovtau) tica_mean = trajdata['tica_mean'] data = trajdata['projdat'] corr = input2output_corr(icov, U) return lag_str, data, corr, tica_mean, l, U
def param_finish(self): # norm self.cov /= self.N - 1 self.cov_tau /= self.N - self.lag * self.number_of_trajectories() - 1 # symmetrize covariance matrices self.cov = self.cov + self.cov.T self.cov /= 2.0 self.cov_tau = self.cov_tau + self.cov_tau.T self.cov_tau /= 2.0 # diagonalize with low rank approximation self.eigenvalues, self.eigenvectors = \ eig_corr(self.cov, self.cov_tau, self.epsilon)
def _diagonalize(self): # diagonalize with low rank approximation self._logger.debug("diagonalize Cov and Cov_tau.") eigenvalues, eigenvectors = eig_corr(self.cov, self.cov_tau, self.epsilon) self._logger.debug("finished diagonalisation.") # compute cumulative variance cumvar = np.cumsum(eigenvalues**2) cumvar /= cumvar[-1] self._model.update_model_params(cumvar=cumvar, eigenvalues=eigenvalues, eigenvectors=eigenvectors) self._estimated = True
def _param_finish(self): if self._force_eigenvalues_le_one: assert self._N_cov == self._N_cov_tau, 'inconsistency in C(0) and C(tau)' # symmetrize covariance matrices self.cov = self.cov + self.cov.T self.cov *= 0.5 self.cov_tau = self.cov_tau + self.cov_tau.T self.cov_tau *= 0.5 # norm self.cov /= self._N_cov - 1 self.cov_tau /= self._N_cov_tau - 1 # diagonalize with low rank approximation self._logger.info("diagonalize Cov and Cov_tau") self.eigenvalues, self.eigenvectors = \ eig_corr(self.cov, self.cov_tau, self._epsilon) self._logger.info("finished diagonalisation.")
C0 = np.zeros((sp*tp,sp*tp)) C0[iu] = c[n/2:] C0 = C0 + C0.T - np.diag(np.diag(C0)) D,_ = pla.eig_corr(C0, Ctau) D = D[:M] return -np.sum(D) # Load the test case: sp = 4 tp = 2 R = 2 M = 2 Ctau = np.load("TestCtau.npy") C0 = np.load("TestC0.npy") D, X = pla.eig_corr(C0, Ctau) D = D[:M] X = X[:,:M] # Check the perturbation theory: eps_array = 1e-7*np.array([2,1,0.8,0.6,0.4,0.2,0.1,0.05,0.01,0.005,0.001,0.0005,0.0001,0.00005,0.00001]) lambdas = np.zeros(eps_array.shape[0]) lambdasp = np.zeros(eps_array.shape[0]) q = 0 iu = np.triu_indices(Ctau.shape[0]) for eps in eps_array: # Create perturbation for Ctau: pe1 = eps*np.random.rand(iu[0].shape[0]) Ctaue = np.zeros(Ctau.shape) Ctaue[iu] = pe1 Ctaue = Ctaue + Ctaue.T -np.diag(np.diag(Ctaue))
Ctaup = Ctaup[iu] C0p = np.reshape(C0p,(R*tp,R*tp)) C0p = C0p[iu] C = np.hstack((Ctaup,C0p)) return C # Set parameters: sp = 4 tp = 2 R = 3 M = 2 # Load the test matrices: Ctau = np.load("TestCtau.npy") C0 = np.load("TestC0.npy") # Solve the full problem: D, X = pla.eig_corr(C0, Ctau) D = D[:M] X = X[:,:M] # Create a U for testing: X2 = np.reshape(X,(sp,tp*M)).copy() U,_,_ = scl.svd(X2,full_matrices=False) U = U[:,1:R].copy() u = U.flatten() # Run the function: Ctaup = np.reshape(Ctau,(sp,tp,sp,tp)) C0p = np.reshape(C0,(sp,tp,sp,tp)) C = TestObjective(u,Ctaup,C0p,R,tp) # Compute the jacobian numerically: f = ft.partial(TestObjective,Ctau=Ctaup,C0=C0p,R=R,tp=tp)
def _estimate(self, iterable, **kw): r""" Chunk-based parameterization of TICA. Iterates over all data and estimates the mean, covariance and time lagged covariance. Finally, the generalized eigenvalue problem is solved to determine the independent components. """ indim = iterable.dimension() assert indim > 0, "zero dimension from data source!" assert self.dim <= indim, ( "requested more output dimensions (%i) than dimension" " of input data (%i)" % (self.dim, indim)) self._logger.debug( "Running TICA with tau=%i; Estimating two covariance matrices" " with dimension (%i, %i)" % (self._lag, indim, indim)) if not any(iterable.trajectory_lengths(self.stride) > self.lag): raise ValueError( "None single dataset [longest=%i] is longer than" " lag time [%i]." % (max(iterable.trajectory_lengths(self.stride)), self.lag)) self._skipped_trajs = np.fromiter( (i for i in range(self._ntraj) if iterable.trajectory_length(i) < self.lag), dtype=int) it = iterable.iterator(lag=self.lag, return_trajindex=False) with it: # register progress n_chunks = it._n_chunks self._progress_register(n_chunks, "calculate mean+cov", 0) nsave = int(max(log(ceil(n_chunks), 2), 2)) self._logger.debug("using %s moments for %i chunks" % (nsave, n_chunks)) covar = running_covar(xx=True, xy=True, yy=False, remove_mean=self.remove_mean, symmetrize=True, nsave=nsave) for X, Y in it: covar.add(X, Y) # counting chunks and log of eta self._progress_update(1, stage=0) cov, cov_tau = covar.cov_XX(), covar.cov_XY() # diagonalize with low rank approximation self._logger.debug("diagonalize Cov and Cov_tau.") eigenvalues, eigenvectors = \ eig_corr(cov, cov_tau, self.epsilon) self._logger.debug("finished diagonalisation.") # compute cumulative variance cumvar = np.cumsum(eigenvalues**2) cumvar /= cumvar[-1] if len(self._skipped_trajs) >= 1: self._logger.warning( "Had to skip %u trajectories for being too short (len<lag). " "Their indices are in tica_obj._skipped_trajs." % len(self._skipped_trajs)) self._model.update_model_params(mean=covar.mean_X(), cov=cov, cov_tau=cov_tau, cumvar=cumvar, eigenvalues=eigenvalues, eigenvectors=eigenvectors) return self._model
try: linalg.cholesky(S) pd=True except linalg.LinAlgError: pd=False print 'WARNING: Overlap matrix is not positive definite.' #------------------------------------------------ # Solve generalized eigenvalue problem #------------------------------------------------ if symmetricC==True and symmetricS==True: print 'Correlation matrix and overlap matrix symmetric.' if rankdef or not pd: from pyemma.util.linalg import eig_corr print 'Using pyemma.util.linalg.eig_corr()' eigenValues,eigenVectors = eig_corr(S,C) else: print "Using scipy.linalg.eig()" eigenValues,eigenVectors = linalg.eigh(C,S) idx = eigenValues.argsort()[::-1] eigenValues = eigenValues[idx] eigenVectors = eigenVectors[:,idx] else: print "Correlation matrix and/or overlap matrix not symmetric." if rankdef or not pd: print 'Using pyemma.util.linalg.eig_corr()' eigenValues,eigenVectors = eig_corr(S,C) else: print "Using scipy.linalg.eig()" eigenValues,eigenVectors = linalg.eigh(C,S) idx = eigenValues.argsort()[::-1]