def calcPCA(self, ensemble, logger): ''' calcPCA: #ensemble: prody ensmeble with structure information calculate PCA for a set of structures return: prody.pca object ''' logger.info("Calculate PCA") PCAname = ensemble.getTitle() pca = prody.PCA(PCAname) pca.buildCovariance(ensemble) logger.info("PCA") pca.calcModes() logger.info(repr(pca)) outputname = PCAname + "_pca_modes.nmd" prody.writeNMD(outputname, pca[:10], self.selection_ref_structure) if self.vmd == True: prody.viewNMDinVMD(outputname) logger.info(f"PCA is saved in: {outputname}") return pca, outputname
def calcPCA(filenameEnsemble): ubi = dy.parsePDB(filenameEnsemble, subset='ca') ensemble = dy.Ensemble('ensemble') ensemble.setCoords(ubi.getCoords()) ensemble.addCoordset(ubi.getCoordsets()) ensemble.iterpose() pca = dy.PCA('Ubiquitin') pca.buildCovariance(ensemble) pca.calcModes() return pca
def test_eigenvalues(self): # do it with PCA metric my_cov_matrix = PCAMetric.create_covariance_matrix( testPCAMetric.coordsets) biggest_eigenvalue = PCAMetric.calculate_biggest_eigenvalue( my_cov_matrix) # Do it with prody pca = prody.PCA('pcametric_pca') pca.buildCovariance(testPCAMetric.ensemble) pca.calcModes(n_modes=1) self.assertAlmostEqual(pca.getEigvals()[0], biggest_eigenvalue, 10)
def test_covariance_matrix_vs_prody(self): # do it with PCA metric my_cov_matrix = PCAMetric.create_covariance_matrix( testPCAMetric.coordsets) # Do it with prody pca = prody.PCA('pcametric_pca') pca.buildCovariance(testPCAMetric.ensemble) prody_cov_matrix = pca._cov # Compare numpy.testing.assert_almost_equal(my_cov_matrix, prody_cov_matrix, 10)
def get_pca_fluctuations(ensemble, limit=3): """ Get squared fluctuations of each residue according to a PCA on the ensemble Parameters ---------- ensemble pd.PDBEnsemble object limit number of PCA modes to consider Returns ------- array of squared fluctuations per aligned residue """ pca = pd.PCA() pca.buildCovariance(ensemble) pca.calcModes() return pd.calcSqFlucts(pca[:limit])
def prody_pca(coords, **kwargs): """Perform PCA calculations for PDB or DCD format *coords* file. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import prody LOGGER = prody.LOGGER prefix = kwargs.get('prefix') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') quiet = kwargs.pop('quiet', False) altloc = kwargs.get('altloc') ext = splitext(coords)[1].lower() if ext == '.gz': ext = splitext(coords[:-3])[1].lower() if ext == '.dcd': pdb = kwargs.get('psf') or kwargs.get('pdb') if pdb: if splitext(pdb)[1].lower() == '.psf': pdb = prody.parsePSF(pdb) else: pdb = prody.parsePDB(pdb, altlocs=altlocs) dcd = prody.DCDFile(coords) if prefix == '_pca' or prefix == '_eda': prefix = dcd.getTitle() + prefix if len(dcd) < 2: raise ValueError('DCD file must have multiple frames') if pdb: if pdb.numAtoms() == dcd.numAtoms(): select = pdb.select(selstr) dcd.setAtoms(select) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) else: select = pdb.select(selstr) if select.numAtoms() != dcd.numAtoms(): raise ValueError('number of selected atoms ({0}) does ' 'not match number of atoms in the DCD ' 'file ({1})'.format( select.numAtoms(), dcd.numAtoms())) if pdb.numCoordsets(): dcd.setCoords(select.getCoords()) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: pdb = prody.parsePDB(coords) if pdb.numCoordsets() < 2: raise ValueError('PDB file must contain multiple models') if prefix == '_pca' or prefix == '_eda': prefix = pdb.getTitle() + prefix select = pdb.select(selstr) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) if select is None: raise ValueError('selection {0} do not match any atoms'.format( repr(selstr))) LOGGER.info('{0} atoms will be used for PCA calculations.'.format( len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) if not kwargs.get('aligned'): ensemble.iterpose() nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): pca.performSVD(ensemble) else: pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(pca, join(outdir, prefix)) if kwargs.get('outscipion'): prody.writeScipionModes(outdir, pca) prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select) extend = kwargs.get('extend') if extend: if pdb: if extend == 'all': extended = prody.extendModel(pca[:nmodes], select, pdb) else: extended = prody.extendModel(pca[:nmodes], select, select | pdb.bb) prody.writeNMD( join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) else: prody.LOGGER.warn('Model could not be extended, provide a PDB or ' 'PSF file.') outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors' + ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues' + ext), pca.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), pca.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(pca) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): resnums = select.getResnums() hmargs = {} if resnums is None else {'resnums': resnums} prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, xlabel='Residue', ylabel='Residue', title=pca.getTitle() + ' cross-correlations', **hmargs) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct' + ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or kwargs.get('outproj'): prody.writeArray(join(outdir, prefix + '_proj' + ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') sp = kwargs.get('figproj') if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(join(outdir, prefix + '_cc.' + format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(join(outdir, prefix + '_sf.' + format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append( list(range(int(item[0]) - 1, int(item[1])))) elif ',' in item: indices.append( [int(i) - 1 for i in item.split(',')]) else: indices.append(int(item) - 1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, Integral): index = [index] index = [str(i + 1) for i in index] plt.savefig(join( outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')
os.mkdir(output_dir) for pdb_FN, conf in zip(new_pdb_FNs,ensemble): trans = conf.getTransformation() chain_coords = prody.parsePDB(pdb_FN) trans.apply(chain_coords) outFN = os.path.join(output_dir,os.path.basename(pdb_FN)) prody.writePDB(outFN, chain_coords) print 'There are %d structures in the ensemble\n'%len(ensemble) print "\n*** Principal Components Analysis ***" pca_FN = os.path.join('prody.pca.npz') if os.path.exists(pca_FN): pca = prody.loadModel(pca_FN) else: pca = prody.PCA() pca.buildCovariance(ensemble) # Build covariance matrix pca.calcModes() # Calculate modes prody.saveModel(pca, filename=pca_FN[:-8]) if not os.path.isdir('figures'): os.makedirs('figures') import matplotlib.pyplot as plt if not os.path.isfile('rmsd.png'): rmsd = prody.calcRMSD(ensemble) plt.clf() plt.plot(rmsd); plt.xlabel('Conformation index'); plt.ylabel('RMSD (A)');