def assign_pcs(args): fn, topf, eda, pcs, sel, outf = args if fn.endswith("pdb"): pdb = prody.parsePDB(fn) pdb = pdb.select(sel).copy() ensemble = prody.Ensemble('A single pdb file ensemble') ensemble.setCoords(pdb.getCoords()) ensemble.addCoordset(pdb.getCoordsets()) ensemble.iterpose() PCs = prody.calcProjection(ensemble, eda[pcs]) print(PCs) return elif fn.endswith(".dcd"): structure = prody.parsePDB(topf) str_sel = structure.select(sel) #dcd = prody.DCDFile(fn) dcd = prody.Trajectory(fn) dcd.link(structure) dcd.setCoords(structure) dcd.setAtoms(str_sel) PCs = prody.calcProjection(dcd, eda[pcs]) if outf is not None: header = " ".join(["PC%d" % (i + 1) for i in pcs]) np.savetxt(outf, PCs, fmt="%.4f", header=header, comments="") else: print("Unsupport file type: %s" % fn) return None return PCs
def makeLandscape(pdb, ensemble, sel="name CA", nstep=100, prefix="landscape"): import prody as pd import scipy as sci import numpy as np # Initialize ensemble atoms = pdb.select(sel) ensemble.setAtoms(atoms) num_atom = len(ensemble.getAtoms()) # Calculate PCA modes of transition path pca = pd.PCA("PCA of Transition Path") pca.buildCovariance(ensemble) pca.calcModes() # Generate mesh for PC0 and PC1 proj0 = pd.calcProjection(ensemble, pca[0]) proj1 = pd.calcProjection(ensemble, pca[1]) x = np.linspace(min(proj0), max(proj0), nstep) y = np.linspace(min(proj1), max(proj1), nstep) xv, yv = np.meshgrid(x, y, sparse=False, indexing="ij") xv = xv.reshape((xv.size, 1)) yv = yv.reshape((yv.size, 1)) proj_land = np.column_stack((xv, yv)) # Generate landscape structures coords_land = z2xyz(proj_land, ensemble, pca[(0, 1)]) coords_land = coords_land.reshape((coords_land.shape[0], coords_land.shape[1] / 3, 3)) ensemble_land = pd.Ensemble() ensemble_land.addCoordset(coords_land) return (proj_land, ensemble_land)
def prody_pca(opt): """Perform PCA calculations based on command line arguments.""" outdir = opt.outdir if not os.path.isdir(outdir): opt.subparser.error('{0:s} is not a valid path'.format(outdir)) import prody LOGGER = prody.LOGGER coords = opt.coords prefix = opt.prefix nmodes, selstr = opt.nmodes, opt.select if os.path.splitext(coords)[1].lower() == '.dcd': ag = opt.psf or opt.pdb if ag: if os.path.splitext(ag)[1].lower() == '.psf': ag = prody.parsePSF(ag) else: ag = prody.parsePDB(ag) dcd = prody.DCDFile(opt.coords) if len(dcd) < 2: opt.subparser("DCD file must contain multiple frames.") if ag: dcd.setAtomGroup(ag) select = dcd.select(selstr) LOGGER.info('{0:d} atoms are selected for calculations.' .format(len(select))) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) if len(dcd) > 1000: pca.buildCovariance(dcd) pca.calcModes(dcd) else: pca.performSVD(dcd[:]) else: pdb = prody.parsePDB(opt.coords) if pdb.numCoordsets() < 2: opt.subparser("PDB file must contain multiple models.") if prefix == '_pca': prefix = pdb.getTitle() + '_pca' select = pdb.select(selstr) LOGGER.info('{0:d} atoms are selected for calculations.' .format(len(select))) if select is None: opt.subparser('Selection "{0:s}" do not match any atoms.' .format(selstr)) LOGGER.info('{0:d} atoms will be used for PCA calculations.' .format(len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) ensemble.iterpose() pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if opt.npz: prody.saveModel(pca) prody.writeNMD(os.path.join(outdir, prefix + '.nmd'), pca[:nmodes], select) outall = opt.all delim, ext, format = opt.delim, opt.ext, opt.numformat if outall or opt.eigen: prody.writeArray(os.path.join(outdir, prefix + '_evectors'+ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(os.path.join(outdir, prefix + '_evalues'+ext), pca.getEigenvalues(), delimiter=delim, format=format) if outall or opt.covar: prody.writeArray(os.path.join(outdir, prefix + '_covariance'+ext), pca.getCovariance(), delimiter=delim, format=format) if outall or opt.ccorr: prody.writeArray(os.path.join(outdir, prefix + '_cross-correlations' + ext), prody.calcCrossCorr(pca), delimiter=delim, format=format) if outall or opt.sqflucts: prody.writeArray(os.path.join(outdir, prefix + '_sqfluct'+ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or opt.proj: prody.writeArray(os.path.join(outdir, prefix + '_proj'+ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall, cc, sf, sp = opt.figures, opt.cc, opt.sf, opt.sp if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: LOGGER.info('Saving graphical output.') format, width, height, dpi = \ opt.figformat, opt.width, opt.height, opt.dpi format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(os.path.join(outdir, prefix + '_cc.'+format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(os.path.join(outdir, prefix + '_sf.'+format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append(range(int(item[0])-1, int(item[1]))) elif ',' in item: indices.append([int(i)-1 for i in item.split(',')]) else: indices.append(int(item)-1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, int): index = [index] index = [str(i+1) for i in index] plt.savefig(os.path.join(outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')
def prody_pca(coords, **kwargs): """Perform PCA calculations for PDB or DCD format *coords* file. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import prody LOGGER = prody.LOGGER prefix = kwargs.get('prefix') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') ext = splitext(coords)[1].lower() if ext == '.gz': ext = splitext(coords[:-3])[1].lower() if ext == '.dcd': pdb = kwargs.get('psf') or kwargs.get('pdb') if pdb: if splitext(pdb)[1].lower() == '.psf': pdb = prody.parsePSF(pdb) else: pdb = prody.parsePDB(pdb) dcd = prody.DCDFile(coords) if prefix == '_pca' or prefix == '_eda': prefix = dcd.getTitle() + prefix if len(dcd) < 2: raise ValueError('DCD file must have multiple frames') if pdb: if pdb.numAtoms() == dcd.numAtoms(): select = pdb.select(selstr) dcd.setAtoms(select) LOGGER.info('{0} atoms are selected for calculations.' .format(len(select))) else: select = pdb.select(selstr) if select.numAtoms() != dcd.numAtoms(): raise ValueError('number of selected atoms ({0}) does ' 'not match number of atoms in the DCD ' 'file ({1})'.format(select.numAtoms(), dcd.numAtoms())) if pdb.numCoordsets(): dcd.setCoords(select.getCoords()) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned')) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose() pca.performSVD(ensemble) else: pdb = prody.parsePDB(coords) if pdb.numCoordsets() < 2: raise ValueError('PDB file must contain multiple models') if prefix == '_pca' or prefix == '_eda': prefix = pdb.getTitle() + prefix select = pdb.select(selstr) LOGGER.info('{0} atoms are selected for calculations.' .format(len(select))) if select is None: raise ValueError('selection {0} do not match any atoms' .format(repr(selstr))) LOGGER.info('{0} atoms will be used for PCA calculations.' .format(len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) if not kwargs.get('aligned'): ensemble.iterpose() pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(pca, join(outdir, prefix)) prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select) extend = kwargs.get('extend') if extend: if pdb: if extend == 'all': extended = prody.extendModel(pca[:nmodes], select, pdb) else: extended = prody.extendModel(pca[:nmodes], select, select | pdb.bb) prody.writeNMD(join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) else: prody.LOGGER.warn('Model could not be extended, provide a PDB or ' 'PSF file.') outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors'+ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues'+ext), pca.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance'+ext), pca.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(pca) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): resnums = select.getResnums() hmargs = {} if resnums is None else {'resnums': resnums} prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, xlabel='Residue', ylabel='Residue', title=pca.getTitle() + ' cross-correlations', **hmargs) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct'+ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or kwargs.get('outproj'): prody.writeArray(join(outdir, prefix + '_proj'+ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') sp = kwargs.get('figproj') if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(join(outdir, prefix + '_cc.'+format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(join(outdir, prefix + '_sf.'+format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append(list(range(int(item[0])-1, int(item[1])))) elif ',' in item: indices.append([int(i)-1 for i in item.split(',')]) else: indices.append(int(item)-1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, int): index = [index] index = [str(i+1) for i in index] plt.savefig(join(outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')
def prody_pca(coords, **kwargs): """Perform PCA calculations for PDB or DCD format *coords* file. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import prody LOGGER = prody.LOGGER prefix = kwargs.get('prefix') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') quiet = kwargs.pop('quiet', False) altloc = kwargs.get('altloc') ext = splitext(coords)[1].lower() if ext == '.gz': ext = splitext(coords[:-3])[1].lower() if ext == '.dcd': pdb = kwargs.get('psf') or kwargs.get('pdb') if pdb: if splitext(pdb)[1].lower() == '.psf': pdb = prody.parsePSF(pdb) else: pdb = prody.parsePDB(pdb, altlocs=altlocs) dcd = prody.DCDFile(coords) if prefix == '_pca' or prefix == '_eda': prefix = dcd.getTitle() + prefix if len(dcd) < 2: raise ValueError('DCD file must have multiple frames') if pdb: if pdb.numAtoms() == dcd.numAtoms(): select = pdb.select(selstr) dcd.setAtoms(select) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) else: select = pdb.select(selstr) if select.numAtoms() != dcd.numAtoms(): raise ValueError('number of selected atoms ({0}) does ' 'not match number of atoms in the DCD ' 'file ({1})'.format( select.numAtoms(), dcd.numAtoms())) if pdb.numCoordsets(): dcd.setCoords(select.getCoords()) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: pdb = prody.parsePDB(coords) if pdb.numCoordsets() < 2: raise ValueError('PDB file must contain multiple models') if prefix == '_pca' or prefix == '_eda': prefix = pdb.getTitle() + prefix select = pdb.select(selstr) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) if select is None: raise ValueError('selection {0} do not match any atoms'.format( repr(selstr))) LOGGER.info('{0} atoms will be used for PCA calculations.'.format( len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) if not kwargs.get('aligned'): ensemble.iterpose() nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): pca.performSVD(ensemble) else: pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(pca, join(outdir, prefix)) if kwargs.get('outscipion'): prody.writeScipionModes(outdir, pca) prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select) extend = kwargs.get('extend') if extend: if pdb: if extend == 'all': extended = prody.extendModel(pca[:nmodes], select, pdb) else: extended = prody.extendModel(pca[:nmodes], select, select | pdb.bb) prody.writeNMD( join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) else: prody.LOGGER.warn('Model could not be extended, provide a PDB or ' 'PSF file.') outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors' + ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues' + ext), pca.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), pca.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(pca) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): resnums = select.getResnums() hmargs = {} if resnums is None else {'resnums': resnums} prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, xlabel='Residue', ylabel='Residue', title=pca.getTitle() + ' cross-correlations', **hmargs) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct' + ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or kwargs.get('outproj'): prody.writeArray(join(outdir, prefix + '_proj' + ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') sp = kwargs.get('figproj') if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(join(outdir, prefix + '_cc.' + format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(join(outdir, prefix + '_sf.' + format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append( list(range(int(item[0]) - 1, int(item[1])))) elif ',' in item: indices.append( [int(i) - 1 for i in item.split(',')]) else: indices.append(int(item) - 1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, Integral): index = [index] index = [str(i + 1) for i in index] plt.savefig(join( outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')
import matplotlib.pyplot as plt if not os.path.isfile('rmsd.png'): rmsd = prody.calcRMSD(ensemble) plt.clf() plt.plot(rmsd); plt.xlabel('Conformation index'); plt.ylabel('RMSD (A)'); plt.title('RMSD %f (%f)'%(rmsd.mean(), rmsd.std())) plt.savefig('figures/rmsd.png') if not os.path.isfile('blastPCA.png'): pc_ind0 = 0 pc_ind1 = 1 xtal_projection = prody.calcProjection(ensemble, pca[:20], rmsd=False) plt.clf() plt.plot(xtal_projection[:,pc_ind0],xtal_projection[:,pc_ind1],'ks') # titles = ['%s%s'%(pdb_id,chain_id) for (pdb_id,chain_id) in chain_hits] titles = [] for map in mappings: titles += [map[0][0]._title[13:17]+'%d'%r \ for r in range(len(map[0][0].getCoordsets()))] for label, x, y in zip(titles, \ xtal_projection[:,pc_ind0], \ xtal_projection[:,pc_ind1]): plt.annotate(label, xy = (x, y), xytext = (-20, 20), textcoords = 'offset points', ha = 'right', va = 'bottom',