def calc_average(trajectory, cutoff): output = prody.AtomGroup('Cartesian average coordinates') output_coords = trajectory.getCoords() output.setCoords(trajectory.getCoords()) output.setNames(trajectory.getNames()) output.setResnums(trajectory.getResnums()) output.setResnames(trajectory.getResnames()) ensemble = prody.PDBEnsemble(trajectory) ensemble.iterpose() cutoff = numpy.mean(ensemble.getRMSDs()) * cutoff print 'Using cutoff of {}'.format(cutoff) input_coords = ensemble.getCoordsets() n_atoms = output_coords.shape[0] for i in range(n_atoms): print 'Computing residue {} of {}:'.format(i + 1, n_atoms) average = get_network_average(input_coords[:, i, :], cutoff) output_coords[i, :] = average output.setCoords(output_coords) return output
def to_prody(self, res): import prody as pr ag = pr.AtomGroup() ag.setCoords(torch.stack(self.bb + self.sc).numpy()) ag.setNames(self.atom_names) ag.setResnames([ONE_TO_THREE_LETTER_MAP[VOCAB._int2char[self.name]]] * len(self.atom_names)) ag.setResnums([res.getResnum()] * len(self.atom_names)) return pr.Residue(ag, [0] * len(self.atom_names), None)
def test_get_ligand_with_no_ligands(self): test_class = file_manipulation.PDBCruncer() self.assertIsNone(test_class.get_ligand( "dummy_protein_id", "dummy_filename", None, prody.AtomGroup(), None))
def convert_chimera_molecule_to_prody(molecule): """ Function that transforms a chimera molecule into a prody atom group Parameters ---------- molecule : chimera.Molecule Returns ------- prody_molecule : prody.AtomGroup() chimera2prody : dict dictionary: chimera2prody[chimera_atom.coordIndex] = i-thm element prody getCoords() array """ prody_molecule = prody.AtomGroup() try: coords, elements, serials = [], [], [] names, resnums, resnames = [], [], [] chids, betas, masses = [], [], [] chimera2prody = {} offset_chimera_residue = min(r.id.position for r in molecule.residues) for i, atm in enumerate(molecule.atoms): chimera2prody[atm.serialNumber] = i coords.append(tuple(atm.coord())) # array documentation to improve elements.append(atm.element.name) serials.append(atm.serialNumber) names.append(atm.name) resnums.append(atm.residue.id.position - offset_chimera_residue) resnames.append(atm.residue.type) chids.append(atm.residue.id.chainId) masses.append(atm.element.mass) betas.append(atm.bfactor) prody_molecule.setCoords(coords) prody_molecule.setElements(elements) prody_molecule.setSerials(serials) prody_molecule.setNames(names) prody_molecule.setResnums(resnums) prody_molecule.setResnames(resnames) prody_molecule.setChids(chids) prody_molecule.setBetas(betas) prody_molecule.setMasses(masses) prody_molecule.setTitle(str(molecule.name)) prody_molecule.setBonds([(chimera2prody[bond.atoms[0].serialNumber], chimera2prody[bond.atoms[1].serialNumber]) for bond in molecule.bonds]) except AttributeError: raise TypeError( 'Attribute not found. Molecule must be a chimera.Molecule') return prody_molecule, chimera2prody
def calc_average(trajectory): output = prody.AtomGroup('Cartesian average coordinates') output.setCoords(trajectory.getCoords()) output.setNames(trajectory.getNames()) output.setResnums(trajectory.getResnums()) output.setResnames(trajectory.getResnames()) ensemble = prody.PDBEnsemble(trajectory) ensemble.iterpose() coords = ensemble.getCoordsets() average_coords = numpy.mean(coords, axis=0) output.setCoords(average_coords) return output
def get_closest_frame(trajectory, average_structure): output = prody.AtomGroup('Cartesian average coordinates') output.setCoords(trajectory.getCoords()) output.setNames(trajectory.getNames()) output.setResnums(trajectory.getResnums()) output.setResnames(trajectory.getResnames()) ensemble = prody.PDBEnsemble(trajectory) ensemble.setCoords(average_structure) ensemble.superpose() rmsds = ensemble.getRMSDs() min_index = numpy.argmin(rmsds) output.setCoords(ensemble.getCoordsets(min_index)) return output
def prody_pca(coords, **kwargs): """Perform PCA calculations for PDB or DCD format *coords* file. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import prody LOGGER = prody.LOGGER prefix = kwargs.get('prefix') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') quiet = kwargs.pop('quiet', False) altloc = kwargs.get('altloc') ext = splitext(coords)[1].lower() if ext == '.gz': ext = splitext(coords[:-3])[1].lower() if ext == '.dcd': pdb = kwargs.get('psf') or kwargs.get('pdb') if pdb: if splitext(pdb)[1].lower() == '.psf': pdb = prody.parsePSF(pdb) else: pdb = prody.parsePDB(pdb, altlocs=altlocs) dcd = prody.DCDFile(coords) if prefix == '_pca' or prefix == '_eda': prefix = dcd.getTitle() + prefix if len(dcd) < 2: raise ValueError('DCD file must have multiple frames') if pdb: if pdb.numAtoms() == dcd.numAtoms(): select = pdb.select(selstr) dcd.setAtoms(select) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) else: select = pdb.select(selstr) if select.numAtoms() != dcd.numAtoms(): raise ValueError('number of selected atoms ({0}) does ' 'not match number of atoms in the DCD ' 'file ({1})'.format( select.numAtoms(), dcd.numAtoms())) if pdb.numCoordsets(): dcd.setCoords(select.getCoords()) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: pdb = prody.parsePDB(coords) if pdb.numCoordsets() < 2: raise ValueError('PDB file must contain multiple models') if prefix == '_pca' or prefix == '_eda': prefix = pdb.getTitle() + prefix select = pdb.select(selstr) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) if select is None: raise ValueError('selection {0} do not match any atoms'.format( repr(selstr))) LOGGER.info('{0} atoms will be used for PCA calculations.'.format( len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) if not kwargs.get('aligned'): ensemble.iterpose() nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): pca.performSVD(ensemble) else: pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(pca, join(outdir, prefix)) if kwargs.get('outscipion'): prody.writeScipionModes(outdir, pca) prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select) extend = kwargs.get('extend') if extend: if pdb: if extend == 'all': extended = prody.extendModel(pca[:nmodes], select, pdb) else: extended = prody.extendModel(pca[:nmodes], select, select | pdb.bb) prody.writeNMD( join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) else: prody.LOGGER.warn('Model could not be extended, provide a PDB or ' 'PSF file.') outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors' + ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues' + ext), pca.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), pca.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(pca) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): resnums = select.getResnums() hmargs = {} if resnums is None else {'resnums': resnums} prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, xlabel='Residue', ylabel='Residue', title=pca.getTitle() + ' cross-correlations', **hmargs) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct' + ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or kwargs.get('outproj'): prody.writeArray(join(outdir, prefix + '_proj' + ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') sp = kwargs.get('figproj') if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(join(outdir, prefix + '_cc.' + format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(join(outdir, prefix + '_sf.' + format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append( list(range(int(item[0]) - 1, int(item[1])))) elif ',' in item: indices.append( [int(i) - 1 for i in item.split(',')]) else: indices.append(int(item) - 1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, Integral): index = [index] index = [str(i + 1) for i in index] plt.savefig(join( outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')