def toMolecule(self, formalcharges=False, ids=None): """ Return the moleculekit.molecule.Molecule Parameters ---------- formalcharges: bool If True,the formal charges are used instead of partial ones ids: list The list of conformer ids to store in the moleculekit Molecule object- If None, all are returned Default: None Returns ------- mol: moleculekit.molecule.Molecule The moleculekit Molecule object """ from moleculekit.molecule import Molecule class NoConformerError(Exception): pass if ids is None: ids = np.arange(self.numFrames) if self.numFrames == 0: raise NoConformerError( "No Conformers are found in the molecule. Generate at least one conformer." ) elif not isinstance(ids, list) and not isinstance(ids, np.ndarray): raise ValueError( 'The argument ids should be a list of confomer ids') mol = Molecule() mol.empty(self.numAtoms) mol.record[:] = 'HETATM' mol.resname[:] = self.ligname[:3] mol.resid[:] = self._resid mol.coords = self._coords[:, :, ids] mol.name[:] = self._name mol.element[:] = self._element if formalcharges: mol.charge[:] = self._formalcharge else: mol.charge[:] = self._charge mol.box = np.zeros((3, self.numFrames), dtype=np.float32) mol.viewname = self.ligname mol.bonds = self._bonds mol.bondtype = self._bondtype mol.atomtype = self._atomtype return mol
def setUpClass(self): from moleculekit.home import home from moleculekit.molecule import Molecule from moleculekit.tools.preparation import proteinPrepare from moleculekit.tools.autosegment import autoSegment self.testf = os.path.join(home(), 'test-data', 'test-voxeldescriptors') mol = Molecule(os.path.join(self.testf, '3PTB.pdb')) mol.filter('protein') mol = autoSegment(mol, field='both') mol = proteinPrepare(mol) mol.bonds = mol._guessBonds() self.mol = mol
def setUpClass(self): from moleculekit.home import home from moleculekit.molecule import Molecule from moleculekit.tools.preparation import systemPrepare from moleculekit.tools.autosegment import autoSegment self.testf = os.path.join(home(), "test-data", "test-voxeldescriptors") mol = Molecule(os.path.join(self.testf, "3PTB.pdb")) mol.filter("protein") mol = autoSegment(mol, field="both") mol = systemPrepare(mol, pH=7.0) mol.bonds = mol._guessBonds() self.mol = mol
def test_metricsphericalcoordinate(self): from moleculekit.molecule import Molecule from moleculekit.home import home from os import path mol = Molecule( path.join(home(dataDir='test-projections'), 'trajectory', 'filtered.pdb')) ref = mol.copy() mol.read( path.join(home(dataDir='test-projections'), 'trajectory', 'traj.xtc')) mol.bonds = mol._guessBonds() res = MetricSphericalCoordinate(ref, 'resname MOL', 'within 8 of resid 98').project(mol) _ = MetricSphericalCoordinate(ref, 'resname MOL', 'within 8 of resid 98').getMapping(mol) ref_array = np.load( path.join(home(dataDir='test-projections'), 'metricsphericalcoordinate', 'res.npy')) assert np.allclose(res, ref_array, rtol=0, atol=1e-04)
def pdb2psf_CA(pdb_name_in, psf_name_out, bonds=True, angles=True): mol = Molecule(pdb_name_in) mol.filter("name CA") n = mol.numAtoms atom_types = [] for i in range(n): atom_types.append(CA_MAP[(mol.resname[i], mol.name[i])]) if bonds: bonds = np.concatenate( ( np.arange(n - 1).reshape([n - 1, 1]), (np.arange(1, n).reshape([n - 1, 1])), ), axis=1, ) else: bonds = np.empty([0, 2], dtype=np.int32) if angles: angles = np.concatenate( ( np.arange(n - 2).reshape([n - 2, 1]), (np.arange(1, n - 1).reshape([n - 2, 1])), (np.arange(2, n).reshape([n - 2, 1])), ), axis=1, ) else: angles = np.empty([0, 3], dtype=np.int32) mol.atomtype = np.array(atom_types) mol.bonds = bonds mol.angles = angles mol.write(psf_name_out)
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, teleap=None, teleapimports=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. teleap : str Path to teLeap executable used to build the system for AMBER teleapimports : list A list of paths to pass to teLeap '-I' flag, i.e. directories to be searched Default: determined from :func:`amber.defaultAmberHome <htmd.builder.amber.defaultAmberHome>` and :func:`amber.htmdAmberHome <htmd.builder.amber.htmdAmberHome>` execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <moleculekit.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if teleap is None: teleap = _findTeLeap() else: if shutil.which(teleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(teleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields for i, force in enumerate(ensurelist(ff)): if not os.path.isfile(force): force = _locateFile(force, 'ff', teleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = { 1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3' } f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for i, off in enumerate(offlibraries): if not os.path.isfile(off): raise RuntimeError( 'Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', teleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', teleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from moleculekit.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) # Calculate the bounding box and store it in the CRD file f.write('setBox mol "vdw"\n\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: if not teleapimports: teleapimports = [] # Source default Amber (i.e. the same paths tleap imports) amberhome = defaultAmberHome(teleap=teleap) teleapimports += [ os.path.join(amberhome, s) for s in _defaultAmberSearchPaths.values() ] if len(teleapimports) == 0: raise RuntimeWarning( 'No default Amber force-field found. Check teLeap location: {}' .format(teleap)) # Source HTMD Amber paths that contain ffs htmdamberdir = htmdAmberHome() teleapimports += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] if len(teleapimports) == 0: raise RuntimeError( 'No default Amber force-field imports found. Check ' '`htmd.builder.amber.defaultAmberHome()` and `htmd.builder.amber.htmdAmberHome()`' ) # Set import flags for teLeap teleapimportflags = [] for p in teleapimports: teleapimportflags.append('-I') teleapimportflags.append(str(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [teleap, '-f', './tleap.in'] cmd[1:1] = teleapimportflags logger.debug(cmd) call(cmd, stdout=f) except: raise NameError('teLeap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: try: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) except Exception as e: raise RuntimeError( 'Failed at reading structure.prmtop/structure.crd due to error: {}' .format(e)) else: raise BuildError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, teleap=teleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds detectCisPeptideBonds(molbuilt) # Warn in case of cis bonds return molbuilt
def pdb2psf_CACB(pdb_name_in, psf_name_out, bonds=True, angles=True): mol = Molecule(pdb_name_in) mol.filter("name CA CB") n = mol.numAtoms atom_types = [] for i in range(n): atom_types.append(CACB_MAP[(mol.resname[i], mol.name[i])]) CA_idx = [] CB_idx = [] for i, name in enumerate(mol.name): if name[:2] == "CA": CA_idx.append(i) else: CB_idx.append(i) if bonds: CA_bonds = np.concatenate( ( np.array(CA_idx)[:-1].reshape([len(CA_idx) - 1, 1]), np.array(CA_idx)[1:].reshape([len(CA_idx) - 1, 1]), ), axis=1, ) CB_bonds = np.concatenate( ( np.array(CB_idx).reshape(len(CB_idx), 1) - 1, np.array(CB_idx).reshape(len(CB_idx), 1), ), axis=1, ) bonds = np.concatenate((CA_bonds, CB_bonds)) else: bonds = np.empty([0, 2], dtype=np.int32) if angles: CA_angles = np.concatenate( ( np.array(CA_idx)[:-2].reshape([len(CA_idx) - 2, 1]), np.array(CA_idx)[1:-1].reshape([len(CA_idx) - 2, 1]), np.array(CA_idx)[2:].reshape([len(CA_idx) - 2, 1]), ), axis=1, ) CB_angles = [] cbn = 0 for i in CA_idx: if mol.resname[i] != "GLY": if i != 0: CB_angles.append( [CA_idx[CA_idx.index(i) - 1], i, CB_idx[cbn]]) if i != CA_idx[-1]: CB_angles.append( [CA_idx[CA_idx.index(i) + 1], i, CB_idx[cbn]]) cbn += 1 CB_angles = np.array(CB_angles) angles = np.concatenate((CA_angles, CB_angles)) else: angles = np.empty([0, 3], dtype=np.int32) mol.atomtype = np.array(atom_types) mol.bonds = bonds mol.angles = angles mol.write(psf_name_out)