def listDihedrals(filename): # This routine gets the names of the atoms in the soft dihedrals # It's so horrible, since we have to jump through hoops to # set up the input for gen_soft_list ll1 = [] ll2 = [] env = Parameterization._preflight_test(None) mol = Molecule(filename) mol = Parameterization._rename_mol(mol) mol.bonds = mol._guessBonds() # make a tempdir with tempfile.TemporaryDirectory() as td: # td=tempfile.mkdtemp() # print(td) pwd = os.getcwd() os.chdir(td) f = open("mol.prm", "w") f.close() mol.write("mol.pdb") mol.write("mol-opt.xyz") mol.write("mol.xpsf", type="psf") for charge in range(-1, 2): subprocess.check_output([ env['BIN_MATCH'], "-charge", str(charge), "-forcefield", "top_all36_cgenff_new", "mol.pdb" ], stderr=subprocess.STDOUT, shell=False, stdin=None) subprocess.check_output( [env['BIN_GEN_XPSF'], "mol.rtf", "mol.xpsf", "MOL"], stderr=subprocess.STDOUT, shell=False, stdin=None) subprocess.check_output([env['BIN_GEN_SOFT_LIST']], stderr=subprocess.STDOUT, shell=False, stdin=None) f = open("soft-dih-list.txt", "r") ff = f.readlines() for l in ff: ss = [] tt = [] for m in l.split(): tt.append(int(m)) ss.append(mol.name[int(m) - 1].strip().upper()) ll1.append(tt) ll2.append(ss) f.close() os.chdir(pwd) return ll1, ll2
def to_molecule(self): from htmd.molecule.molecule import Molecule coords = self.get_coords() elements = self.get_elements() mol = Molecule() mol.empty(self.get_natoms()) mol.resname[:] = self.get_name()[:3] mol.resid[:] = 1 mol.name[:] = elements mol.element[:] = elements mol.charge[:] = self.get_charges() mol.coords[:, :, 0] = coords mol.viewname = self.get_name() mol.bonds, mol.bondtype = self.get_bonds() return mol
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True, atomtypes=None, offlibraries=None): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP ... >>> # More complex example >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) _checkResidueInsertions(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: if not isinstance(offlibraries, list) and not isinstance( offlibraries, tuple): offlibraries = [ offlibraries, ] for off in offlibraries: f.write('loadoff {}\n\n'.format(off)) # Loading frcmod parameters f.write('# Loading parameter files\n') for p in param: try: shutil.copy(p, outdir) f.write('loadamberparams ' + os.path.basename(p) + '\n') except: f.write('loadamberparams ' + p + '\n') logger.info( "File {:s} not found, assuming its present on the standard Amber location" .format(p)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + os.path.basename(t) + '\n') f.write('\n') # Detect disulfide bridges if not defined by user if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if not ionize and len(disulfide) != 0: for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = (mol.segid == d.segid1) & (mol.resid == d.resid1) atoms2 = (mol.segid == d.segid2) & (mol.resid == d.resid2) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) mol.remove(atoms1 & (mol.name == 'HG'), _logger=False) mol.remove(atoms2 & (mol.name == 'HG'), _logger=False) # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize and len(disulfide) != 0: f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int( np.unique(uqseqid[(mol.segid == d.segid1) & (mol.resid == d.resid1)])) uqres2 = int( np.unique(uqseqid[(mol.segid == d.segid2) & (mol.resid == d.resid2)])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise NameError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = { 1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3' } f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError( 'Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] topo : list of str A list of topology `prepi` files. param : list of str A list of parameter `frcmod` files. prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['ACE', 'NME']. Default: will apply ACE and NME caps to proteins and no caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : np.ndarray If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15) """ # Remove pdb bonds! mol = mol.copy() mol.bonds = [] if shutil.which(tleap) is None: raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] if topo is None: topo = [] if param is None: param = [] if caps is None: caps = _defaultCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) logger.info('Converting CHARMM membranes to AMBER.') mol = _charmmLipid2Amber(mol) #_checkProteinGaps(mol) _applyCaps(mol, caps) f = open(path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields for force in ff: f.write('source ' + force + '\n') f.write('\n') # Loading TIP3P water parameters f.write('# Loading ions and TIP3P water parameters\n') f.write('loadamberparams frcmod.ionsjc_tip3p\n\n') # Printing out topologies logger.info('Writing prepi files.') f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + path.basename(t) + '\n') f.write('\n') # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.info('Writing PDB file for input to tleap.') pdbname = path.join(outdir, 'input.pdb') mol.write(pdbname) if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') # Printing out patches for the disulfide bridges '''if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) if not ionize and len(disulfide) != 0: # Only make disu bonds after ionizing! f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID(mol.resid) uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))])) # Rename the CYS to CYX if there is a disulfide bond mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1)) mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2)) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n')''' f.write('# Writing out the results\n') f.write('savepdb mol ' + prefix + '.pdb\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: call([tleap, '-f', './tleap.in'], stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if path.getsize(path.join(outdir, 'structure.pdb')) != 0 and path.getsize(path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(path.join(outdir, 'structure.pdb')) molbuilt.read(path.join(outdir, 'structure.prmtop')) molbuilt.bonds = [] # Causes problems in ionization mol.remove and mol._removeBonds else: raise NameError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.pdb'), path.join(outdir, 'structure.noions.pdb')) shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd')) shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(molbuilt, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap) return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError('Could not find executable: `{}` in the PATH. Cannot build for AMBER.'.format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = {1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3'} f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError('Atom type definitions have to be triplets. Check the AMBER documentation.') f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError('Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError('Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath(os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f))] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt