def test_tmscore(self): from htmd.molecule.molecule import Molecule expectedTMscore = np.array([0.21418524, 0.2367377, 0.23433833, 0.21362964, 0.20935164, 0.20279461, 0.27012895, 0.22675238, 0.21230793, 0.2372011]) expectedRMSD = np.array([3.70322128, 3.43637027, 3.188193, 3.84455877, 3.53053882, 3.46781854, 2.93777629, 2.97978692, 2.70792428, 2.63051318]) mol = Molecule(os.path.join(home(dataDir='tmscore'), 'filtered.pdb')) mol.read(os.path.join(home(dataDir='tmscore'), 'traj.xtc')) ref = Molecule(os.path.join(home(dataDir='tmscore'), 'ntl9_2hbb.pdb')) tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'), ref.atomselect('protein')) self.assertTrue(np.allclose(tmscore, expectedTMscore)) self.assertTrue(np.allclose(rmsd, expectedRMSD))
def __init__(self, sims, refmol, trajalnstr, refalnstr, atomsel, centerstr): self._refmol = refmol self._refalnsel = self._refmol.atomselect(refalnstr) self._trajalnsel = trajalnstr self._centersel = centerstr self._atomsel = atomsel self._pc_trajalnsel = None # pc = Pre-calculated self._pc_atomsel = None self._pc_centersel = None (single, molfile) = _singleMolfile(sims) if single: mol = Molecule(molfile) self._pc_trajalnsel = mol.atomselect(trajalnstr) self._pc_atomsel = mol.atomselect(atomsel) self._pc_centersel = mol.atomselect(centerstr)
def _filtSim(i, sims, outFolder, filterSel): name = _simName(sims[i].trajectory[0]) directory = path.join(outFolder, name) if not path.exists(directory): makedirs(directory) logger.debug('Processing trajectory ' + name) fmolfile = path.join(outFolder, 'filtered.pdb') (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder) if not traj: ftrajectory = _listXTCs(path.join(outFolder, name)) return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile) try: mol = Molecule(sims[i].molfile) except: logger.warning('Error! Skipping simulation ' + name) return sel = mol.atomselect(filterSel) for j in range(0, len(traj)): try: mol.read(traj[j]) except IOError as e: logger.warning(e.strerror + ', skipping trajectory') break mol.write(outtraj[j], sel) ftrajectory = _listXTCs(path.join(outFolder, name)) #bar.progress() return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile)
def __init__(self, sims, sel, simple): self._pc_sel = None self._sel = sel self._simple = simple (single, molfile) = _singleMolfile(sims) if single: mol = Molecule(molfile) self._pc_sel = mol.atomselect(sel)
def __init__(self, sims, protsel, dih=None, sincos=True): self._protsel = protsel self._sincos = sincos self._dih = dih # TODO: Calculate the dihedral self._pc_dih = None (single, molfile) = _singleMolfile(sims) if single: mol = Molecule(molfile) self._pc_dih = self._dihedralPrecalc(mol, mol.atomselect(protsel))
def test_tmscore(self): from htmd.molecule.molecule import Molecule expectedTMscore = np.array([ 0.21418524, 0.2367377, 0.23433833, 0.21362964, 0.20935164, 0.20279461, 0.27012895, 0.22675238, 0.21230793, 0.2372011 ]) expectedRMSD = np.array([ 3.70322128, 3.43637027, 3.188193, 3.84455877, 3.53053882, 3.46781854, 2.93777629, 2.97978692, 2.70792428, 2.63051318 ]) mol = Molecule(os.path.join(home(dataDir='tmscore'), 'filtered.pdb')) mol.read(os.path.join(home(dataDir='tmscore'), 'traj.xtc')) ref = Molecule(os.path.join(home(dataDir='tmscore'), 'ntl9_2hbb.pdb')) tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'), ref.atomselect('protein')) self.assertTrue(np.allclose(tmscore, expectedTMscore)) self.assertTrue(np.allclose(rmsd, expectedRMSD))
def _filtSim(i, sims, outFolder, filterSel): name = _simName(sims[i].trajectory[0]) directory = path.join(outFolder, name) if not path.exists(directory): makedirs(directory) logger.debug('Processing trajectory ' + name) fmolfile = path.join(outFolder, 'filtered.pdb') (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder) if not traj: ftrajectory = _autoDetectTrajectories(path.join(outFolder, name)) numframes = _getNumFrames(sims[i], ftrajectory) return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile, numframes=numframes) try: from htmd.molecule.molecule import Molecule mol = Molecule(sims[i].molfile) except: logger.warning('Error! Skipping simulation ' + name) return sel = mol.atomselect(filterSel) for j in range(0, len(traj)): try: mol.read(traj[j]) except IOError as e: logger.warning('{}, skipping trajectory'.format(e)) break mol.write(outtraj[j], sel) ftrajectory = _autoDetectTrajectories(path.join(outFolder, name)) numframes = _getNumFrames(sims[i], ftrajectory) return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile, numframes=numframes)
def _filtSim(i, sims, outFolder, filterSel): name = _simName(sims[i].trajectory[0]) directory = path.join(outFolder, name) if not path.exists(directory): makedirs(directory) logger.debug('Processing trajectory ' + name) fmolfile = path.join(outFolder, 'filtered.pdb') (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder) if not traj: ftrajectory = _listTrajectories(path.join(outFolder, name)) return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile) try: mol = Molecule(sims[i].molfile) except: logger.warning('Error! Skipping simulation ' + name) return sel = mol.atomselect(filterSel) for j in range(0, len(traj)): try: mol.read(traj[j]) except IOError as e: logger.warning(e.strerror + ', skipping trajectory') break mol.write(outtraj[j], sel) ftrajectory = _listTrajectories(path.join(outFolder, name)) #bar.progress() return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile)
def build(mol, topo=None, param=None, stream=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, patches=None, noregen=None, psfgen=None, execute=True, _clean=True): """ Builds a system for CHARMM Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system topo : list of str A list of topology `rtf` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files. Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf'] param : list of str A list of parameter `prm` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files. Default: ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm'] stream : list of str A list of stream `str` files containing topologies and parameters. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files. Default: ['str/prot/toppar_all36_prot_arg0.str'] prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['first ACE', 'last CT3'] or caps['P'] = ['first none', 'last none']. Default: will apply ACE and CT3 caps to proteins and none caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration (in Molar) to add to the system after neutralization. saltanion : {'CLA'} The anion type. Please use only CHARMM ion atom names. saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'} The cation type. Please use only CHARMM ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. patches : list of str Any further patches the user wants to apply noregen : list of str A list of patches that must not be regenerated (angles and dihedrals) Default: ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL'] psfgen : str Path to psfgen executable used to build for CHARMM execute : bool Disable building. Will only write out the input script needed by psfgen. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd import * >>> mol = Molecule("3PTB") >>> mol.filter("not resname BEN") >>> mol.renumberResidues() >>> molbuilt = charmm.build(mol, outdir='/tmp/build', ionize=False) # doctest: +ELLIPSIS Bond between A: [serial 185 resid 42 resname CYS chain A segid 0] B: [serial 298 resid 58 resname CYS chain A segid 0]... >>> # More complex example >>> topos = ['top/top_all36_prot.rtf', './benzamidine.rtf', 'top/top_water_ions.rtf'] >>> params = ['par/par_all36_prot_mod.prm', './benzamidine.prm', 'par/par_water_ions.prm'] >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)] >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ mol = mol.copy() _missingSegID(mol) _checkMixedSegment(mol) _checkResidueInsertions(mol) if psfgen is None: psfgen = shutil.which('psfgen', mode=os.X_OK) if not psfgen: raise FileNotFoundError('Could not find psfgen executable, or no execute permissions are given. ' 'Run `conda install psfgen`.') if not os.path.isdir(outdir): os.makedirs(outdir) if _clean: _cleanOutDir(outdir) if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if stream is None: stream = defaultStream() if caps is None: caps = _defaultCaps(mol) # patches that must _not_ be regenerated if noregen is None: noregen = ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL'] alltopo = topo.copy() allparam = param.copy() # Splitting the stream files and adding them to the list of parameter and topology files charmmdir = path.join(home(), 'builder', 'charmmfiles') for s in stream: if s[0] != '.' and path.isfile(path.join(charmmdir, s)): s = path.join(charmmdir, s) outrtf, outprm = _prepareStream(s) alltopo.append(outrtf) allparam.append(outprm) #_missingChain(mol) #_checkProteinGaps(mol) if patches is None: patches = [] if isinstance(patches, str): patches = [patches] allpatches = [] allpatches += patches # Find protonated residues and add patches for them allpatches += _protonationPatches(mol) f = open(path.join(outdir, 'build.vmd'), 'w') f.write('# psfgen file generated by charmm.build\n') f.write('package require psfgen;\n') f.write('psfcontext reset;\n\n') # Copying and printing out the topologies if not path.exists(path.join(outdir, 'topologies')): os.makedirs(path.join(outdir, 'topologies')) for i in range(len(alltopo)): if alltopo[i][0] != '.' and path.isfile(path.join(charmmdir, alltopo[i])): alltopo[i] = path.join(charmmdir, alltopo[i]) localname = '{}.'.format(i) + path.basename(alltopo[i]) shutil.copy(alltopo[i], path.join(outdir, 'topologies', localname)) f.write('topology ' + path.join('topologies', localname) + '\n') f.write('\n') _printAliases(f) # Printing out segments if not path.exists(path.join(outdir, 'segments')): os.makedirs(path.join(outdir, 'segments')) logger.info('Writing out segments.') segments = _getSegments(mol) wateratoms = mol.atomselect('water') for seg in segments: pdbname = 'segment' + seg + '.pdb' segatoms = mol.segid == seg mol.write(path.join(outdir, 'segments', pdbname), sel=segatoms) segwater = wateratoms & segatoms f.write('segment ' + seg + ' {\n') if np.all(segatoms == segwater): # If segment only contains waters, set: auto none f.write('\tauto none\n') f.write('\tpdb ' + path.join('segments', pdbname) + '\n') if caps is not None and seg in caps: for c in caps[seg]: f.write('\t' + c + '\n') f.write('}\n') f.write('coordpdb ' + path.join('segments', pdbname) + ' ' + seg + '\n\n') # Printing out patches for the disulfide bridges if disulfide is None: disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in disulfide: f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1, d.segid2, d.resid2)) f.write('\n') noregenpatches = [p for p in allpatches if p.split()[1] in noregen] regenpatches = [p for p in allpatches if p.split()[1] not in noregen] # Printing regenerable patches if len(regenpatches) != 0: for p in regenpatches: f.write(p + '\n') f.write('\n') # Regenerate angles and dihedrals f.write('regenerate angles dihedrals\n') f.write('\n') # Printing non-regenerable patches if len(noregenpatches) != 0: for p in noregenpatches: f.write(p + '\n') f.write('\n') f.write('guesscoord\n') f.write('writepsf ' + prefix + '.psf\n') f.write('writepdb ' + prefix + '.pdb\n') #f.write('quit\n') f.close() if allparam is not None: combine(allparam, path.join(outdir, 'parameters')) molbuilt = None if execute: logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f) call([psfgen, './build.vmd'], stdout=f) f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)]) logger.info('Finished building.') if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile(path.join(outdir, 'structure.psf')): molbuilt = Molecule(path.join(outdir, 'structure.pdb')) molbuilt.read(path.join(outdir, 'structure.psf')) else: raise BuildError('No structure pdb/psf file was generated. Check {} for errors in building.'.format(logpath)) if ionize: os.makedirs(path.join(outdir, 'pre-ionize')) data = glob(path.join(outdir, '*')) for f in data: shutil.move(f, path.join(outdir, 'pre-ionize')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='charmm', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, topo=alltopo, param=allparam, stream=[], prefix=prefix, outdir=outdir, ionize=False, caps=caps, execute=execute, saltconc=saltconc, disulfide=disulfide, patches=patches, noregen=noregen, psfgen=psfgen, _clean=False) _checkFailedAtoms(molbuilt) _recoverProtonations(molbuilt) return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = { 1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3' } f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError( 'Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] topo : list of str A list of topology `prepi` files. param : list of str A list of parameter `frcmod` files. prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['ACE', 'NME']. Default: will apply ACE and NME caps to proteins and no caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : np.ndarray If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15) """ # Remove pdb bonds! mol = mol.copy() mol.bonds = [] if shutil.which(tleap) is None: raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] if topo is None: topo = [] if param is None: param = [] if caps is None: caps = _defaultCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) logger.info('Converting CHARMM membranes to AMBER.') mol = _charmmLipid2Amber(mol) #_checkProteinGaps(mol) _applyCaps(mol, caps) f = open(path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields for force in ff: f.write('source ' + force + '\n') f.write('\n') # Loading TIP3P water parameters f.write('# Loading ions and TIP3P water parameters\n') f.write('loadamberparams frcmod.ionsjc_tip3p\n\n') # Printing out topologies logger.info('Writing prepi files.') f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + path.basename(t) + '\n') f.write('\n') # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.info('Writing PDB file for input to tleap.') pdbname = path.join(outdir, 'input.pdb') mol.write(pdbname) if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') # Printing out patches for the disulfide bridges '''if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) if not ionize and len(disulfide) != 0: # Only make disu bonds after ionizing! f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID(mol.resid) uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))])) # Rename the CYS to CYX if there is a disulfide bond mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1)) mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2)) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n')''' f.write('# Writing out the results\n') f.write('savepdb mol ' + prefix + '.pdb\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: call([tleap, '-f', './tleap.in'], stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if path.getsize(path.join(outdir, 'structure.pdb')) != 0 and path.getsize(path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(path.join(outdir, 'structure.pdb')) molbuilt.read(path.join(outdir, 'structure.prmtop')) molbuilt.bonds = [] # Causes problems in ionization mol.remove and mol._removeBonds else: raise NameError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.pdb'), path.join(outdir, 'structure.noions.pdb')) shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd')) shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(molbuilt, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap) return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] topo : list of str A list of topology `prepi` files. param : list of str A list of parameter `frcmod` files. prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['ACE', 'NME']. Default: will apply ACE and NME caps to proteins and no caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : np.ndarray If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15) """ # Remove pdb bonds! mol = mol.copy() mol.bonds = np.empty((0, 2), dtype=np.uint32) if shutil.which(tleap) is None: raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] if topo is None: topo = [] if param is None: param = [] if caps is None: caps = _defaultCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) logger.info('Converting CHARMM membranes to AMBER.') mol = _charmmLipid2Amber(mol) #_checkProteinGaps(mol) _applyCaps(mol, caps) f = open(path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Loading TIP3P water parameters f.write('# Loading ions and TIP3P water parameters\n') f.write('loadamberparams frcmod.ionsjc_tip3p\n\n') # Loading user parameters f.write('# Loading parameter files\n') for p in param: shutil.copy(p, outdir) f.write('loadamberparams ' + path.basename(p) + '\n') f.write('\n') # Printing out topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + path.basename(t) + '\n') f.write('\n') # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.info('Writing PDB file for input to tleap.') pdbname = path.join(outdir, 'input.pdb') mol.write(pdbname) if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') # Printing out patches for the disulfide bridges if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) if not ionize and len( disulfide) != 0: # Only make disu bonds after ionizing! f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1 uqres1 = int( np.unique(uqseqid[mol.atomselect( 'segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int( np.unique(uqseqid[mol.atomselect( 'segid {} and resid {}'.format(d.segid2, d.resid2))])) # Rename the CYS to CYX if there is a disulfide bond mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1)) mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2)) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs htmdamberdir = path.join(home(), 'builder', 'amberfiles') sourcepaths = [htmdamberdir] sourcepaths += [path.join(htmdamberdir, path.dirname(f)) for f in ff] extrasource = '' for p in sourcepaths: extrasource += '-I {} '.format(p) logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: call([tleap, extrasource, '-f', './tleap.in'], stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if path.getsize(path.join( outdir, 'structure.crd')) != 0 and path.getsize( path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(path.join(outdir, 'structure.prmtop')) molbuilt.read(path.join(outdir, 'structure.crd')) else: raise NameError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd')) shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap) molbuilt.write(path.join(outdir, 'structure.pdb')) return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] topo : list of str A list of topology `prepi` files. param : list of str A list of parameter `frcmod` files. prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : np.ndarray If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15) """ # Remove pdb bonds! mol = mol.copy() mol.bonds = np.empty((0, 2), dtype=np.uint32) if shutil.which(tleap) is None: raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] if topo is None: topo = [] if param is None: param = [] if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) logger.info('Converting CHARMM membranes to AMBER.') mol = _charmmLipid2Amber(mol) #_checkProteinGaps(mol) _applyProteinCaps(mol, caps) f = open(path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Loading TIP3P water parameters f.write('# Loading ions and TIP3P water parameters\n') f.write('loadamberparams frcmod.ionsjc_tip3p\n\n') # Loading user parameters f.write('# Loading parameter files\n') for p in param: try: shutil.copy(p, outdir) f.write('loadamberparams ' + path.basename(p) + '\n') except: f.write('loadamberparams ' + p + '\n') logger.info("Path {:s} not found, assuming a standard AmberTools file.". format(p)) f.write('\n') # Printing out topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + path.basename(t) + '\n') f.write('\n') # Detect disulfide bonds if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1 uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))])) # Rename the CYS to CYX if there is a disulfide bond mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1)) mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2)) # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) mol.remove('name HG and segid {} and resid {}'.format(d.segid1, d.resid1), _logger=False) mol.remove('name HG and segid {} and resid {}'.format(d.segid2, d.resid2), _logger=False) # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.info('Writing PDB file for input to tleap.') pdbname = path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.info('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError('Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Printing out patches for the disulfide bridges if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Write patches for disulfide bonds (only after ionizing) if not ionize and len(disulfide) != 0: f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1 uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = path.abspath(path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [path.join(htmdamberdir, path.dirname(f)) for f in ff if path.isfile(path.join(htmdamberdir, f))] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if path.getsize(path.join(outdir, 'structure.crd')) != 0 and path.getsize(path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(path.join(outdir, 'structure.prmtop')) molbuilt.read(path.join(outdir, 'structure.crd')) else: raise NameError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd')) shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap) molbuilt.write(path.join(outdir, 'structure.pdb')) return molbuilt
def write(self, inputdir, outputdir): """ Write the equilibration protocol Writes the equilibration protocol and files into a folder for execution using files inside the inputdir directory Parameters ---------- inputdir : str Path to a directory containing the files produced by a build process. outputdir : str Directory where to write the equilibration setup files. Examples -------- >>> md = Equilibration() >>> md.write('./build','./equil') """ self._findFiles(inputdir) self._amberFixes() from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self.constraintsteps is None: constrsteps = int(numsteps / 2) else: constrsteps = int(self.constraintsteps) if isinstance(self.acemd.TCL, tuple): tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format(NUMSTEPS=numsteps, KCONST=self.fb_k, REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box)), NVTSTEPS=self.nvtsteps, CONSTRAINTSTEPS=constrsteps, TEMPERATURE=self.temperature) self.acemd.TCL = tcl[0] + tcl[1] else: logger.warning('{} default TCL was already formatted.'.format(self.__class__.__name__)) if self.acemd.celldimension is None and self.acemd.extendedsystem is None: coords = inmol.get('coords', sel='water') if coords.size == 0: # It's a vacuum simulation coords = inmol.get('coords', sel='all') dim = np.max(coords, axis=0) - np.min(coords, axis=0) dim = dim + 12. else: dim = np.max(coords, axis=0) - np.min(coords, axis=0) self.acemd.celldimension = '{} {} {}'.format(dim[0], dim[1], dim[2]) if self.useconstantratio: self.acemd.useconstantratio = 'on' self.acemd.setup(inputdir, outputdir, overwrite=True) # Adding constraints by writing them to the consref file inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError('You have not defined any constraints for the Equilibration (constraints={}).') else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
def write(self, inputdir, outputdir): """ Write the equilibration protocol Writes the equilibration protocol and files into a folder for execution using files inside the inputdir directory Parameters ---------- inputdir : str Path to a directory containing the files produced by a build process. outputdir : str Directory where to write the equilibration setup files. Examples -------- >>> md = Equilibration() >>> md.write('./build','./equil') """ from htmd.molecule.molecule import Molecule # Do version consistency check if (self._version == 2 and not isinstance(self.acemd, Acemd2)) and \ (self._version == 3 and not isinstance(self.acemd, Acemd)): raise RuntimeError('Acemd object version ({}) inconsistent with protocol version at instantiation ' '({})'.format(type(self.acemd), self._version)) self._findFiles(inputdir) self._amberFixes() from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) if self._version == 3: self.acemd.temperature = self.temperature self.acemd.thermostattemp = self.temperature self.acemd.run = str(numsteps) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self.constraintsteps is None: constrsteps = int(numsteps / 2) else: constrsteps = int(self.constraintsteps) if self._version == 2: if self.restraints: raise RuntimeWarning('restraints are only available on {}(_version=3)'.format(self.__class__.__name__)) if isinstance(self.acemd.TCL, tuple): tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format(NUMSTEPS=numsteps, KCONST=self.fb_k, REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box)), NVTSTEPS=self.nvtsteps, CONSTRAINTSTEPS=constrsteps, TEMPERATURE=self.temperature) self.acemd.TCL = tcl[0] + tcl[1] else: logger.warning('{} default TCL was already formatted.'.format(self.__class__.__name__)) elif self._version == 3: if self.restraints is not None: logger.info('Using user-provided restraints and ignoring constraints and fb_potential') self.acemd.restraints = self.restraints else: logger.warning('Converting constraints and fb_potential to restraints. This is a convenience ' 'functional conversion. We recommend start using restraints with ' '{}(_version=3)'.format(self.__class__.__name__)) restraints = list() # convert constraints to restraints and add them if self.constraints is not None: restraints += self._constraints2restraints(constrsteps) # convert fb_potential to restraints and add them if self.fb_k > 0: restraints += self._fb_potential2restraints(inputdir) self.acemd.restraints = restraints if self.acemd.celldimension is None and self.acemd.extendedsystem is None: coords = inmol.get('coords', sel='water') if coords.size == 0: # It's a vacuum simulation coords = inmol.get('coords', sel='all') dim = np.max(coords, axis=0) - np.min(coords, axis=0) dim += 12. else: dim = np.max(coords, axis=0) - np.min(coords, axis=0) self.acemd.celldimension = '{} {} {}'.format(dim[0], dim[1], dim[2]) if self.useconstantratio: self.acemd.useconstantratio = 'on' self.acemd.setup(inputdir, outputdir, overwrite=True) if self._version == 2: # Adding constraints by writing them to the consref file inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError('You have not defined any constraints for the {} (' 'constraints={{}}).'.format(self.__class__.__name__)) else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
class MutualInformation: def __init__(self, model, mol=None, fstep=0.1, skip=1): """ Class that calculates the mutual information of protein residues. Parameters ---------- model : :class:`Model <htmd.model.Model>` object A Model object with a calculated MSM mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A reference molecule from which to obtain structural information. By default model.data.simlist[0].molfile will be used. fstep : float The frame step of the simulations skip : int Frame skipping Examples -------- >>> from htmd.mutualinformation import MutualInformation >>> from htmd.ui import * >>> >>> sims = simlist(glob('./filtered/*/'), './filtered/filtered.pdb') >>> mol = Molecule('./filtered/filtered.pdb') >>> >>> metr = Metric(sims) >>> metr.set(MetricDihedral()) >>> datadih = metr.project() >>> datadih.fstep = 0.1 >>> datadih.dropTraj() >>> >>> metr = Metric(datadih.simlist) >>> metr.set(MetricSelfDistance('protein and name CA', metric='contacts')) >>> dataco = metr.project() >>> dataco.fstep = 0.1 >>> dataco.dropTraj() >>> >>> tica = TICA(datadih, 20, units='ns') >>> datatica = tica.project(3) >>> datatica.cluster(MiniBatchKMeans(n_clusters=1500)) >>> model = Model(datatica) >>> model.markovModel(12, 4, units='ns') >>> >>> mu = MutualInformation(model) >>> mu.calculate() >>> mu.saveMI('./mi_matrix.npy') >>> mu.weightGraph(dataco, 0.005) >>> mu.save_graphml('./weightgraph.graphml') """ self.model = model self.mol = mol if mol is None: self.mol = Molecule(self.model.data.simlist[0].molfile) self._computeChiDihedrals(fstep=fstep, skip=skip) self.bindihcat = self._histogram() # Lasts two minutes self.resids = self.mol.get('resid', 'name CA') self.residmap = np.ones(self.mol.resid.max() + 1, dtype=int) * -1 self.residmap[self.resids] = np.arange(len(self.resids)) self.mi_matrix = None self.graph_array = None self.graph = None def calculate(self): from htmd.config import _config from htmd.parallelprogress import ParallelExecutor numchi = self.chi.numDimensions statdist = self.model.msm.stationary_distribution stconcat = np.concatenate(self.model.data.St) microcat = self.model.micro_ofcluster[stconcat] aprun = ParallelExecutor(n_jobs=_config['ncpus']) res = aprun(total=numchi, desc='Calculating MI')( delayed(self._parallelAll)(numchi, dih1, 4, self.model.micronum, self.bindihcat, microcat, statdist) for dih1 in range(numchi)) MI_all = np.zeros((len(self.resids), len(self.resids))) for r in res: dihcounts = r[0] pairs = r[1] for dihc, p in zip(dihcounts, pairs): dih1, dih2 = p if dih1 == dih2: continue resid1 = self.residmap[self.mol.resid[ self.chi.description.atomIndexes[dih1][0]]] resid2 = self.residmap[self.mol.resid[ self.chi.description.atomIndexes[dih2][0]]] MI_all[resid1][resid2] = self._calcMutualInfo(dihc) self.mi_matrix = self._cleanautocorrelations(MI_all) def _computeChiDihedrals(self, fstep=0.1, skip=1): chis = [] protmol = self.mol.copy() protmol.filter('protein') caidx = self.mol.atomselect('protein and name CA') resids = self.mol.resid[caidx] resnames = self.mol.resname[caidx] for residue, resname in zip(resids, resnames): ch = Dihedral.chi1(protmol, residue) if ch is not None: chis.append(ch) metr = Metric(self.model.data.simlist, skip=skip) metr.set(MetricDihedral(chis, sincos=False)) data = metr.project() data.fstep = fstep self.chi = data def _histogram(self): condata = np.concatenate(self.chi.dat) bins = np.array([-180, -150, -90, -30, 0, 30, 90, 150, 180]) dic = {1: 3, 2: 0, 3: 1, 4: 0, 5: 0, 6: 2, 7: 0, 8: 3, 9: 3} binneddih = np.zeros([condata.shape[0], condata.shape[1]]) for dihedral in range(condata.shape[1]): binning = np.digitize(condata[:, dihedral], bins) binneddih[:, dihedral] = [ dic[n] if n in dic.keys() else n for n in binning ] return binneddih def _calcMutualInfo(self, contingency): # Ripped out of sklearn since it converts floats to integers without warning which breaks our use-case from math import log nzx, nzy = np.nonzero(contingency) nz_val = contingency[nzx, nzy] contingency_sum = contingency.sum() pi = np.ravel(contingency.sum(axis=1)) pj = np.ravel(contingency.sum(axis=0)) log_contingency_nm = np.log(nz_val) contingency_nm = nz_val / contingency_sum # Don't need to calculate the full outer product, just for non-zeroes outer = pi.take(nzx) * pj.take(nzy) log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum()) mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) + contingency_nm * log_outer) return mi.sum() def _parallelAll(self, numdih, dih1, numbins, micronum, bindihcat, microcat, stat_dist): results = [] resultpairs = [] for dih2 in range(dih1, numdih): dihcounts = np.zeros((numbins, numbins, micronum)) # Find pairs of dihedrals (keys) and which absolute frames they occur in (vals) df = pd.DataFrame( {'a': list(zip(bindihcat[:, dih1], bindihcat[:, dih2]))}) gg = df.groupby(by=df.a).groups for pair in gg: microsofpairs = microcat[gg[ pair]] # Get the microstates of all frames having given dihedral pair microsofpairs = np.delete( microsofpairs, np.where( microsofpairs == -1)[0]) # Delete dropped clusters counts = np.bincount( microsofpairs ) # Count number of frames with that pair for each microstate (0, max_micro_seen) dihcounts[ int(pair[0]), int(pair[1]), :len(counts)] += counts # Add the counts dihcounts /= dihcounts.sum(axis=0).sum( axis=0 ) # Normalize all slices by total counts in each microstate dihcounts *= stat_dist # Multiply by stationary distribution of each state dihcounts = np.sum( dihcounts, axis=2) # Calculate the weighted sum over all states results.append(dihcounts) resultpairs.append((dih1, dih2)) return results, resultpairs def _cleanautocorrelations(self, mi): # TODO: vectorize this # np.diag(np.ones(3), k=-1).astype(bool) | np.diag(np.ones(3), k=1).astype(bool) | np.diag(np.ones(4)).astype(bool) for i in range(mi.shape[0]): for j in range(mi.shape[1]): if abs(i - j) < 2: mi[i][j] = 0 return mi def saveMI(self, path): np.save(path, self.mi_matrix) def loadMI(self, path): self.mi_matrix = np.load(path) def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6): if len(self.mol.get('resid', 'name CA')) != len(self.resids): raise Exception( 'The length of the protein doesn\'t match the Mutual Information data' ) contactcat = np.concatenate(datacontacts.dat) contacts_matrix = np.zeros([len(self.resids), len(self.resids)]) for i in range(contactcat.shape[1]): counter = np.count_nonzero(contactcat[:, i]) resid1 = self.residmap[self.mol.resid[ datacontacts.description.atomIndexes[i][0]]] resid2 = self.residmap[self.mol.resid[ datacontacts.description.atomIndexes[i][1]]] contacts_matrix[resid1][resid2] = counter self.graph_array = np.zeros( [contacts_matrix.shape[0], contacts_matrix.shape[0]]) mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0])) self.graph_array[mask] = self.mi_matrix[mask] intermed = [] for source in range(self.graph_array.shape[0]): for target in range(source, self.graph_array.shape[1]): if self.graph_array[source, target] != 0 and target > source: intermed.append([ int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target]) ]) import pandas as pd import networkx as nx from sklearn.cluster.spectral import SpectralClustering pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight']) pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int, ), {})) pd['weight'] = pd['weight'].astype(type('float', (float, ), {})) G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight']) ## setSegment segids = self.mol.get('segid', 'name CA') seg_res_dict = { key: value for (key, value) in zip(self.resids, segids) if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[( pd['target'] == key)].index) } nx.set_node_attributes(G, 'Segment', seg_res_dict) ## set if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight') nx.set_node_attributes(G, 'flowcent', flow_cent) Spectre = SpectralClustering(n_clusters=10, affinity='precomputed') model = Spectre.fit_predict(self.graph_array) model = model.astype(type('float', (float, ), {})) spectral_dict = { key: value for (key, value) in zip(self.resids, model) if key in G.nodes() } nx.set_node_attributes(G, 'spectral', spectral_dict) self.graph = G def save_graphml(self, path): import networkx as nx nx.write_graphml(self.graph, path) # def save_pdf(self, graphpath, outpath): # print(self.graph_array) # self.save_graphml(graphpath) # compile_java('./GephiGraph.java') # compile_java('./GraphTest.java') # execute_java('GraphTest', graphpath, outpath) # if __name__ == '__main__': # from htmd.mutualinformation import MutualInformation # from htmd import * # # sims = simlist(glob('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/*/'), # '/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb') # mol = Molecule('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb') # # metr = Metric(sims[0:100]) # metr.set(MetricDihedral()) # datadih = metr.project() # datadih.fstep = 0.1 # datadih.dropTraj() # # metr = Metric(datadih.simlist) # metr.set(MetricSelfDistance('protein and name CA', metric='contacts', threshold=8)) # dataco = metr.project() # dataco.fstep = 0.1 # dataco.dropTraj() # # tica = TICA(datadih, 20, units='ns') # datatica = tica.project(3) # datatica.cluster(MiniBatchKMeans(n_clusters=1500)) # model = Model(datatica) # model.markovModel(12, 4, units='ns') # # mu = MutualInformation(model) # mu.calculate() # mu.saveMI('/tmp/mi_matrix.npy') # mu.weightGraph(dataco, 0.005) # mu.save_graphml('/tmp/weightgraph_0-005.graphml')
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True, atomtypes=None, offlibraries=None): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP ... >>> # More complex example >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) _checkResidueInsertions(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: if not isinstance(offlibraries, list) and not isinstance( offlibraries, tuple): offlibraries = [ offlibraries, ] for off in offlibraries: f.write('loadoff {}\n\n'.format(off)) # Loading frcmod parameters f.write('# Loading parameter files\n') for p in param: try: shutil.copy(p, outdir) f.write('loadamberparams ' + os.path.basename(p) + '\n') except: f.write('loadamberparams ' + p + '\n') logger.info( "File {:s} not found, assuming its present on the standard Amber location" .format(p)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + os.path.basename(t) + '\n') f.write('\n') # Detect disulfide bridges if not defined by user if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if not ionize and len(disulfide) != 0: for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = (mol.segid == d.segid1) & (mol.resid == d.resid1) atoms2 = (mol.segid == d.segid2) & (mol.resid == d.resid2) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) mol.remove(atoms1 & (mol.name == 'HG'), _logger=False) mol.remove(atoms2 & (mol.name == 'HG'), _logger=False) # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize and len(disulfide) != 0: f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int( np.unique(uqseqid[(mol.segid == d.segid1) & (mol.resid == d.resid1)])) uqres2 = int( np.unique(uqseqid[(mol.segid == d.segid2) & (mol.resid == d.resid2)])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise NameError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def write(self, inputdir, outputdir): """ Writes the production protocol and files into a folder. Parameters ---------- inputdir : str Path to a directory containing the files produced by a equilibration process. outputdir : str Directory where to write the production setup files. """ from htmd.molecule.molecule import Molecule # Do version consistency check if (self._version == 2 and not isinstance(self.acemd, Acemd2)) and \ (self._version == 3 and not isinstance(self.acemd, Acemd)): raise RuntimeError( 'Acemd object version ({}) inconsistent with protocol version at instantiation ' '({})'.format(type(self.acemd), self._version)) self._findFiles(inputdir) self._amberFixes() if self._version == 2: self.acemd.temperature = str(self.temperature) self.acemd.langevintemp = str(self.temperature) elif self._version == 3: self.acemd.temperature = self.temperature self.acemd.thermostattemp = self.temperature from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) if self._version == 3: self.acemd.run = str(numsteps) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning( 'Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self._version == 2: if self.restraints: raise RuntimeWarning( 'restraints are only available on {}(_version=3)'.format( self.__class__.__name__)) if self.fb_k > 0: # use TCL only for flatbottom self.acemd.tclforces = 'on' if isinstance(self.acemd.TCL, tuple): tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format( NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k, REFINDEX=' '.join( map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join( map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box))) self.acemd.TCL = tcl[0] + tcl[1] else: logger.warning( '{} default TCL was already formatted.'.format( self.__class__.__name__)) else: self.acemd.TCL = 'set numsteps {NUMSTEPS}\n'.format( NUMSTEPS=numsteps) if self.useconstraints: # Turn on constraints self.acemd.constraints = 'on' self.acemd.constraintscaling = '1.0' else: if len(self.constraints) != 0: logger.warning( 'You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define ' 'useconstraints=True'.format(self.constraints)) elif self._version == 3: if self.restraints is not None: logger.info( 'Using user-provided restraints and ignoring constraints and fb_potential' ) self.acemd.restraints = self.restraints else: restraints = list() if self.fb_k > 0: logger.warning( 'Converting fb_potential to restraints. This is a convenience ' 'functional conversion. We recommend start using restraints with ' '{}(_version=3)'.format(self.__class__.__name__)) restraints += self._fb_potential2restraints(inputdir) if self.useconstraints: logger.warning( 'Converting constraints to restraints. This is a convenience ' 'functional conversion. We recommend start using restraints with ' '{}(_version=3)'.format(self.__class__.__name__)) restraints += self._constraints2restraints() else: if len(self.constraints) != 0: logger.warning( 'You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define ' 'useconstraints=True'.format(self.constraints)) if len(restraints) != 0: self.acemd.restraints = restraints if self.useconstantratio: self.acemd.useconstantratio = 'on' if self.adaptive: self.acemd.binvelocities = None self.acemd.setup(inputdir, outputdir, overwrite=True) if self._version == 2: # Adding constraints by writing them to the consref file if self.useconstraints: inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError( 'You have set the production to use constraints (useconstraints=True), but have ' 'not defined any constraints (constraints={}).') else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError('Could not find executable: `{}` in the PATH. Cannot build for AMBER.'.format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = {1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3'} f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError('Atom type definitions have to be triplets. Check the AMBER documentation.') f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError('Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError('Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath(os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f))] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def write(self, inputdir, outputdir): """ Writes the production protocol and files into a folder. Parameters ---------- inputdir : str Path to a directory containing the files produced by a equilibration process. outputdir : str Directory where to write the production setup files. """ self._findFiles(inputdir) self._amberFixes() self.acemd.temperature = self.temperature self.acemd.langevintemp = self.temperature from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self.fb_k > 0: # use TCL only for flatbottom self.acemd.tclforces = 'on' tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k, REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box))) self.acemd.TCL = tcl[0] + tcl[1] else: self.acemd.TCL = 'set numsteps {NUMSTEPS}\n' \ 'set temperature {TEMPERATURE}\n'.format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature) if self.useconstraints: # Turn on constraints self.acemd.constraints = 'on' self.acemd.constraintscaling = 1.0 else: if len(self.constraints) != 0: logger.warning('You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define useconstraints=True'.format(self.constraints)) if self.useconstantratio: self.acemd.useconstantratio = 'on' if self.adaptive: self.acemd.binvelocities = None self.acemd.setup(inputdir, outputdir, overwrite=True) # Adding constraints by writing them to the consref file if self.useconstraints: inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError('You have set the production to use constraints (useconstraints=True), but have not ' 'defined any constraints (constraints={}).') else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
return angles, dihedrals # A test method if __name__ == "__main__": from htmd.molecule.molecule import Molecule from htmd.home import home import numpy as np from os import path import doctest #doctest.testmod(extraglobs={"mol" : Molecule("3PTB")}) expectedTMscore = np.array([ 0.21418524, 0.2367377 , 0.23433833, 0.21362964, 0.20935164, 0.20279461, 0.27012895, 0.22675238, 0.21230793, 0.2372011 ]) expectedRMSD = np.array([ 3.70322128, 3.43637027, 3.188193 , 3.84455877, 3.53053882, 3.46781854, 2.93777629, 2.97978692, 2.70792428, 2.63051318]) mol = Molecule(path.join(home(), 'data', 'tmscore', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'tmscore', 'traj.xtc')) ref = Molecule(path.join(home(), 'data', 'tmscore', 'ntl9_2hbb.pdb')) tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'), ref.atomselect('protein')) assert np.allclose(tmscore, expectedTMscore) assert np.allclose(rmsd, expectedRMSD) # # rhodopsin = Molecule('1F88') # d3r = Molecule('3PBL') # alnmol = sequenceStructureAlignment(rhodopsin, d3r)
def write(self, inputdir, outputdir): """ Writes the production protocol and files into a folder. Parameters ---------- inputdir : str Path to a directory containing the files produced by a equilibration process. outputdir : str Directory where to write the production setup files. """ from htmd.molecule.molecule import Molecule # Do version consistency check if (self._version == 2 and not isinstance(self.acemd, Acemd2)) and \ (self._version == 3 and not isinstance(self.acemd, Acemd)): raise RuntimeError('Acemd object version ({}) inconsistent with protocol version at instantiation ' '({})'.format(type(self.acemd), self._version)) self._findFiles(inputdir) self._amberFixes() if self._version == 2: self.acemd.temperature = str(self.temperature) self.acemd.langevintemp = str(self.temperature) elif self._version == 3: self.acemd.temperature = self.temperature self.acemd.thermostattemp = self.temperature from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) if self._version == 3: self.acemd.run = str(numsteps) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning('Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self._version == 2: if self.restraints: raise RuntimeWarning('restraints are only available on {}(_version=3)'.format(self.__class__.__name__)) if self.fb_k > 0: # use TCL only for flatbottom self.acemd.tclforces = 'on' if isinstance(self.acemd.TCL, tuple): tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k, REFINDEX=' '.join(map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join(map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box))) self.acemd.TCL = tcl[0] + tcl[1] else: logger.warning('{} default TCL was already formatted.'.format(self.__class__.__name__)) else: self.acemd.TCL = 'set numsteps {NUMSTEPS}\n'.format(NUMSTEPS=numsteps) if self.useconstraints: # Turn on constraints self.acemd.constraints = 'on' self.acemd.constraintscaling = '1.0' else: if len(self.constraints) != 0: logger.warning('You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define ' 'useconstraints=True'.format(self.constraints)) elif self._version == 3: if self.restraints is not None: logger.info('Using user-provided restraints and ignoring constraints and fb_potential') self.acemd.restraints = self.restraints else: restraints = list() if self.fb_k > 0: logger.warning('Converting fb_potential to restraints. This is a convenience ' 'functional conversion. We recommend start using restraints with ' '{}(_version=3)'.format(self.__class__.__name__)) restraints += self._fb_potential2restraints(inputdir) if self.useconstraints: logger.warning('Converting constraints to restraints. This is a convenience ' 'functional conversion. We recommend start using restraints with ' '{}(_version=3)'.format(self.__class__.__name__)) restraints += self._constraints2restraints() else: if len(self.constraints) != 0: logger.warning('You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define ' 'useconstraints=True'.format(self.constraints)) if len(restraints) != 0: self.acemd.restraints = restraints if self.useconstantratio: self.acemd.useconstantratio = 'on' if self.adaptive: self.acemd.binvelocities = None self.acemd.setup(inputdir, outputdir, overwrite=True) if self._version == 2: # Adding constraints by writing them to the consref file if self.useconstraints: inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError('You have set the production to use constraints (useconstraints=True), but have ' 'not defined any constraints (constraints={}).') else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
def write(self, inputdir, outputdir): """ Write the equilibration protocol Writes the equilibration protocol and files into a folder for execution using files inside the inputdir directory Parameters ---------- inputdir : str Path to a directory containing the files produced by a build process. outputdir : str Directory where to write the equilibration setup files. Examples -------- >>> md = Equilibration() >>> md.write('./build','./equil') """ self._findFiles(inputdir) self._amberFixes() from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning( 'Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self.constraintsteps is None: constrsteps = int(numsteps / 2) else: constrsteps = int(self.constraintsteps) if isinstance(self.acemd.TCL, tuple): tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format( NUMSTEPS=numsteps, KCONST=self.fb_k, REFINDEX=' '.join( map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join( map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box)), NVTSTEPS=self.nvtsteps, CONSTRAINTSTEPS=constrsteps, TEMPERATURE=self.temperature) self.acemd.TCL = tcl[0] + tcl[1] else: logger.warning('{} default TCL was already formatted.'.format( self.__class__.__name__)) if self.acemd.celldimension is None and self.acemd.extendedsystem is None: coords = inmol.get('coords', sel='water') if coords.size == 0: # It's a vacuum simulation coords = inmol.get('coords', sel='all') dim = np.max(coords, axis=0) - np.min(coords, axis=0) dim = dim + 12. else: dim = np.max(coords, axis=0) - np.min(coords, axis=0) self.acemd.celldimension = '{} {} {}'.format( dim[0], dim[1], dim[2]) if self.useconstantratio: self.acemd.useconstantratio = 'on' self.acemd.setup(inputdir, outputdir, overwrite=True) # Adding constraints by writing them to the consref file inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError( 'You have not defined any constraints for the Equilibration (constraints={}).' ) else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
def write(self, inputdir, outputdir): """ Writes the production protocol and files into a folder. Parameters ---------- inputdir : str Path to a directory containing the files produced by a equilibration process. outputdir : str Directory where to write the production setup files. """ self._findFiles(inputdir) self._amberFixes() self.acemd.temperature = self.temperature self.acemd.langevintemp = self.temperature from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning( 'Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self.fb_k > 0: # use TCL only for flatbottom self.acemd.tclforces = 'on' tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format( NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k, REFINDEX=' '.join( map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join( map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box))) self.acemd.TCL = tcl[0] + tcl[1] else: self.acemd.TCL = 'set numsteps {NUMSTEPS}\n' \ 'set temperature {TEMPERATURE}\n'.format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature) if self.useconstraints: # Turn on constraints self.acemd.constraints = 'on' self.acemd.constraintscaling = 1.0 else: if len(self.constraints) != 0: logger.warning( 'You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define useconstraints=True' .format(self.constraints)) if self.useconstantratio: self.acemd.useconstantratio = 'on' if self.adaptive: self.acemd.binvelocities = None self.acemd.setup(inputdir, outputdir, overwrite=True) # Adding constraints by writing them to the consref file if self.useconstraints: inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError( 'You have set the production to use constraints (useconstraints=True), but have not ' 'defined any constraints (constraints={}).') else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
def build(mol, topo=None, param=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, patches=None, psfgen=None, execute=True): """ Builds a system for CHARMM Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system topo : list of str A list of topology `rtf` files. Default: ['top/top_all22star_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf'] param : list of str A list of parameter `prm` files. Default: ['par/par_all22star_prot.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm'] prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['first ACE', 'last CT3']. Default: will apply ACE and CT3 caps to proteins and none caps to the rest ionize : bool Enable or disable ionization saltconc : float Salt concentration (in Molar) to add to the system after neutralization. saltanion : {'CLA'} The anion type. Please use only CHARMM ion atom names. saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'} The cation type. Please use only CHARMM ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. patches : list of str Any further patches the user wants to apply psfgen : str Path to psfgen executable used to build for CHARMM execute : bool Disable building. Will only write out the input script needed by psfgen. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> charmm.listFiles() >>> topos = ['top/top_all22star_prot.rtf', './benzamidine.rtf'] >>> params = ['par/par_all22star_prot.prm', './benzamidine.prm'] >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15) """ mol = mol.copy() _missingSegID(mol) if psfgen is None: try: psfgen = shutil.which('psfgen', mode=os.X_OK) except: raise FileNotFoundError( 'Could not find psfgen executable, or no execute permissions are given. ' 'Run `conda install psfgen`.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if topo is None: topo = _defaultTopo() if param is None: param = _defaultParam() if caps is None: caps = _defaultCaps(mol) #_missingChain(mol) #_checkProteinGaps(mol) if patches is None: patches = [] if isinstance(patches, str): patches = [patches] # Find protonated residues and add patches for them patches += _protonationPatches(mol) f = open(path.join(outdir, 'build.vmd'), 'w') f.write('# psfgen file generated by charmm.build\n') f.write('package require psfgen;\n') f.write('psfcontext reset;\n\n') # Copying and printing out the topologies charmmdir = path.join(home(), 'builder', 'charmmfiles') for i in range(len(topo)): if topo[i][0] != '.' and path.isfile(path.join(charmmdir, topo[i])): topo[i] = path.join(charmmdir, topo[i]) localname = '{}.'.format(i) + path.basename(topo[i]) shutil.copy(topo[i], path.join(outdir, localname)) f.write('topology ' + localname + '\n') f.write('\n') _printAliases(f) # Printing out segments logger.info('Writing out segments.') segments = _getSegments(mol) for seg in segments: pdbname = 'segment' + seg + '.pdb' mol.write(path.join(outdir, pdbname), sel='segid ' + seg) segatoms = mol.atomselect('segid {}'.format(seg)) segwater = mol.atomselect('segid {} and water'.format(seg)) f.write('segment ' + seg + ' {\n') if np.all(segatoms == segwater): # If segment only contains waters, set: auto none f.write('\tauto none\n') f.write('\tpdb ' + pdbname + '\n') if caps is not None and seg in caps: for c in caps[seg]: f.write('\t' + c + '\n') f.write('}\n') f.write('coordpdb ' + pdbname + ' ' + seg + '\n\n') # Printing out patches for the disulfide bridges if disulfide is None: disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in disulfide: f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1, d.segid2, d.resid2)) f.write('\n') # Printing out extra patches if len(patches) != 0: for p in patches: f.write(p + '\n') f.write('\n') f.write('guesscoord\n') f.write('writepsf ' + prefix + '.psf\n') f.write('writepdb ' + prefix + '.pdb\n') #f.write('quit\n') f.close() if param is not None: _charmmCombine(param, path.join(outdir, 'parameters')) molbuilt = None if execute: logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f) call([psfgen, './build.vmd'], stdout=f) f.close() _logParser(logpath) os.chdir(currdir) logger.info('Finished building.') if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile( path.join(outdir, 'structure.psf')): molbuilt = Molecule(path.join(outdir, 'structure.pdb')) molbuilt.read(path.join(outdir, 'structure.psf')) else: raise NameError( 'No structure pdb/psf file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.pdb'), path.join(outdir, 'structure.noions.pdb')) shutil.move(path.join(outdir, 'structure.psf'), path.join(outdir, 'structure.noions.psf')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, ff='charmm', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, topo=topo, param=param, prefix=prefix, outdir=outdir, ionize=False, caps=caps, execute=execute, saltconc=saltconc, disulfide=disulfide, patches=patches, psfgen=psfgen) return molbuilt
class MutualInformation: def __init__(self, model, mol=None, fstep=0.1, skip=1): """ Class that calculates the mutual information of protein residues. Parameters ---------- model : :class:`Model <htmd.model.Model>` object A Model object with a calculated MSM mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A reference molecule from which to obtain structural information. By default model.data.simlist[0].molfile will be used. fstep : float The frame step of the simulations skip : int Frame skipping Examples -------- >>> from htmd.mutualinformation import MutualInformation >>> from htmd.ui import * >>> >>> sims = simlist(glob('./filtered/*/'), './filtered/filtered.pdb') >>> mol = Molecule('./filtered/filtered.pdb') >>> >>> metr = Metric(sims) >>> metr.set(MetricDihedral()) >>> datadih = metr.project() >>> datadih.fstep = 0.1 >>> datadih.dropTraj() >>> >>> metr = Metric(datadih.simlist) >>> metr.set(MetricSelfDistance('protein and name CA', metric='contacts')) >>> dataco = metr.project() >>> dataco.fstep = 0.1 >>> dataco.dropTraj() >>> >>> tica = TICA(datadih, 20, units='ns') >>> datatica = tica.project(3) >>> datatica.cluster(MiniBatchKMeans(n_clusters=1500)) >>> model = Model(datatica) >>> model.markovModel(12, 4, units='ns') >>> >>> mu = MutualInformation(model) >>> mu.calculate() >>> mu.saveMI('./mi_matrix.npy') >>> mu.weightGraph(dataco, 0.005) >>> mu.save_graphml('./weightgraph.graphml') """ self.model = model self.mol = mol if mol is None: self.mol = Molecule(self.model.data.simlist[0].molfile) self._computeChiDihedrals(fstep=fstep, skip=skip) self.bindihcat = self._histogram() # Lasts two minutes self.resids = self.mol.get('resid', 'name CA') self.residmap = np.ones(self.mol.resid.max() + 1, dtype=int) * -1 self.residmap[self.resids] = np.arange(len(self.resids)) self.mi_matrix = None self.graph_array = None self.graph = None def calculate(self): from htmd.config import _config from htmd.parallelprogress import ParallelExecutor numchi = self.chi.numDimensions statdist = self.model.msm.stationary_distribution stconcat = np.concatenate(self.model.data.St) microcat = self.model.micro_ofcluster[stconcat] aprun = ParallelExecutor(n_jobs=_config['ncpus']) res = aprun(total=numchi, desc='Calculating MI')(delayed(self._parallelAll)(numchi, dih1, 4, self.model.micronum, self.bindihcat, microcat, statdist) for dih1 in range(numchi)) MI_all = np.zeros((len(self.resids), len(self.resids))) for r in res: dihcounts = r[0] pairs = r[1] for dihc, p in zip(dihcounts, pairs): dih1, dih2 = p if dih1 == dih2: continue resid1 = self.residmap[self.mol.resid[self.chi.description.atomIndexes[dih1][0]]] resid2 = self.residmap[self.mol.resid[self.chi.description.atomIndexes[dih2][0]]] MI_all[resid1][resid2] = self._calcMutualInfo(dihc) self.mi_matrix = self._cleanautocorrelations(MI_all) def _computeChiDihedrals(self, fstep=0.1, skip=1): chis = [] protmol = self.mol.copy() protmol.filter('protein') caidx = self.mol.atomselect('protein and name CA') resids = self.mol.resid[caidx] resnames = self.mol.resname[caidx] for residue, resname in zip(resids, resnames): ch = Dihedral.chi1(protmol, residue) if ch is not None: chis.append(ch) metr = Metric(self.model.data.simlist, skip=skip) metr.set(MetricDihedral(chis, sincos=False)) data = metr.project() data.fstep = fstep self.chi = data def _histogram(self): condata = np.concatenate(self.chi.dat) bins = np.array([-180, -150, -90, -30, 0, 30, 90, 150, 180]) dic = {1: 3, 2: 0, 3: 1, 4: 0, 5: 0, 6: 2, 7: 0, 8: 3, 9:3} binneddih = np.zeros([condata.shape[0], condata.shape[1]]) for dihedral in range(condata.shape[1]): binning = np.digitize(condata[:, dihedral], bins) binneddih[:, dihedral] = [dic[n] if n in dic.keys() else n for n in binning] return binneddih def _calcMutualInfo(self, contingency): # Ripped out of sklearn since it converts floats to integers without warning which breaks our use-case from math import log nzx, nzy = np.nonzero(contingency) nz_val = contingency[nzx, nzy] contingency_sum = contingency.sum() pi = np.ravel(contingency.sum(axis=1)) pj = np.ravel(contingency.sum(axis=0)) log_contingency_nm = np.log(nz_val) contingency_nm = nz_val / contingency_sum # Don't need to calculate the full outer product, just for non-zeroes outer = pi.take(nzx) * pj.take(nzy) log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum()) mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) + contingency_nm * log_outer) return mi.sum() def _parallelAll(self, numdih, dih1, numbins, micronum, bindihcat, microcat, stat_dist): results = [] resultpairs = [] for dih2 in range(dih1, numdih): dihcounts = np.zeros((numbins, numbins, micronum)) # Find pairs of dihedrals (keys) and which absolute frames they occur in (vals) df = pd.DataFrame({'a': list(zip(bindihcat[:, dih1], bindihcat[:, dih2]))}) gg = df.groupby(by=df.a).groups for pair in gg: microsofpairs = microcat[gg[pair]] # Get the microstates of all frames having given dihedral pair microsofpairs = np.delete(microsofpairs, np.where(microsofpairs == -1)[0]) # Delete dropped clusters counts = np.bincount(microsofpairs) # Count number of frames with that pair for each microstate (0, max_micro_seen) dihcounts[int(pair[0]), int(pair[1]), :len(counts)] += counts # Add the counts dihcounts /= dihcounts.sum(axis=0).sum(axis=0) # Normalize all slices by total counts in each microstate dihcounts *= stat_dist # Multiply by stationary distribution of each state dihcounts = np.sum(dihcounts, axis=2) # Calculate the weighted sum over all states results.append(dihcounts) resultpairs.append((dih1, dih2)) return results, resultpairs def _cleanautocorrelations(self, mi): # TODO: vectorize this # np.diag(np.ones(3), k=-1).astype(bool) | np.diag(np.ones(3), k=1).astype(bool) | np.diag(np.ones(4)).astype(bool) for i in range(mi.shape[0]): for j in range(mi.shape[1]): if abs(i - j) < 2: mi[i][j] = 0 return mi def saveMI(self, path): np.save(path, self.mi_matrix) def loadMI(self, path): self.mi_matrix = np.load(path) def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6): if len(self.mol.get('resid', 'name CA')) != len(self.resids): raise Exception('The length of the protein doesn\'t match the Mutual Information data') contactcat = np.concatenate(datacontacts.dat) contacts_matrix = np.zeros([len(self.resids), len(self.resids)]) for i in range(contactcat.shape[1]): counter = np.count_nonzero(contactcat[:, i]) resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]] resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]] contacts_matrix[resid1][resid2] = counter self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]]) mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0])) self.graph_array[mask] = self.mi_matrix[mask] intermed = [] for source in range(self.graph_array.shape[0]): for target in range(source, self.graph_array.shape[1]): if self.graph_array[source, target] != 0 and target > source: intermed.append( [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])]) import pandas as pd import networkx as nx from sklearn.cluster.spectral import SpectralClustering pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight']) pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {})) pd['weight'] = pd['weight'].astype(type('float', (float,), {})) G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight']) ## setSegment segids = self.mol.get('segid', 'name CA') seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)} nx.set_node_attributes(G, 'Segment', seg_res_dict) ## set if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight') nx.set_node_attributes(G, 'flowcent', flow_cent) Spectre = SpectralClustering(n_clusters=10, affinity='precomputed') model = Spectre.fit_predict(self.graph_array) model = model.astype(type('float', (float,), {})) spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()} nx.set_node_attributes(G, 'spectral', spectral_dict) self.graph = G def save_graphml(self, path): import networkx as nx nx.write_graphml(self.graph, path) # def save_pdf(self, graphpath, outpath): # print(self.graph_array) # self.save_graphml(graphpath) # compile_java('./GephiGraph.java') # compile_java('./GraphTest.java') # execute_java('GraphTest', graphpath, outpath) # if __name__ == '__main__': # from htmd.mutualinformation import MutualInformation # from htmd import * # # sims = simlist(glob('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/*/'), # '/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb') # mol = Molecule('/shared/adria/2ov5/adaptive_amber/batches/1/filtered/filtered.pdb') # # metr = Metric(sims[0:100]) # metr.set(MetricDihedral()) # datadih = metr.project() # datadih.fstep = 0.1 # datadih.dropTraj() # # metr = Metric(datadih.simlist) # metr.set(MetricSelfDistance('protein and name CA', metric='contacts', threshold=8)) # dataco = metr.project() # dataco.fstep = 0.1 # dataco.dropTraj() # # tica = TICA(datadih, 20, units='ns') # datatica = tica.project(3) # datatica.cluster(MiniBatchKMeans(n_clusters=1500)) # model = Model(datatica) # model.markovModel(12, 4, units='ns') # # mu = MutualInformation(model) # mu.calculate() # mu.saveMI('/tmp/mi_matrix.npy') # mu.weightGraph(dataco, 0.005) # mu.save_graphml('/tmp/weightgraph_0-005.graphml')
def build(mol, topo=None, param=None, stream=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, patches=None, noregen=None, psfgen=None, execute=True, _clean=True): """ Builds a system for CHARMM Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system topo : list of str A list of topology `rtf` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files. Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf'] param : list of str A list of parameter `prm` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files. Default: ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm'] stream : list of str A list of stream `str` files containing topologies and parameters. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files. Default: ['str/prot/toppar_all36_prot_arg0.str'] prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['first ACE', 'last CT3'] or caps['P'] = ['first none', 'last none']. Default: will apply ACE and CT3 caps to proteins and none caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration (in Molar) to add to the system after neutralization. saltanion : {'CLA'} The anion type. Please use only CHARMM ion atom names. saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'} The cation type. Please use only CHARMM ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. patches : list of str Any further patches the user wants to apply noregen : list of str A list of patches that must not be regenerated (angles and dihedrals) Default: ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL'] psfgen : str Path to psfgen executable used to build for CHARMM execute : bool Disable building. Will only write out the input script needed by psfgen. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd import * >>> mol = Molecule("3PTB") >>> mol.filter("not resname BEN") >>> mol.renumberResidues() >>> molbuilt = charmm.build(mol, outdir='/tmp/build', ionize=False) # doctest: +ELLIPSIS Bond between A: [serial 185 resid 42 resname CYS chain A segid 0] B: [serial 298 resid 58 resname CYS chain A segid 0]... >>> # More complex example >>> topos = ['top/top_all36_prot.rtf', './benzamidine.rtf', 'top/top_water_ions.rtf'] >>> params = ['par/par_all36_prot_mod.prm', './benzamidine.prm', 'par/par_water_ions.prm'] >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)] >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ mol = mol.copy() _missingSegID(mol) _checkMixedSegment(mol) _checkResidueInsertions(mol) if psfgen is None: psfgen = shutil.which('psfgen', mode=os.X_OK) if not psfgen: raise FileNotFoundError( 'Could not find psfgen executable, or no execute permissions are given. ' 'Run `conda install psfgen`.') if not os.path.isdir(outdir): os.makedirs(outdir) if _clean: _cleanOutDir(outdir) if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if stream is None: stream = defaultStream() if caps is None: caps = _defaultCaps(mol) # patches that must _not_ be regenerated if noregen is None: noregen = ['FHEM', 'PHEM', 'PLOH', 'PLO2', 'PLIG', 'PSUL'] alltopo = topo.copy() allparam = param.copy() # Splitting the stream files and adding them to the list of parameter and topology files charmmdir = path.join(home(), 'builder', 'charmmfiles') for s in stream: if s[0] != '.' and path.isfile(path.join(charmmdir, s)): s = path.join(charmmdir, s) outrtf, outprm = _prepareStream(s) alltopo.append(outrtf) allparam.append(outprm) #_missingChain(mol) #_checkProteinGaps(mol) if patches is None: patches = [] if isinstance(patches, str): patches = [patches] allpatches = [] allpatches += patches # Find protonated residues and add patches for them allpatches += _protonationPatches(mol) f = open(path.join(outdir, 'build.vmd'), 'w') f.write('# psfgen file generated by charmm.build\n') f.write('package require psfgen;\n') f.write('psfcontext reset;\n\n') # Copying and printing out the topologies if not path.exists(path.join(outdir, 'topologies')): os.makedirs(path.join(outdir, 'topologies')) for i in range(len(alltopo)): if alltopo[i][0] != '.' and path.isfile( path.join(charmmdir, alltopo[i])): alltopo[i] = path.join(charmmdir, alltopo[i]) localname = '{}.'.format(i) + path.basename(alltopo[i]) shutil.copy(alltopo[i], path.join(outdir, 'topologies', localname)) f.write('topology ' + path.join('topologies', localname) + '\n') f.write('\n') _printAliases(f) # Printing out segments if not path.exists(path.join(outdir, 'segments')): os.makedirs(path.join(outdir, 'segments')) logger.info('Writing out segments.') segments = _getSegments(mol) wateratoms = mol.atomselect('water') for seg in segments: pdbname = 'segment' + seg + '.pdb' segatoms = mol.segid == seg mol.write(path.join(outdir, 'segments', pdbname), sel=segatoms) segwater = wateratoms & segatoms f.write('segment ' + seg + ' {\n') if np.all(segatoms == segwater): # If segment only contains waters, set: auto none f.write('\tauto none\n') f.write('\tpdb ' + path.join('segments', pdbname) + '\n') if caps is not None and seg in caps: for c in caps[seg]: f.write('\t' + c + '\n') f.write('}\n') f.write('coordpdb ' + path.join('segments', pdbname) + ' ' + seg + '\n\n') # Printing out patches for the disulfide bridges if disulfide is None: disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in disulfide: f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1, d.segid2, d.resid2)) f.write('\n') noregenpatches = [p for p in allpatches if p.split()[1] in noregen] regenpatches = [p for p in allpatches if p.split()[1] not in noregen] # Printing regenerable patches if len(regenpatches) != 0: for p in regenpatches: f.write(p + '\n') f.write('\n') # Regenerate angles and dihedrals f.write('regenerate angles dihedrals\n') f.write('\n') # Printing non-regenerable patches if len(noregenpatches) != 0: for p in noregenpatches: f.write(p + '\n') f.write('\n') f.write('guesscoord\n') f.write('writepsf ' + prefix + '.psf\n') f.write('writepdb ' + prefix + '.pdb\n') #f.write('quit\n') f.close() if allparam is not None: combine(allparam, path.join(outdir, 'parameters')) molbuilt = None if execute: logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f) call([psfgen, './build.vmd'], stdout=f) f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile( path.join(outdir, 'structure.psf')): molbuilt = Molecule(path.join(outdir, 'structure.pdb')) molbuilt.read(path.join(outdir, 'structure.psf')) else: raise BuildError( 'No structure pdb/psf file was generated. Check {} for errors in building.' .format(logpath)) if ionize: os.makedirs(path.join(outdir, 'pre-ionize')) data = glob(path.join(outdir, '*')) for f in data: shutil.move(f, path.join(outdir, 'pre-ionize')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, topo=alltopo, param=allparam, stream=[], prefix=prefix, outdir=outdir, ionize=False, caps=caps, execute=execute, saltconc=saltconc, disulfide=disulfide, patches=patches, noregen=noregen, psfgen=psfgen, _clean=False) _checkFailedAtoms(molbuilt) _recoverProtonations(molbuilt) return molbuilt
def build(mol, topo=None, param=None, stream=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, patches=None, psfgen=None, execute=True): """ Builds a system for CHARMM Uses VMD and psfgen to build a system for CHARMM. Additionally it allows for ionization and adding of disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system topo : list of str A list of topology `rtf` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available topology files. Default: ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf'] param : list of str A list of parameter `prm` files. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available parameter files. Default: ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm'] stream : list of str A list of stream `str` files containing topologies and parameters. Use :func:`charmm.listFiles <htmd.builder.charmm.listFiles>` to get a list of available stream files. Default: ['str/prot/toppar_all36_prot_arg0.str'] prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['first ACE', 'last CT3']. Default: will apply ACE and CT3 caps to proteins and none caps to the rest ionize : bool Enable or disable ionization saltconc : float Salt concentration (in Molar) to add to the system after neutralization. saltanion : {'CLA'} The anion type. Please use only CHARMM ion atom names. saltcation : {'SOD', 'MG', 'POT', 'CES', 'CAL', 'ZN2'} The cation type. Please use only CHARMM ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. patches : list of str Any further patches the user wants to apply psfgen : str Path to psfgen executable used to build for CHARMM execute : bool Disable building. Will only write out the input script needed by psfgen. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> charmm.listFiles() >>> topos = ['top/top_all36_prot.rtf', './benzamidine.rtf'] >>> params = ['par/par_all36_prot_mod.prm', './benzamidine.prm'] >>> molbuilt = charmm.build(mol, topo=topos, param=params, outdir='/tmp/build', saltconc=0.15) """ mol = mol.copy() _missingSegID(mol) if psfgen is None: try: psfgen = shutil.which('psfgen', mode=os.X_OK) except: raise FileNotFoundError('Could not find psfgen executable, or no execute permissions are given. ' 'Run `conda install psfgen`.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if topo is None: topo = ['top/top_all36_prot.rtf', 'top/top_all36_lipid.rtf', 'top/top_water_ions.rtf'] if param is None: param = ['par/par_all36_prot_mod.prm', 'par/par_all36_lipid.prm', 'par/par_water_ions.prm'] if stream is None: stream = ['str/prot/toppar_all36_prot_arg0.str'] if caps is None: caps = _defaultCaps(mol) # Splitting the stream files and adding them to the list of parameter and topology files charmmdir = path.join(home(), 'builder', 'charmmfiles') for s in stream: if s[0] != '.' and path.isfile(path.join(charmmdir, s)): s = path.join(charmmdir, s) outrtf, outprm = _prepareStream(s) topo.append(outrtf) param.append(outprm) #_missingChain(mol) #_checkProteinGaps(mol) if patches is None: patches = [] if isinstance(patches, str): patches = [patches] # Find protonated residues and add patches for them patches += _protonationPatches(mol) f = open(path.join(outdir, 'build.vmd'), 'w') f.write('# psfgen file generated by charmm.build\n') f.write('package require psfgen;\n') f.write('psfcontext reset;\n\n') # Copying and printing out the topologies for i in range(len(topo)): if topo[i][0] != '.' and path.isfile(path.join(charmmdir, topo[i])): topo[i] = path.join(charmmdir, topo[i]) localname = '{}.'.format(i) + path.basename(topo[i]) shutil.copy(topo[i], path.join(outdir, localname)) f.write('topology ' + localname + '\n') f.write('\n') _printAliases(f) # Printing out segments logger.info('Writing out segments.') segments = _getSegments(mol) for seg in segments: pdbname = 'segment' + seg + '.pdb' mol.write(path.join(outdir, pdbname), sel='segid '+seg) segatoms = mol.atomselect('segid {}'.format(seg)) segwater = mol.atomselect('segid {} and water'.format(seg)) f.write('segment ' + seg + ' {\n') if np.all(segatoms == segwater): # If segment only contains waters, set: auto none f.write('\tauto none\n') f.write('\tpdb ' + pdbname + '\n') if caps is not None and seg in caps: for c in caps[seg]: f.write('\t' + c + '\n') f.write('}\n') f.write('coordpdb ' + pdbname + ' ' + seg + '\n\n') # Printing out patches for the disulfide bridges if disulfide is None: disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in disulfide: f.write('patch DISU {}:{} {}:{}\n'.format(d.segid1, d.resid1, d.segid2, d.resid2)) f.write('\n') # Printing out extra patches if len(patches) != 0: for p in patches: f.write(p + '\n') f.write('\n') f.write('guesscoord\n') f.write('writepsf ' + prefix + '.psf\n') f.write('writepdb ' + prefix + '.pdb\n') #f.write('quit\n') f.close() if param is not None: _charmmCombine(param, path.join(outdir, 'parameters')) molbuilt = None if execute: logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') #call([vmd, '-dispdev', 'text', '-e', './build.vmd'], stdout=f) call([psfgen, './build.vmd'], stdout=f) f.close() _logParser(logpath) os.chdir(currdir) logger.info('Finished building.') if path.isfile(path.join(outdir, 'structure.pdb')) and path.isfile(path.join(outdir, 'structure.psf')): molbuilt = Molecule(path.join(outdir, 'structure.pdb')) molbuilt.read(path.join(outdir, 'structure.psf')) else: raise NameError('No structure pdb/psf file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.pdb'), path.join(outdir, 'structure.noions.pdb')) shutil.move(path.join(outdir, 'structure.psf'), path.join(outdir, 'structure.noions.psf')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='charmm', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, topo=topo, param=param, prefix=prefix, outdir=outdir, ionize=False, caps=caps, execute=execute, saltconc=saltconc, disulfide=disulfide, patches=patches, psfgen=psfgen) _checkFailedAtoms(molbuilt) return molbuilt