def solvate(self, struct=None, **kwargs): """Solvate structure *struct* in a box of solvent. The solvent is determined with the *solvent* keyword to the constructor. :Keywords: *struct* pdb or gro coordinate file (if not supplied, the value is used that was supplied to the constructor of :class:`~mdpow.equil.Simulation`) *kwargs* All other arguments are passed on to :func:`gromacs.setup.solvate`, but set to sensible default values. *top* and *water* are always fixed. """ self.journal.start('solvate') self.dirs.solvation = realpath(kwargs.setdefault('dirname', self.BASEDIR('solvation'))) kwargs['struct'] = self._checknotempty(struct or self.files.structure, 'struct') kwargs['top'] = self._checknotempty(self.files.topology, 'top') kwargs['water'] = self.solvent.box kwargs.setdefault('mainselection', '"%s"' % self.molecule) # quotes are needed for make_ndx kwargs.setdefault('distance', self.solvent.distance) kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes params = gromacs.setup.solvate(**kwargs) self.files.structure = kwargs['struct'] self.files.solvated = params['struct'] self.files.ndx = params['ndx'] # we can also make a processed topology right now self.processed_topology(**kwargs) self.journal.completed('solvate') return params
def solvate(self, struct=None, **kwargs): """Solvate structure *struct* in a box of solvent. The solvent is determined with the *solvent* keyword to the constructor. :Keywords: *struct* pdb or gro coordinate file (if not supplied, the value is used that was supplied to the constructor of :class:`~mdpow.equil.Simulation`) *distance* minimum distance between solute and the closes box face; the default depends on the solvent but can be set explicitly here, too. *bt* any box type understood by :func:`gromacs.editconf` (``-bt``): * "triclinic" is a triclinic box, * "cubic" is a rectangular box with all sides equal; * "dodecahedron" represents a rhombic dodecahedron; * "octahedron" is a truncated octahedron. The default is "dodecahedron". *kwargs* All other arguments are passed on to :func:`gromacs.setup.solvate`, but set to sensible default values. *top* and *water* are always fixed. """ self.journal.start('solvate') self.dirs.solvation = realpath( kwargs.setdefault('dirname', self.BASEDIR('solvation'))) kwargs['struct'] = self._checknotempty(struct or self.files.structure, 'struct') kwargs['top'] = self._checknotempty(self.files.topology, 'top') kwargs['water'] = self.solvent.box kwargs.setdefault('mainselection', '"%s"' % self.molecule) # quotes are needed for make_ndx kwargs.setdefault('distance', self.solvent.distance) boxtype = kwargs.pop('bt', None) boxtype = boxtype if boxtype is not None else "dodecahedron" if boxtype not in ("dodecahedron", "triclinic", "cubic", "octahedron"): msg = "Invalid boxtype '{0}', not suitable for 'gmx editconf'.".format( boxtype) logger.error(msg) raise ValueError(msg) kwargs['bt'] = boxtype kwargs['includes'] = asiterable(kwargs.pop('includes', [])) + self.dirs.includes params = self._setup_solvate(**kwargs) self.files.structure = kwargs['struct'] self.files.solvated = params['struct'] self.files.ndx = params['ndx'] # we can also make a processed topology right now self.processed_topology(**kwargs) self.journal.completed('solvate') return params
def topology(struct=None, protein='protein', top='system.top', dirname='top', posres="posres.itp", **pdb2gmx_args): """Build Gromacs topology files from pdb. :Keywords: *struct* input structure (**required**) *protein* name of the output files *top* name of the topology file *dirname* directory in which the new topology will be stored *pdb2gmxargs* arguments for ``pdb2gmx`` such as ``ff``, ``water``, ... .. note:: At the moment this function simply runs ``pdb2gmx`` and uses the resulting topology file directly. If you want to create more complicated topologies and maybe also use additional itp files or make a protein itp file then you will have to do this manually. """ structure = realpath(struct) new_struct = protein + '.gro' if posres is None: posres = protein + '_posres.itp' pdb2gmx_args.update({'f': structure, 'o': new_struct, 'p': top, 'i': posres}) with in_dir(dirname): logger.info("[%(dirname)s] Building topology %(top)r from struct = %(struct)r" % vars()) # perhaps parse output from pdb2gmx 4.5.x to get the names of the chain itp files? gromacs.pdb2gmx(**pdb2gmx_args) return { \ 'top': realpath(dirname, top), \ 'struct': realpath(dirname, new_struct), \ 'posres' : realpath(dirname, posres) }
def topology(self, itp='drug.itp', **kwargs): """Generate a topology for compound *molecule*. :Keywords: *itp* Gromacs itp file; will be copied to topology dir and included in topology *dirname* name of the topology directory ["top"] *kwargs* see source for *top_template*, *topol* """ self.journal.start('topology') dirname = kwargs.pop('dirname', self.BASEDIR('top')) self.dirs.topology = realpath(dirname) top_template = config.get_template(kwargs.pop('top_template', 'system.top')) topol = kwargs.pop('topol', os.path.basename(top_template)) itp = os.path.realpath(itp) _itp = os.path.basename(itp) with in_dir(dirname): shutil.copy(itp, _itp) gromacs.cbook.edit_txt(top_template, [('#include +"compound\.itp"', 'compound\.itp', _itp), ('#include +"oplsaa\.ff/tip4p\.itp"', 'tip4p\.itp', self.solvent.itp), ('Compound', 'solvent', self.solvent_type), ('Compound', 'DRUG', self.molecule), ('DRUG\s*1', 'DRUG', self.molecule), ], newname=topol) logger.info('[%(dirname)s] Created topology %(topol)r that includes %(_itp)r', vars()) # update known files and dirs self.files.topology = realpath(dirname, topol) if not self.dirs.topology in self.dirs.includes: self.dirs.includes.append(self.dirs.topology) self.journal.completed('topology') return {'dirname': dirname, 'topol': topol}
def _MD(self, protocol, **kwargs): """Basic MD driver for this Simulation. Do not call directly.""" self.journal.start(protocol) kwargs.setdefault('dirname', self.BASEDIR(protocol)) kwargs.setdefault('deffnm', self.deffnm) kwargs.setdefault('mdp', config.get_template('NPT_opls.mdp')) self.dirs[protocol] = realpath(kwargs['dirname']) setupMD = kwargs.pop('MDfunc', gromacs.setup.MD) kwargs['top'] = self.files.topology kwargs['includes'] = asiterable(kwargs.pop('includes', [])) + self.dirs.includes kwargs['ndx'] = self.files.ndx kwargs[ 'mainselection'] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD) self._checknotempty(kwargs['struct'], 'struct') if not os.path.exists(kwargs['struct']): # struct is not reliable as it depends on qscript so now we just try everything... struct = gromacs.utilities.find_first(kwargs['struct'], suffices=['pdb', 'gro']) if struct is None: logger.error( "Starting structure %(struct)r does not exist (yet)" % kwargs) raise IOError(errno.ENOENT, "Starting structure not found", kwargs['struct']) else: logger.info("Found starting structure %r (instead of %r).", struct, kwargs['struct']) kwargs['struct'] = struct # now setup the whole simulation (this is typically gromacs.setup.MD() ) params = setupMD(**kwargs) # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript self.files[protocol] = params['struct'] # Gromacs 4.5.x 'mdrun -c PDB' fails if it cannot find 'residuetypes.dat' # so instead of fuffing with GMXLIB we just dump it into the directory try: shutil.copy(config.topfiles['residuetypes.dat'], self.dirs[protocol]) except IOError: logger.warning( "Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure" ) self.journal.completed(protocol) return params
def get_lipid_vdwradii(outdir=os.path.curdir, libdir=None): """Find vdwradii.dat and add special entries for lipids. See :data:`gromacs.setup.vdw_lipid_resnames` for lipid resnames. Add more if necessary. """ vdwradii_dat = os.path.join(outdir, "vdwradii.dat") if not libdir is None: filename = os.path.join(libdir, 'vdwradii.dat') # canonical name if not os.path.exists(filename): msg = 'No VDW database file found in %(filename)r.' % vars() logger.exception(msg) raise OSError(msg, errno.ENOENT) else: try: filename = os.path.join(os.environ['GMXLIB'], 'vdwradii.dat') except KeyError: try: filename = os.path.join(os.environ['GMXDATA'], 'gromacs', 'top', 'vdwradii.dat') except KeyError: msg = "Cannot find vdwradii.dat. Set GMXLIB or GMXDATA." logger.exception(msg) raise OSError(msg, errno.ENOENT) if not os.path.exists(filename): msg = "Cannot find %r; something is wrong with the Gromacs installation." % vars() logger.exception(msg, errno.ENOENT) raise OSError(msg) # make sure to catch 3 and 4 letter resnames patterns = vdw_lipid_resnames + list(set([x[:3] for x in vdw_lipid_resnames])) # TODO: should do a tempfile... with open(vdwradii_dat, 'w') as outfile: # write lipid stuff before general outfile.write('; Special larger vdw radii for solvating lipid membranes\n') for resname in patterns: for atom,radius in vdw_lipid_atom_radii.items(): outfile.write('%(resname)4s %(atom)-5s %(radius)5.3f\n' % vars()) with open(filename, 'r') as infile: for line in infile: outfile.write(line) logger.debug('Created lipid vdW radii file %(vdwradii_dat)r.' % vars()) return realpath(vdwradii_dat)
def energy_minimize(self, **kwargs): """Energy minimize the solvated structure on the local machine. *kwargs* are passed to :func:`gromacs.setup.energ_minimize` but if :meth:`~mdpow.equil.Simulation.solvate` step has been carried out previously all the defaults should just work. """ self.journal.start('energy_minimize') self.dirs.energy_minimization = realpath(kwargs.setdefault('dirname', self.BASEDIR('em'))) kwargs['top'] = self.files.topology kwargs.setdefault('struct', self.files.solvated) kwargs.setdefault('mdp', self.mdp['energy_minimize']) kwargs['mainselection'] = None kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes params = gromacs.setup.energy_minimize(**kwargs) self.files.energy_minimized = params['struct'] self.journal.completed('energy_minimize') return params
def _MD(self, protocol, **kwargs): """Basic MD driver for this Simulation. Do not call directly.""" self.journal.start(protocol) kwargs.setdefault('dirname', self.BASEDIR(protocol)) kwargs.setdefault('deffnm', self.deffnm) kwargs.setdefault('mdp', config.get_template('NPT_opls.mdp')) self.dirs[protocol] = realpath(kwargs['dirname']) setupMD = kwargs.pop('MDfunc', gromacs.setup.MD) kwargs['top'] = self.files.topology kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes kwargs['ndx'] = self.files.ndx kwargs['mainselection'] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD) self._checknotempty(kwargs['struct'], 'struct') if not os.path.exists(kwargs['struct']): # struct is not reliable as it depends on qscript so now we just try everything... struct = gromacs.utilities.find_first(kwargs['struct'], suffices=['pdb', 'gro']) if struct is None: logger.error("Starting structure %(struct)r does not exist (yet)" % kwargs) raise IOError(errno.ENOENT, "Starting structure not found", kwargs['struct']) else: logger.info("Found starting structure %r (instead of %r).", struct, kwargs['struct']) kwargs['struct'] = struct # now setup the whole simulation (this is typically gromacs.setup.MD() ) params = setupMD(**kwargs) # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript self.files[protocol] = params['struct'] # Gromacs 4.5.x 'mdrun -c PDB' fails if it cannot find 'residuetypes.dat' # so instead of fuffing with GMXLIB we just dump it into the directory try: shutil.copy(config.topfiles['residuetypes.dat'], self.dirs[protocol]) except: logger.warn("Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure") self.journal.completed(protocol) return params
except GromacsError, err: # or should I rather fail here? wmsg = "Failed to make main index file %r ... maybe set mainselection='...'.\n"\ "The error message was:\n%s\n" % (ndx, str(err)) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) try: trj_compact_main(f='ionized.gro', s='ionized.tpr', o='compact.gro', n=ndx) except GromacsError, err: wmsg = "Failed to make compact pdb for visualization... pressing on regardless. "\ "The error message was:\n%s\n" % str(err) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) return {'qtot': qtot, 'struct': realpath(dirname, 'ionized.gro'), 'ndx': realpath(dirname, ndx), # not sure why this is propagated-is it used? 'mainselection': mainselection, } def check_mdpargs(d): """Check if any arguments remain in dict *d*.""" if len(d) > 0: wmsg = "Unprocessed mdp option are interpreted as options for grompp:\n"+str(d) logger.warn(wmsg) warnings.warn(wmsg, category=UsageWarning) return len(d) == 0 def energy_minimize(dirname='em', mdp=config.templates['em.mdp'], struct='solvate/ionized.pdb', top='top/system.top',
def _setup_MD( dirname, deffnm="md", mdp=config.templates["md_OPLSAA.mdp"], struct=None, top="top/system.top", ndx=None, mainselection='"Protein"', qscript=config.qscript_template, qname=None, startdir=None, mdrun_opts="", budget=None, walltime=1 / 3.0, dt=0.002, runtime=1e3, **mdp_kwargs ): """Generic function to set up a ``mdrun`` MD simulation. See the user functions for usage. """ if struct is None: raise ValueError("struct must be set to a input structure") structure = realpath(struct) topology = realpath(top) try: index = realpath(ndx) except AttributeError: # (that's what realpath(None) throws...) index = None # None is handled fine below qname = mdp_kwargs.pop("sgename", qname) # compatibility for old scripts qscript = mdp_kwargs.pop("sge", qscript) # compatibility for old scripts qscript_template = config.get_template(qscript) mdp_template = config.get_template(mdp) nsteps = int(float(runtime) / float(dt)) mdp = deffnm + ".mdp" tpr = deffnm + ".tpr" mainindex = deffnm + ".ndx" final_structure = deffnm + ".gro" # guess... really depends on templates,could also be DEFFNM.pdb # write the processed topology to the default output mdp_parameters = {"nsteps": nsteps, "dt": dt, "pp": "processed.top"} mdp_parameters.update(mdp_kwargs) add_mdp_includes(topology, mdp_parameters) logger.info("[%(dirname)s] input mdp = %(mdp_template)r", vars()) with in_dir(dirname): if not (mdp_parameters.get("Tcoupl", "").lower() == "no" or mainselection is None): logger.info("[%(dirname)s] Automatic adjustment of T-coupling groups" % vars()) # make index file in almost all cases; with mainselection == None the user # takes FULL control and also has to provide the template or index groups = make_main_index(structure, selection=mainselection, oldndx=index, ndx=mainindex) natoms = dict([(g["name"], float(g["natoms"])) for g in groups]) tc_group_names = ("__main__", "__environment__") # defined in make_main_index() try: x = natoms["__main__"] / natoms["__environment__"] except KeyError: x = 0 # force using SYSTEM in code below wmsg = ( "Missing __main__ and/or __environment__ index group.\n" "This probably means that you have an atypical system. You can " "set mainselection=None and provide your own mdp and index files " "in order to set up temperature coupling.\n" "If no T-coupling is required then set Tcoupl='no'.\n" "For now we will just couple everything to 'System'." ) logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) if x < 0.1: # couple everything together tau_t = firstof(mdp_parameters.pop("tau_t", 0.1)) ref_t = firstof(mdp_parameters.pop("ref_t", 300)) # combine all in one T-coupling group mdp_parameters["tc-grps"] = "System" mdp_parameters["tau_t"] = tau_t # this overrides the commandline! mdp_parameters["ref_t"] = ref_t # this overrides the commandline! mdp_parameters["gen-temp"] = mdp_parameters.pop("gen_temp", ref_t) wmsg = ( "Size of __main__ is only %.1f%% of __environment__ so " "we use 'System' for T-coupling and ref_t = %g K and " "tau_t = %g 1/ps (can be changed in mdp_parameters).\n" % (x * 100, ref_t, tau_t) ) logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) else: # couple protein and bath separately n_tc_groups = len(tc_group_names) tau_t = asiterable(mdp_parameters.pop("tau_t", 0.1)) ref_t = asiterable(mdp_parameters.pop("ref_t", 300)) if len(tau_t) != n_tc_groups: tau_t = n_tc_groups * [tau_t[0]] wmsg = ( "%d coupling constants should have been supplied for tau_t. " "Using %f 1/ps for all of them." % (n_tc_groups, tau_t[0]) ) logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) if len(ref_t) != n_tc_groups: ref_t = n_tc_groups * [ref_t[0]] wmsg = "%d temperatures should have been supplied for ref_t. " "Using %g K for all of them." % ( n_tc_groups, ref_t[0], ) logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) mdp_parameters["tc-grps"] = tc_group_names mdp_parameters["tau_t"] = tau_t mdp_parameters["ref_t"] = ref_t mdp_parameters["gen-temp"] = mdp_parameters.pop("gen_temp", ref_t[0]) index = realpath(mainindex) if mdp_parameters.get("Tcoupl", "").lower() == "no": logger.info("Tcoupl == no: disabling all temperature coupling mdp options") mdp_parameters["tc-grps"] = "" mdp_parameters["tau_t"] = "" mdp_parameters["ref_t"] = "" mdp_parameters["gen-temp"] = "" if mdp_parameters.get("Pcoupl", "").lower() == "no": logger.info("Pcoupl == no: disabling all pressure coupling mdp options") mdp_parameters["tau_p"] = "" mdp_parameters["ref_p"] = "" mdp_parameters["compressibility"] = "" unprocessed = gromacs.cbook.edit_mdp(mdp_template, new_mdp=mdp, **mdp_parameters) check_mdpargs(unprocessed) gromacs.grompp(f=mdp, p=topology, c=structure, n=index, o=tpr, **unprocessed) runscripts = gromacs.qsub.generate_submit_scripts( qscript_template, deffnm=deffnm, jobname=qname, budget=budget, startdir=startdir, mdrun_opts=mdrun_opts, walltime=walltime, ) logger.info("[%(dirname)s] output mdp = %(mdp)r", vars()) logger.info("[%(dirname)s] output ndx = %(ndx)r", vars()) logger.info("[%(dirname)s] output tpr = %(tpr)r", vars()) logger.info("[%(dirname)s] output runscripts = %(runscripts)r", vars()) logger.info( "[%(dirname)s] All files set up for a run time of %(runtime)g ps " "(dt=%(dt)g, nsteps=%(nsteps)g)" % vars() ) kwargs = { "struct": realpath(os.path.join(dirname, final_structure)), # guess "top": topology, "ndx": index, # possibly mainindex "qscript": runscripts, "mainselection": mainselection, "deffnm": deffnm, # return deffnm (tpr = deffnm.tpr!) } kwargs.update(mdp_kwargs) # return extra mdp args so that one can use them for prod run return kwargs
) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) try: trj_compact_main(f="ionized.gro", s="ionized.tpr", o="compact.pdb", n=ndx) except GromacsError, err: wmsg = ( "Failed to make compact pdb for visualization... pressing on regardless. " "The error message was:\n%s\n" % str(err) ) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) return { "qtot": qtot, "struct": realpath(dirname, "ionized.gro"), "ndx": realpath(dirname, ndx), # not sure why this is propagated-is it used? "mainselection": mainselection, } def check_mdpargs(d): """Check if any arguments remain in dict *d*.""" if len(d) > 0: wmsg = "Unprocessed mdp option are interpreted as options for grompp:\n" + str(d) logger.warn(wmsg) warnings.warn(wmsg, category=UsageWarning) return len(d) == 0 def energy_minimize(
def energy_minimize(dirname='em', mdp=config.templates['em.mdp'], struct='solvate/ionized.pdb', top='top/system.top', output='em.pdb', deffnm="em", mdrunner=None, **kwargs): """Energy minimize the system. This sets up the system (creates run input files) and also runs ``mdrun_d``. Thus it can take a while. Additional itp files should be in the same directory as the top file. Many of the keyword arguments below already have sensible values. :Keywords: *dirname* set up under directory dirname [em] *struct* input structure (gro, pdb, ...) [solvate/ionized.pdb] *output* output structure (will be put under dirname) [em.pdb] *deffnm* default name for mdrun-related files [em] *top* topology file [top/system.top] *mdp* mdp file (or use the template) [templates/em.mdp] *includes* additional directories to search for itp files *mdrunner* :class:`gromacs.run.MDrunner` class; by defauly we just try :func:`gromacs.mdrun_d` and :func:`gromacs.mdrun` but a MDrunner class gives the user the ability to run mpi jobs etc. [None] *kwargs* remaining key/value pairs that should be changed in the template mdp file, eg ``nstxtcout=250, nstfout=250``. .. note:: If :func:`~gromacs.mdrun_d` is not found, the function falls back to :func:`~gromacs.mdrun` instead. """ structure = realpath(struct) topology = realpath(top) mdp_template = config.get_template(mdp) deffnm = deffnm.strip() # write the processed topology to the default output kwargs.setdefault('pp', 'processed.top') # filter some kwargs that might come through when feeding output # from previous stages such as solvate(); necessary because *all* # **kwargs must be *either* substitutions in the mdp file *or* valid # command line parameters for ``grompp``. kwargs.pop('ndx', None) # mainselection is not used but only passed through; right now we # set it to the default that is being used in all argument lists # but that is not pretty. TODO. mainselection = kwargs.pop('mainselection', '"Protein"') # only interesting when passed from solvate() qtot = kwargs.pop('qtot', 0) mdp = deffnm+'.mdp' tpr = deffnm+'.tpr' logger.info("[%(dirname)s] Energy minimization of struct=%(struct)r, top=%(top)r, mdp=%(mdp)r ..." % vars()) add_mdp_includes(topology, kwargs) if qtot != 0: # At the moment this is purely user-reported and really only here because # it might get fed into the function when using the keyword-expansion pipeline # usage paradigm. wmsg = "Total charge was reported as qtot = %(qtot)g <> 0; probably a problem." % vars() logger.warn(wmsg) warnings.warn(wmsg, category=BadParameterWarning) with in_dir(dirname): unprocessed = gromacs.cbook.edit_mdp(mdp_template, new_mdp=mdp, **kwargs) check_mdpargs(unprocessed) gromacs.grompp(f=mdp, o=tpr, c=structure, p=topology, **unprocessed) mdrun_args = dict(v=True, stepout=10, deffnm=deffnm, c=output) if mdrunner is None: try: gromacs.mdrun_d(**mdrun_args) except (AttributeError, OSError): # fall back to mdrun if no double precision binary wmsg = "No 'mdrun_d' binary found so trying 'mdrun' instead.\n"\ "(Note that energy minimization runs better with mdrun_d.)" logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) gromacs.mdrun(**mdrun_args) else: # user wants full control and provides simulation.MDrunner **class** # NO CHECKING --- in principle user can supply any callback they like mdrun = mdrunner(**mdrun_args) mdrun.run() # em.gro --> gives 'Bad box in file em.gro' warning --- why?? # --> use em.pdb instead. if not os.path.exists(output): errmsg = "Energy minimized system NOT produced." logger.error(errmsg) raise GromacsError(errmsg) final_struct = realpath(output) logger.info("[%(dirname)s] energy minimized structure %(final_struct)r" % vars()) return {'struct': final_struct, 'top': topology, 'mainselection': mainselection, }
def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *forcefield* 'OPLS-AA' or 'CHARMM' or 'AMBER' *solvent* 'water' or 'octanol' or 'cyclohexane' or 'wetoctanol' *solventmodel* ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL` for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *distance* minimum distance between solute and closest box face *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. """ self.__cache = {} filename = kwargs.pop('filename', None) dirname = kwargs.pop('dirname', self.dirname_default) forcefield = kwargs.pop('forcefield', 'OPLS-AA') solvent = kwargs.pop('solvent', self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', 'm24', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop('solventmodel', None) mdp_kw = kwargs.pop('mdp', {}) self.mdp = dict((stage, config.get_template(fn)) for stage, fn in self.mdp_defaults.items()) self.mdp.update( dict((stage, config.get_template(fn)) for stage, fn in mdp_kw.items() if fn is not None)) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or 'DRUG' self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop('includes', []))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath( os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.forcefield = forcefield self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier( solvent, model=solventmodel, forcefield=forcefield, ) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model( self.solventmodel_identifier, forcefield=forcefield, ) distance = kwargs.pop('distance', None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict(itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance) self.filename = filename or self.solvent_type + '.simulation' super(Simulation, self).__init__(**kwargs)
def solvate(struct='top/protein.pdb', top='top/system.top', distance=0.9, boxtype='dodecahedron', concentration=0, cation='NA', anion='CL', water='spc', solvent_name='SOL', with_membrane=False, ndx = 'main.ndx', mainselection = '"Protein"', dirname='solvate', **kwargs): """Put protein into box, add water, add counter-ions. Currently this really only supports solutes in water. If you need to embedd a protein in a membrane then you will require more sophisticated approaches. However, you *can* supply a protein already inserted in a bilayer. In this case you will probably want to set *distance* = ``None`` and also enable *with_membrane* = ``True`` (using extra big vdw radii for typical lipids). .. Note:: The defaults are suitable for solvating a globular protein in a fairly tight (increase *distance*!) dodecahedral box. :Arguments: *struct* : filename pdb or gro input structure *top* : filename Gromacs topology *distance* : float When solvating with water, make the box big enough so that at least *distance* nm water are between the solute *struct* and the box boundary. Set *boxtype* to ``None`` in order to use a box size in the input file (gro or pdb). *boxtype* or *bt*: string Any of the box types supported by :class:`~gromacs.tools.Editconf` (triclinic, cubic, dodecahedron, octahedron). Set the box dimensions either with *distance* or the *box* and *angle* keywords. If set to ``None`` it will ignore *distance* and use the box inside the *struct* file. *bt* overrides the value of *boxtype*. *box* List of three box lengths [A,B,C] that are used by :class:`~gromacs.tools.Editconf` in combination with *boxtype* (``bt`` in :program:`editconf`) and *angles*. Setting *box* overrides *distance*. *angles* List of three angles (only necessary for triclinic boxes). *concentration* : float Concentration of the free ions in mol/l. Note that counter ions are added in excess of this concentration. *cation* and *anion* : string Molecule names of the ions. This depends on the chosen force field. *water* : string Name of the water model; one of "spc", "spce", "tip3p", "tip4p". This should be appropriate for the chosen force field. If an alternative solvent is required, simply supply the path to a box with solvent molecules (used by :func:`~gromacs.genbox`'s *cs* argument) and also supply the molecule name via *solvent_name*. *solvent_name* Name of the molecules that make up the solvent (as set in the itp/top). Typically needs to be changed when using non-standard/non-water solvents. ["SOL"] *with_membrane* : bool ``True``: use special ``vdwradii.dat`` with 0.1 nm-increased radii on lipids. Default is ``False``. *ndx* : filename How to name the index file that is produced by this function. *mainselection* : string A string that is fed to :class:`~gromacs.tools.Make_ndx` and which should select the solute. *dirname* : directory name Name of the directory in which all files for the solvation stage are stored. *includes* List of additional directories to add to the mdp include path *kwargs* Additional arguments are passed on to :class:`~gromacs.tools.Editconf` or are interpreted as parameters to be changed in the mdp file. """ structure = realpath(struct) topology = realpath(top) # arguments for editconf that we honour editconf_keywords = ["box", "bt", "angles", "c", "center", "aligncenter", "align", "translate", "rotate", "princ"] editconf_kwargs = dict((k,kwargs.pop(k,None)) for k in editconf_keywords) editconf_boxtypes = ["triclinic", "cubic", "dodecahedron", "octahedron", None] # needed for topology scrubbing scrubber_kwargs = {'marker': kwargs.pop('marker',None)} # sanity checks and argument dependencies bt = editconf_kwargs.pop('bt') boxtype = bt if bt else boxtype # bt takes precedence over boxtype if not boxtype in editconf_boxtypes: msg = "Unsupported boxtype %(boxtype)r: Only %(boxtypes)r are possible." % vars() logger.error(msg) raise ValueError(msg) if editconf_kwargs['box']: distance = None # if box is set then user knows what she is doing... # handle additional include directories (kwargs are also modified!) mdp_kwargs = add_mdp_includes(topology, kwargs) if water.lower() in ('spc', 'spce'): water = 'spc216' elif water.lower() == 'tip3p': water = 'spc216' logger.warning("TIP3P water model selected: using SPC equilibrated box " "for initial solvation because it is a reasonable starting point " "for any 3-point model. EQUILIBRATE THOROUGHLY!") # By default, grompp should not choke on a few warnings because at # this stage the user cannot do much about it (can be set to any # value but is kept undocumented...) grompp_maxwarn = kwargs.pop('maxwarn',10) # clean topology (if user added the marker; the default marker is # ; Gromacs auto-generated entries follow: n_removed = gromacs.cbook.remove_molecules_from_topology(topology, **scrubber_kwargs) with in_dir(dirname): logger.info("[%(dirname)s] Solvating with water %(water)r..." % vars()) if boxtype is None: hasBox = False ext = os.path.splitext(structure)[1] if ext == '.gro': hasBox = True elif ext == '.pdb': with open(structure) as struct: for line in struct: if line.startswith('CRYST'): hasBox = True break if not hasBox: msg = "No box data in the input structure %(structure)r and boxtype is set to None" % vars() logger.exception(msg) raise MissingDataError(msg) distance = boxtype = None # ensures that editconf just converts editconf_kwargs.update({'f': structure, 'o': 'boxed.gro', 'bt': boxtype, 'd': distance}) gromacs.editconf(**editconf_kwargs) if with_membrane: vdwradii_dat = get_lipid_vdwradii() # need to clean up afterwards logger.info("Using special vdW radii for lipids %r" % vdw_lipid_resnames) try: gromacs.genbox(p=topology, cp='boxed.gro', cs=water, o='solvated.gro') except: if with_membrane: # remove so that it's not picked up accidentally gromacs.utilities.unlink_f(vdwradii_dat) raise logger.info("Solvated system with %s", water) with open('none.mdp','w') as mdp: mdp.write('; empty mdp file\ninclude = %(include)s\nrcoulomb = 1\nrvdw = 1\nrlist = 1\n' % mdp_kwargs) qtotgmx = gromacs.cbook.grompp_qtot(f='none.mdp', o='topol.tpr', c='solvated.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) qtot = round(qtotgmx) logger.info("[%(dirname)s] After solvation: total charge qtot = %(qtotgmx)r = %(qtot)r" % vars()) if concentration != 0: logger.info("[%(dirname)s] Adding ions for c = %(concentration)f M..." % vars()) # target concentration of free ions c ==> # N = N_water * c/c_water # add ions for concentration to the counter ions (counter ions are less free) # # get number of waters (count OW ... works for SPC*, TIP*P water models) rc,output,junk = gromacs.make_ndx(f='topol.tpr', o='ow.ndx', input=('keep 0', 'del 0', 'a OW*', 'name 0 OW', '', 'q'), stdout=False) groups = gromacs.cbook.parse_ndxlist(output) gdict = dict([(g['name'], g) for g in groups]) # overkill... N_water = gdict['OW']['natoms'] # ... but dict lookup is nice N_ions = int(N_water * concentration/CONC_WATER) # number of monovalents else: N_ions = 0 # neutralize (or try -neutral switch of genion???) n_cation = n_anion = 0 if qtot > 0: n_anion = int(abs(qtot)) elif qtot < 0: n_cation = int(abs(qtot)) n_cation += N_ions n_anion += N_ions if n_cation != 0 or n_anion != 0: # sanity check: assert qtot + n_cation - n_anion < 1e-6 logger.info("[%(dirname)s] Adding n_cation = %(n_cation)d and n_anion = %(n_anion)d ions..." % vars()) gromacs.genion(s='topol.tpr', o='ionized.gro', p=topology, pname=cation, nname=anion, np=n_cation, nn=n_anion, input=solvent_name) else: # fake ionized file ... makes it easier to continue without too much fuzz try: os.unlink('ionized.gro') except OSError, err: if err.errno != errno.ENOENT: raise os.symlink('solvated.gro', 'ionized.gro') qtot = gromacs.cbook.grompp_qtot(f='none.mdp', o='ionized.tpr', c='ionized.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) if abs(qtot) > 1e-4: wmsg = "System has non-zero total charge qtot = %(qtot)g e." % vars() warnings.warn(wmsg, category=BadParameterWarning) logger.warn(wmsg) # make main index try: make_main_index('ionized.tpr', selection=mainselection, ndx=ndx) except GromacsError, err: # or should I rather fail here? wmsg = "Failed to make main index file %r ... maybe set mainselection='...'.\n"\ "The error message was:\n%s\n" % (ndx, str(err)) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning)
def topology(self, itp='drug.itp', prm=None, **kwargs): """Generate a topology for compound *molecule*. :Keywords: *itp* Gromacs itp file; will be copied to topology dir and included in topology *prm* Gromacs prm file; if given, will be copied to topology dir and included in topology *dirname* name of the topology directory ["top"] *kwargs* see source for *top_template*, *topol* """ self.journal.start('topology') dirname = kwargs.pop('dirname', self.BASEDIR('top')) self.dirs.topology = realpath(dirname) setting = forcefields.get_ff_paths(self.forcefield) template = forcefields.get_top_template(self.solvent_type) top_template = config.get_template(kwargs.pop('top_template', template)) topol = kwargs.pop('topol', os.path.basename(top_template)) self.top_template = top_template itp = os.path.realpath(itp) _itp = os.path.basename(itp) if prm is None: prm_kw = '' else: prm = os.path.realpath(prm) _prm = os.path.basename(prm) prm_kw = '#include "{}"'.format(_prm) with in_dir(dirname): shutil.copy(itp, _itp) if prm is not None: shutil.copy(prm, _prm) gromacs.cbook.edit_txt(top_template, [ (r'#include +"oplsaa\.ff/forcefield\.itp"', r'oplsaa\.ff/', setting[0]), (r'#include +"compound\.itp"', r'compound\.itp', _itp), (r'#include +"oplsaa\.ff/tip4p\.itp"', r'oplsaa\.ff/tip4p\.itp', setting[0] + self.solvent.itp), (r'#include +"oplsaa\.ff/ions_opls\.itp"', r'oplsaa\.ff/ions_opls\.itp', setting[1]), (r'#include +"compound\.prm"', r'#include +"compound\.prm"', prm_kw), (r'#include +"water\.itp"', r'water\.itp', setting[2]), (r'Compound', 'solvent', self.solvent_type), (r'Compound', 'DRUG', self.molecule), (r'DRUG\s*1', 'DRUG', self.molecule), ], newname=topol) logger.info( '[%(dirname)s] Created topology %(topol)r that includes %(_itp)r', vars()) # update known files and dirs self.files.topology = realpath(dirname, topol) if not self.dirs.topology in self.dirs.includes: self.dirs.includes.append(self.dirs.topology) self.journal.completed('topology') return {'dirname': dirname, 'topol': topol}
def _setup_MD(dirname, deffnm='md', mdp=config.templates['md_OPLSAA.mdp'], struct=None, top='top/system.top', ndx=None, mainselection='"Protein"', qscript=config.qscript_template, qname=None, startdir=None, mdrun_opts="", budget=None, walltime=1/3., dt=0.002, runtime=1e3, multi=1, **mdp_kwargs): """Generic function to set up a ``mdrun`` MD simulation. See the user functions for usage. @param qname: name of the queing system, may be None. @param multi: setup multiple concurrent simulations. These are based upon deffnm being set, and a set of mdp / tpr are created named [deffnm]0.tpr. [deffnm]1.tpr, ... """ if struct is None: raise ValueError('struct must be set to a input structure') structure = realpath(struct) topology = realpath(top) try: index = realpath(ndx) except AttributeError: # (that's what realpath(None) throws...) index = None # None is handled fine below qname = mdp_kwargs.pop('sgename', qname) # compatibility for old scripts qscript = mdp_kwargs.pop('sge', qscript) # compatibility for old scripts qscript_template = config.get_template(qscript) mdp_template = config.get_template(mdp) nsteps = int(float(runtime)/float(dt)) mainindex = deffnm + '.ndx' final_structure = deffnm + '.pdb' # guess... really depends on templates,could also be DEFFNM.pdb # write the processed topology to the default output mdp_parameters = {'nsteps':nsteps, 'dt':dt} mdp_parameters.update(mdp_kwargs) add_mdp_includes(topology, mdp_parameters) # the basic result dictionary # depending on options, various bits might be added to this. result = {'struct': realpath(os.path.join(dirname, final_structure)), # guess 'top': topology, 'ndx': index, # possibly mainindex 'mainselection': mainselection, 'deffnm': deffnm, # return deffnm (tpr = deffnm.tpr!) } with in_dir(dirname): if not (mdp_parameters.get('Tcoupl','').lower() == 'no' or mainselection is None): logger.info("[%(dirname)s] Automatic adjustment of T-coupling groups" % vars()) # make index file in almost all cases; with mainselection == None the user # takes FULL control and also has to provide the template or index groups = make_main_index(structure, selection=mainselection, oldndx=index, ndx=mainindex) natoms = dict([(g['name'], float(g['natoms'])) for g in groups]) tc_group_names = ('__main__', '__environment__') # defined in make_main_index() try: x = natoms['__main__']/natoms['__environment__'] except KeyError: x = 0 # force using SYSTEM in code below wmsg = "Missing __main__ and/or __environment__ index group.\n" \ "This probably means that you have an atypical system. You can " \ "set mainselection=None and provide your own mdp and index files " \ "in order to set up temperature coupling.\n" \ "If no T-coupling is required then set Tcoupl='no'.\n" \ "For now we will just couple everything to 'System'." logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) if x < 0.1: # couple everything together tau_t = firstof(mdp_parameters.pop('tau_t', 0.1)) ref_t = firstof(mdp_parameters.pop('ref_t', 300)) # combine all in one T-coupling group mdp_parameters['tc-grps'] = 'System' mdp_parameters['tau_t'] = tau_t # this overrides the commandline! mdp_parameters['ref_t'] = ref_t # this overrides the commandline! mdp_parameters['gen-temp'] = mdp_parameters.pop('gen_temp', ref_t) wmsg = "Size of __main__ is only %.1f%% of __environment__ so " \ "we use 'System' for T-coupling and ref_t = %g K and " \ "tau_t = %g 1/ps (can be changed in mdp_parameters).\n" \ % (x * 100, ref_t, tau_t) logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) else: # couple protein and bath separately n_tc_groups = len(tc_group_names) tau_t = asiterable(mdp_parameters.pop('tau_t', 0.1)) ref_t = asiterable(mdp_parameters.pop('ref_t', 300)) if len(tau_t) != n_tc_groups: tau_t = n_tc_groups * [tau_t[0]] wmsg = "%d coupling constants should have been supplied for tau_t. "\ "Using %f 1/ps for all of them." % (n_tc_groups, tau_t[0]) logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) if len(ref_t) != n_tc_groups: ref_t = n_tc_groups * [ref_t[0]] wmsg = "%d temperatures should have been supplied for ref_t. "\ "Using %g K for all of them." % (n_tc_groups, ref_t[0]) logger.warn(wmsg) warnings.warn(wmsg, category=AutoCorrectionWarning) mdp_parameters['tc-grps'] = tc_group_names mdp_parameters['tau_t'] = tau_t mdp_parameters['ref_t'] = ref_t mdp_parameters['gen-temp'] = mdp_parameters.pop('gen_temp', ref_t[0]) index = realpath(mainindex) if mdp_parameters.get('Tcoupl','').lower() == 'no': logger.info("Tcoupl == no: disabling all temperature coupling mdp options") mdp_parameters['tc-grps'] = "" mdp_parameters['tau_t'] = "" mdp_parameters['ref_t'] = "" mdp_parameters['gen-temp'] = "" if mdp_parameters.get('Pcoupl','').lower() == 'no': logger.info("Pcoupl == no: disabling all pressure coupling mdp options") mdp_parameters['tau_p'] = "" mdp_parameters['ref_p'] = "" mdp_parameters['compressibility'] = "" # do multiple concurrent simulations - ensemble sampling if multi > 1: for i in range(multi): new_mdp = deffnm + str(i) + ".mdp" mdout = deffnm + "out" + str(i) + ".mdp" pp = "processed" + str(i) + ".top" tpr = deffnm + str(i) + ".tpr" # doing ensemble sampling, so give differnt seeds for each one # if we are using 32 bit gromacs, make seeds are are 32 bit even on # 64 bit machine mdp_parameters["andersen_seed"] = random.randint(0,2**31) mdp_parameters["gen_seed"] = random.randint(0,2**31) mdp_parameters["ld_seed"] = random.randint(0,2**31) unprocessed = gromacs.cbook.edit_mdp(mdp_template, new_mdp=new_mdp, **mdp_parameters) check_mdpargs(unprocessed) gromacs.grompp(f=new_mdp, p=topology, c=structure, n=index, o=tpr, po=mdout, pp=pp, **unprocessed) # only add multi to result if we really are doing multiple runs result["multi"] = multi else: new_mdp = deffnm + '.mdp' tpr = deffnm + '.tpr' unprocessed = gromacs.cbook.edit_mdp(mdp_template, new_mdp=new_mdp, **mdp_parameters) check_mdpargs(unprocessed) gromacs.grompp(f=new_mdp, p=topology, c=structure, n=index, o=tpr, po="mdout.mdp", pp="processed.top", **unprocessed) # generate scripts for queing system if requested if qname is not None: runscripts = gromacs.qsub.generate_submit_scripts( qscript_template, deffnm=deffnm, jobname=qname, budget=budget, startdir=startdir, mdrun_opts=mdrun_opts, walltime=walltime) result["qscript"] =runscripts logger.info("[%(dirname)s] All files set up for a run time of %(runtime)g ps " "(dt=%(dt)g, nsteps=%(nsteps)g)" % vars()) result.update(mdp_kwargs) # return extra mdp args so that one can use them for prod run result.pop('define', None) # but make sure that -DPOSRES does not stay... return result
def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *solvent* 'water' or 'octanol' or 'cyclohexane' *solventmodel* ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL` for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. """ self.__cache = {} filename = kwargs.pop('filename', None) dirname = kwargs.pop('dirname', self.dirname_default) solvent = kwargs.pop('solvent', self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop('solventmodel', None) mdp_kw = kwargs.pop('mdp', {}) self.mdp = dict((stage, config.get_template(fn)) for stage,fn in self.mdp_defaults.items()) self.mdp.update(dict((stage, config.get_template(fn)) for stage,fn in mdp_kw.items() if fn is not None)) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or 'DRUG' self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop('includes',[]))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath(os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier(solvent, solventmodel) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model(self.solventmodel_identifier) distance = kwargs.pop('distance', None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict(itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance) self.filename = filename or self.solvent_type+'.simulation' super(Simulation, self).__init__(**kwargs)