def __init__(self, **kwargs): """Set up Worker class. :Keywords: *plugin* : instance The :class:`Plugin` instance that owns this worker. **Must be supplied.** *simulation* A :class:Simulation` object, required for registration, but can be supplied later. *kwargs* All other keyword arguments are passed to the super class. """ self.plugin = kwargs.pop('plugin', None) """:class:`Plugin` instance that owns this Worker.""" assert self.plugin is not None # must be supplied, non-opt kw arg self.plugin_name = self.plugin.plugin_name """Name of the plugin that this Worker belongs to.""" self.simulation = kwargs.pop( 'simulation', None) # eventually needed but can come after init self.location = self.plugin_name # directory name under analysisdir self.results = AttributeDict() # store results self.parameters = AttributeDict( ) # container for options, filenames, etc... self.parameters.filenames = AttributeDict() super(Worker, self).__init__(**kwargs)
def analyze(self,**kwargs): """Short description of postprocessing. The analyze method typically postprocesses the data files generated by run. Splitting the complete analysis task into two parts (*run* and *analyze*) is advantageous because in this way parameters of postprocessing steps can be easily changed without having to rerun the time consuming trajectory analysis. :Keywords: *kw1* description :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XVG results = AttributeDict() # - Do postprocessing here. # - Store results of calculation in results[key] where key can be chosen freely # but *must* be provided so that other functions can uniformly access results. # - You are encouraged to store class instances with a plot() method; if you do # this then you can just don't have to change the plot() method below. # For instance you can use gromacs.formats.XVG(filename) to create # a object from a xvg file that knows how to plot itself. self.results = results return results
def analyze(self, **kwargs): """Analyze hydrogen bond output. * hydrogen bond existence (existence) * total number of hydrogen bonds (num) * (others can be added easily) :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XPM, XVG results = AttributeDict() results['num'] = XVG(self.parameters.filenames['num']) results['matrix'] = hbm = XPM(self.parameters.filenames['hbm'], reverse=True) hb_fraction = hbm.array.mean(axis=0) desc = [ line.strip() for line in open(self.parameters.filenames['log']) if not line.startswith('#') ] results['existence'] = zip(desc, hb_fraction) with open(self.parameters.filenames['existence'], "w") as out: logger.info( "Hydrogen bond existence analysis (results['existence'] and %(existence)r)", self.parameters.filenames) for name, frac in results['existence']: logger.info("hb_existence: %-40s %4.1f%%", name, 100 * frac) out.write("{0:<40!s} {1:4.1f}%\n".format(name, 100 * frac)) self.results = results return results
def analyze(self, **kwargs): """Make data files available as numpy arrays.""" results = AttributeDict() for name, f in self.parameters.filenames.items(): results[name] = XVG(f) self.results = results return results
def analyze(self, **kwargs): """Mindist analysis for all cysteines. Returns results for interactive analysis.""" results = AttributeDict() for resid in self.parameters.cysteines: groupname = 'Cys{resid:d}'.format( **vars()) # identifier should be a valid python variable name results[groupname] = self._mindist(resid) self.results = results return results
def analyze(self,**kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XVG logger.info("Preparing Energy graphs as XVG objects.") results = AttributeDict(Energy=XVG(self.parameters.filenames['Energy'])) self.results = results return results
def analyze(self,**kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XVG logger.info("Preparing HelixBundle graphs as XVG objects.") results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() ) self.results = results return results
class Simulation(Journalled): """Simple MD simulation of a single compound molecule in water. Typical use :: S = Simulation(molecule='DRUG') S.topology(itp='drug.itp') S.solvate(struct='DRUG-H.pdb') S.energy_minimize() S.MD_relaxed() S.MD() .. Note:: The OPLS/AA force field and the TIP4P water molecule is the default; changing this is possible but will require provision of customized itp, mdp and template top files at various stages. """ #: Keyword arguments to pre-set some file names; they are keys in :attr:`Simulation.files`. filekeys = ('topology', 'processed_topology', 'structure', 'solvated', 'ndx', 'energy_minimized', 'MD_relaxed', 'MD_restrained', 'MD_NPT') topdir_default = "Equilibrium" dirname_default = os.path.curdir solvent_default = 'water' #: Coordinate files of the full system in increasing order of advancement of #: the protocol; the later the better. The values are keys into :attr:`Simulation.files`. coordinate_structures = ('solvated', 'energy_minimized', 'MD_relaxed', 'MD_restrained', 'MD_NPT') checkpoints = ('solvated', 'energy_minimized', 'MD_relaxed', 'MD_restrained', 'MD_NPT') #: Check list of all methods that can be run as an independent protocol; see also #: :meth:`Simulation.get_protocol` and :class:`restart.Journal` protocols = ( "MD_NPT", "MD_NPT_run", # *_run as dummies for the ... "MD_relaxed", "MD_relaxed_run", # ...checkpointing logic "MD_restrained", "MD_restrained_run", "energy_minimize", "solvate", "topology") #: Default Gromacs *MDP* run parameter files for the different stages. #: (All are part of the package and are found with :func:`mdpow.config.get_template`.) mdp_defaults = { 'MD_relaxed': 'NPT_opls.mdp', 'MD_restrained': 'NPT_opls.mdp', 'MD_NPT': 'NPT_opls.mdp', 'energy_minimize': 'em_opls.mdp', } def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *forcefield* 'OPLS-AA' or 'CHARMM' or 'AMBER' *solvent* 'water' or 'octanol' or 'cyclohexane' or 'wetoctanol' *solventmodel* ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL` for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *distance* minimum distance between solute and closest box face *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. """ self.__cache = {} filename = kwargs.pop('filename', None) dirname = kwargs.pop('dirname', self.dirname_default) forcefield = kwargs.pop('forcefield', 'OPLS-AA') solvent = kwargs.pop('solvent', self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', 'm24', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop('solventmodel', None) mdp_kw = kwargs.pop('mdp', {}) self.mdp = dict((stage, config.get_template(fn)) for stage, fn in self.mdp_defaults.items()) self.mdp.update( dict((stage, config.get_template(fn)) for stage, fn in mdp_kw.items() if fn is not None)) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or 'DRUG' self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop('includes', []))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath( os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.forcefield = forcefield self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier( solvent, model=solventmodel, forcefield=forcefield, ) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model( self.solventmodel_identifier, forcefield=forcefield, ) distance = kwargs.pop('distance', None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict(itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance) self.filename = filename or self.solvent_type + '.simulation' super(Simulation, self).__init__(**kwargs) def BASEDIR(self, *args): return os.path.join(self.dirs.basedir, *args) def save(self, filename=None): """Save instance to a pickle file. The default filename is the name of the file that was last loaded from or saved to. """ if filename is None: if self.filename is None: self.filename = filename or self.solvent_type + '.simulation' logger.warning( "No filename known, saving instance under name %r", self.filename) filename = self.filename else: self.filename = filename with open(filename, 'wb') as f: pickle.dump(self, f) logger.debug("Instance pickled to %(filename)r" % vars()) def load(self, filename=None): """Re-instantiate class from pickled file.""" if filename is None: if self.filename is None: self.filename = self.molecule.lower() + '.pickle' logger.warning("No filename known, trying name %r", self.filename) filename = self.filename with open(filename, 'rb') as f: instance = pickle.load(f) self.__dict__.update(instance.__dict__) logger.debug("Instance loaded from %(filename)r" % vars()) def make_paths_relative(self, prefix=os.path.curdir): """Hack to be able to copy directories around: prune basedir from paths. .. Warning:: This is not guaranteed to work for all paths. In particular, check :attr:`mdpow.equil.Simulation.dirs.includes` and adjust manually if necessary. """ def assinglet(m): if len(m) == 1: return m[0] elif len(m) == 0: return None return m basedir = self.dirs.basedir for key, fn in self.files.items(): try: self.files[key] = fn.replace(basedir, prefix) except AttributeError: pass for key, val in self.dirs.items(): fns = asiterable(val) # treat them all as lists try: self.dirs[key] = assinglet( [fn.replace(basedir, prefix) for fn in fns]) except AttributeError: pass for key, fn in self.mdp.items(): try: self.mdp[key] = fn.replace(basedir, prefix) except AttributeError: pass logger.warning( "make_paths_relative(): check/manually adjust %s.dirs.includes = %r !", self.__class__.__name__, self.dirs.includes) def topology(self, itp='drug.itp', prm=None, **kwargs): """Generate a topology for compound *molecule*. :Keywords: *itp* Gromacs itp file; will be copied to topology dir and included in topology *prm* Gromacs prm file; if given, will be copied to topology dir and included in topology *dirname* name of the topology directory ["top"] *kwargs* see source for *top_template*, *topol* """ self.journal.start('topology') dirname = kwargs.pop('dirname', self.BASEDIR('top')) self.dirs.topology = realpath(dirname) setting = forcefields.get_ff_paths(self.forcefield) template = forcefields.get_top_template(self.solvent_type) top_template = config.get_template(kwargs.pop('top_template', template)) topol = kwargs.pop('topol', os.path.basename(top_template)) self.top_template = top_template itp = os.path.realpath(itp) _itp = os.path.basename(itp) if prm is None: prm_kw = '' else: prm = os.path.realpath(prm) _prm = os.path.basename(prm) prm_kw = '#include "{}"'.format(_prm) with in_dir(dirname): shutil.copy(itp, _itp) if prm is not None: shutil.copy(prm, _prm) gromacs.cbook.edit_txt(top_template, [ (r'#include +"oplsaa\.ff/forcefield\.itp"', r'oplsaa\.ff/', setting[0]), (r'#include +"compound\.itp"', r'compound\.itp', _itp), (r'#include +"oplsaa\.ff/tip4p\.itp"', r'oplsaa\.ff/tip4p\.itp', setting[0] + self.solvent.itp), (r'#include +"oplsaa\.ff/ions_opls\.itp"', r'oplsaa\.ff/ions_opls\.itp', setting[1]), (r'#include +"compound\.prm"', r'#include +"compound\.prm"', prm_kw), (r'#include +"water\.itp"', r'water\.itp', setting[2]), (r'Compound', 'solvent', self.solvent_type), (r'Compound', 'DRUG', self.molecule), (r'DRUG\s*1', 'DRUG', self.molecule), ], newname=topol) logger.info( '[%(dirname)s] Created topology %(topol)r that includes %(_itp)r', vars()) # update known files and dirs self.files.topology = realpath(dirname, topol) if not self.dirs.topology in self.dirs.includes: self.dirs.includes.append(self.dirs.topology) self.journal.completed('topology') return {'dirname': dirname, 'topol': topol} @staticmethod def _setup_solvate(**kwargs): """Solvate structure in a single solvent box.""" return gromacs.setup.solvate(**kwargs) def solvate(self, struct=None, **kwargs): """Solvate structure *struct* in a box of solvent. The solvent is determined with the *solvent* keyword to the constructor. :Keywords: *struct* pdb or gro coordinate file (if not supplied, the value is used that was supplied to the constructor of :class:`~mdpow.equil.Simulation`) *distance* minimum distance between solute and the closes box face; the default depends on the solvent but can be set explicitly here, too. *bt* any box type understood by :func:`gromacs.editconf` (``-bt``): * "triclinic" is a triclinic box, * "cubic" is a rectangular box with all sides equal; * "dodecahedron" represents a rhombic dodecahedron; * "octahedron" is a truncated octahedron. The default is "dodecahedron". *kwargs* All other arguments are passed on to :func:`gromacs.setup.solvate`, but set to sensible default values. *top* and *water* are always fixed. """ self.journal.start('solvate') self.dirs.solvation = realpath( kwargs.setdefault('dirname', self.BASEDIR('solvation'))) kwargs['struct'] = self._checknotempty(struct or self.files.structure, 'struct') kwargs['top'] = self._checknotempty(self.files.topology, 'top') kwargs['water'] = self.solvent.box kwargs.setdefault('mainselection', '"%s"' % self.molecule) # quotes are needed for make_ndx kwargs.setdefault('distance', self.solvent.distance) boxtype = kwargs.pop('bt', None) boxtype = boxtype if boxtype is not None else "dodecahedron" if boxtype not in ("dodecahedron", "triclinic", "cubic", "octahedron"): msg = "Invalid boxtype '{0}', not suitable for 'gmx editconf'.".format( boxtype) logger.error(msg) raise ValueError(msg) kwargs['bt'] = boxtype kwargs['includes'] = asiterable(kwargs.pop('includes', [])) + self.dirs.includes params = self._setup_solvate(**kwargs) self.files.structure = kwargs['struct'] self.files.solvated = params['struct'] self.files.ndx = params['ndx'] # we can also make a processed topology right now self.processed_topology(**kwargs) self.journal.completed('solvate') return params def processed_topology(self, **kwargs): """Create a portable topology file from the topology and the solvated system.""" if self.files.solvated is None or not os.path.exists( self.files.solvated): self.solvate(**kwargs) kwargs['topol'] = self.files.topology kwargs['struct'] = self.files.solvated kwargs['includes'] = self.dirs.includes self.files.processed_topology = gromacs.cbook.create_portable_topology( **kwargs) return self.files.processed_topology def energy_minimize(self, **kwargs): """Energy minimize the solvated structure on the local machine. *kwargs* are passed to :func:`gromacs.setup.energ_minimize` but if :meth:`~mdpow.equil.Simulation.solvate` step has been carried out previously all the defaults should just work. """ self.journal.start('energy_minimize') self.dirs.energy_minimization = realpath( kwargs.setdefault('dirname', self.BASEDIR('em'))) kwargs['top'] = self.files.topology kwargs.setdefault('struct', self.files.solvated) kwargs.setdefault('mdp', self.mdp['energy_minimize']) kwargs['mainselection'] = None kwargs['includes'] = asiterable(kwargs.pop('includes', [])) + self.dirs.includes params = gromacs.setup.energy_minimize(**kwargs) self.files.energy_minimized = params['struct'] self.journal.completed('energy_minimize') return params def _MD(self, protocol, **kwargs): """Basic MD driver for this Simulation. Do not call directly.""" self.journal.start(protocol) kwargs.setdefault('dirname', self.BASEDIR(protocol)) kwargs.setdefault('deffnm', self.deffnm) kwargs.setdefault('mdp', config.get_template('NPT_opls.mdp')) self.dirs[protocol] = realpath(kwargs['dirname']) setupMD = kwargs.pop('MDfunc', gromacs.setup.MD) kwargs['top'] = self.files.topology kwargs['includes'] = asiterable(kwargs.pop('includes', [])) + self.dirs.includes kwargs['ndx'] = self.files.ndx kwargs[ 'mainselection'] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD) self._checknotempty(kwargs['struct'], 'struct') if not os.path.exists(kwargs['struct']): # struct is not reliable as it depends on qscript so now we just try everything... struct = gromacs.utilities.find_first(kwargs['struct'], suffices=['pdb', 'gro']) if struct is None: logger.error( "Starting structure %(struct)r does not exist (yet)" % kwargs) raise IOError(errno.ENOENT, "Starting structure not found", kwargs['struct']) else: logger.info("Found starting structure %r (instead of %r).", struct, kwargs['struct']) kwargs['struct'] = struct # now setup the whole simulation (this is typically gromacs.setup.MD() ) params = setupMD(**kwargs) # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript self.files[protocol] = params['struct'] # Gromacs 4.5.x 'mdrun -c PDB' fails if it cannot find 'residuetypes.dat' # so instead of fuffing with GMXLIB we just dump it into the directory try: shutil.copy(config.topfiles['residuetypes.dat'], self.dirs[protocol]) except IOError: logger.warning( "Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure" ) self.journal.completed(protocol) return params def MD_relaxed(self, **kwargs): """Short MD simulation with *timestep* = 0.1 fs to relax strain. Energy minimization does not always remove all problems and LINCS constraint errors occur. A very short *runtime* = 5 ps MD with very short integration time step *dt* tends to solve these problems. .. See Also:: :func:`gromacs.setup.MD` :Keywords: *struct* starting coordinates (typically guessed) *mdp* MDP run parameter file for Gromacs *qscript* list of queuing system submission scripts; probably a good idea to always include the default "local.sh" even if you have your own ["local.sh"] *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ # user structure or restrained or solvated kwargs.setdefault('struct', self.files.energy_minimized) kwargs.setdefault('dt', 0.0001) # ps kwargs.setdefault('runtime', 5) # ps kwargs.setdefault('mdp', self.mdp['MD_relaxed']) return self._MD('MD_relaxed', **kwargs) def MD_restrained(self, **kwargs): """Short MD simulation with position restraints on compound. See documentation of :func:`gromacs.setup.MD_restrained` for details. The following keywords can not be changed: top, mdp, ndx, mainselection .. Note:: Position restraints are activated with ``-DPOSRES`` directives for :func:`gromacs.grompp`. Hence this will only work if the compound itp file does indeed contain a ``[ posres ]`` section that is protected by a ``#ifdef POSRES`` clause. .. See Also:: :func:`gromacs.setup.MD_restrained` :Keywords: *struct* starting coordinates (leave empty for inspired guess of file name) *mdp* MDP run parameter file for Gromacs *qscript* list of queuing system submission scripts; probably a good idea to always include the default "local.sh" even if you have your own ["local.sh"] *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ kwargs.setdefault( 'struct', self._lastnotempty( [self.files.energy_minimized, self.files.MD_relaxed])) kwargs.setdefault('mdp', self.mdp['MD_restrained']) kwargs['MDfunc'] = gromacs.setup.MD_restrained return self._MD('MD_restrained', **kwargs) def MD_NPT(self, **kwargs): """Short NPT MD simulation. See documentation of :func:`gromacs.setup.MD` for details such as *runtime* or specific queuing system options. The following keywords can not be changed: *top*, *mdp*, *ndx*, *mainselection*. .. Note:: If the system crashes (with LINCS errors), try initial equilibration with timestep *dt* = 0.0001 ps (0.1 fs instead of 2 fs) and *runtime* = 5 ps as done in :meth:`~Simulation.MD_relaxed` .. See Also:: :func:`gromacs.setup.MD` and :meth:`Simulation.MD_relaxed` :Keywords: *struct* starting conformation; by default, the *struct* is the last frame from the position restraints run, or, if this file cannot be found (e.g. because :meth:`Simulation.MD_restrained` was not run) it falls back to the relaxed and then the solvated system. *mdp* MDP run parameter file for Gromacs *runtime* total run time in ps *qscript* list of queuing system scripts to prepare; available values are in :data:`gromacs.config.templates` or you can provide your own filename(s) in the current directory (see :mod:`gromacs.qsub` for the format of the templates) *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ # user structure or relaxed or restrained or solvated kwargs.setdefault('struct', self.get_last_structure()) kwargs.setdefault( 't', self.get_last_checkpoint()) # Pass checkpoint file from md_relaxed kwargs.setdefault('mdp', self.mdp['MD_NPT']) return self._MD('MD_NPT', **kwargs) # for convenience and compatibility MD = MD_NPT @staticmethod def _checknotempty(value, name): if value is None or value == "": raise ValueError("Parameter %s cannot be empty." % name) return value @staticmethod def _lastnotempty(l): """Return the last non-empty value in list *l* (or None :-p)""" nonempty = [None] + [ x for x in l if not (x is None or x == "" or x == []) ] return nonempty[-1] def get_last_structure(self): """Returns the coordinates of the most advanced step in the protocol.""" return self._lastnotempty( [self.files[name] for name in self.coordinate_structures]) def get_last_checkpoint(self): """Returns the checkpoint of the most advanced step in the protocol. Relies on md.gro being present from previous simulation, assumes that checkpoint is then present. """ return self._lastnotempty([ self.files[name] for name in self.checkpoints ]).replace('.gro', '.cpt')
def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *forcefield* 'OPLS-AA' or 'CHARMM' or 'AMBER' *solvent* 'water' or 'octanol' or 'cyclohexane' or 'wetoctanol' *solventmodel* ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL` for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *distance* minimum distance between solute and closest box face *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. """ self.__cache = {} filename = kwargs.pop('filename', None) dirname = kwargs.pop('dirname', self.dirname_default) forcefield = kwargs.pop('forcefield', 'OPLS-AA') solvent = kwargs.pop('solvent', self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', 'm24', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop('solventmodel', None) mdp_kw = kwargs.pop('mdp', {}) self.mdp = dict((stage, config.get_template(fn)) for stage, fn in self.mdp_defaults.items()) self.mdp.update( dict((stage, config.get_template(fn)) for stage, fn in mdp_kw.items() if fn is not None)) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or 'DRUG' self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop('includes', []))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath( os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.forcefield = forcefield self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier( solvent, model=solventmodel, forcefield=forcefield, ) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model( self.solventmodel_identifier, forcefield=forcefield, ) distance = kwargs.pop('distance', None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict(itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance) self.filename = filename or self.solvent_type + '.simulation' super(Simulation, self).__init__(**kwargs)
def __init__(self, **kwargs): """Set up a Simulation object. :Keywords: *sim* Any object that contains the attributes *tpr*, *xtc*, and optionally *ndx* (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such as *xtc* override the values in *sim*. *tpr* Gromacs tpr file (**required**) *xtc* Gromacs trajectory, can also be a trr (**required**) *edr* Gromacs energy file (only required for some plugins) *ndx* Gromacs index file *absolute* ``True``: Turn file names into absolute paths (typically required for most plugins); ``False`` keep a they are [``True``] *strict* ``True``: missing required file keyword raises a :exc:`TypeError` and missing the file itself raises a :exc:`IOError`. ``False``: missing required files only give a warning. [``True``] *analysisdir* directory under which derived data are stored; defaults to the directory containing the tpr [None] *plugins* : list plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples (*plugin_class_name*, *kwarg dict*) to be used; more can be added later with :meth:`Simulation.add_plugin`. """ logger.info("Loading simulation data") sim = kwargs.pop('sim', None) strict = kwargs.pop('strict', True) def getpop(attr, required=False, strict=strict): """Return attribute from from kwargs or sim or None""" val = kwargs.pop(attr, None) # must pop from kwargs to clean it if val is not None: return val try: return sim.__getattribute__(attr) except AttributeError: if required: errmsg = "Required attribute {0!r} not found in kwargs or sim".format( attr) if strict: logger.fatal(errmsg) raise TypeError(errmsg) else: logger.warn(errmsg + "... continuing because of strict=False") warnings.warn(errmsg) return None make_absolute = kwargs.pop('absolute', True) def canonical(*args): """Join *args* and get the :func:`os.path.realpath`.""" if None in args: return None if not make_absolute: return os.path.join(*args) return os.path.realpath(os.path.join(*args)) # required files self.tpr = canonical(getpop('tpr', required=True)) self.xtc = canonical(getpop('xtc', required=True)) # optional files self.ndx = canonical(getpop('ndx')) self.edr = canonical(getpop('edr')) # check existence of required files resolve = "exception" if not strict: resolve = "warn" for v in ('tpr', 'xtc'): self.check_file(v, self.__getattribute__(v), resolve=resolve) self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr)) #: Registry for plugins: This dict is central. self.plugins = AttributeDict() #: Use this plugin if none is explicitly specified. Typically set with #: :meth:`~Simulation.set_plugin`. self.default_plugin_name = None # XXX: Or should we simply add instances and then re-register # all instances using register() ? # XXX: ... this API should be cleaned up. It seems to be connected # back and forth in vicious circles. -- OB 2009-07-10 plugins = kwargs.pop('plugins', []) # list of tuples (plugin, kwargs) or just (plugin,) if no kwords # required (eg if plugin is an instance) for x in plugins: try: P, kwargs = asiterable( x) # make sure to wrap strings, especially 2-letter ones! except ValueError: P = x kwargs = {} self.add_plugin(P, **kwargs) # convenience: if only a single plugin was registered we default to that one if len(self.plugins) == 1: self.set_plugin(self.plugins.keys()[0]) # Is this needed? If done properly, kwargs should be empty by now BUT # because the same list is re-used for all plugins I cannot pop them in # the plugins. I don't think multiple inheritance would work with this # setup so let's not pretend it does: hence comment out the super-init # call: ## super(Simulation, self).__init__(**kwargs) logger.info("Simulation instance initialised:") logger.info(str(self))
class _COM(Worker): """COM worker class.""" def __init__(self, **kwargs): """Set up COM analysis. :Keywords: *group_names* list of index group names *ndx* index file if groups are not in the default index *offset* add the *offset* to the residue numbers [0] *name* plugin name [COM] *simulation* The :class:`gromacs.analysis.Simulation` instance that owns the plugin [None] """ group_names = asiterable(kwargs.pop('group_names', [])) ndx = kwargs.pop('ndx', None) offset = kwargs.pop('offset', 0) super(_COM, self).__init__(**kwargs) self.parameters.group_names = group_names self.parameters.offset = offset self.ndx = ndx if self.simulation is not None: self._register_hook() def _register_hook(self, **kwargs): """Run when registering; requires simulation.""" super(_COM, self)._register_hook(**kwargs) assert self.simulation is not None if self.ndx is None: self.ndx = self.simulation.ndx self.parameters.filenames = { # result xvg files 'com': self.plugindir('com.xvg'), } # default filename for the plots -- not used self.parameters.fignames = { 'com': self.figdir('com'), } def run(self, force=None, **gmxargs): """Analyze trajectory and write COM file. All three components of the COM coordinate are written. :Arguments: - *force*: ``True`` does analysis and overwrites existing files - *gmxargs*: additional keyword arguments for :func:`gromacs.g_bundle` """ gmxargs['com'] = True gmxargs['mol'] = False gmxargs['ng'] = len(self.parameters.group_names) gmxargs['x'] = True gmxargs['y'] = True gmxargs['z'] = True if gmxargs['ng'] == 0: errmsg = "No index group name(s) provided. Use group_name with the constructor." logger.error(errmsg) raise ValueError(errmsg) if self.check_file_exists(self.parameters.filenames['com'], resolve='warning', force=force): return logger.info("Analyzing COM ...") f = self.parameters.filenames gromacs.g_traj(s=self.simulation.tpr, f=self.simulation.xtc, n=self.ndx, ox=f['com'], input=self.parameters.group_names, **gmxargs) def analyze(self, **kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. - Make COM as a function of time available as XVG files and objects. - Compute RMSD of the COM of each group (from average position, "rmsd"). - Compute distance whic encompasses 50% of observations ("median") - Compute drift of COM, i.e. length of the vector between initial and final position. Initial and final position are computed as averages over *nframesavg* frames ("drift"). RMSD, median, and drift are columns in an xvg file. The rows correspond to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`. :Keywords: *nframesavg* number of initial and final frames that are averaged in order to compute the drift of the COM of each group [5000] *refgroup* group name whose com is taken as the reference and subtracted from all other coms for the distance calculations. If supplied, additional result 'com_relative_*refgroup*' is created. :Returns: a dictionary of the results and also sets :attr:`gromacs.analysis.plugins.com.Worker.results`. """ from gromacs.formats import XVG logger.info("Preparing COM graphs as XVG objects.") self.results = AttributeDict( (k, XVG(fn)) for k, fn in self.parameters.filenames.items()) # compute RMSD of COM and shift of COM (drift) between avg pos # over first/last 5,000 frames nframesavg = kwargs.pop('nframesavg', 5000) ngroups = len(self.parameters.group_names) xcom = self.results['com'].array refgroup = kwargs.pop('refgroup', None) if refgroup is not None: if not refgroup in self.parameters.group_names: errmsg = "refgroup={0!s} must be one of {1!r}".format( refgroup, self.parameters.group_names) logger.error(errmsg) raise ValueError(errmsg) nreference = 1 + 3 * self.parameters.group_names.index( refgroup) # 1-based !! reference_com = xcom[nreference:nreference + 3] xcom[1:] -= numpy.vstack(ngroups * [reference_com]) # can't use broadcast logger.debug("distances computed with refgroup %r", refgroup) self.store_xvg('com_relative_{0!s}'.format(refgroup), xcom, names=['time'] + self.parameters.group_names) def vlength(v): return numpy.sqrt(numpy.sum(v**2, axis=0)) # distances over time step logger.debug( "drift calculated between %d-frame averages at beginning and end", nframesavg) records = [] for i in xrange(1, 3 * ngroups + 1, 3): x = xcom[i:i + 3] r = vlength( x - x.mean(axis=1)[:, numpy.newaxis]) # distances over time step #r0 = vlength(r - r[:,0][:,numpy.newaxis]) # distances over time step from r(t=0) #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True) #m = 0.5*(edges[1:]+edges[:-1]) #c = h.cumsum(dtype=float) # integral #c /= c[-1] # normalized (0 to 1) #median = m[c < 0.5][-1] #g = h/(4*numpy.pi*m**2) #import scipy.integrate #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m) #g /= radint(g) # properly normalized radial distribution function rmsd = numpy.sqrt(numpy.mean( r**2)) # radial spread sqrt(radint(m**2 * g)) median = numpy.median( r) # radius that contains 50% of the observations dx = x[:, :nframesavg].mean(axis=1) - x[:, -nframesavg:].mean(axis=1) drift = vlength(dx) records.append((rmsd, median, drift)) self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift") return self.results def plot(self, **kwargs): """Plot all results in one graph, labelled by the result keys. :Keywords: observables select one or more of the stored results. Can be a list or a string (a key into the results dict). ``None`` plots everything [``None``] figure - ``True``: save figures in the given formats - "name.ext": save figure under this filename (``ext`` -> format) - ``False``: only show on screen [``False``] formats : sequence sequence of all formats that should be saved [('png', 'pdf')] plotargs keyword arguments for pylab.plot() """ import pylab figure = kwargs.pop('figure', False) observables = asiterable(kwargs.pop('observables', self.results.keys())) extensions = kwargs.pop('formats', ('pdf', 'png')) for name in observables: result = self.results[name] try: result.plot( **kwargs ) # This requires result classes with a plot() method!! except AttributeError: warnings.warn( "Sorry, plotting of result {name!r} is not implemented". format(**vars()), category=UserWarning) # quick labels -- relies on the proper ordering labels = [ str(n) + " " + dim for n in self.parameters.group_names for dim in 'xyz' ] if kwargs.get('columns') is not None: # select labels according to columns; only makes sense # if plotting against the time (col 0) if kwargs['columns'][0] == 0: labels = numpy.array([None] + labels)[kwargs['columns'][1:]] else: labels = () pylab.legend(labels, loc='best') if figure is True: for ext in extensions: self.savefig(ext=ext) elif figure: self.savefig(filename=figure)
def analyze(self, **kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. - Make COM as a function of time available as XVG files and objects. - Compute RMSD of the COM of each group (from average position, "rmsd"). - Compute distance whic encompasses 50% of observations ("median") - Compute drift of COM, i.e. length of the vector between initial and final position. Initial and final position are computed as averages over *nframesavg* frames ("drift"). RMSD, median, and drift are columns in an xvg file. The rows correspond to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`. :Keywords: *nframesavg* number of initial and final frames that are averaged in order to compute the drift of the COM of each group [5000] *refgroup* group name whose com is taken as the reference and subtracted from all other coms for the distance calculations. If supplied, additional result 'com_relative_*refgroup*' is created. :Returns: a dictionary of the results and also sets :attr:`gromacs.analysis.plugins.com.Worker.results`. """ from gromacs.formats import XVG logger.info("Preparing COM graphs as XVG objects.") self.results = AttributeDict( (k, XVG(fn)) for k, fn in self.parameters.filenames.items()) # compute RMSD of COM and shift of COM (drift) between avg pos # over first/last 5,000 frames nframesavg = kwargs.pop('nframesavg', 5000) ngroups = len(self.parameters.group_names) xcom = self.results['com'].array refgroup = kwargs.pop('refgroup', None) if refgroup is not None: if not refgroup in self.parameters.group_names: errmsg = "refgroup={0!s} must be one of {1!r}".format( refgroup, self.parameters.group_names) logger.error(errmsg) raise ValueError(errmsg) nreference = 1 + 3 * self.parameters.group_names.index( refgroup) # 1-based !! reference_com = xcom[nreference:nreference + 3] xcom[1:] -= numpy.vstack(ngroups * [reference_com]) # can't use broadcast logger.debug("distances computed with refgroup %r", refgroup) self.store_xvg('com_relative_{0!s}'.format(refgroup), xcom, names=['time'] + self.parameters.group_names) def vlength(v): return numpy.sqrt(numpy.sum(v**2, axis=0)) # distances over time step logger.debug( "drift calculated between %d-frame averages at beginning and end", nframesavg) records = [] for i in xrange(1, 3 * ngroups + 1, 3): x = xcom[i:i + 3] r = vlength( x - x.mean(axis=1)[:, numpy.newaxis]) # distances over time step #r0 = vlength(r - r[:,0][:,numpy.newaxis]) # distances over time step from r(t=0) #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True) #m = 0.5*(edges[1:]+edges[:-1]) #c = h.cumsum(dtype=float) # integral #c /= c[-1] # normalized (0 to 1) #median = m[c < 0.5][-1] #g = h/(4*numpy.pi*m**2) #import scipy.integrate #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m) #g /= radint(g) # properly normalized radial distribution function rmsd = numpy.sqrt(numpy.mean( r**2)) # radial spread sqrt(radint(m**2 * g)) median = numpy.median( r) # radius that contains 50% of the observations dx = x[:, :nframesavg].mean(axis=1) - x[:, -nframesavg:].mean(axis=1) drift = vlength(dx) records.append((rmsd, median, drift)) self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift") return self.results
class _COM(Worker): """COM worker class.""" def __init__(self,**kwargs): """Set up COM analysis. :Keywords: *group_names* list of index group names *ndx* index file if groups are not in the default index *offset* add the *offset* to the residue numbers [0] *name* plugin name [COM] *simulation* The :class:`gromacs.analysis.Simulation` instance that owns the plugin [None] """ group_names = asiterable(kwargs.pop('group_names', [])) ndx = kwargs.pop('ndx', None) offset = kwargs.pop('offset', 0) super(_COM, self).__init__(**kwargs) self.parameters.group_names = group_names self.parameters.offset = offset self.ndx = ndx if not self.simulation is None: self._register_hook() def _register_hook(self, **kwargs): """Run when registering; requires simulation.""" super(_COM, self)._register_hook(**kwargs) assert not self.simulation is None if self.ndx is None: self.ndx = self.simulation.ndx self.parameters.filenames = { # result xvg files 'com': self.plugindir('com.xvg'), } # default filename for the plots -- not used self.parameters.fignames = { 'com': self.figdir('com'), } def run(self, force=None, **gmxargs): """Analyze trajectory and write COM file. All three components of the COM coordinate are written. :Arguments: - *force*: ``True`` does analysis and overwrites existing files - *gmxargs*: additional keyword arguments for :func:`gromacs.g_bundle` """ gmxargs['com'] = True gmxargs['mol'] = False gmxargs['ng'] = len(self.parameters.group_names) gmxargs['x'] = True gmxargs['y'] = True gmxargs['z'] = True if gmxargs['ng'] == 0: errmsg = "No index group name(s) provided. Use group_name with the constructor." logger.error(errmsg) raise ValueError(errmsg) if self.check_file_exists(self.parameters.filenames['com'], resolve='warning', force=force): return logger.info("Analyzing COM ...") f = self.parameters.filenames gromacs.g_traj(s=self.simulation.tpr, f=self.simulation.xtc, n=self.ndx, ox=f['com'], input=self.parameters.group_names, **gmxargs) def analyze(self,**kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. - Make COM as a function of time available as XVG files and objects. - Compute RMSD of the COM of each group (from average position, "rmsd"). - Compute distance whic encompasses 50% of observations ("median") - Compute drift of COM, i.e. length of the vector between initial and final position. Initial and final position are computed as averages over *nframesavg* frames ("drift"). RMSD, median, and drift are columns in an xvg file. The rows correspond to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`. :Keywords: *nframesavg* number of initial and final frames that are averaged in order to compute the drift of the COM of each group [5000] *refgroup* group name whose com is taken as the reference and subtracted from all other coms for the distance calculations. If supplied, additional result 'com_relative_*refgroup*' is created. :Returns: a dictionary of the results and also sets :attr:`gromacs.analysis.plugins.com.Worker.results`. """ from gromacs.formats import XVG logger.info("Preparing COM graphs as XVG objects.") self.results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() ) # compute RMSD of COM and shift of COM (drift) between avg pos # over first/last 5,000 frames nframesavg = kwargs.pop('nframesavg', 5000) ngroups = len(self.parameters.group_names) xcom = self.results['com'].array refgroup = kwargs.pop('refgroup', None) if not refgroup is None: if not refgroup in self.parameters.group_names: errmsg = "refgroup=%s must be one of %r" % (refgroup, self.parameters.group_names) logger.error(errmsg) raise ValueError(errmsg) nreference = 1 + 3 * self.parameters.group_names.index(refgroup) # 1-based !! reference_com = xcom[nreference:nreference+3] xcom[1:] -= numpy.vstack(ngroups * [reference_com]) # can't use broadcast logger.debug("distances computed with refgroup %r", refgroup) self.store_xvg('com_relative_%s' % refgroup, xcom, names=['time']+self.parameters.group_names) def vlength(v): return numpy.sqrt(numpy.sum(v**2, axis=0)) # distances over time step logger.debug("drift calculated between %d-frame averages at beginning and end",nframesavg) records = [] for i in xrange(1, 3*ngroups+1, 3): x = xcom[i:i+3] r = vlength(x - x.mean(axis=1)[:,numpy.newaxis]) # distances over time step #r0 = vlength(r - r[:,0][:,numpy.newaxis]) # distances over time step from r(t=0) #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True) #m = 0.5*(edges[1:]+edges[:-1]) #c = h.cumsum(dtype=float) # integral #c /= c[-1] # normalized (0 to 1) #median = m[c < 0.5][-1] #g = h/(4*numpy.pi*m**2) #import scipy.integrate #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m) #g /= radint(g) # properly normalized radial distribution function rmsd = numpy.sqrt(numpy.mean(r**2)) # radial spread sqrt(radint(m**2 * g)) median = numpy.median(r) # radius that contains 50% of the observations dx = x[:,:nframesavg].mean(axis=1) - x[:,-nframesavg:].mean(axis=1) drift = vlength(dx) records.append((rmsd, median, drift)) self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift") return self.results def plot(self, **kwargs): """Plot all results in one graph, labelled by the result keys. :Keywords: observables select one or more of the stored results. Can be a list or a string (a key into the results dict). ``None`` plots everything [``None``] figure - ``True``: save figures in the given formats - "name.ext": save figure under this filename (``ext`` -> format) - ``False``: only show on screen [``False``] formats : sequence sequence of all formats that should be saved [('png', 'pdf')] plotargs keyword arguments for pylab.plot() """ import pylab figure = kwargs.pop('figure', False) observables = asiterable(kwargs.pop('observables', self.results.keys())) extensions = kwargs.pop('formats', ('pdf','png')) for name in observables: result = self.results[name] try: result.plot(**kwargs) # This requires result classes with a plot() method!! except AttributeError: warnings.warn("Sorry, plotting of result %(name)r is not implemented" % vars(), category=UserWarning) # quick labels -- relies on the proper ordering labels = [str(n)+" "+dim for n in self.parameters.group_names for dim in 'xyz'] if not kwargs.get('columns', None) is None: # select labels according to columns; only makes sense # if plotting against the time (col 0) if kwargs['columns'][0] == 0: labels = numpy.array([None]+labels)[kwargs['columns'][1:]] else: labels = () pylab.legend(labels, loc='best') if figure is True: for ext in extensions: self.savefig(ext=ext) elif figure: self.savefig(filename=figure)
def analyze(self,**kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. - Make COM as a function of time available as XVG files and objects. - Compute RMSD of the COM of each group (from average position, "rmsd"). - Compute distance whic encompasses 50% of observations ("median") - Compute drift of COM, i.e. length of the vector between initial and final position. Initial and final position are computed as averages over *nframesavg* frames ("drift"). RMSD, median, and drift are columns in an xvg file. The rows correspond to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`. :Keywords: *nframesavg* number of initial and final frames that are averaged in order to compute the drift of the COM of each group [5000] *refgroup* group name whose com is taken as the reference and subtracted from all other coms for the distance calculations. If supplied, additional result 'com_relative_*refgroup*' is created. :Returns: a dictionary of the results and also sets :attr:`gromacs.analysis.plugins.com.Worker.results`. """ from gromacs.formats import XVG logger.info("Preparing COM graphs as XVG objects.") self.results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() ) # compute RMSD of COM and shift of COM (drift) between avg pos # over first/last 5,000 frames nframesavg = kwargs.pop('nframesavg', 5000) ngroups = len(self.parameters.group_names) xcom = self.results['com'].array refgroup = kwargs.pop('refgroup', None) if not refgroup is None: if not refgroup in self.parameters.group_names: errmsg = "refgroup=%s must be one of %r" % (refgroup, self.parameters.group_names) logger.error(errmsg) raise ValueError(errmsg) nreference = 1 + 3 * self.parameters.group_names.index(refgroup) # 1-based !! reference_com = xcom[nreference:nreference+3] xcom[1:] -= numpy.vstack(ngroups * [reference_com]) # can't use broadcast logger.debug("distances computed with refgroup %r", refgroup) self.store_xvg('com_relative_%s' % refgroup, xcom, names=['time']+self.parameters.group_names) def vlength(v): return numpy.sqrt(numpy.sum(v**2, axis=0)) # distances over time step logger.debug("drift calculated between %d-frame averages at beginning and end",nframesavg) records = [] for i in xrange(1, 3*ngroups+1, 3): x = xcom[i:i+3] r = vlength(x - x.mean(axis=1)[:,numpy.newaxis]) # distances over time step #r0 = vlength(r - r[:,0][:,numpy.newaxis]) # distances over time step from r(t=0) #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True) #m = 0.5*(edges[1:]+edges[:-1]) #c = h.cumsum(dtype=float) # integral #c /= c[-1] # normalized (0 to 1) #median = m[c < 0.5][-1] #g = h/(4*numpy.pi*m**2) #import scipy.integrate #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m) #g /= radint(g) # properly normalized radial distribution function rmsd = numpy.sqrt(numpy.mean(r**2)) # radial spread sqrt(radint(m**2 * g)) median = numpy.median(r) # radius that contains 50% of the observations dx = x[:,:nframesavg].mean(axis=1) - x[:,-nframesavg:].mean(axis=1) drift = vlength(dx) records.append((rmsd, median, drift)) self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift") return self.results
class Simulation(Journalled): """Simple MD simulation of a single compound molecule in water. Typical use :: S = Simulation(molecule='DRUG') S.topology(itp='drug.itp') S.solvate(struct='DRUG-H.pdb') S.energy_minimize() S.MD_relaxed() S.MD() .. Note:: The OPLS/AA force field and the TIP4P water molecule is the default; changing this is possible but will require provision of customized itp, mdp and template top files at various stages. """ #: Keyword arguments to pre-set some file names; they are keys in :attr:`Simulation.files`. filekeys = ('topology', 'processed_topology', 'structure', 'solvated', 'ndx', 'energy_minimized', 'MD_relaxed', 'MD_restrained', 'MD_NPT') topdir_default = "Equilibrium" dirname_default = os.path.curdir solvent_default = 'water' #: Coordinate files of the full system in increasing order of advancement of #: the protocol; the later the better. The values are keys into :attr:`Simulation.files`. coordinate_structures = ('solvated', 'energy_minimized', 'MD_relaxed', 'MD_restrained', 'MD_NPT') checkpoints = ('solvated','energy_minimized','MD_relaxed','MD_restrained','MD_NPT') #: Check list of all methods that can be run as an independent protocol; see also #: :meth:`Simulation.get_protocol` and :class:`restart.Journal` protocols = ("MD_NPT", "MD_NPT_run", # *_run as dummies for the ... "MD_relaxed", "MD_relaxed_run", # ...checkpointing logic "MD_restrained", "MD_restrained_run", "energy_minimize", "solvate", "topology") #: Default Gromacs *MDP* run parameter files for the different stages. #: (All are part of the package and are found with :func:`mdpow.config.get_template`.) mdp_defaults = {'MD_relaxed': 'NPT_opls.mdp', 'MD_restrained': 'NPT_opls.mdp', 'MD_NPT': 'NPT_opls.mdp', 'energy_minimize': 'em_opls.mdp', } def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *solvent* 'water' or 'octanol' or 'cyclohexane' *solventmodel* ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL` for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. """ self.__cache = {} filename = kwargs.pop('filename', None) dirname = kwargs.pop('dirname', self.dirname_default) solvent = kwargs.pop('solvent', self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop('solventmodel', None) mdp_kw = kwargs.pop('mdp', {}) self.mdp = dict((stage, config.get_template(fn)) for stage,fn in self.mdp_defaults.items()) self.mdp.update(dict((stage, config.get_template(fn)) for stage,fn in mdp_kw.items() if fn is not None)) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or 'DRUG' self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop('includes',[]))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath(os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier(solvent, solventmodel) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model(self.solventmodel_identifier) distance = kwargs.pop('distance', None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict(itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance) self.filename = filename or self.solvent_type+'.simulation' super(Simulation, self).__init__(**kwargs) def BASEDIR(self, *args): return os.path.join(self.dirs.basedir, *args) def save(self, filename=None): """Save instance to a pickle file. The default filename is the name of the file that was last loaded from or saved to. """ if filename is None: if self.filename is None: self.filename = filename or self.solvent_type+'.simulation' logger.warning("No filename known, saving instance under name %r", self.filename) filename = self.filename else: self.filename = filename with open(filename, 'wb') as f: cPickle.dump(self, f, protocol=cPickle.HIGHEST_PROTOCOL) logger.debug("Instance pickled to %(filename)r" % vars()) def load(self, filename=None): """Re-instantiate class from pickled file.""" if filename is None: if self.filename is None: self.filename = self.molecule.lower() + '.pickle' logger.warning("No filename known, trying name %r", self.filename) filename = self.filename with open(filename, 'rb') as f: instance = cPickle.load(f) self.__dict__.update(instance.__dict__) logger.debug("Instance loaded from %(filename)r" % vars()) def make_paths_relative(self, prefix=os.path.curdir): """Hack to be able to copy directories around: prune basedir from paths. .. Warning:: This is not guaranteed to work for all paths. In particular, check :attrib:`mdpow.equil.Simulation.dirs.includes` and adjust manually if necessary. """ def assinglet(m): if len(m) == 1: return m[0] elif len(m) == 0: return None return m basedir = self.dirs.basedir for key, fn in self.files.items(): try: self.files[key] = fn.replace(basedir, prefix) except AttributeError: pass for key, val in self.dirs.items(): fns = asiterable(val) # treat them all as lists try: self.dirs[key] = assinglet([fn.replace(basedir, prefix) for fn in fns]) except AttributeError: pass for key, fn in self.mdp: try: self.mdp[key] = fn.replace(basedir, prefix) except AttributeError: pass logger.warn("make_paths_relative(): check/manually adjust %s.dirs.includes = %r !", self.__class__.__name__, self.dirs.includes) def topology(self, itp='drug.itp', **kwargs): """Generate a topology for compound *molecule*. :Keywords: *itp* Gromacs itp file; will be copied to topology dir and included in topology *dirname* name of the topology directory ["top"] *kwargs* see source for *top_template*, *topol* """ self.journal.start('topology') dirname = kwargs.pop('dirname', self.BASEDIR('top')) self.dirs.topology = realpath(dirname) top_template = config.get_template(kwargs.pop('top_template', 'system.top')) topol = kwargs.pop('topol', os.path.basename(top_template)) itp = os.path.realpath(itp) _itp = os.path.basename(itp) with in_dir(dirname): shutil.copy(itp, _itp) gromacs.cbook.edit_txt(top_template, [('#include +"compound\.itp"', 'compound\.itp', _itp), ('#include +"oplsaa\.ff/tip4p\.itp"', 'tip4p\.itp', self.solvent.itp), ('Compound', 'solvent', self.solvent_type), ('Compound', 'DRUG', self.molecule), ('DRUG\s*1', 'DRUG', self.molecule), ], newname=topol) logger.info('[%(dirname)s] Created topology %(topol)r that includes %(_itp)r', vars()) # update known files and dirs self.files.topology = realpath(dirname, topol) if not self.dirs.topology in self.dirs.includes: self.dirs.includes.append(self.dirs.topology) self.journal.completed('topology') return {'dirname': dirname, 'topol': topol} def solvate(self, struct=None, **kwargs): """Solvate structure *struct* in a box of solvent. The solvent is determined with the *solvent* keyword to the constructor. :Keywords: *struct* pdb or gro coordinate file (if not supplied, the value is used that was supplied to the constructor of :class:`~mdpow.equil.Simulation`) *kwargs* All other arguments are passed on to :func:`gromacs.setup.solvate`, but set to sensible default values. *top* and *water* are always fixed. """ self.journal.start('solvate') self.dirs.solvation = realpath(kwargs.setdefault('dirname', self.BASEDIR('solvation'))) kwargs['struct'] = self._checknotempty(struct or self.files.structure, 'struct') kwargs['top'] = self._checknotempty(self.files.topology, 'top') kwargs['water'] = self.solvent.box kwargs.setdefault('mainselection', '"%s"' % self.molecule) # quotes are needed for make_ndx kwargs.setdefault('distance', self.solvent.distance) kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes params = gromacs.setup.solvate(**kwargs) self.files.structure = kwargs['struct'] self.files.solvated = params['struct'] self.files.ndx = params['ndx'] # we can also make a processed topology right now self.processed_topology(**kwargs) self.journal.completed('solvate') return params def processed_topology(self, **kwargs): """Create a portable topology file from the topology and the solvated system.""" if self.files.solvated is None or not os.path.exists(self.files.solvated): self.solvate(**kwargs) kwargs['topol'] = self.files.topology kwargs['struct'] = self.files.solvated kwargs['includes'] = self.dirs.includes self.files.processed_topology = gromacs.cbook.create_portable_topology(**kwargs) return self.files.processed_topology def energy_minimize(self, **kwargs): """Energy minimize the solvated structure on the local machine. *kwargs* are passed to :func:`gromacs.setup.energ_minimize` but if :meth:`~mdpow.equil.Simulation.solvate` step has been carried out previously all the defaults should just work. """ self.journal.start('energy_minimize') self.dirs.energy_minimization = realpath(kwargs.setdefault('dirname', self.BASEDIR('em'))) kwargs['top'] = self.files.topology kwargs.setdefault('struct', self.files.solvated) kwargs.setdefault('mdp', self.mdp['energy_minimize']) kwargs['mainselection'] = None kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes params = gromacs.setup.energy_minimize(**kwargs) self.files.energy_minimized = params['struct'] self.journal.completed('energy_minimize') return params def _MD(self, protocol, **kwargs): """Basic MD driver for this Simulation. Do not call directly.""" self.journal.start(protocol) kwargs.setdefault('dirname', self.BASEDIR(protocol)) kwargs.setdefault('deffnm', self.deffnm) kwargs.setdefault('mdp', config.get_template('NPT_opls.mdp')) self.dirs[protocol] = realpath(kwargs['dirname']) setupMD = kwargs.pop('MDfunc', gromacs.setup.MD) kwargs['top'] = self.files.topology kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes kwargs['ndx'] = self.files.ndx kwargs['mainselection'] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD) self._checknotempty(kwargs['struct'], 'struct') if not os.path.exists(kwargs['struct']): # struct is not reliable as it depends on qscript so now we just try everything... struct = gromacs.utilities.find_first(kwargs['struct'], suffices=['pdb', 'gro']) if struct is None: logger.error("Starting structure %(struct)r does not exist (yet)" % kwargs) raise IOError(errno.ENOENT, "Starting structure not found", kwargs['struct']) else: logger.info("Found starting structure %r (instead of %r).", struct, kwargs['struct']) kwargs['struct'] = struct # now setup the whole simulation (this is typically gromacs.setup.MD() ) params = setupMD(**kwargs) # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript self.files[protocol] = params['struct'] # Gromacs 4.5.x 'mdrun -c PDB' fails if it cannot find 'residuetypes.dat' # so instead of fuffing with GMXLIB we just dump it into the directory try: shutil.copy(config.topfiles['residuetypes.dat'], self.dirs[protocol]) except: logger.warn("Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure") self.journal.completed(protocol) return params def MD_relaxed(self, **kwargs): """Short MD simulation with *timestep* = 0.1 fs to relax strain. Energy minimization does not always remove all problems and LINCS constraint errors occur. A very short *runtime* = 5 ps MD with very short integration time step *dt* tends to solve these problems. .. See Also:: :func:`gromacs.setup.MD` :Keywords: *struct* starting coordinates (typically guessed) *mdp* MDP run parameter file for Gromacs *qscript* list of queuing system submission scripts; probably a good idea to always include the default "local.sh" even if you have your own ["local.sh"] *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ # user structure or restrained or solvated kwargs.setdefault('struct', self.files.energy_minimized) kwargs.setdefault('dt', 0.0001) # ps kwargs.setdefault('runtime', 5) # ps kwargs.setdefault('mdp', self.mdp['MD_relaxed']) return self._MD('MD_relaxed', **kwargs) def MD_restrained(self, **kwargs): """Short MD simulation with position restraints on compound. See documentation of :func:`gromacs.setup.MD_restrained` for details. The following keywords can not be changed: top, mdp, ndx, mainselection .. Note:: Position restraints are activated with ``-DPOSRES`` directives for :func:`gromacs.grompp`. Hence this will only work if the compound itp file does indeed contain a ``[ posres ]`` section that is protected by a ``#ifdef POSRES`` clause. .. See Also:: :func:`gromacs.setup.MD_restrained` :Keywords: *struct* starting coordinates (leave empty for inspired guess of file name) *mdp* MDP run parameter file for Gromacs *qscript* list of queuing system submission scripts; probably a good idea to always include the default "local.sh" even if you have your own ["local.sh"] *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ kwargs.setdefault('struct', self._lastnotempty([self.files.energy_minimized, self.files.MD_relaxed])) kwargs.setdefault('mdp', self.mdp['MD_restrained']) kwargs['MDfunc'] = gromacs.setup.MD_restrained return self._MD('MD_restrained', **kwargs) def MD_NPT(self, **kwargs): """Short NPT MD simulation. See documentation of :func:`gromacs.setup.MD` for details such as *runtime* or specific queuing system options. The following keywords can not be changed: *top*, *mdp*, *ndx*, *mainselection*. .. Note:: If the system crashes (with LINCS errors), try initial equilibration with timestep *dt* = 0.0001 ps (0.1 fs instead of 2 fs) and *runtime* = 5 ps as done in :meth:`~Simulation.MD_relaxed` .. See Also:: :func:`gromacs.setup.MD` and :meth:`Simulation.MD_relaxed` :Keywords: *struct* starting conformation; by default, the *struct* is the last frame from the position restraints run, or, if this file cannot be found (e.g. because :meth:`Simulation.MD_restrained` was not run) it falls back to the relaxed and then the solvated system. *mdp* MDP run parameter file for Gromacs *runtime* total run time in ps *qscript* list of queuing system scripts to prepare; available values are in :data:`gromacs.config.templates` or you can provide your own filename(s) in the current directory (see :mod:`gromacs.qsub` for the format of the templates) *qname* name of the job as shown in the queuing system *startdir* **advanced uses**: path of the directory on a remote system, which will be hard-coded into the queuing system script(s); see :func:`gromacs.setup.MD` and :class:`gromacs.manager.Manager` """ # user structure or relaxed or restrained or solvated kwargs.setdefault('struct', self.get_last_structure()) kwargs.setdefault('t',self.get_last_checkpoint()) # Pass checkpoint file from md_relaxed kwargs.setdefault('mdp', self.mdp['MD_NPT']) return self._MD('MD_NPT', **kwargs) # for convenience and compatibility MD = MD_NPT @staticmethod def _checknotempty(value, name): if value is None or value == "": raise ValueError("Parameter %s cannot be empty." % name) return value @staticmethod def _lastnotempty(l): """Return the last non-empty value in list *l* (or None :-p)""" nonempty = [None] + [x for x in l if not (x is None or x == "" or x == [])] return nonempty[-1] def get_last_structure(self): """Returns the coordinates of the most advanced step in the protocol.""" return self._lastnotempty([self.files[name] for name in self.coordinate_structures]) def get_last_checkpoint(self): """Returns the checkpoint of the most advanced step in the protocol. Relies on md.gro being present from previous simulation, assumes that checkpoint is then present. """ return self._lastnotempty([self.files[name] for name in self.checkpoints]).replace('.gro','.cpt')
def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *solvent* 'water' or 'octanol' or 'cyclohexane' *solventmodel* ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL` for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. """ self.__cache = {} filename = kwargs.pop('filename', None) dirname = kwargs.pop('dirname', self.dirname_default) solvent = kwargs.pop('solvent', self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop('solventmodel', None) mdp_kw = kwargs.pop('mdp', {}) self.mdp = dict((stage, config.get_template(fn)) for stage,fn in self.mdp_defaults.items()) self.mdp.update(dict((stage, config.get_template(fn)) for stage,fn in mdp_kw.items() if fn is not None)) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or 'DRUG' self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop('includes',[]))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath(os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier(solvent, solventmodel) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model(self.solventmodel_identifier) distance = kwargs.pop('distance', None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict(itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance) self.filename = filename or self.solvent_type+'.simulation' super(Simulation, self).__init__(**kwargs)
def __init__(self, **kwargs): """Set up ProteinOnly :Arguments: *force* ``True`` will always regenerate trajectories even if they already exist, ``False`` raises an exception, ``None`` does the sensible thing in most cases (i.e. notify and then move on). *dt* : float or list of floats only write every dt timestep (in ps); if a list of floats is supplied, write multiple trajectories, one for each dt. *compact* : bool write a compact representation *fit* Create an additional trajectory from the stripped one in which the Protein group is rms-fitted to the initial structure. See :meth:`gromacs.cbook.Transformer.fit` for details. Useful values: - "xy" : perform a rot+trans fit in the x-y plane - "all": rot+trans - ``None``: no fitting If *fit* is not supplied then the constructore-default is used (:attr:`_ProteinOnly.parameters.fit`). *keepalso* List of literal ``make_ndx`` selections that select additional groups of atoms that should also be kept in addition to the protein. For example *keepalso* = ['"POPC"', 'resname DRUG']. """ # specific arguments: take them before calling the super class that # does not know what to do with them _fitvalues = ("xy", "all", None) parameters = {} parameters['fit'] = kwargs.pop('fit', None) # fitting algorithm if not parameters['fit'] in _fitvalues: raise ValueError( "ProteinOnly: *fit* must be one of {_fitvalues!r}, not {fit!r}." .format(**vars())) parameters['compact'] = kwargs.pop('compact', False) # compact+centered ? parameters['dt'] = kwargs.pop('dt', None) parameters['force'] = kwargs.pop('force', None) parameters['keepalso'] = kwargs.pop('keepalso', None) # super class init: do this before doing anything else # (also sets up self.parameters and self.results) super(_ProteinOnly, self).__init__(**kwargs) # self.parameters is set up by the base Worker class... self.parameters.filenames = AttributeDict() self.parameters.update(parameters) # self.simulation might have been set by the super class # already; just leave this snippet at the end. Do all # initialization that requires the simulation class in the # _register_hook() method. if self.simulation is not None: self._register_hook()
class Simulation(object): """Class that represents one simulation. Analysis capabilities are added via plugins. 1. Set the *active plugin* with the :meth:`Simulation.set_plugin` method. 2. Analyze the trajectory with the active plugin by calling the :meth:`Simulation.run` method. 3. Analyze the output from :meth:`run` with :meth:`Simulation.analyze`; results are stored in the plugin's :attr:`~Worker.results` dictionary. 4. Plot results with :meth:`Simulation.plot`. """ # NOTE: not suitable for multiple inheritance def __init__(self, **kwargs): """Set up a Simulation object. :Keywords: *sim* Any object that contains the attributes *tpr*, *xtc*, and optionally *ndx* (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such as *xtc* override the values in *sim*. *tpr* Gromacs tpr file (**required**) *xtc* Gromacs trajectory, can also be a trr (**required**) *edr* Gromacs energy file (only required for some plugins) *ndx* Gromacs index file *absolute* ``True``: Turn file names into absolute paths (typically required for most plugins); ``False`` keep a they are [``True``] *strict* ``True``: missing required file keyword raises a :exc:`TypeError` and missing the file itself raises a :exc:`IOError`. ``False``: missing required files only give a warning. [``True``] *analysisdir* directory under which derived data are stored; defaults to the directory containing the tpr [None] *plugins* : list plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples (*plugin_class_name*, *kwarg dict*) to be used; more can be added later with :meth:`Simulation.add_plugin`. """ logger.info("Loading simulation data") sim = kwargs.pop('sim', None) strict = kwargs.pop('strict', True) def getpop(attr, required=False, strict=strict): """Return attribute from from kwargs or sim or None""" val = kwargs.pop(attr, None) # must pop from kwargs to clean it if not val is None: return val try: return sim.__getattribute__(attr) except AttributeError: if required: errmsg = "Required attribute %r not found in kwargs or sim" % attr if strict: logger.fatal(errmsg) raise TypeError(errmsg) else: logger.warn(errmsg+"... continuing because of strict=False") warnings.warn(errmsg) return None make_absolute = kwargs.pop('absolute', True) def canonical(*args): """Join *args* and get the :func:`os.path.realpath`.""" if None in args: return None if not make_absolute: return os.path.join(*args) return os.path.realpath(os.path.join(*args)) # required files self.tpr = canonical(getpop('tpr', required=True)) self.xtc = canonical(getpop('xtc', required=True)) # optional files self.ndx = canonical(getpop('ndx')) self.edr = canonical(getpop('edr')) # check existence of required files resolve = "exception" if not strict: resolve = "warn" for v in ('tpr', 'xtc'): self.check_file(v, self.__getattribute__(v), resolve=resolve) self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr)) #: Registry for plugins: This dict is central. self.plugins = AttributeDict() #: Use this plugin if none is explicitly specified. Typically set with #: :meth:`~Simulation.set_plugin`. self.default_plugin_name = None # XXX: Or should we simply add instances and then re-register # all instances using register() ? # XXX: ... this API should be cleaned up. It seems to be connected # back and forth in vicious circles. -- OB 2009-07-10 plugins = kwargs.pop('plugins', []) # list of tuples (plugin, kwargs) or just (plugin,) if no kwords # required (eg if plugin is an instance) for x in plugins: try: P, kwargs = asiterable(x) # make sure to wrap strings, especially 2-letter ones! except ValueError: P = x kwargs = {} self.add_plugin(P, **kwargs) # convenience: if only a single plugin was registered we default to that one if len(self.plugins) == 1: self.set_plugin(self.plugins.keys()[0]) # Is this needed? If done properly, kwargs should be empty by now BUT # because the same list is re-used for all plugins I cannot pop them in # the plugins. I don't think multiple inheritance would work with this # setup so let's not pretend it does: hence comment out the super-init # call: ## super(Simulation, self).__init__(**kwargs) logger.info("Simulation instance initialised:") logger.info(str(self)) def add_plugin(self, plugin, **kwargs): """Add a plugin to the registry. - If *plugin* is a :class:`Plugin` instance then the instance is directly registered and any keyword arguments are ignored. - If *plugin* is a :class:`Plugin` class object or a string that can be found in :mod:`gromacs.analysis.plugins` then first an instance is created with the given keyword arguments and then registered. :Arguments: *plugin* : class or string, or instance If the parameter is a class then it should have been derived from :class:`Plugin`. If it is a string then it is taken as a plugin name in :mod:`gromacs.analysis.plugins` and the corresponding class is added. In both cases any parameters for initizlization should be provided. If *plugin* is already a :class:`Plugin` instance then the kwargs will be ignored. *kwargs* The kwargs are specific for the plugin and should be described in its documentation. """ # simulation=self must be provided so that plugin knows who owns it try: plugin.register(simulation=self) except (TypeError, AttributeError): # NOTE: this except clause can mask bugs in the plugin code!! if type(plugin) is str: import plugins # We should be able to import this safely now... plugin = plugins.__plugin_classes__[plugin] # plugin registers itself in self.plugins plugin(simulation=self, **kwargs) # simulation=self is REQUIRED! def topdir(self,*args): """Returns a path under self.analysis_dir, which is guaranteed to exist. .. Note:: Parent dirs are created if necessary.""" p = os.path.join(self.analysis_dir, *args) parent = os.path.dirname(p) try: os.makedirs(parent) except OSError,err: if err.errno != errno.EEXIST: raise return p
class Simulation(object): """Class that represents one simulation. Analysis capabilities are added via plugins. 1. Set the *active plugin* with the :meth:`Simulation.set_plugin` method. 2. Analyze the trajectory with the active plugin by calling the :meth:`Simulation.run` method. 3. Analyze the output from :meth:`run` with :meth:`Simulation.analyze`; results are stored in the plugin's :attr:`~Worker.results` dictionary. 4. Plot results with :meth:`Simulation.plot`. """ # NOTE: not suitable for multiple inheritance def __init__(self, **kwargs): """Set up a Simulation object. :Keywords: *sim* Any object that contains the attributes *tpr*, *xtc*, and optionally *ndx* (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such as *xtc* override the values in *sim*. *tpr* Gromacs tpr file (**required**) *xtc* Gromacs trajectory, can also be a trr (**required**) *edr* Gromacs energy file (only required for some plugins) *ndx* Gromacs index file *absolute* ``True``: Turn file names into absolute paths (typically required for most plugins); ``False`` keep a they are [``True``] *strict* ``True``: missing required file keyword raises a :exc:`TypeError` and missing the file itself raises a :exc:`IOError`. ``False``: missing required files only give a warning. [``True``] *analysisdir* directory under which derived data are stored; defaults to the directory containing the tpr [None] *plugins* : list plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples (*plugin_class_name*, *kwarg dict*) to be used; more can be added later with :meth:`Simulation.add_plugin`. """ logger.info("Loading simulation data") sim = kwargs.pop('sim', None) strict = kwargs.pop('strict', True) def getpop(attr, required=False, strict=strict): """Return attribute from from kwargs or sim or None""" val = kwargs.pop(attr, None) # must pop from kwargs to clean it if val is not None: return val try: return sim.__getattribute__(attr) except AttributeError: if required: errmsg = "Required attribute {0!r} not found in kwargs or sim".format( attr) if strict: logger.fatal(errmsg) raise TypeError(errmsg) else: logger.warn(errmsg + "... continuing because of strict=False") warnings.warn(errmsg) return None make_absolute = kwargs.pop('absolute', True) def canonical(*args): """Join *args* and get the :func:`os.path.realpath`.""" if None in args: return None if not make_absolute: return os.path.join(*args) return os.path.realpath(os.path.join(*args)) # required files self.tpr = canonical(getpop('tpr', required=True)) self.xtc = canonical(getpop('xtc', required=True)) # optional files self.ndx = canonical(getpop('ndx')) self.edr = canonical(getpop('edr')) # check existence of required files resolve = "exception" if not strict: resolve = "warn" for v in ('tpr', 'xtc'): self.check_file(v, self.__getattribute__(v), resolve=resolve) self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr)) #: Registry for plugins: This dict is central. self.plugins = AttributeDict() #: Use this plugin if none is explicitly specified. Typically set with #: :meth:`~Simulation.set_plugin`. self.default_plugin_name = None # XXX: Or should we simply add instances and then re-register # all instances using register() ? # XXX: ... this API should be cleaned up. It seems to be connected # back and forth in vicious circles. -- OB 2009-07-10 plugins = kwargs.pop('plugins', []) # list of tuples (plugin, kwargs) or just (plugin,) if no kwords # required (eg if plugin is an instance) for x in plugins: try: P, kwargs = asiterable( x) # make sure to wrap strings, especially 2-letter ones! except ValueError: P = x kwargs = {} self.add_plugin(P, **kwargs) # convenience: if only a single plugin was registered we default to that one if len(self.plugins) == 1: self.set_plugin(self.plugins.keys()[0]) # Is this needed? If done properly, kwargs should be empty by now BUT # because the same list is re-used for all plugins I cannot pop them in # the plugins. I don't think multiple inheritance would work with this # setup so let's not pretend it does: hence comment out the super-init # call: ## super(Simulation, self).__init__(**kwargs) logger.info("Simulation instance initialised:") logger.info(str(self)) def add_plugin(self, plugin, **kwargs): """Add a plugin to the registry. - If *plugin* is a :class:`Plugin` instance then the instance is directly registered and any keyword arguments are ignored. - If *plugin* is a :class:`Plugin` class object or a string that can be found in :mod:`gromacs.analysis.plugins` then first an instance is created with the given keyword arguments and then registered. :Arguments: *plugin* : class or string, or instance If the parameter is a class then it should have been derived from :class:`Plugin`. If it is a string then it is taken as a plugin name in :mod:`gromacs.analysis.plugins` and the corresponding class is added. In both cases any parameters for initizlization should be provided. If *plugin* is already a :class:`Plugin` instance then the kwargs will be ignored. *kwargs* The kwargs are specific for the plugin and should be described in its documentation. """ # simulation=self must be provided so that plugin knows who owns it try: plugin.register(simulation=self) except (TypeError, AttributeError): # NOTE: this except clause can mask bugs in the plugin code!! if type(plugin) is str: import plugins # We should be able to import this safely now... plugin = plugins.__plugin_classes__[plugin] # plugin registers itself in self.plugins plugin(simulation=self, **kwargs) # simulation=self is REQUIRED! def topdir(self, *args): """Returns a path under self.analysis_dir, which is guaranteed to exist. .. Note:: Parent dirs are created if necessary.""" p = os.path.join(self.analysis_dir, *args) parent = os.path.dirname(p) try: os.makedirs(parent) except OSError, err: if err.errno != errno.EEXIST: raise return p
def __init__(self, **kwargs): """Set up a Simulation object. :Keywords: *sim* Any object that contains the attributes *tpr*, *xtc*, and optionally *ndx* (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such as *xtc* override the values in *sim*. *tpr* Gromacs tpr file (**required**) *xtc* Gromacs trajectory, can also be a trr (**required**) *edr* Gromacs energy file (only required for some plugins) *ndx* Gromacs index file *absolute* ``True``: Turn file names into absolute paths (typically required for most plugins); ``False`` keep a they are [``True``] *strict* ``True``: missing required file keyword raises a :exc:`TypeError` and missing the file itself raises a :exc:`IOError`. ``False``: missing required files only give a warning. [``True``] *analysisdir* directory under which derived data are stored; defaults to the directory containing the tpr [None] *plugins* : list plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples (*plugin_class_name*, *kwarg dict*) to be used; more can be added later with :meth:`Simulation.add_plugin`. """ logger.info("Loading simulation data") sim = kwargs.pop('sim', None) strict = kwargs.pop('strict', True) def getpop(attr, required=False, strict=strict): """Return attribute from from kwargs or sim or None""" val = kwargs.pop(attr, None) # must pop from kwargs to clean it if not val is None: return val try: return sim.__getattribute__(attr) except AttributeError: if required: errmsg = "Required attribute %r not found in kwargs or sim" % attr if strict: logger.fatal(errmsg) raise TypeError(errmsg) else: logger.warn(errmsg+"... continuing because of strict=False") warnings.warn(errmsg) return None make_absolute = kwargs.pop('absolute', True) def canonical(*args): """Join *args* and get the :func:`os.path.realpath`.""" if None in args: return None if not make_absolute: return os.path.join(*args) return os.path.realpath(os.path.join(*args)) # required files self.tpr = canonical(getpop('tpr', required=True)) self.xtc = canonical(getpop('xtc', required=True)) # optional files self.ndx = canonical(getpop('ndx')) self.edr = canonical(getpop('edr')) # check existence of required files resolve = "exception" if not strict: resolve = "warn" for v in ('tpr', 'xtc'): self.check_file(v, self.__getattribute__(v), resolve=resolve) self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr)) #: Registry for plugins: This dict is central. self.plugins = AttributeDict() #: Use this plugin if none is explicitly specified. Typically set with #: :meth:`~Simulation.set_plugin`. self.default_plugin_name = None # XXX: Or should we simply add instances and then re-register # all instances using register() ? # XXX: ... this API should be cleaned up. It seems to be connected # back and forth in vicious circles. -- OB 2009-07-10 plugins = kwargs.pop('plugins', []) # list of tuples (plugin, kwargs) or just (plugin,) if no kwords # required (eg if plugin is an instance) for x in plugins: try: P, kwargs = asiterable(x) # make sure to wrap strings, especially 2-letter ones! except ValueError: P = x kwargs = {} self.add_plugin(P, **kwargs) # convenience: if only a single plugin was registered we default to that one if len(self.plugins) == 1: self.set_plugin(self.plugins.keys()[0]) # Is this needed? If done properly, kwargs should be empty by now BUT # because the same list is re-used for all plugins I cannot pop them in # the plugins. I don't think multiple inheritance would work with this # setup so let's not pretend it does: hence comment out the super-init # call: ## super(Simulation, self).__init__(**kwargs) logger.info("Simulation instance initialised:") logger.info(str(self))
def analyze(self, **kwargs): """Load results from disk into :attr:`_Dihedrals.results` and compute PMF. The PMF W(phi) in kT is computed from each dihedral probability distribution P(phi) as W(phi) = -kT ln P(phi) It is stored in :attr:`_Dihedrals.results` with the key *PMF*. :Keywords: *bins* bins for histograms (passed to numpy.histogram(new=True)) :Returns: a dictionary of the results and also sets :attr:`_Dihedrals.results`. """ bins = kwargs.pop('bins', 361) results = AttributeDict() # get graphs that were produced by g_angle for name, f in self.parameters.filenames.items(): try: results[name] = XVG(f) except IOError: pass # either not computed (yet) or some failure # compute individual distributions ts = results['timeseries'].array # ts[0] = time, ts[1] = avg dih = ts[2:] phi_range = (-180., 180.) Ndih = len(dih) p = Ndih * [None] # histograms (prob. distributions), one for each dihedral i for i in xrange(Ndih): phis = dih[i] p[i],e = numpy.histogram(phis, bins=bins, range=phi_range, normed=True, new=True) P = numpy.array(p) phi = 0.5*(e[:-1]+e[1:]) # midpoints of bin edges distributions = numpy.concatenate((phi[numpy.newaxis, :], P)) # phi, P[0], P[1], ... xvg = XVG() xvg.set(distributions) xvg.write(self.parameters.filenames['distributions']) results['distributions'] = xvg del xvg # compute PMF (from individual distributions) W = -numpy.log(P) # W(phi)/kT = -ln P W -= W.min(axis=1)[:, numpy.newaxis] # minimum at 0 kT pmf = numpy.concatenate((phi[numpy.newaxis, :], W), axis=0) xvg = XVG() xvg.set(pmf) xvg.write(self.parameters.filenames['PMF']) results['PMF'] = xvg self.results = results return results
def __init__(self,**kwargs): """Set up StripWater :Arguments: *force* ``True`` will always regenerate trajectories even if they already exist, ``False`` raises an exception, ``None`` does the sensible thing in most cases (i.e. notify and then move on). *dt* : float or list of floats only write every dt timestep (in ps); if a list of floats is supplied, write multiple trajectories, one for each dt. *compact* : bool write a compact representation *centergroup* Index group to center on ["Protein"] *fit* Create an additional trajectory from the stripped one in which the *fitgroup* group is rms-fitted to the initial structure. See :meth:`gromacs.cbook.Transformer.fit` for details. Useful values: - "xy" : perform a rot+trans fit in the x-y plane - "all": rot+trans - ``None``: no fitting If *fit* is not supplied then the constructor-default is used (:attr:`_StripWater.parameters.fit`). *fitgroup* Index group to fit to with the *fit* option; must be changed if molecule is not a protein and automatically recognized. Also consider supplying a custom index file. ["backbone"] *resn* name of the residues that are stripped (typically it is safe to leave this at the default 'SOL') *outdir* place generated files in *outdir* instead of the same directory where the input tpr/xtc lived [``None``] .. Note:: If set, *dt* is only applied to a fit step; the no-water trajectory is always generated for all time steps of the input. """ # specific arguments: take them before calling the super class that # does not know what to do with them _fitvalues = ("xy", "all", None) parameters = {} parameters['fit'] = kwargs.pop('fit',None) # fitting algorithm if not parameters['fit'] in _fitvalues: raise ValueError("StripWater: *fit* must be one of {_fitvalues!r}, not {fit!r}.".format(**vars())) parameters['fitgroup'] = kwargs.pop('fitgroup', "backbone") parameters['centergroup'] = kwargs.pop('centergroup', "Protein") parameters['compact'] = kwargs.pop('compact', False) # compact+centered ? parameters['resn'] = kwargs.pop('resn', 'SOL') # residue name to be stripped parameters['dt'] = kwargs.pop('dt', None) parameters['force'] = kwargs.pop('force', None) parameters['outdir'] = kwargs.pop('outdir', None) # super class init: do this before doing anything else # (also sets up self.parameters and self.results) super(_StripWater, self).__init__(**kwargs) # self.parameters is set up by the base Worker class... self.parameters.filenames = AttributeDict() self.parameters.update(parameters) # self.simulation might have been set by the super class # already; just leave this snippet at the end. Do all # initialization that requires the simulation class in the # _register_hook() method. if self.simulation is not None: self._register_hook()