    def __init__(self, **kwargs):
        """Set up Worker class.

          *plugin* : instance
             The :class:`Plugin` instance that owns this worker. **Must be supplied.**
             A :class:Simulation` object, required for registration,
             but can be supplied later.
             All other keyword arguments are passed to the super class.

        self.plugin = kwargs.pop('plugin', None)
        """:class:`Plugin` instance that owns this Worker."""
        assert self.plugin is not None  # must be supplied, non-opt kw arg
        self.plugin_name = self.plugin.plugin_name
        """Name of the plugin that this Worker belongs to."""

        self.simulation = kwargs.pop(
            'simulation', None)  # eventually needed but can come after init
        self.location = self.plugin_name  # directory name under analysisdir
        self.results = AttributeDict()  # store results
        self.parameters = AttributeDict(
        )  # container for options, filenames, etc...
        self.parameters.filenames = AttributeDict()
        super(Worker, self).__init__(**kwargs)
    def analyze(self,**kwargs):
        """Short description of postprocessing.

        The analyze method typically postprocesses the data files
        generated by run. Splitting the complete analysis task into
        two parts (*run* and *analyze*) is advantageous because in
        this way parameters of postprocessing steps can be easily
        changed without having to rerun the time consuming trajectory

        :Returns:  a dictionary of the results and also sets ``self.results``.
        from gromacs.formats import XVG

        results = AttributeDict()

        # - Do postprocessing here.
        # - Store results of calculation in results[key] where key can be chosen freely
        #   but *must* be provided so that other functions can uniformly access results.
        # - You are encouraged to store class instances with a plot() method; if you do
        #   this then you can just don't have to change the plot() method below.
        #   For instance you can use gromacs.formats.XVG(filename) to create
        #   a object from a xvg file that knows how to plot itself.

        self.results = results
        return results
    def analyze(self, **kwargs):
        """Analyze hydrogen bond output.

        * hydrogen bond existence (existence)
        * total number of hydrogen bonds (num)
        * (others can be added easily)

        :Returns:  a dictionary of the results and also sets ``self.results``.
        from gromacs.formats import XPM, XVG

        results = AttributeDict()
        results['num'] = XVG(self.parameters.filenames['num'])
        results['matrix'] = hbm = XPM(self.parameters.filenames['hbm'],

        hb_fraction = hbm.array.mean(axis=0)
        desc = [
            line.strip() for line in open(self.parameters.filenames['log'])
            if not line.startswith('#')
        results['existence'] = zip(desc, hb_fraction)

        with open(self.parameters.filenames['existence'], "w") as out:
                "Hydrogen bond existence analysis (results['existence'] and %(existence)r)",
            for name, frac in results['existence']:
      "hb_existence: %-40s %4.1f%%", name, 100 * frac)
                out.write("{0:<40!s} {1:4.1f}%\n".format(name, 100 * frac))

        self.results = results
        return results
 def analyze(self, **kwargs):
     """Make data files available as numpy arrays."""
     results = AttributeDict()
     for name, f in self.parameters.filenames.items():
         results[name] = XVG(f)
     self.results = results
     return results
    def analyze(self, **kwargs):
        """Mindist analysis for all cysteines. Returns results for interactive analysis."""

        results = AttributeDict()
        for resid in self.parameters.cysteines:
            groupname = 'Cys{resid:d}'.format(
                **vars())  # identifier should be a valid python variable name
            results[groupname] = self._mindist(resid)
        self.results = results
        return results
    def analyze(self,**kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        :Returns:  a dictionary of the results and also sets ``self.results``.
        from gromacs.formats import XVG"Preparing Energy graphs as XVG objects.")
        results = AttributeDict(Energy=XVG(self.parameters.filenames['Energy']))
        self.results = results
        return results
    def analyze(self,**kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        :Returns:  a dictionary of the results and also sets ``self.results``.
        from gromacs.formats import XVG"Preparing HelixBundle graphs as XVG objects.")
        results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() )
        self.results = results
        return results
class Simulation(Journalled):
    """Simple MD simulation of a single compound molecule in water.

    Typical use ::

       S = Simulation(molecule='DRUG')

    .. Note:: The OPLS/AA force field and the TIP4P water molecule is the
              default; changing this is possible but will require provision of
              customized itp, mdp and template top files at various stages.

    #: Keyword arguments to pre-set some file names; they are keys in :attr:`Simulation.files`.
    filekeys = ('topology', 'processed_topology', 'structure', 'solvated',
                'ndx', 'energy_minimized', 'MD_relaxed', 'MD_restrained',
    topdir_default = "Equilibrium"
    dirname_default = os.path.curdir
    solvent_default = 'water'

    #: Coordinate files of the full system in increasing order of advancement of
    #: the protocol; the later the better. The values are keys into :attr:`Simulation.files`.
    coordinate_structures = ('solvated', 'energy_minimized', 'MD_relaxed',
                             'MD_restrained', 'MD_NPT')
    checkpoints = ('solvated', 'energy_minimized', 'MD_relaxed',
                   'MD_restrained', 'MD_NPT')

    #: Check list of all methods that can be run as an independent protocol; see also
    #: :meth:`Simulation.get_protocol` and :class:`restart.Journal`
    protocols = (
        "MD_NPT_run",  # *_run as dummies for the ...
        "MD_relaxed_run",  # ...checkpointing logic

    #: Default Gromacs *MDP* run parameter files for the different stages.
    #: (All are part of the package and are found with :func:`mdpow.config.get_template`.)
    mdp_defaults = {
        'MD_relaxed': 'NPT_opls.mdp',
        'MD_restrained': 'NPT_opls.mdp',
        'MD_NPT': 'NPT_opls.mdp',
        'energy_minimize': 'em_opls.mdp',

    def __init__(self, molecule=None, **kwargs):
        """Set up Simulation instance.

        The *molecule* of the compound molecule should be supplied. Existing files
        (which have been generated in previous runs) can also be supplied.

              Identifier for the compound molecule. This is the same as the
              entry in the ``[ molecule ]`` section of the itp file. ["DRUG"]
              If provided and *molecule* is ``None`` then load the instance from
              the pickle file *filename*, which was generated with
              base directory; all other directories are created under it
              'OPLS-AA' or 'CHARMM' or 'AMBER'
              'water' or 'octanol' or 'cyclohexane' or 'wetoctanol'
              ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL`
              for ``solvent == "water"``. Other options are the models defined in
              :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no
              alternative parameterizations included for other solvents.
              dict with keys corresponding to the stages ``energy_minimize``,
              ``MD_restrained``, ``MD_relaxed``,
              ``MD_NPT`` and values *mdp* file names (if no entry then the
              package defaults are used)
               minimum distance between solute and closest box face
              advanced keywords for short-circuiting; see

        self.__cache = {}
        filename = kwargs.pop('filename', None)
        dirname = kwargs.pop('dirname', self.dirname_default)

        forcefield = kwargs.pop('forcefield', 'OPLS-AA')
        solvent = kwargs.pop('solvent', self.solvent_default)
        # mdp files --- should get values from default runinput.cfg
        # None values in the kwarg mdp dict are ignored
        # self.mdp: key = stage, value = path to MDP file

        # 'water' will choose the default ('tip4p'), other choices are
        # 'tip3p', 'spc', 'spce', 'm24', for water; no choices
        # available for 'cyclohexane' and 'octanol'
        solventmodel = kwargs.pop('solventmodel', None)

        mdp_kw = kwargs.pop('mdp', {})
        self.mdp = dict((stage, config.get_template(fn))
                        for stage, fn in self.mdp_defaults.items())
            dict((stage, config.get_template(fn))
                 for stage, fn in mdp_kw.items() if fn is not None))

        if molecule is None and filename is not None:
            # load from pickle file
            self.filename = filename
            kwargs = {}  # for super
            self.molecule = molecule or 'DRUG'
            self.dirs = AttributeDict(
                basedir=realpath(dirname),  # .../Equilibrium/<solvent>
                includes=list(asiterable(kwargs.pop('includes', []))) +
            # pre-set filenames: keyword == variable name
            self.files = AttributeDict([(k, kwargs.pop(k, None))
                                        for k in self.filekeys])
            self.deffnm = kwargs.pop("deffnm", "md")

            if self.files.topology:
                # assume that a user-supplied topology lives in a 'standard' top dir
                # that includes the necessary itp file(s)
                self.dirs.topology = realpath(

            self.forcefield = forcefield
            self.solvent_type = solvent
            self.solventmodel_identifier = forcefields.get_solvent_identifier(
            if self.solventmodel_identifier is None:
                msg = "No parameters for solvent {0} and solventmodel {1} available.".format(
                    solvent, solventmodel)
                raise ValueError(msg)
            self.solventmodel = forcefields.get_solvent_model(

            distance = kwargs.pop('distance', None)
            distance = distance if distance is not None else DIST[solvent]

            self.solvent = AttributeDict(itp=self.solventmodel.itp,

            self.filename = filename or self.solvent_type + '.simulation'

        super(Simulation, self).__init__(**kwargs)

    def BASEDIR(self, *args):
        return os.path.join(self.dirs.basedir, *args)

    def save(self, filename=None):
        """Save instance to a pickle file.

        The default filename is the name of the file that was last loaded from
        or saved to.
        if filename is None:
            if self.filename is None:
                self.filename = filename or self.solvent_type + '.simulation'
                    "No filename known, saving instance under name %r",
            filename = self.filename
            self.filename = filename
        with open(filename, 'wb') as f:
            pickle.dump(self, f)
        logger.debug("Instance pickled to %(filename)r" % vars())

    def load(self, filename=None):
        """Re-instantiate class from pickled file."""
        if filename is None:
            if self.filename is None:
                self.filename = self.molecule.lower() + '.pickle'
                logger.warning("No filename known, trying name %r",
            filename = self.filename
        with open(filename, 'rb') as f:
            instance = pickle.load(f)
        logger.debug("Instance loaded from %(filename)r" % vars())

    def make_paths_relative(self, prefix=os.path.curdir):
        """Hack to be able to copy directories around: prune basedir from paths.

        .. Warning:: This is not guaranteed to work for all paths. In particular,
                     check :attr:`mdpow.equil.Simulation.dirs.includes` and adjust
                     manually if necessary.
        def assinglet(m):
            if len(m) == 1:
                return m[0]
            elif len(m) == 0:
                return None
            return m

        basedir = self.dirs.basedir
        for key, fn in self.files.items():
                self.files[key] = fn.replace(basedir, prefix)
            except AttributeError:
        for key, val in self.dirs.items():
            fns = asiterable(val)  # treat them all as lists
                self.dirs[key] = assinglet(
                    [fn.replace(basedir, prefix) for fn in fns])
            except AttributeError:
        for key, fn in self.mdp.items():
                self.mdp[key] = fn.replace(basedir, prefix)
            except AttributeError:
            "make_paths_relative(): check/manually adjust %s.dirs.includes = %r !",
            self.__class__.__name__, self.dirs.includes)

    def topology(self, itp='drug.itp', prm=None, **kwargs):
        """Generate a topology for compound *molecule*.

               Gromacs itp file; will be copied to topology dir and
               included in topology
               Gromacs prm file; if given, will be copied to topology
               dir and included in topology
               name of the topology directory ["top"]
               see source for *top_template*, *topol*

        dirname = kwargs.pop('dirname', self.BASEDIR('top'))
        self.dirs.topology = realpath(dirname)

        setting = forcefields.get_ff_paths(self.forcefield)
        template = forcefields.get_top_template(self.solvent_type)

        top_template = config.get_template(kwargs.pop('top_template',
        topol = kwargs.pop('topol', os.path.basename(top_template))
        self.top_template = top_template
        itp = os.path.realpath(itp)
        _itp = os.path.basename(itp)

        if prm is None:
            prm_kw = ''
            prm = os.path.realpath(prm)
            _prm = os.path.basename(prm)
            prm_kw = '#include "{}"'.format(_prm)

        with in_dir(dirname):
            shutil.copy(itp, _itp)
            if prm is not None:
                shutil.copy(prm, _prm)
            gromacs.cbook.edit_txt(top_template, [
                (r'#include +"oplsaa\.ff/forcefield\.itp"', r'oplsaa\.ff/',
                (r'#include +"compound\.itp"', r'compound\.itp', _itp),
                (r'#include +"oplsaa\.ff/tip4p\.itp"',
                 r'oplsaa\.ff/tip4p\.itp', setting[0] + self.solvent.itp),
                (r'#include +"oplsaa\.ff/ions_opls\.itp"',
                 r'oplsaa\.ff/ions_opls\.itp', setting[1]),
                (r'#include +"compound\.prm"', r'#include +"compound\.prm"',
                (r'#include +"water\.itp"', r'water\.itp', setting[2]),
                (r'Compound', 'solvent', self.solvent_type),
                (r'Compound', 'DRUG', self.molecule),
                (r'DRUG\s*1', 'DRUG', self.molecule),
            '[%(dirname)s] Created topology %(topol)r that includes %(_itp)r',

        # update known files and dirs
        self.files.topology = realpath(dirname, topol)
        if not self.dirs.topology in self.dirs.includes:

        return {'dirname': dirname, 'topol': topol}

    def _setup_solvate(**kwargs):
        """Solvate structure in a single solvent box."""
        return gromacs.setup.solvate(**kwargs)

    def solvate(self, struct=None, **kwargs):
        """Solvate structure *struct* in a box of solvent.

        The solvent is determined with the *solvent* keyword to the constructor.

              pdb or gro coordinate file (if not supplied, the value is used
              that was supplied to the constructor of :class:`~mdpow.equil.Simulation`)
               minimum distance between solute and the closes box face; the default depends
               on the solvent but can be set explicitly here, too.
               any box type understood by :func:`gromacs.editconf` (``-bt``):

               * "triclinic" is a triclinic box,
               * "cubic" is a rectangular box with all sides equal;
               * "dodecahedron" represents a rhombic dodecahedron;
               * "octahedron" is a truncated octahedron.

               The default is "dodecahedron".
              All other arguments are passed on to :func:`gromacs.setup.solvate`, but
              set to sensible default values. *top* and *water* are always fixed.

        self.dirs.solvation = realpath(
            kwargs.setdefault('dirname', self.BASEDIR('solvation')))
        kwargs['struct'] = self._checknotempty(struct or self.files.structure,
        kwargs['top'] = self._checknotempty(self.files.topology, 'top')
        kwargs['water'] =
        kwargs.setdefault('mainselection', '"%s"' %
                          self.molecule)  # quotes are needed for make_ndx
        kwargs.setdefault('distance', self.solvent.distance)

        boxtype = kwargs.pop('bt', None)
        boxtype = boxtype if boxtype is not None else "dodecahedron"
        if boxtype not in ("dodecahedron", "triclinic", "cubic", "octahedron"):
            msg = "Invalid boxtype '{0}', not suitable for 'gmx editconf'.".format(
            raise ValueError(msg)
        kwargs['bt'] = boxtype

        kwargs['includes'] = asiterable(kwargs.pop('includes',
                                                   [])) + self.dirs.includes

        params = self._setup_solvate(**kwargs)

        self.files.structure = kwargs['struct']
        self.files.solvated = params['struct']
        self.files.ndx = params['ndx']
        # we can also make a processed topology right now

        return params

    def processed_topology(self, **kwargs):
        """Create a portable topology file from the topology and the solvated system."""
        if self.files.solvated is None or not os.path.exists(
        kwargs['topol'] = self.files.topology
        kwargs['struct'] = self.files.solvated
        kwargs['includes'] = self.dirs.includes
        self.files.processed_topology = gromacs.cbook.create_portable_topology(
        return self.files.processed_topology

    def energy_minimize(self, **kwargs):
        """Energy minimize the solvated structure on the local machine.

        *kwargs* are passed to :func:`gromacs.setup.energ_minimize` but if
        :meth:`~mdpow.equil.Simulation.solvate` step has been carried out
        previously all the defaults should just work.

        self.dirs.energy_minimization = realpath(
            kwargs.setdefault('dirname', self.BASEDIR('em')))
        kwargs['top'] = self.files.topology
        kwargs.setdefault('struct', self.files.solvated)
        kwargs.setdefault('mdp', self.mdp['energy_minimize'])
        kwargs['mainselection'] = None
        kwargs['includes'] = asiterable(kwargs.pop('includes',
                                                   [])) + self.dirs.includes

        params = gromacs.setup.energy_minimize(**kwargs)

        self.files.energy_minimized = params['struct']

        return params

    def _MD(self, protocol, **kwargs):
        """Basic MD driver for this Simulation. Do not call directly."""

        kwargs.setdefault('dirname', self.BASEDIR(protocol))
        kwargs.setdefault('deffnm', self.deffnm)
        kwargs.setdefault('mdp', config.get_template('NPT_opls.mdp'))
        self.dirs[protocol] = realpath(kwargs['dirname'])
        setupMD = kwargs.pop('MDfunc', gromacs.setup.MD)
        kwargs['top'] = self.files.topology
        kwargs['includes'] = asiterable(kwargs.pop('includes',
                                                   [])) + self.dirs.includes
        kwargs['ndx'] = self.files.ndx
            'mainselection'] = None  # important for SD (use custom mdp and ndx!, gromacs.setup._MD)
        self._checknotempty(kwargs['struct'], 'struct')
        if not os.path.exists(kwargs['struct']):
            # struct is not reliable as it depends on qscript so now we just try everything...
            struct = gromacs.utilities.find_first(kwargs['struct'],
                                                  suffices=['pdb', 'gro'])
            if struct is None:
                    "Starting structure %(struct)r does not exist (yet)" %
                raise IOError(errno.ENOENT, "Starting structure not found",
      "Found starting structure %r (instead of %r).",
                            struct, kwargs['struct'])
                kwargs['struct'] = struct
        # now setup the whole simulation (this is typically gromacs.setup.MD() )
        params = setupMD(**kwargs)
        # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript
        self.files[protocol] = params['struct']
        # Gromacs 4.5.x 'mdrun -c PDB'  fails if it cannot find 'residuetypes.dat'
        # so instead of fuffing with GMXLIB we just dump it into the directory
        except IOError:
                "Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure"

        return params

    def MD_relaxed(self, **kwargs):
        """Short MD simulation with *timestep* = 0.1 fs to relax strain.

        Energy minimization does not always remove all problems and LINCS
        constraint errors occur. A very short *runtime* = 5 ps MD with very
        short integration time step *dt* tends to solve these problems.

        .. See Also:: :func:`gromacs.setup.MD`

             starting coordinates (typically guessed)
             MDP run parameter file for Gromacs
             list of queuing system submission scripts; probably a
             good idea to always include the default "" even
             if you have your own [""]
             name of the job as shown in the queuing system
             **advanced uses**: path of the directory on a remote
             system, which will be hard-coded into the queuing system
             script(s); see :func:`gromacs.setup.MD` and

        # user structure or restrained or solvated
        kwargs.setdefault('struct', self.files.energy_minimized)
        kwargs.setdefault('dt', 0.0001)  # ps
        kwargs.setdefault('runtime', 5)  # ps
        kwargs.setdefault('mdp', self.mdp['MD_relaxed'])
        return self._MD('MD_relaxed', **kwargs)

    def MD_restrained(self, **kwargs):
        """Short MD simulation with position restraints on compound.

        See documentation of :func:`gromacs.setup.MD_restrained` for
        details. The following keywords can not be changed: top, mdp, ndx,

        .. Note:: Position restraints are activated with ``-DPOSRES`` directives
                  for :func:`gromacs.grompp`. Hence this will only work if the
                  compound itp file does indeed contain a ``[ posres ]``
                  section that is protected by a ``#ifdef POSRES`` clause.

        .. See Also:: :func:`gromacs.setup.MD_restrained`

              starting coordinates (leave empty for inspired guess of file name)
              MDP run parameter file for Gromacs
             list of queuing system submission scripts; probably a
             good idea to always include the default "" even
             if you have your own [""]
             name of the job as shown in the queuing system
             **advanced uses**: path of the directory on a remote
             system, which will be hard-coded into the queuing system
             script(s); see :func:`gromacs.setup.MD` and

                [self.files.energy_minimized, self.files.MD_relaxed]))
        kwargs.setdefault('mdp', self.mdp['MD_restrained'])
        kwargs['MDfunc'] = gromacs.setup.MD_restrained
        return self._MD('MD_restrained', **kwargs)

    def MD_NPT(self, **kwargs):
        """Short NPT MD simulation.

        See documentation of :func:`gromacs.setup.MD` for details such
        as *runtime* or specific queuing system options. The following
        keywords can not be changed: *top*, *mdp*, *ndx*, *mainselection*.

        .. Note:: If the system crashes (with LINCS errors), try initial
                  equilibration with timestep *dt* = 0.0001 ps (0.1 fs instead
                  of 2 fs) and *runtime* = 5 ps as done in :meth:`~Simulation.MD_relaxed`

        .. See Also:: :func:`gromacs.setup.MD` and :meth:`Simulation.MD_relaxed`

               starting conformation; by default, the *struct* is the last frame
               from the position restraints run, or, if this file cannot be
               found (e.g. because :meth:`Simulation.MD_restrained` was not run)
               it falls back to the relaxed and then the solvated system.
               MDP run parameter file for Gromacs
               total run time in ps
               list of queuing system scripts to prepare; available values are
               in :data:`gromacs.config.templates` or you can provide your own
               filename(s) in the current directory (see :mod:`gromacs.qsub` for
               the format of the templates)
             name of the job as shown in the queuing system
             **advanced uses**: path of the directory on a remote
             system, which will be hard-coded into the queuing system
             script(s); see :func:`gromacs.setup.MD` and

        # user structure or relaxed or restrained or solvated
        kwargs.setdefault('struct', self.get_last_structure())
            self.get_last_checkpoint())  # Pass checkpoint file from md_relaxed
        kwargs.setdefault('mdp', self.mdp['MD_NPT'])
        return self._MD('MD_NPT', **kwargs)

    # for convenience and compatibility
    MD = MD_NPT

    def _checknotempty(value, name):
        if value is None or value == "":
            raise ValueError("Parameter %s cannot be empty." % name)
        return value

    def _lastnotempty(l):
        """Return the last non-empty value in list *l* (or None :-p)"""
        nonempty = [None] + [
            x for x in l if not (x is None or x == "" or x == [])
        return nonempty[-1]

    def get_last_structure(self):
        """Returns the coordinates of the most advanced step in the protocol."""
        return self._lastnotempty(
            [self.files[name] for name in self.coordinate_structures])

    def get_last_checkpoint(self):
        """Returns the checkpoint of the most advanced step in the protocol.
        Relies on md.gro being present from previous simulation, assumes that checkpoint is then present.
        return self._lastnotempty([
            self.files[name] for name in self.checkpoints
        ]).replace('.gro', '.cpt')
    def __init__(self, molecule=None, **kwargs):
        """Set up Simulation instance.

        The *molecule* of the compound molecule should be supplied. Existing files
        (which have been generated in previous runs) can also be supplied.

              Identifier for the compound molecule. This is the same as the
              entry in the ``[ molecule ]`` section of the itp file. ["DRUG"]
              If provided and *molecule* is ``None`` then load the instance from
              the pickle file *filename*, which was generated with
              base directory; all other directories are created under it
              'OPLS-AA' or 'CHARMM' or 'AMBER'
              'water' or 'octanol' or 'cyclohexane' or 'wetoctanol'
              ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL`
              for ``solvent == "water"``. Other options are the models defined in
              :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no
              alternative parameterizations included for other solvents.
              dict with keys corresponding to the stages ``energy_minimize``,
              ``MD_restrained``, ``MD_relaxed``,
              ``MD_NPT`` and values *mdp* file names (if no entry then the
              package defaults are used)
               minimum distance between solute and closest box face
              advanced keywords for short-circuiting; see

        self.__cache = {}
        filename = kwargs.pop('filename', None)
        dirname = kwargs.pop('dirname', self.dirname_default)

        forcefield = kwargs.pop('forcefield', 'OPLS-AA')
        solvent = kwargs.pop('solvent', self.solvent_default)
        # mdp files --- should get values from default runinput.cfg
        # None values in the kwarg mdp dict are ignored
        # self.mdp: key = stage, value = path to MDP file

        # 'water' will choose the default ('tip4p'), other choices are
        # 'tip3p', 'spc', 'spce', 'm24', for water; no choices
        # available for 'cyclohexane' and 'octanol'
        solventmodel = kwargs.pop('solventmodel', None)

        mdp_kw = kwargs.pop('mdp', {})
        self.mdp = dict((stage, config.get_template(fn))
                        for stage, fn in self.mdp_defaults.items())
            dict((stage, config.get_template(fn))
                 for stage, fn in mdp_kw.items() if fn is not None))

        if molecule is None and filename is not None:
            # load from pickle file
            self.filename = filename
            kwargs = {}  # for super
            self.molecule = molecule or 'DRUG'
            self.dirs = AttributeDict(
                basedir=realpath(dirname),  # .../Equilibrium/<solvent>
                includes=list(asiterable(kwargs.pop('includes', []))) +
            # pre-set filenames: keyword == variable name
            self.files = AttributeDict([(k, kwargs.pop(k, None))
                                        for k in self.filekeys])
            self.deffnm = kwargs.pop("deffnm", "md")

            if self.files.topology:
                # assume that a user-supplied topology lives in a 'standard' top dir
                # that includes the necessary itp file(s)
                self.dirs.topology = realpath(

            self.forcefield = forcefield
            self.solvent_type = solvent
            self.solventmodel_identifier = forcefields.get_solvent_identifier(
            if self.solventmodel_identifier is None:
                msg = "No parameters for solvent {0} and solventmodel {1} available.".format(
                    solvent, solventmodel)
                raise ValueError(msg)
            self.solventmodel = forcefields.get_solvent_model(

            distance = kwargs.pop('distance', None)
            distance = distance if distance is not None else DIST[solvent]

            self.solvent = AttributeDict(itp=self.solventmodel.itp,

            self.filename = filename or self.solvent_type + '.simulation'

        super(Simulation, self).__init__(**kwargs)
    def __init__(self, **kwargs):
        """Set up a Simulation object.

             Any object that contains the attributes *tpr*, *xtc*,
             and optionally *ndx*
             (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such
             as *xtc* override the values in *sim*.
             Gromacs tpr file (**required**)
             Gromacs trajectory, can also be a trr (**required**)
             Gromacs energy file (only required for some plugins)
             Gromacs index file
             ``True``: Turn file names into absolute paths (typically required
             for most plugins); ``False`` keep a they are [``True``]
             ``True``: missing required file keyword raises a :exc:`TypeError`
             and missing the file itself raises a :exc:`IOError`.  ``False``:
             missing required files only give a warning. [``True``]
             directory under which derived data are stored;
             defaults to the directory containing the tpr [None]
           *plugins* : list
             plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples
             (*plugin_class_name*, *kwarg dict*) to be used; more can be
             added later with :meth:`Simulation.add_plugin`.

        """"Loading simulation data")

        sim = kwargs.pop('sim', None)
        strict = kwargs.pop('strict', True)

        def getpop(attr, required=False, strict=strict):
            """Return attribute from from kwargs or sim or None"""
            val = kwargs.pop(attr, None)  # must pop from kwargs to clean it
            if val is not None:
                return val
                return sim.__getattribute__(attr)
            except AttributeError:
                if required:
                    errmsg = "Required attribute {0!r} not found in kwargs or sim".format(
                    if strict:
                        raise TypeError(errmsg)
                        logger.warn(errmsg +
                                    "... continuing because of strict=False")
                return None

        make_absolute = kwargs.pop('absolute', True)

        def canonical(*args):
            """Join *args* and get the :func:`os.path.realpath`."""
            if None in args:
                return None
            if not make_absolute:
                return os.path.join(*args)
            return os.path.realpath(os.path.join(*args))

        # required files
        self.tpr = canonical(getpop('tpr', required=True))
        self.xtc = canonical(getpop('xtc', required=True))
        # optional files
        self.ndx = canonical(getpop('ndx'))
        self.edr = canonical(getpop('edr'))

        # check existence of required files
        resolve = "exception"
        if not strict:
            resolve = "warn"
        for v in ('tpr', 'xtc'):
            self.check_file(v, self.__getattribute__(v), resolve=resolve)

        self.analysis_dir = kwargs.pop('analysisdir',

        #: Registry for plugins: This dict is central.
        self.plugins = AttributeDict()
        #: Use this plugin if none is explicitly specified. Typically set with
        #: :meth:`~Simulation.set_plugin`.
        self.default_plugin_name = None

        # XXX: Or should we simply add instances and then re-register
        #      all instances using register() ?
        # XXX: ... this API should be cleaned up. It seems to be connected
        #      back and forth in vicious circles. -- OB 2009-07-10

        plugins = kwargs.pop('plugins', [])
        # list of tuples (plugin, kwargs) or just (plugin,) if no kwords
        # required (eg if plugin is an instance)
        for x in plugins:
                P, kwargs = asiterable(
                    x)  # make sure to wrap strings, especially 2-letter ones!
            except ValueError:
                P = x
                kwargs = {}
            self.add_plugin(P, **kwargs)

        # convenience: if only a single plugin was registered we default to that one
        if len(self.plugins) == 1:

        # Is this needed? If done properly, kwargs should be empty by now BUT
        # because the same list is re-used for all plugins I cannot pop them in
        # the plugins. I don't think multiple inheritance would work with this
        # setup so let's not pretend it does: hence comment out the super-init
        # call:
        ## super(Simulation, self).__init__(**kwargs)"Simulation instance initialised:")
class _COM(Worker):
    """COM worker class."""
    def __init__(self, **kwargs):
        """Set up COM analysis.

               list of index group names
               index file if groups are not in the default index
               add the *offset* to the residue numbers [0]
               plugin name [COM]
               The :class:`gromacs.analysis.Simulation` instance that
               owns the plugin [None]
        group_names = asiterable(kwargs.pop('group_names', []))
        ndx = kwargs.pop('ndx', None)
        offset = kwargs.pop('offset', 0)

        super(_COM, self).__init__(**kwargs)

        self.parameters.group_names = group_names
        self.parameters.offset = offset
        self.ndx = ndx

        if self.simulation is not None:

    def _register_hook(self, **kwargs):
        """Run when registering; requires simulation."""

        super(_COM, self)._register_hook(**kwargs)
        assert self.simulation is not None

        if self.ndx is None:
            self.ndx = self.simulation.ndx

        self.parameters.filenames = {                     # result xvg files
            'com': self.plugindir('com.xvg'),

        # default filename for the plots -- not used
        self.parameters.fignames = {
            'com': self.figdir('com'),

    def run(self, force=None, **gmxargs):
        """Analyze trajectory and write COM file.

        All three components of the COM coordinate are written.

          - *force*: ``True`` does analysis and overwrites existing files
          - *gmxargs*: additional keyword arguments for :func:`gromacs.g_bundle`
        gmxargs['com'] = True
        gmxargs['mol'] = False
        gmxargs['ng'] = len(self.parameters.group_names)
        gmxargs['x'] = True
        gmxargs['y'] = True
        gmxargs['z'] = True

        if gmxargs['ng'] == 0:
            errmsg = "No index group name(s) provided. Use group_name with the constructor."
            raise ValueError(errmsg)

        if self.check_file_exists(self.parameters.filenames['com'],
            return"Analyzing COM ...")
        f = self.parameters.filenames

    def analyze(self, **kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        - Make COM as a function of time available as XVG files and
        - Compute RMSD of the COM of each group (from average
          position, "rmsd").
        - Compute distance whic encompasses 50% of observations ("median")
        - Compute drift of COM, i.e. length of the vector between
          initial and final position. Initial and final position are
          computed as averages over *nframesavg* frames ("drift").

        RMSD, median, and drift are columns in an xvg file. The rows correspond
        to the groups in :attr:``.

              number of initial and final frames that are averaged in
              order to compute the drift of the COM of each group
              group name whose com is taken as the reference and subtracted from
              all other coms for the distance calculations. If supplied,
              additional result 'com_relative_*refgroup*' is created.

        :Returns:  a dictionary of the results and also sets
        from gromacs.formats import XVG"Preparing COM graphs as XVG objects.")
        self.results = AttributeDict(
            (k, XVG(fn)) for k, fn in self.parameters.filenames.items())

        # compute RMSD of COM and shift of COM (drift) between avg pos
        # over first/last 5,000 frames
        nframesavg = kwargs.pop('nframesavg', 5000)
        ngroups = len(self.parameters.group_names)
        xcom = self.results['com'].array

        refgroup = kwargs.pop('refgroup', None)
        if refgroup is not None:
            if not refgroup in self.parameters.group_names:
                errmsg = "refgroup={0!s} must be one of {1!r}".format(
                    refgroup, self.parameters.group_names)
                raise ValueError(errmsg)
            nreference = 1 + 3 * self.parameters.group_names.index(
                refgroup)  # 1-based !!
            reference_com = xcom[nreference:nreference + 3]
            xcom[1:] -= numpy.vstack(ngroups *
                                     [reference_com])  # can't use broadcast
            logger.debug("distances computed with refgroup %r", refgroup)

                           names=['time'] + self.parameters.group_names)

        def vlength(v):
            return numpy.sqrt(numpy.sum(v**2,
                                        axis=0))  # distances over time step

            "drift calculated between %d-frame averages at beginning and end",
        records = []
        for i in xrange(1, 3 * ngroups + 1, 3):
            x = xcom[i:i + 3]
            r = vlength(
                x -
                x.mean(axis=1)[:, numpy.newaxis])  # distances over time step
            #r0 = vlength(r - r[:,0][:,numpy.newaxis])         # distances over time step from r(t=0)
            #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True)
            #m = 0.5*(edges[1:]+edges[:-1])
            #c = h.cumsum(dtype=float)    # integral
            #c /= c[-1]                   # normalized (0 to 1)
            #median = m[c < 0.5][-1]
            #g =  h/(4*numpy.pi*m**2)
            #import scipy.integrate
            #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m)
            #g /= radint(g)  # properly normalized radial distribution function
            rmsd = numpy.sqrt(numpy.mean(
                r**2))  # radial spread sqrt(radint(m**2 * g))
            median = numpy.median(
                r)  # radius that contains 50% of the observations
            dx = x[:, :nframesavg].mean(axis=1) - x[:,
            drift = vlength(dx)
            records.append((rmsd, median, drift))

        return self.results

    def plot(self, **kwargs):
        """Plot all results in one graph, labelled by the result keys.

              select one or more of the stored results. Can be a list
              or a string (a key into the results dict). ``None``
              plots everything [``None``]
               - ``True``: save figures in the given formats
               - "name.ext": save figure under this filename (``ext`` -> format)
               - ``False``: only show on screen [``False``]
           formats : sequence
               sequence of all formats that should be saved [('png', 'pdf')]
               keyword arguments for pylab.plot()

        import pylab
        figure = kwargs.pop('figure', False)
        observables = asiterable(kwargs.pop('observables',
        extensions = kwargs.pop('formats', ('pdf', 'png'))

        for name in observables:
            result = self.results[name]
                )  # This requires result classes with a plot() method!!
            except AttributeError:
                    "Sorry, plotting of result {name!r} is not implemented".

        # quick labels -- relies on the proper ordering
        labels = [
            str(n) + " " + dim for n in self.parameters.group_names
            for dim in 'xyz'
        if kwargs.get('columns') is not None:
            # select labels according to columns; only makes sense
            # if plotting against the time (col 0)
            if kwargs['columns'][0] == 0:
                labels = numpy.array([None] + labels)[kwargs['columns'][1:]]
                labels = ()

        pylab.legend(labels, loc='best')
        if figure is True:
            for ext in extensions:
        elif figure:
    def analyze(self, **kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        - Make COM as a function of time available as XVG files and
        - Compute RMSD of the COM of each group (from average
          position, "rmsd").
        - Compute distance whic encompasses 50% of observations ("median")
        - Compute drift of COM, i.e. length of the vector between
          initial and final position. Initial and final position are
          computed as averages over *nframesavg* frames ("drift").

        RMSD, median, and drift are columns in an xvg file. The rows correspond
        to the groups in :attr:``.

              number of initial and final frames that are averaged in
              order to compute the drift of the COM of each group
              group name whose com is taken as the reference and subtracted from
              all other coms for the distance calculations. If supplied,
              additional result 'com_relative_*refgroup*' is created.

        :Returns:  a dictionary of the results and also sets
        from gromacs.formats import XVG"Preparing COM graphs as XVG objects.")
        self.results = AttributeDict(
            (k, XVG(fn)) for k, fn in self.parameters.filenames.items())

        # compute RMSD of COM and shift of COM (drift) between avg pos
        # over first/last 5,000 frames
        nframesavg = kwargs.pop('nframesavg', 5000)
        ngroups = len(self.parameters.group_names)
        xcom = self.results['com'].array

        refgroup = kwargs.pop('refgroup', None)
        if refgroup is not None:
            if not refgroup in self.parameters.group_names:
                errmsg = "refgroup={0!s} must be one of {1!r}".format(
                    refgroup, self.parameters.group_names)
                raise ValueError(errmsg)
            nreference = 1 + 3 * self.parameters.group_names.index(
                refgroup)  # 1-based !!
            reference_com = xcom[nreference:nreference + 3]
            xcom[1:] -= numpy.vstack(ngroups *
                                     [reference_com])  # can't use broadcast
            logger.debug("distances computed with refgroup %r", refgroup)

                           names=['time'] + self.parameters.group_names)

        def vlength(v):
            return numpy.sqrt(numpy.sum(v**2,
                                        axis=0))  # distances over time step

            "drift calculated between %d-frame averages at beginning and end",
        records = []
        for i in xrange(1, 3 * ngroups + 1, 3):
            x = xcom[i:i + 3]
            r = vlength(
                x -
                x.mean(axis=1)[:, numpy.newaxis])  # distances over time step
            #r0 = vlength(r - r[:,0][:,numpy.newaxis])         # distances over time step from r(t=0)
            #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True)
            #m = 0.5*(edges[1:]+edges[:-1])
            #c = h.cumsum(dtype=float)    # integral
            #c /= c[-1]                   # normalized (0 to 1)
            #median = m[c < 0.5][-1]
            #g =  h/(4*numpy.pi*m**2)
            #import scipy.integrate
            #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m)
            #g /= radint(g)  # properly normalized radial distribution function
            rmsd = numpy.sqrt(numpy.mean(
                r**2))  # radial spread sqrt(radint(m**2 * g))
            median = numpy.median(
                r)  # radius that contains 50% of the observations
            dx = x[:, :nframesavg].mean(axis=1) - x[:,
            drift = vlength(dx)
            records.append((rmsd, median, drift))

        return self.results
class _COM(Worker):
    """COM worker class."""

    def __init__(self,**kwargs):
        """Set up COM analysis.

               list of index group names
               index file if groups are not in the default index
               add the *offset* to the residue numbers [0]
               plugin name [COM]
               The :class:`gromacs.analysis.Simulation` instance that
               owns the plugin [None]
        group_names = asiterable(kwargs.pop('group_names', []))
        ndx = kwargs.pop('ndx', None)
        offset = kwargs.pop('offset', 0)

        super(_COM, self).__init__(**kwargs)
        self.parameters.group_names = group_names
        self.parameters.offset = offset
        self.ndx = ndx

        if not self.simulation is None:

    def _register_hook(self, **kwargs):
        """Run when registering; requires simulation."""

        super(_COM, self)._register_hook(**kwargs)
        assert not self.simulation is None

        if self.ndx is None:
            self.ndx = self.simulation.ndx

        self.parameters.filenames = {                     # result xvg files
            'com': self.plugindir('com.xvg'),

        # default filename for the plots -- not used
        self.parameters.fignames = {
            'com': self.figdir('com'),
    def run(self, force=None, **gmxargs):
        """Analyze trajectory and write COM file.

        All three components of the COM coordinate are written.

          - *force*: ``True`` does analysis and overwrites existing files
          - *gmxargs*: additional keyword arguments for :func:`gromacs.g_bundle` 
        gmxargs['com'] = True
        gmxargs['mol'] = False
        gmxargs['ng'] = len(self.parameters.group_names)
        gmxargs['x'] = True
        gmxargs['y'] = True
        gmxargs['z'] = True

        if gmxargs['ng'] == 0:
            errmsg = "No index group name(s) provided. Use group_name with the constructor."
            raise ValueError(errmsg)

        if self.check_file_exists(self.parameters.filenames['com'], resolve='warning', force=force):
            return"Analyzing COM ...")
        f = self.parameters.filenames
        gromacs.g_traj(s=self.simulation.tpr, f=self.simulation.xtc, n=self.ndx,
                       ox=f['com'],  input=self.parameters.group_names,  **gmxargs)

    def analyze(self,**kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        - Make COM as a function of time available as XVG files and
        - Compute RMSD of the COM of each group (from average
          position, "rmsd").
        - Compute distance whic encompasses 50% of observations ("median")
        - Compute drift of COM, i.e. length of the vector between
          initial and final position. Initial and final position are
          computed as averages over *nframesavg* frames ("drift").
        RMSD, median, and drift are columns in an xvg file. The rows correspond
        to the groups in :attr:``.

              number of initial and final frames that are averaged in
              order to compute the drift of the COM of each group
              group name whose com is taken as the reference and subtracted from
              all other coms for the distance calculations. If supplied,
              additional result 'com_relative_*refgroup*' is created.
        :Returns:  a dictionary of the results and also sets
        from gromacs.formats import XVG"Preparing COM graphs as XVG objects.")        
        self.results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() )

        # compute RMSD of COM and shift of COM (drift) between avg pos
        # over first/last 5,000 frames
        nframesavg = kwargs.pop('nframesavg', 5000)
        ngroups = len(self.parameters.group_names)
        xcom = self.results['com'].array

        refgroup = kwargs.pop('refgroup', None)
        if not refgroup is None:
            if not refgroup in self.parameters.group_names:
                errmsg = "refgroup=%s must be one of %r" % (refgroup, self.parameters.group_names)
                raise ValueError(errmsg)
            nreference = 1 + 3 * self.parameters.group_names.index(refgroup) # 1-based !!
            reference_com = xcom[nreference:nreference+3]
            xcom[1:] -= numpy.vstack(ngroups * [reference_com])  # can't use broadcast
            logger.debug("distances computed with refgroup %r", refgroup)

            self.store_xvg('com_relative_%s' % refgroup, xcom, 

        def vlength(v):
            return numpy.sqrt(numpy.sum(v**2, axis=0))  # distances over time step

        logger.debug("drift calculated between %d-frame averages at beginning and end",nframesavg)
        records = []
        for i in xrange(1, 3*ngroups+1, 3):
            x = xcom[i:i+3]
            r  = vlength(x - x.mean(axis=1)[:,numpy.newaxis])  # distances over time step
            #r0 = vlength(r - r[:,0][:,numpy.newaxis])         # distances over time step from r(t=0)
            #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True)
            #m = 0.5*(edges[1:]+edges[:-1])
            #c = h.cumsum(dtype=float)    # integral
            #c /= c[-1]                   # normalized (0 to 1)
            #median = m[c < 0.5][-1]
            #g =  h/(4*numpy.pi*m**2)
            #import scipy.integrate
            #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m)
            #g /= radint(g)  # properly normalized radial distribution function
            rmsd = numpy.sqrt(numpy.mean(r**2))  # radial spread sqrt(radint(m**2 * g))
            median = numpy.median(r)             # radius that contains 50% of the observations
            dx = x[:,:nframesavg].mean(axis=1) - x[:,-nframesavg:].mean(axis=1)
            drift = vlength(dx)
            records.append((rmsd, median, drift))
        self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift")

        return self.results

    def plot(self, **kwargs):
        """Plot all results in one graph, labelled by the result keys.

              select one or more of the stored results. Can be a list
              or a string (a key into the results dict). ``None``
              plots everything [``None``]           
               - ``True``: save figures in the given formats
               - "name.ext": save figure under this filename (``ext`` -> format)
               - ``False``: only show on screen [``False``]
           formats : sequence
               sequence of all formats that should be saved [('png', 'pdf')]
               keyword arguments for pylab.plot()

        import pylab
        figure = kwargs.pop('figure', False)
        observables = asiterable(kwargs.pop('observables', self.results.keys()))
        extensions = kwargs.pop('formats', ('pdf','png'))

        for name in observables:
            result = self.results[name]
                result.plot(**kwargs)      # This requires result classes with a plot() method!!
            except AttributeError:
                warnings.warn("Sorry, plotting of result %(name)r is not implemented" % vars(),

        # quick labels -- relies on the proper ordering
        labels = [str(n)+" "+dim for n in self.parameters.group_names
                  for dim in 'xyz']
        if not kwargs.get('columns', None) is None:
            # select labels according to columns; only makes sense
            # if plotting against the time (col 0)
            if kwargs['columns'][0] == 0:
                labels = numpy.array([None]+labels)[kwargs['columns'][1:]]
                labels = ()

        pylab.legend(labels, loc='best')
        if figure is True:
            for ext in extensions:
        elif figure:
    def analyze(self,**kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        - Make COM as a function of time available as XVG files and
        - Compute RMSD of the COM of each group (from average
          position, "rmsd").
        - Compute distance whic encompasses 50% of observations ("median")
        - Compute drift of COM, i.e. length of the vector between
          initial and final position. Initial and final position are
          computed as averages over *nframesavg* frames ("drift").
        RMSD, median, and drift are columns in an xvg file. The rows correspond
        to the groups in :attr:``.

              number of initial and final frames that are averaged in
              order to compute the drift of the COM of each group
              group name whose com is taken as the reference and subtracted from
              all other coms for the distance calculations. If supplied,
              additional result 'com_relative_*refgroup*' is created.
        :Returns:  a dictionary of the results and also sets
        from gromacs.formats import XVG"Preparing COM graphs as XVG objects.")        
        self.results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() )

        # compute RMSD of COM and shift of COM (drift) between avg pos
        # over first/last 5,000 frames
        nframesavg = kwargs.pop('nframesavg', 5000)
        ngroups = len(self.parameters.group_names)
        xcom = self.results['com'].array

        refgroup = kwargs.pop('refgroup', None)
        if not refgroup is None:
            if not refgroup in self.parameters.group_names:
                errmsg = "refgroup=%s must be one of %r" % (refgroup, self.parameters.group_names)
                raise ValueError(errmsg)
            nreference = 1 + 3 * self.parameters.group_names.index(refgroup) # 1-based !!
            reference_com = xcom[nreference:nreference+3]
            xcom[1:] -= numpy.vstack(ngroups * [reference_com])  # can't use broadcast
            logger.debug("distances computed with refgroup %r", refgroup)

            self.store_xvg('com_relative_%s' % refgroup, xcom, 

        def vlength(v):
            return numpy.sqrt(numpy.sum(v**2, axis=0))  # distances over time step

        logger.debug("drift calculated between %d-frame averages at beginning and end",nframesavg)
        records = []
        for i in xrange(1, 3*ngroups+1, 3):
            x = xcom[i:i+3]
            r  = vlength(x - x.mean(axis=1)[:,numpy.newaxis])  # distances over time step
            #r0 = vlength(r - r[:,0][:,numpy.newaxis])         # distances over time step from r(t=0)
            #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True)
            #m = 0.5*(edges[1:]+edges[:-1])
            #c = h.cumsum(dtype=float)    # integral
            #c /= c[-1]                   # normalized (0 to 1)
            #median = m[c < 0.5][-1]
            #g =  h/(4*numpy.pi*m**2)
            #import scipy.integrate
            #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m)
            #g /= radint(g)  # properly normalized radial distribution function
            rmsd = numpy.sqrt(numpy.mean(r**2))  # radial spread sqrt(radint(m**2 * g))
            median = numpy.median(r)             # radius that contains 50% of the observations
            dx = x[:,:nframesavg].mean(axis=1) - x[:,-nframesavg:].mean(axis=1)
            drift = vlength(dx)
            records.append((rmsd, median, drift))
        self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift")

        return self.results
class Simulation(Journalled):
    """Simple MD simulation of a single compound molecule in water.

    Typical use ::

       S = Simulation(molecule='DRUG')

    .. Note:: The OPLS/AA force field and the TIP4P water molecule is the
              default; changing this is possible but will require provision of
              customized itp, mdp and template top files at various stages.

    #: Keyword arguments to pre-set some file names; they are keys in :attr:`Simulation.files`.
    filekeys = ('topology', 'processed_topology', 'structure', 'solvated', 'ndx',
                'energy_minimized', 'MD_relaxed', 'MD_restrained', 'MD_NPT')
    topdir_default = "Equilibrium"
    dirname_default = os.path.curdir
    solvent_default = 'water'

    #: Coordinate files of the full system in increasing order of advancement of
    #: the protocol; the later the better. The values are keys into :attr:`Simulation.files`.
    coordinate_structures = ('solvated', 'energy_minimized', 'MD_relaxed',
                             'MD_restrained', 'MD_NPT')
    checkpoints = ('solvated','energy_minimized','MD_relaxed','MD_restrained','MD_NPT')

    #: Check list of all methods that can be run as an independent protocol; see also
    #: :meth:`Simulation.get_protocol` and :class:`restart.Journal`
    protocols = ("MD_NPT", "MD_NPT_run",                 # *_run as dummies for the ...
                 "MD_relaxed", "MD_relaxed_run",         # ...checkpointing logic
                 "MD_restrained", "MD_restrained_run",
                 "energy_minimize", "solvate", "topology")

    #: Default Gromacs *MDP* run parameter files for the different stages.
    #: (All are part of the package and are found with :func:`mdpow.config.get_template`.)
    mdp_defaults = {'MD_relaxed': 'NPT_opls.mdp',
                    'MD_restrained': 'NPT_opls.mdp',
                    'MD_NPT': 'NPT_opls.mdp',
                    'energy_minimize': 'em_opls.mdp',

    def __init__(self, molecule=None, **kwargs):
        """Set up Simulation instance.

        The *molecule* of the compound molecule should be supplied. Existing files
        (which have been generated in previous runs) can also be supplied.

              Identifier for the compound molecule. This is the same as the
              entry in the ``[ molecule ]`` section of the itp file. ["DRUG"]
              If provided and *molecule* is ``None`` then load the instance from
              the pickle file *filename*, which was generated with
              base directory; all other directories are created under it
              'water' or 'octanol' or 'cyclohexane'
              ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL`
              for ``solvent == "water"``. Other options are the models defined in
              :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no
              alternative parameterizations included for other solvents.
              dict with keys corresponding to the stages ``energy_minimize``,
              ``MD_restrained``, ``MD_relaxed``,
              ``MD_NPT`` and values *mdp* file names (if no entry then the
              package defaults are used)
              advanced keywords for short-circuiting; see

        self.__cache = {}
        filename = kwargs.pop('filename', None)
        dirname = kwargs.pop('dirname', self.dirname_default)

        solvent = kwargs.pop('solvent', self.solvent_default)
        # mdp files --- should get values from default runinput.cfg
        # None values in the kwarg mdp dict are ignored
        # self.mdp: key = stage, value = path to MDP file

        # 'water' will choose the default ('tip4p'), other choices are
        # 'tip3p', 'spc', 'spce', for water; no choices
        # available for 'cyclohexane' and 'octanol'
        solventmodel = kwargs.pop('solventmodel', None)

        mdp_kw = kwargs.pop('mdp', {})
        self.mdp = dict((stage, config.get_template(fn)) for stage,fn in self.mdp_defaults.items())
        self.mdp.update(dict((stage, config.get_template(fn)) for stage,fn in mdp_kw.items() if fn is not None))

        if molecule is None and filename is not None:
            # load from pickle file
            self.filename = filename
            kwargs = {}    # for super
            self.molecule = molecule or 'DRUG'
            self.dirs = AttributeDict(
                basedir=realpath(dirname),    # .../Equilibrium/<solvent>
                includes=list(asiterable(kwargs.pop('includes',[]))) + [config.includedir],
            # pre-set filenames: keyword == variable name
            self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys])
            self.deffnm = kwargs.pop("deffnm", "md")

            if self.files.topology:
                # assume that a user-supplied topology lives in a 'standard' top dir
                # that includes the necessary itp file(s)
                self.dirs.topology = realpath(os.path.dirname(self.files.topology))

            self.solvent_type = solvent
            self.solventmodel_identifier = forcefields.get_solvent_identifier(solvent, solventmodel)
            if self.solventmodel_identifier is None:
                msg = "No parameters for solvent {0} and solventmodel {1} available.".format(
                    solvent, solventmodel)
                raise ValueError(msg)
            self.solventmodel = forcefields.get_solvent_model(self.solventmodel_identifier)

            distance = kwargs.pop('distance', None)
            distance = distance if distance is not None else DIST[solvent]

            self.solvent = AttributeDict(itp=self.solventmodel.itp,

            self.filename = filename or self.solvent_type+'.simulation'

        super(Simulation, self).__init__(**kwargs)

    def BASEDIR(self, *args):
        return os.path.join(self.dirs.basedir, *args)

    def save(self, filename=None):
        """Save instance to a pickle file.

        The default filename is the name of the file that was last loaded from
        or saved to.
        if filename is None:
            if self.filename is None:
                self.filename = filename or self.solvent_type+'.simulation'
                logger.warning("No filename known, saving instance under name %r", self.filename)
            filename = self.filename
            self.filename = filename
        with open(filename, 'wb') as f:
            cPickle.dump(self, f, protocol=cPickle.HIGHEST_PROTOCOL)
        logger.debug("Instance pickled to %(filename)r" % vars())

    def load(self, filename=None):
        """Re-instantiate class from pickled file."""
        if filename is None:
            if self.filename is None:
                self.filename = self.molecule.lower() + '.pickle'
                logger.warning("No filename known, trying name %r", self.filename)
            filename = self.filename
        with open(filename, 'rb') as f:
            instance = cPickle.load(f)
        logger.debug("Instance loaded from %(filename)r" % vars())

    def make_paths_relative(self, prefix=os.path.curdir):
        """Hack to be able to copy directories around: prune basedir from paths.

        .. Warning:: This is not guaranteed to work for all paths. In particular,
                     check :attrib:`mdpow.equil.Simulation.dirs.includes` and adjust
                     manually if necessary.
        def assinglet(m):
            if len(m) == 1:
                return m[0]
            elif len(m) == 0:
                return None
            return m

        basedir = self.dirs.basedir
        for key, fn in self.files.items():
                self.files[key] = fn.replace(basedir, prefix)
            except AttributeError:
        for key, val in self.dirs.items():
            fns = asiterable(val)  # treat them all as lists
                self.dirs[key] = assinglet([fn.replace(basedir, prefix) for fn in fns])
            except AttributeError:
        for key, fn in self.mdp:
                self.mdp[key] = fn.replace(basedir, prefix)
            except AttributeError:
        logger.warn("make_paths_relative(): check/manually adjust %s.dirs.includes = %r !",
                    self.__class__.__name__, self.dirs.includes)

    def topology(self, itp='drug.itp', **kwargs):
        """Generate a topology for compound *molecule*.

               Gromacs itp file; will be copied to topology dir and
               included in topology
               name of the topology directory ["top"]
               see source for *top_template*, *topol*

        dirname = kwargs.pop('dirname', self.BASEDIR('top'))
        self.dirs.topology = realpath(dirname)

        top_template = config.get_template(kwargs.pop('top_template', ''))
        topol = kwargs.pop('topol', os.path.basename(top_template))
        itp = os.path.realpath(itp)
        _itp = os.path.basename(itp)

        with in_dir(dirname):
            shutil.copy(itp, _itp)
                                   [('#include +"compound\.itp"', 'compound\.itp', _itp),
                                    ('#include +"oplsaa\.ff/tip4p\.itp"', 'tip4p\.itp', self.solvent.itp),
                                    ('Compound', 'solvent', self.solvent_type),
                                    ('Compound', 'DRUG', self.molecule),
                                    ('DRUG\s*1', 'DRUG', self.molecule),
                                   newname=topol)'[%(dirname)s] Created topology %(topol)r that includes %(_itp)r', vars())

        # update known files and dirs
        self.files.topology = realpath(dirname, topol)
        if not self.dirs.topology in self.dirs.includes:

        return {'dirname': dirname, 'topol': topol}

    def solvate(self, struct=None, **kwargs):
        """Solvate structure *struct* in a box of solvent.

        The solvent is determined with the *solvent* keyword to the constructor.

              pdb or gro coordinate file (if not supplied, the value is used
              that was supplied to the constructor of :class:`~mdpow.equil.Simulation`)
              All other arguments are passed on to :func:`gromacs.setup.solvate`, but
              set to sensible default values. *top* and *water* are always fixed.

        self.dirs.solvation = realpath(kwargs.setdefault('dirname', self.BASEDIR('solvation')))
        kwargs['struct'] = self._checknotempty(struct or self.files.structure, 'struct')
        kwargs['top'] = self._checknotempty(self.files.topology, 'top')
        kwargs['water'] =
        kwargs.setdefault('mainselection', '"%s"' % self.molecule)  # quotes are needed for make_ndx
        kwargs.setdefault('distance', self.solvent.distance)
        kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes

        params = gromacs.setup.solvate(**kwargs)

        self.files.structure = kwargs['struct']
        self.files.solvated = params['struct']
        self.files.ndx = params['ndx']

        # we can also make a processed topology right now

        return params

    def processed_topology(self, **kwargs):
        """Create a portable topology file from the topology and the solvated system."""
        if self.files.solvated is None or not os.path.exists(self.files.solvated):
        kwargs['topol'] = self.files.topology
        kwargs['struct'] = self.files.solvated
        kwargs['includes'] = self.dirs.includes
        self.files.processed_topology = gromacs.cbook.create_portable_topology(**kwargs)
        return self.files.processed_topology

    def energy_minimize(self, **kwargs):
        """Energy minimize the solvated structure on the local machine.

        *kwargs* are passed to :func:`gromacs.setup.energ_minimize` but if
        :meth:`~mdpow.equil.Simulation.solvate` step has been carried out
        previously all the defaults should just work.

        self.dirs.energy_minimization = realpath(kwargs.setdefault('dirname', self.BASEDIR('em')))
        kwargs['top'] = self.files.topology
        kwargs.setdefault('struct', self.files.solvated)
        kwargs.setdefault('mdp', self.mdp['energy_minimize'])
        kwargs['mainselection'] = None
        kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes

        params = gromacs.setup.energy_minimize(**kwargs)

        self.files.energy_minimized = params['struct']

        return params

    def _MD(self, protocol, **kwargs):
        """Basic MD driver for this Simulation. Do not call directly."""

        kwargs.setdefault('dirname', self.BASEDIR(protocol))
        kwargs.setdefault('deffnm', self.deffnm)
        kwargs.setdefault('mdp', config.get_template('NPT_opls.mdp'))
        self.dirs[protocol] = realpath(kwargs['dirname'])
        setupMD = kwargs.pop('MDfunc', gromacs.setup.MD)
        kwargs['top'] = self.files.topology
        kwargs['includes'] = asiterable(kwargs.pop('includes',[])) + self.dirs.includes
        kwargs['ndx'] = self.files.ndx
        kwargs['mainselection'] = None # important for SD (use custom mdp and ndx!, gromacs.setup._MD)
        self._checknotempty(kwargs['struct'], 'struct')
        if not os.path.exists(kwargs['struct']):
            # struct is not reliable as it depends on qscript so now we just try everything...
            struct = gromacs.utilities.find_first(kwargs['struct'], suffices=['pdb', 'gro'])
            if struct is None:
                logger.error("Starting structure %(struct)r does not exist (yet)" % kwargs)
                raise IOError(errno.ENOENT, "Starting structure not found", kwargs['struct'])
      "Found starting structure %r (instead of %r).", struct, kwargs['struct'])
                kwargs['struct'] = struct
        # now setup the whole simulation (this is typically gromacs.setup.MD() )
        params =  setupMD(**kwargs)
        # params['struct'] is md.gro but could also be md.pdb --- depends entirely on qscript
        self.files[protocol] = params['struct']
        # Gromacs 4.5.x 'mdrun -c PDB'  fails if it cannot find 'residuetypes.dat'
        # so instead of fuffing with GMXLIB we just dump it into the directory
            shutil.copy(config.topfiles['residuetypes.dat'], self.dirs[protocol])
            logger.warn("Failed to copy 'residuetypes.dat': mdrun will likely fail to write a final structure")

        return params

    def MD_relaxed(self, **kwargs):
        """Short MD simulation with *timestep* = 0.1 fs to relax strain.

        Energy minimization does not always remove all problems and LINCS
        constraint errors occur. A very short *runtime* = 5 ps MD with very
        short integration time step *dt* tends to solve these problems.

        .. See Also:: :func:`gromacs.setup.MD`

             starting coordinates (typically guessed)
             MDP run parameter file for Gromacs
             list of queuing system submission scripts; probably a
             good idea to always include the default "" even
             if you have your own [""]
             name of the job as shown in the queuing system
             **advanced uses**: path of the directory on a remote
             system, which will be hard-coded into the queuing system
             script(s); see :func:`gromacs.setup.MD` and

        # user structure or restrained or solvated
        kwargs.setdefault('struct', self.files.energy_minimized)
        kwargs.setdefault('dt', 0.0001)  # ps
        kwargs.setdefault('runtime', 5)  # ps
        kwargs.setdefault('mdp', self.mdp['MD_relaxed'])
        return self._MD('MD_relaxed', **kwargs)

    def MD_restrained(self, **kwargs):
        """Short MD simulation with position restraints on compound.

        See documentation of :func:`gromacs.setup.MD_restrained` for
        details. The following keywords can not be changed: top, mdp, ndx,

        .. Note:: Position restraints are activated with ``-DPOSRES`` directives
                  for :func:`gromacs.grompp`. Hence this will only work if the
                  compound itp file does indeed contain a ``[ posres ]``
                  section that is protected by a ``#ifdef POSRES`` clause.

        .. See Also:: :func:`gromacs.setup.MD_restrained`

              starting coordinates (leave empty for inspired guess of file name)
              MDP run parameter file for Gromacs
             list of queuing system submission scripts; probably a
             good idea to always include the default "" even
             if you have your own [""]
             name of the job as shown in the queuing system
             **advanced uses**: path of the directory on a remote
             system, which will be hard-coded into the queuing system
             script(s); see :func:`gromacs.setup.MD` and

                          self._lastnotempty([self.files.energy_minimized, self.files.MD_relaxed]))
        kwargs.setdefault('mdp', self.mdp['MD_restrained'])
        kwargs['MDfunc'] = gromacs.setup.MD_restrained
        return self._MD('MD_restrained', **kwargs)

    def MD_NPT(self, **kwargs):
        """Short NPT MD simulation.

        See documentation of :func:`gromacs.setup.MD` for details such
        as *runtime* or specific queuing system options. The following
        keywords can not be changed: *top*, *mdp*, *ndx*, *mainselection*.

        .. Note:: If the system crashes (with LINCS errors), try initial
                  equilibration with timestep *dt* = 0.0001 ps (0.1 fs instead
                  of 2 fs) and *runtime* = 5 ps as done in :meth:`~Simulation.MD_relaxed`

        .. See Also:: :func:`gromacs.setup.MD` and :meth:`Simulation.MD_relaxed`

               starting conformation; by default, the *struct* is the last frame
               from the position restraints run, or, if this file cannot be
               found (e.g. because :meth:`Simulation.MD_restrained` was not run)
               it falls back to the relaxed and then the solvated system.
               MDP run parameter file for Gromacs
               total run time in ps
               list of queuing system scripts to prepare; available values are
               in :data:`gromacs.config.templates` or you can provide your own
               filename(s) in the current directory (see :mod:`gromacs.qsub` for
               the format of the templates)
             name of the job as shown in the queuing system
             **advanced uses**: path of the directory on a remote
             system, which will be hard-coded into the queuing system
             script(s); see :func:`gromacs.setup.MD` and

        # user structure or relaxed or restrained or solvated
        kwargs.setdefault('struct', self.get_last_structure())
        kwargs.setdefault('t',self.get_last_checkpoint()) # Pass checkpoint file from md_relaxed
        kwargs.setdefault('mdp', self.mdp['MD_NPT'])
        return self._MD('MD_NPT', **kwargs)

    # for convenience and compatibility
    MD = MD_NPT

    def _checknotempty(value, name):
        if value is None or value == "":
            raise ValueError("Parameter %s cannot be empty." % name)
        return value

    def _lastnotempty(l):
        """Return the last non-empty value in list *l* (or None :-p)"""
        nonempty = [None] + [x for x in l if not (x is None or x == "" or x == [])]
        return nonempty[-1]

    def get_last_structure(self):
        """Returns the coordinates of the most advanced step in the protocol."""
        return self._lastnotempty([self.files[name] for name in self.coordinate_structures])

    def get_last_checkpoint(self):
        """Returns the checkpoint of the most advanced step in the protocol.
        Relies on md.gro being present from previous simulation, assumes that checkpoint is then present.
        return self._lastnotempty([self.files[name] for name in self.checkpoints]).replace('.gro','.cpt')
    def __init__(self, molecule=None, **kwargs):
        """Set up Simulation instance.

        The *molecule* of the compound molecule should be supplied. Existing files
        (which have been generated in previous runs) can also be supplied.

              Identifier for the compound molecule. This is the same as the
              entry in the ``[ molecule ]`` section of the itp file. ["DRUG"]
              If provided and *molecule* is ``None`` then load the instance from
              the pickle file *filename*, which was generated with
              base directory; all other directories are created under it
              'water' or 'octanol' or 'cyclohexane'
              ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL`
              for ``solvent == "water"``. Other options are the models defined in
              :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no
              alternative parameterizations included for other solvents.
              dict with keys corresponding to the stages ``energy_minimize``,
              ``MD_restrained``, ``MD_relaxed``,
              ``MD_NPT`` and values *mdp* file names (if no entry then the
              package defaults are used)
              advanced keywords for short-circuiting; see

        self.__cache = {}
        filename = kwargs.pop('filename', None)
        dirname = kwargs.pop('dirname', self.dirname_default)

        solvent = kwargs.pop('solvent', self.solvent_default)
        # mdp files --- should get values from default runinput.cfg
        # None values in the kwarg mdp dict are ignored
        # self.mdp: key = stage, value = path to MDP file

        # 'water' will choose the default ('tip4p'), other choices are
        # 'tip3p', 'spc', 'spce', for water; no choices
        # available for 'cyclohexane' and 'octanol'
        solventmodel = kwargs.pop('solventmodel', None)

        mdp_kw = kwargs.pop('mdp', {})
        self.mdp = dict((stage, config.get_template(fn)) for stage,fn in self.mdp_defaults.items())
        self.mdp.update(dict((stage, config.get_template(fn)) for stage,fn in mdp_kw.items() if fn is not None))

        if molecule is None and filename is not None:
            # load from pickle file
            self.filename = filename
            kwargs = {}    # for super
            self.molecule = molecule or 'DRUG'
            self.dirs = AttributeDict(
                basedir=realpath(dirname),    # .../Equilibrium/<solvent>
                includes=list(asiterable(kwargs.pop('includes',[]))) + [config.includedir],
            # pre-set filenames: keyword == variable name
            self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys])
            self.deffnm = kwargs.pop("deffnm", "md")

            if self.files.topology:
                # assume that a user-supplied topology lives in a 'standard' top dir
                # that includes the necessary itp file(s)
                self.dirs.topology = realpath(os.path.dirname(self.files.topology))

            self.solvent_type = solvent
            self.solventmodel_identifier = forcefields.get_solvent_identifier(solvent, solventmodel)
            if self.solventmodel_identifier is None:
                msg = "No parameters for solvent {0} and solventmodel {1} available.".format(
                    solvent, solventmodel)
                raise ValueError(msg)
            self.solventmodel = forcefields.get_solvent_model(self.solventmodel_identifier)

            distance = kwargs.pop('distance', None)
            distance = distance if distance is not None else DIST[solvent]

            self.solvent = AttributeDict(itp=self.solventmodel.itp,

            self.filename = filename or self.solvent_type+'.simulation'

        super(Simulation, self).__init__(**kwargs)
    def __init__(self, **kwargs):
        """Set up  ProteinOnly


             ``True`` will always regenerate trajectories even if they
             already exist, ``False`` raises an exception, ``None``
             does the sensible thing in most cases (i.e. notify and
             then move on).
          *dt* : float or list of floats
             only write every dt timestep (in ps); if a list of floats is
             supplied, write multiple trajectories, one for each dt.
          *compact* : bool
             write a compact representation
             Create an additional trajectory from the stripped one in which
             the Protein group is rms-fitted to the initial structure. See
             :meth:`` for details. Useful

             - "xy" : perform a rot+trans fit in the x-y plane
             - "all": rot+trans
             - ``None``: no fitting

             If *fit* is not supplied then the constructore-default is used
             List of literal ``make_ndx`` selections that select additional
             groups of atoms that should also be kept in addition to the
             protein. For example *keepalso* = ['"POPC"', 'resname DRUG'].

        # specific arguments: take them before calling the super class that
        # does not know what to do with them
        _fitvalues = ("xy", "all", None)
        parameters = {}
        parameters['fit'] = kwargs.pop('fit', None)  # fitting algorithm
        if not parameters['fit'] in _fitvalues:
            raise ValueError(
                "ProteinOnly: *fit* must be one of {_fitvalues!r}, not {fit!r}."
        parameters['compact'] = kwargs.pop('compact',
                                           False)  # compact+centered ?
        parameters['dt'] = kwargs.pop('dt', None)
        parameters['force'] = kwargs.pop('force', None)
        parameters['keepalso'] = kwargs.pop('keepalso', None)

        # super class init: do this before doing anything else
        # (also sets up self.parameters and self.results)
        super(_ProteinOnly, self).__init__(**kwargs)

        # self.parameters is set up by the base Worker class...
        self.parameters.filenames = AttributeDict()

        # self.simulation might have been set by the super class
        # already; just leave this snippet at the end. Do all
        # initialization that requires the simulation class in the
        # _register_hook() method.
        if self.simulation is not None:
class Simulation(object):
    """Class that represents one simulation.

    Analysis capabilities are added via plugins.

    1. Set the *active plugin* with the :meth:`Simulation.set_plugin` method.
    2. Analyze the trajectory with the active plugin by calling the
       :meth:`` method.
    3. Analyze the output from :meth:`run` with :meth:`Simulation.analyze`; results are stored
       in the plugin's :attr:`~Worker.results` dictionary.
    4. Plot results with :meth:`Simulation.plot`.
    # NOTE: not suitable for multiple inheritance

    def __init__(self, **kwargs):
        """Set up a Simulation object.

             Any object that contains the attributes *tpr*, *xtc*,
             and optionally *ndx*
             (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such
             as *xtc* override the values in *sim*.
             Gromacs tpr file (**required**)
             Gromacs trajectory, can also be a trr (**required**)
             Gromacs energy file (only required for some plugins)
             Gromacs index file
             ``True``: Turn file names into absolute paths (typically required
             for most plugins); ``False`` keep a they are [``True``]
             ``True``: missing required file keyword raises a :exc:`TypeError`
             and missing the file itself raises a :exc:`IOError`.  ``False``:
             missing required files only give a warning. [``True``]
             directory under which derived data are stored;
             defaults to the directory containing the tpr [None]
           *plugins* : list
             plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples
             (*plugin_class_name*, *kwarg dict*) to be used; more can be
             added later with :meth:`Simulation.add_plugin`.

        """"Loading simulation data")

        sim = kwargs.pop('sim', None)
        strict = kwargs.pop('strict', True)
        def getpop(attr, required=False, strict=strict):
            """Return attribute from from kwargs or sim or None"""
            val = kwargs.pop(attr, None)  # must pop from kwargs to clean it
            if not val is None:
                return val
                return sim.__getattribute__(attr)
            except AttributeError:
                if required:
                    errmsg = "Required attribute %r not found in kwargs or sim" % attr
                    if strict:
                        raise TypeError(errmsg)
                        logger.warn(errmsg+"... continuing because of strict=False")
                return None

        make_absolute = kwargs.pop('absolute', True)
        def canonical(*args):
            """Join *args* and get the :func:`os.path.realpath`."""
            if None in args:
                return None
            if not make_absolute:
                return os.path.join(*args)
            return os.path.realpath(os.path.join(*args))

        # required files
        self.tpr = canonical(getpop('tpr', required=True))
        self.xtc = canonical(getpop('xtc', required=True))
        # optional files
        self.ndx = canonical(getpop('ndx'))
        self.edr = canonical(getpop('edr'))

        # check existence of required files
        resolve = "exception"
        if not strict:
            resolve = "warn"
        for v in ('tpr', 'xtc'):
            self.check_file(v, self.__getattribute__(v), resolve=resolve)

        self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr))

        #: Registry for plugins: This dict is central.
        self.plugins = AttributeDict()
        #: Use this plugin if none is explicitly specified. Typically set with
        #: :meth:`~Simulation.set_plugin`.
        self.default_plugin_name = None

        # XXX: Or should we simply add instances and then re-register
        #      all instances using register() ?
        # XXX: ... this API should be cleaned up. It seems to be connected
        #      back and forth in vicious circles. -- OB 2009-07-10

        plugins = kwargs.pop('plugins', [])
        # list of tuples (plugin, kwargs) or just (plugin,) if no kwords
        # required (eg if plugin is an instance)
        for x in plugins:
                P, kwargs = asiterable(x)   # make sure to wrap strings, especially 2-letter ones!
            except ValueError:
                P = x
                kwargs = {}
            self.add_plugin(P, **kwargs)

        # convenience: if only a single plugin was registered we default to that one
        if len(self.plugins) == 1:

        # Is this needed? If done properly, kwargs should be empty by now BUT
        # because the same list is re-used for all plugins I cannot pop them in
        # the plugins. I don't think multiple inheritance would work with this
        # setup so let's not pretend it does: hence comment out the super-init
        # call:
        ## super(Simulation, self).__init__(**kwargs)"Simulation instance initialised:")

    def add_plugin(self, plugin, **kwargs):
        """Add a plugin to the registry.

        - If *plugin* is a :class:`Plugin` instance then the
          instance is directly registered and any keyword arguments
          are ignored.

        - If *plugin* is a :class:`Plugin` class object or a
          string that can be found in :mod:`gromacs.analysis.plugins`
          then first an instance is created with the given keyword
          arguments and then registered.

            *plugin* : class or string, or instance
               If the parameter is a class then it should have been derived
               from :class:`Plugin`. If it is a string then it is taken as a
               plugin name in :mod:`gromacs.analysis.plugins` and the
               corresponding class is added. In both cases any parameters for
               initizlization should be provided.

               If *plugin* is already a :class:`Plugin` instance then the kwargs
               will be ignored.
               The kwargs are specific for the plugin and should be
               described in its documentation.
        # simulation=self must be provided so that plugin knows who owns it

        except (TypeError, AttributeError):
            # NOTE: this except clause can mask bugs in the plugin code!!
            if type(plugin) is str:
                import plugins            # We should be able to import this safely now...
                plugin = plugins.__plugin_classes__[plugin]
            # plugin registers itself in self.plugins
            plugin(simulation=self, **kwargs)  # simulation=self is REQUIRED!

    def topdir(self,*args):
        """Returns a path under self.analysis_dir, which is guaranteed to exist.

        .. Note:: Parent dirs are created if necessary."""
        p = os.path.join(self.analysis_dir, *args)
        parent = os.path.dirname(p)
        except OSError,err:
            if err.errno != errno.EEXIST:
        return p
class Simulation(object):
    """Class that represents one simulation.

    Analysis capabilities are added via plugins.

    1. Set the *active plugin* with the :meth:`Simulation.set_plugin` method.
    2. Analyze the trajectory with the active plugin by calling the
       :meth:`` method.
    3. Analyze the output from :meth:`run` with :meth:`Simulation.analyze`; results are stored
       in the plugin's :attr:`~Worker.results` dictionary.
    4. Plot results with :meth:`Simulation.plot`.

    # NOTE: not suitable for multiple inheritance

    def __init__(self, **kwargs):
        """Set up a Simulation object.

             Any object that contains the attributes *tpr*, *xtc*,
             and optionally *ndx*
             (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such
             as *xtc* override the values in *sim*.
             Gromacs tpr file (**required**)
             Gromacs trajectory, can also be a trr (**required**)
             Gromacs energy file (only required for some plugins)
             Gromacs index file
             ``True``: Turn file names into absolute paths (typically required
             for most plugins); ``False`` keep a they are [``True``]
             ``True``: missing required file keyword raises a :exc:`TypeError`
             and missing the file itself raises a :exc:`IOError`.  ``False``:
             missing required files only give a warning. [``True``]
             directory under which derived data are stored;
             defaults to the directory containing the tpr [None]
           *plugins* : list
             plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples
             (*plugin_class_name*, *kwarg dict*) to be used; more can be
             added later with :meth:`Simulation.add_plugin`.

        """"Loading simulation data")

        sim = kwargs.pop('sim', None)
        strict = kwargs.pop('strict', True)

        def getpop(attr, required=False, strict=strict):
            """Return attribute from from kwargs or sim or None"""
            val = kwargs.pop(attr, None)  # must pop from kwargs to clean it
            if val is not None:
                return val
                return sim.__getattribute__(attr)
            except AttributeError:
                if required:
                    errmsg = "Required attribute {0!r} not found in kwargs or sim".format(
                    if strict:
                        raise TypeError(errmsg)
                        logger.warn(errmsg +
                                    "... continuing because of strict=False")
                return None

        make_absolute = kwargs.pop('absolute', True)

        def canonical(*args):
            """Join *args* and get the :func:`os.path.realpath`."""
            if None in args:
                return None
            if not make_absolute:
                return os.path.join(*args)
            return os.path.realpath(os.path.join(*args))

        # required files
        self.tpr = canonical(getpop('tpr', required=True))
        self.xtc = canonical(getpop('xtc', required=True))
        # optional files
        self.ndx = canonical(getpop('ndx'))
        self.edr = canonical(getpop('edr'))

        # check existence of required files
        resolve = "exception"
        if not strict:
            resolve = "warn"
        for v in ('tpr', 'xtc'):
            self.check_file(v, self.__getattribute__(v), resolve=resolve)

        self.analysis_dir = kwargs.pop('analysisdir',

        #: Registry for plugins: This dict is central.
        self.plugins = AttributeDict()
        #: Use this plugin if none is explicitly specified. Typically set with
        #: :meth:`~Simulation.set_plugin`.
        self.default_plugin_name = None

        # XXX: Or should we simply add instances and then re-register
        #      all instances using register() ?
        # XXX: ... this API should be cleaned up. It seems to be connected
        #      back and forth in vicious circles. -- OB 2009-07-10

        plugins = kwargs.pop('plugins', [])
        # list of tuples (plugin, kwargs) or just (plugin,) if no kwords
        # required (eg if plugin is an instance)
        for x in plugins:
                P, kwargs = asiterable(
                    x)  # make sure to wrap strings, especially 2-letter ones!
            except ValueError:
                P = x
                kwargs = {}
            self.add_plugin(P, **kwargs)

        # convenience: if only a single plugin was registered we default to that one
        if len(self.plugins) == 1:

        # Is this needed? If done properly, kwargs should be empty by now BUT
        # because the same list is re-used for all plugins I cannot pop them in
        # the plugins. I don't think multiple inheritance would work with this
        # setup so let's not pretend it does: hence comment out the super-init
        # call:
        ## super(Simulation, self).__init__(**kwargs)"Simulation instance initialised:")

    def add_plugin(self, plugin, **kwargs):
        """Add a plugin to the registry.

        - If *plugin* is a :class:`Plugin` instance then the
          instance is directly registered and any keyword arguments
          are ignored.

        - If *plugin* is a :class:`Plugin` class object or a
          string that can be found in :mod:`gromacs.analysis.plugins`
          then first an instance is created with the given keyword
          arguments and then registered.

            *plugin* : class or string, or instance
               If the parameter is a class then it should have been derived
               from :class:`Plugin`. If it is a string then it is taken as a
               plugin name in :mod:`gromacs.analysis.plugins` and the
               corresponding class is added. In both cases any parameters for
               initizlization should be provided.

               If *plugin* is already a :class:`Plugin` instance then the kwargs
               will be ignored.
               The kwargs are specific for the plugin and should be
               described in its documentation.
        # simulation=self must be provided so that plugin knows who owns it

        except (TypeError, AttributeError):
            # NOTE: this except clause can mask bugs in the plugin code!!
            if type(plugin) is str:
                import plugins  # We should be able to import this safely now...
                plugin = plugins.__plugin_classes__[plugin]
            # plugin registers itself in self.plugins
            plugin(simulation=self, **kwargs)  # simulation=self is REQUIRED!

    def topdir(self, *args):
        """Returns a path under self.analysis_dir, which is guaranteed to exist.

        .. Note:: Parent dirs are created if necessary."""
        p = os.path.join(self.analysis_dir, *args)
        parent = os.path.dirname(p)
        except OSError, err:
            if err.errno != errno.EEXIST:
        return p
    def __init__(self, **kwargs):
        """Set up a Simulation object.

             Any object that contains the attributes *tpr*, *xtc*,
             and optionally *ndx*
             (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such
             as *xtc* override the values in *sim*.
             Gromacs tpr file (**required**)
             Gromacs trajectory, can also be a trr (**required**)
             Gromacs energy file (only required for some plugins)
             Gromacs index file
             ``True``: Turn file names into absolute paths (typically required
             for most plugins); ``False`` keep a they are [``True``]
             ``True``: missing required file keyword raises a :exc:`TypeError`
             and missing the file itself raises a :exc:`IOError`.  ``False``:
             missing required files only give a warning. [``True``]
             directory under which derived data are stored;
             defaults to the directory containing the tpr [None]
           *plugins* : list
             plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples
             (*plugin_class_name*, *kwarg dict*) to be used; more can be
             added later with :meth:`Simulation.add_plugin`.

        """"Loading simulation data")

        sim = kwargs.pop('sim', None)
        strict = kwargs.pop('strict', True)
        def getpop(attr, required=False, strict=strict):
            """Return attribute from from kwargs or sim or None"""
            val = kwargs.pop(attr, None)  # must pop from kwargs to clean it
            if not val is None:
                return val
                return sim.__getattribute__(attr)
            except AttributeError:
                if required:
                    errmsg = "Required attribute %r not found in kwargs or sim" % attr
                    if strict:
                        raise TypeError(errmsg)
                        logger.warn(errmsg+"... continuing because of strict=False")
                return None

        make_absolute = kwargs.pop('absolute', True)
        def canonical(*args):
            """Join *args* and get the :func:`os.path.realpath`."""
            if None in args:
                return None
            if not make_absolute:
                return os.path.join(*args)
            return os.path.realpath(os.path.join(*args))

        # required files
        self.tpr = canonical(getpop('tpr', required=True))
        self.xtc = canonical(getpop('xtc', required=True))
        # optional files
        self.ndx = canonical(getpop('ndx'))
        self.edr = canonical(getpop('edr'))

        # check existence of required files
        resolve = "exception"
        if not strict:
            resolve = "warn"
        for v in ('tpr', 'xtc'):
            self.check_file(v, self.__getattribute__(v), resolve=resolve)

        self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr))

        #: Registry for plugins: This dict is central.
        self.plugins = AttributeDict()
        #: Use this plugin if none is explicitly specified. Typically set with
        #: :meth:`~Simulation.set_plugin`.
        self.default_plugin_name = None

        # XXX: Or should we simply add instances and then re-register
        #      all instances using register() ?
        # XXX: ... this API should be cleaned up. It seems to be connected
        #      back and forth in vicious circles. -- OB 2009-07-10

        plugins = kwargs.pop('plugins', [])
        # list of tuples (plugin, kwargs) or just (plugin,) if no kwords
        # required (eg if plugin is an instance)
        for x in plugins:
                P, kwargs = asiterable(x)   # make sure to wrap strings, especially 2-letter ones!
            except ValueError:
                P = x
                kwargs = {}
            self.add_plugin(P, **kwargs)

        # convenience: if only a single plugin was registered we default to that one
        if len(self.plugins) == 1:

        # Is this needed? If done properly, kwargs should be empty by now BUT
        # because the same list is re-used for all plugins I cannot pop them in
        # the plugins. I don't think multiple inheritance would work with this
        # setup so let's not pretend it does: hence comment out the super-init
        # call:
        ## super(Simulation, self).__init__(**kwargs)"Simulation instance initialised:")
    def analyze(self, **kwargs):
        """Load results from disk into :attr:`_Dihedrals.results` and compute PMF.

        The PMF W(phi) in kT is computed from each dihedral
        probability distribution P(phi) as

           W(phi) = -kT ln P(phi)

        It is stored in :attr:`_Dihedrals.results` with the key *PMF*.

             bins for histograms (passed to numpy.histogram(new=True))

        :Returns: a dictionary of the results and also sets

        bins = kwargs.pop('bins', 361)

        results = AttributeDict()

        # get graphs that were produced by g_angle
        for name, f in self.parameters.filenames.items():
                results[name] = XVG(f)
            except IOError:
                pass    # either not computed (yet) or some failure

        # compute individual distributions
        ts = results['timeseries'].array    # ts[0] = time, ts[1] = avg
        dih = ts[2:]

        phi_range = (-180., 180.)

        Ndih = len(dih)
        p = Ndih * [None]  # histograms (prob. distributions), one for each dihedral i
        for i in xrange(Ndih):
            phis = dih[i]
            p[i],e = numpy.histogram(phis, bins=bins, range=phi_range, normed=True, new=True)

        P = numpy.array(p)
        phi = 0.5*(e[:-1]+e[1:])   # midpoints of bin edges
        distributions = numpy.concatenate((phi[numpy.newaxis, :], P))  # phi, P[0], P[1], ...

        xvg = XVG()
        results['distributions'] = xvg
        del xvg

        # compute PMF (from individual distributions)
        W = -numpy.log(P)                      # W(phi)/kT = -ln P
        W -= W.min(axis=1)[:, numpy.newaxis]   # minimum at 0 kT
        pmf = numpy.concatenate((phi[numpy.newaxis, :], W), axis=0)
        xvg = XVG()
        results['PMF'] = xvg

        self.results = results
        return results
    def __init__(self,**kwargs):
        """Set up  StripWater


             ``True`` will always regenerate trajectories even if they
             already exist, ``False`` raises an exception, ``None``
             does the sensible thing in most cases (i.e. notify and
             then move on).
          *dt* : float or list of floats
             only write every dt timestep (in ps); if a list of floats is
             supplied, write multiple trajectories, one for each dt.
          *compact* : bool
             write a compact representation
             Index group to center on ["Protein"]
             Create an additional trajectory from the stripped one in which
             the *fitgroup* group is rms-fitted to the initial structure. See
             :meth:`` for details. Useful

             - "xy" : perform a rot+trans fit in the x-y plane
             - "all": rot+trans
             - ``None``: no fitting

             If *fit* is not supplied then the constructor-default is used
             Index group to fit to with the *fit* option; must be changed if
             molecule is not a protein and automatically recognized. Also
             consider supplying a custom index file. ["backbone"]
             name of the residues that are stripped (typically it is
             safe to leave this at the default 'SOL')
             place generated files in *outdir* instead of the same directory
             where the input tpr/xtc lived [``None``]

        .. Note::

           If set, *dt* is only applied to a fit step; the no-water
           trajectory is always generated for all time steps of the

        # specific arguments: take them before calling the super class that
        # does not know what to do with them
        _fitvalues = ("xy", "all", None)
        parameters = {}
        parameters['fit'] = kwargs.pop('fit',None)            # fitting algorithm
        if not parameters['fit'] in _fitvalues:
            raise ValueError("StripWater: *fit* must be one of {_fitvalues!r}, not {fit!r}.".format(**vars()))
	parameters['fitgroup'] = kwargs.pop('fitgroup', "backbone")
	parameters['centergroup'] = kwargs.pop('centergroup', "Protein")
        parameters['compact'] = kwargs.pop('compact', False)  # compact+centered ?
        parameters['resn'] = kwargs.pop('resn', 'SOL')        # residue name to be stripped
        parameters['dt'] = kwargs.pop('dt', None)
        parameters['force'] = kwargs.pop('force', None)
        parameters['outdir'] = kwargs.pop('outdir', None)

        # super class init: do this before doing anything else
        # (also sets up self.parameters and self.results)
        super(_StripWater, self).__init__(**kwargs)

        # self.parameters is set up by the base Worker class...
        self.parameters.filenames = AttributeDict()

        # self.simulation might have been set by the super class
        # already; just leave this snippet at the end. Do all
        # initialization that requires the simulation class in the
        # _register_hook() method.
        if self.simulation is not None: