コード例 #1
0
ファイル: core.py プロジェクト: uitb/GromacsWrapper
    def __init__(self, **kwargs):
        """Set up Worker class.

        :Keywords:
          *plugin* : instance
             The :class:`Plugin` instance that owns this worker. **Must be supplied.**
          *simulation*
             A :class:Simulation` object, required for registration,
             but can be supplied later.
          *kwargs*
             All other keyword arguments are passed to the super class.
        """

        self.plugin = kwargs.pop('plugin', None)
        """:class:`Plugin` instance that owns this Worker."""
        assert self.plugin is not None  # must be supplied, non-opt kw arg
        self.plugin_name = self.plugin.plugin_name
        """Name of the plugin that this Worker belongs to."""

        self.simulation = kwargs.pop(
            'simulation', None)  # eventually needed but can come after init
        self.location = self.plugin_name  # directory name under analysisdir
        self.results = AttributeDict()  # store results
        self.parameters = AttributeDict(
        )  # container for options, filenames, etc...
        self.parameters.filenames = AttributeDict()
        super(Worker, self).__init__(**kwargs)
コード例 #2
0
    def analyze(self,**kwargs):
        """Short description of postprocessing.

        The analyze method typically postprocesses the data files
        generated by run. Splitting the complete analysis task into
        two parts (*run* and *analyze*) is advantageous because in
        this way parameters of postprocessing steps can be easily
        changed without having to rerun the time consuming trajectory
        analysis.

        :Keywords:
          *kw1*
             description
        :Returns:  a dictionary of the results and also sets ``self.results``.
        """
        from gromacs.formats import XVG

        results = AttributeDict()

        # - Do postprocessing here.
        # - Store results of calculation in results[key] where key can be chosen freely
        #   but *must* be provided so that other functions can uniformly access results.
        # - You are encouraged to store class instances with a plot() method; if you do
        #   this then you can just don't have to change the plot() method below.
        #   For instance you can use gromacs.formats.XVG(filename) to create
        #   a object from a xvg file that knows how to plot itself.

        self.results = results
        return results
コード例 #3
0
ファイル: distances.py プロジェクト: uitb/GromacsWrapper
 def analyze(self, **kwargs):
     """Make data files available as numpy arrays."""
     results = AttributeDict()
     for name, f in self.parameters.filenames.items():
         results[name] = XVG(f)
     self.results = results
     return results
コード例 #4
0
    def analyze(self, **kwargs):
        """Analyze hydrogen bond output.

        * hydrogen bond existence (existence)
        * total number of hydrogen bonds (num)
        * (others can be added easily)

        :Returns:  a dictionary of the results and also sets ``self.results``.
        """
        from gromacs.formats import XPM, XVG

        results = AttributeDict()
        results['num'] = XVG(self.parameters.filenames['num'])
        results['matrix'] = hbm = XPM(self.parameters.filenames['hbm'],
                                      reverse=True)

        hb_fraction = hbm.array.mean(axis=0)
        desc = [
            line.strip() for line in open(self.parameters.filenames['log'])
            if not line.startswith('#')
        ]
        results['existence'] = zip(desc, hb_fraction)

        with open(self.parameters.filenames['existence'], "w") as out:
            logger.info(
                "Hydrogen bond existence analysis (results['existence'] and %(existence)r)",
                self.parameters.filenames)
            for name, frac in results['existence']:
                logger.info("hb_existence: %-40s %4.1f%%", name, 100 * frac)
                out.write("{0:<40!s} {1:4.1f}%\n".format(name, 100 * frac))

        self.results = results
        return results
コード例 #5
0
    def analyze(self, **kwargs):
        """Mindist analysis for all cysteines. Returns results for interactive analysis."""

        results = AttributeDict()
        for resid in self.parameters.cysteines:
            groupname = 'Cys{resid:d}'.format(
                **vars())  # identifier should be a valid python variable name
            results[groupname] = self._mindist(resid)
        self.results = results
        return results
コード例 #6
0
    def analyze(self,**kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        :Returns:  a dictionary of the results and also sets ``self.results``.
        """
        from gromacs.formats import XVG

        logger.info("Preparing HelixBundle graphs as XVG objects.")
        results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() )
        self.results = results
        return results
コード例 #7
0
    def analyze(self,**kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        :Returns:  a dictionary of the results and also sets ``self.results``.
        """
        from gromacs.formats import XVG

        logger.info("Preparing Energy graphs as XVG objects.")
        results = AttributeDict(Energy=XVG(self.parameters.filenames['Energy']))
        self.results = results
        return results
コード例 #8
0
ファイル: core.py プロジェクト: uitb/GromacsWrapper
    def __init__(self, **kwargs):
        """Set up a Simulation object.

        :Keywords:
           *sim*
             Any object that contains the attributes *tpr*, *xtc*,
             and optionally *ndx*
             (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such
             as *xtc* override the values in *sim*.
           *tpr*
             Gromacs tpr file (**required**)
           *xtc*
             Gromacs trajectory, can also be a trr (**required**)
           *edr*
             Gromacs energy file (only required for some plugins)
           *ndx*
             Gromacs index file
           *absolute*
             ``True``: Turn file names into absolute paths (typically required
             for most plugins); ``False`` keep a they are [``True``]
           *strict*
             ``True``: missing required file keyword raises a :exc:`TypeError`
             and missing the file itself raises a :exc:`IOError`.  ``False``:
             missing required files only give a warning. [``True``]
           *analysisdir*
             directory under which derived data are stored;
             defaults to the directory containing the tpr [None]
           *plugins* : list
             plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples
             (*plugin_class_name*, *kwarg dict*) to be used; more can be
             added later with :meth:`Simulation.add_plugin`.

        """
        logger.info("Loading simulation data")

        sim = kwargs.pop('sim', None)
        strict = kwargs.pop('strict', True)

        def getpop(attr, required=False, strict=strict):
            """Return attribute from from kwargs or sim or None"""
            val = kwargs.pop(attr, None)  # must pop from kwargs to clean it
            if val is not None:
                return val
            try:
                return sim.__getattribute__(attr)
            except AttributeError:
                if required:
                    errmsg = "Required attribute {0!r} not found in kwargs or sim".format(
                        attr)
                    if strict:
                        logger.fatal(errmsg)
                        raise TypeError(errmsg)
                    else:
                        logger.warn(errmsg +
                                    "... continuing because of strict=False")
                        warnings.warn(errmsg)
                return None

        make_absolute = kwargs.pop('absolute', True)

        def canonical(*args):
            """Join *args* and get the :func:`os.path.realpath`."""
            if None in args:
                return None
            if not make_absolute:
                return os.path.join(*args)
            return os.path.realpath(os.path.join(*args))

        # required files
        self.tpr = canonical(getpop('tpr', required=True))
        self.xtc = canonical(getpop('xtc', required=True))
        # optional files
        self.ndx = canonical(getpop('ndx'))
        self.edr = canonical(getpop('edr'))

        # check existence of required files
        resolve = "exception"
        if not strict:
            resolve = "warn"
        for v in ('tpr', 'xtc'):
            self.check_file(v, self.__getattribute__(v), resolve=resolve)

        self.analysis_dir = kwargs.pop('analysisdir',
                                       os.path.dirname(self.tpr))

        #: Registry for plugins: This dict is central.
        self.plugins = AttributeDict()
        #: Use this plugin if none is explicitly specified. Typically set with
        #: :meth:`~Simulation.set_plugin`.
        self.default_plugin_name = None

        # XXX: Or should we simply add instances and then re-register
        #      all instances using register() ?
        # XXX: ... this API should be cleaned up. It seems to be connected
        #      back and forth in vicious circles. -- OB 2009-07-10

        plugins = kwargs.pop('plugins', [])
        # list of tuples (plugin, kwargs) or just (plugin,) if no kwords
        # required (eg if plugin is an instance)
        for x in plugins:
            try:
                P, kwargs = asiterable(
                    x)  # make sure to wrap strings, especially 2-letter ones!
            except ValueError:
                P = x
                kwargs = {}
            self.add_plugin(P, **kwargs)

        # convenience: if only a single plugin was registered we default to that one
        if len(self.plugins) == 1:
            self.set_plugin(self.plugins.keys()[0])

        # Is this needed? If done properly, kwargs should be empty by now BUT
        # because the same list is re-used for all plugins I cannot pop them in
        # the plugins. I don't think multiple inheritance would work with this
        # setup so let's not pretend it does: hence comment out the super-init
        # call:
        ## super(Simulation, self).__init__(**kwargs)
        logger.info("Simulation instance initialised:")
        logger.info(str(self))
コード例 #9
0
    def __init__(self, **kwargs):
        """Set up  ProteinOnly

        :Arguments:

          *force*
             ``True`` will always regenerate trajectories even if they
             already exist, ``False`` raises an exception, ``None``
             does the sensible thing in most cases (i.e. notify and
             then move on).
          *dt* : float or list of floats
             only write every dt timestep (in ps); if a list of floats is
             supplied, write multiple trajectories, one for each dt.
          *compact* : bool
             write a compact representation
          *fit*
             Create an additional trajectory from the stripped one in which
             the Protein group is rms-fitted to the initial structure. See
             :meth:`gromacs.cbook.Transformer.fit` for details. Useful
             values:

             - "xy" : perform a rot+trans fit in the x-y plane
             - "all": rot+trans
             - ``None``: no fitting

             If *fit* is not supplied then the constructore-default is used
             (:attr:`_ProteinOnly.parameters.fit`).
          *keepalso*
             List of literal ``make_ndx`` selections that select additional
             groups of atoms that should also be kept in addition to the
             protein. For example *keepalso* = ['"POPC"', 'resname DRUG'].

        """
        # specific arguments: take them before calling the super class that
        # does not know what to do with them
        _fitvalues = ("xy", "all", None)
        parameters = {}
        parameters['fit'] = kwargs.pop('fit', None)  # fitting algorithm
        if not parameters['fit'] in _fitvalues:
            raise ValueError(
                "ProteinOnly: *fit* must be one of {_fitvalues!r}, not {fit!r}."
                .format(**vars()))
        parameters['compact'] = kwargs.pop('compact',
                                           False)  # compact+centered ?
        parameters['dt'] = kwargs.pop('dt', None)
        parameters['force'] = kwargs.pop('force', None)
        parameters['keepalso'] = kwargs.pop('keepalso', None)

        # super class init: do this before doing anything else
        # (also sets up self.parameters and self.results)
        super(_ProteinOnly, self).__init__(**kwargs)

        # self.parameters is set up by the base Worker class...
        self.parameters.filenames = AttributeDict()
        self.parameters.update(parameters)

        # self.simulation might have been set by the super class
        # already; just leave this snippet at the end. Do all
        # initialization that requires the simulation class in the
        # _register_hook() method.
        if self.simulation is not None:
            self._register_hook()
コード例 #10
0
    def __init__(self, molecule=None, **kwargs):
        """Set up Simulation instance.

        The *molecule* of the compound molecule should be supplied. Existing files
        (which have been generated in previous runs) can also be supplied.

        :Keywords:
          *molecule*
              Identifier for the compound molecule. This is the same as the
              entry in the ``[ molecule ]`` section of the itp file. ["DRUG"]
          *filename*
              If provided and *molecule* is ``None`` then load the instance from
              the pickle file *filename*, which was generated with
              :meth:`~mdpow.equil.Simulation.save`.
          *dirname*
              base directory; all other directories are created under it
          *forcefield*
              'OPLS-AA' or 'CHARMM' or 'AMBER'
          *solvent*
              'water' or 'octanol' or 'cyclohexane' or 'wetoctanol'
          *solventmodel*
              ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL`
              for ``solvent == "water"``. Other options are the models defined in
              :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no
              alternative parameterizations included for other solvents.
          *mdp*
              dict with keys corresponding to the stages ``energy_minimize``,
              ``MD_restrained``, ``MD_relaxed``,
              ``MD_NPT`` and values *mdp* file names (if no entry then the
              package defaults are used)
          *distance*
               minimum distance between solute and closest box face
          *kwargs*
              advanced keywords for short-circuiting; see
              :data:`mdpow.equil.Simulation.filekeys`.

        """
        self.__cache = {}
        filename = kwargs.pop('filename', None)
        dirname = kwargs.pop('dirname', self.dirname_default)

        forcefield = kwargs.pop('forcefield', 'OPLS-AA')
        solvent = kwargs.pop('solvent', self.solvent_default)
        # mdp files --- should get values from default runinput.cfg
        # None values in the kwarg mdp dict are ignored
        # self.mdp: key = stage, value = path to MDP file

        # 'water' will choose the default ('tip4p'), other choices are
        # 'tip3p', 'spc', 'spce', 'm24', for water; no choices
        # available for 'cyclohexane' and 'octanol'
        solventmodel = kwargs.pop('solventmodel', None)

        mdp_kw = kwargs.pop('mdp', {})
        self.mdp = dict((stage, config.get_template(fn))
                        for stage, fn in self.mdp_defaults.items())
        self.mdp.update(
            dict((stage, config.get_template(fn))
                 for stage, fn in mdp_kw.items() if fn is not None))

        if molecule is None and filename is not None:
            # load from pickle file
            self.load(filename)
            self.filename = filename
            kwargs = {}  # for super
        else:
            self.molecule = molecule or 'DRUG'
            self.dirs = AttributeDict(
                basedir=realpath(dirname),  # .../Equilibrium/<solvent>
                includes=list(asiterable(kwargs.pop('includes', []))) +
                [config.includedir],
            )
            # pre-set filenames: keyword == variable name
            self.files = AttributeDict([(k, kwargs.pop(k, None))
                                        for k in self.filekeys])
            self.deffnm = kwargs.pop("deffnm", "md")

            if self.files.topology:
                # assume that a user-supplied topology lives in a 'standard' top dir
                # that includes the necessary itp file(s)
                self.dirs.topology = realpath(
                    os.path.dirname(self.files.topology))
                self.dirs.includes.append(self.dirs.topology)

            self.forcefield = forcefield
            self.solvent_type = solvent
            self.solventmodel_identifier = forcefields.get_solvent_identifier(
                solvent,
                model=solventmodel,
                forcefield=forcefield,
            )
            if self.solventmodel_identifier is None:
                msg = "No parameters for solvent {0} and solventmodel {1} available.".format(
                    solvent, solventmodel)
                logger.error(msg)
                raise ValueError(msg)
            self.solventmodel = forcefields.get_solvent_model(
                self.solventmodel_identifier,
                forcefield=forcefield,
            )

            distance = kwargs.pop('distance', None)
            distance = distance if distance is not None else DIST[solvent]

            self.solvent = AttributeDict(itp=self.solventmodel.itp,
                                         box=self.solventmodel.coordinates,
                                         distance=distance)

            self.filename = filename or self.solvent_type + '.simulation'

        super(Simulation, self).__init__(**kwargs)
コード例 #11
0
    def analyze(self, **kwargs):
        """Load results from disk into :attr:`_Dihedrals.results` and compute PMF.

        The PMF W(phi) in kT is computed from each dihedral
        probability distribution P(phi) as

           W(phi) = -kT ln P(phi)

        It is stored in :attr:`_Dihedrals.results` with the key *PMF*.

        :Keywords:
          *bins*
             bins for histograms (passed to numpy.histogram(new=True))

        :Returns: a dictionary of the results and also sets
                  :attr:`_Dihedrals.results`.
        """

        bins = kwargs.pop('bins', 361)

        results = AttributeDict()

        # get graphs that were produced by g_angle
        for name, f in self.parameters.filenames.items():
            try:
                results[name] = XVG(f)
            except IOError:
                pass    # either not computed (yet) or some failure

        # compute individual distributions
        ts = results['timeseries'].array    # ts[0] = time, ts[1] = avg
        dih = ts[2:]

        phi_range = (-180., 180.)

        Ndih = len(dih)
        p = Ndih * [None]  # histograms (prob. distributions), one for each dihedral i
        for i in xrange(Ndih):
            phis = dih[i]
            p[i],e = numpy.histogram(phis, bins=bins, range=phi_range, normed=True, new=True)

        P = numpy.array(p)
        phi = 0.5*(e[:-1]+e[1:])   # midpoints of bin edges
        distributions = numpy.concatenate((phi[numpy.newaxis, :], P))  # phi, P[0], P[1], ...

        xvg = XVG()
        xvg.set(distributions)
        xvg.write(self.parameters.filenames['distributions'])
        results['distributions'] = xvg
        del xvg

        # compute PMF (from individual distributions)
        W = -numpy.log(P)                      # W(phi)/kT = -ln P
        W -= W.min(axis=1)[:, numpy.newaxis]   # minimum at 0 kT
        pmf = numpy.concatenate((phi[numpy.newaxis, :], W), axis=0)
        xvg = XVG()
        xvg.set(pmf)
        xvg.write(self.parameters.filenames['PMF'])
        results['PMF'] = xvg

        self.results = results
        return results
コード例 #12
0
    def analyze(self, **kwargs):
        """Collect output xvg files as :class:`gromacs.formats.XVG` objects.

        - Make COM as a function of time available as XVG files and
          objects.
        - Compute RMSD of the COM of each group (from average
          position, "rmsd").
        - Compute distance whic encompasses 50% of observations ("median")
        - Compute drift of COM, i.e. length of the vector between
          initial and final position. Initial and final position are
          computed as averages over *nframesavg* frames ("drift").

        RMSD, median, and drift are columns in an xvg file. The rows correspond
        to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`.

        :Keywords:
          *nframesavg*
              number of initial and final frames that are averaged in
              order to compute the drift of the COM of each group
              [5000]
          *refgroup*
              group name whose com is taken as the reference and subtracted from
              all other coms for the distance calculations. If supplied,
              additional result 'com_relative_*refgroup*' is created.

        :Returns:  a dictionary of the results and also sets
                  :attr:`gromacs.analysis.plugins.com.Worker.results`.
        """
        from gromacs.formats import XVG

        logger.info("Preparing COM graphs as XVG objects.")
        self.results = AttributeDict(
            (k, XVG(fn)) for k, fn in self.parameters.filenames.items())

        # compute RMSD of COM and shift of COM (drift) between avg pos
        # over first/last 5,000 frames
        nframesavg = kwargs.pop('nframesavg', 5000)
        ngroups = len(self.parameters.group_names)
        xcom = self.results['com'].array

        refgroup = kwargs.pop('refgroup', None)
        if refgroup is not None:
            if not refgroup in self.parameters.group_names:
                errmsg = "refgroup={0!s} must be one of {1!r}".format(
                    refgroup, self.parameters.group_names)
                logger.error(errmsg)
                raise ValueError(errmsg)
            nreference = 1 + 3 * self.parameters.group_names.index(
                refgroup)  # 1-based !!
            reference_com = xcom[nreference:nreference + 3]
            xcom[1:] -= numpy.vstack(ngroups *
                                     [reference_com])  # can't use broadcast
            logger.debug("distances computed with refgroup %r", refgroup)

            self.store_xvg('com_relative_{0!s}'.format(refgroup),
                           xcom,
                           names=['time'] + self.parameters.group_names)

        def vlength(v):
            return numpy.sqrt(numpy.sum(v**2,
                                        axis=0))  # distances over time step

        logger.debug(
            "drift calculated between %d-frame averages at beginning and end",
            nframesavg)
        records = []
        for i in xrange(1, 3 * ngroups + 1, 3):
            x = xcom[i:i + 3]
            r = vlength(
                x -
                x.mean(axis=1)[:, numpy.newaxis])  # distances over time step
            #r0 = vlength(r - r[:,0][:,numpy.newaxis])         # distances over time step from r(t=0)
            #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True)
            #m = 0.5*(edges[1:]+edges[:-1])
            #c = h.cumsum(dtype=float)    # integral
            #c /= c[-1]                   # normalized (0 to 1)
            #median = m[c < 0.5][-1]
            #g =  h/(4*numpy.pi*m**2)
            #import scipy.integrate
            #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m)
            #g /= radint(g)  # properly normalized radial distribution function
            rmsd = numpy.sqrt(numpy.mean(
                r**2))  # radial spread sqrt(radint(m**2 * g))
            median = numpy.median(
                r)  # radius that contains 50% of the observations
            dx = x[:, :nframesavg].mean(axis=1) - x[:,
                                                    -nframesavg:].mean(axis=1)
            drift = vlength(dx)
            records.append((rmsd, median, drift))
        self.store_xvg('distance',
                       numpy.transpose(records),
                       names="rmsd,median,drift")

        return self.results
コード例 #13
0
    def __init__(self,**kwargs):
        """Set up  StripWater

        :Arguments:

          *force*
             ``True`` will always regenerate trajectories even if they
             already exist, ``False`` raises an exception, ``None``
             does the sensible thing in most cases (i.e. notify and
             then move on).
          *dt* : float or list of floats
             only write every dt timestep (in ps); if a list of floats is
             supplied, write multiple trajectories, one for each dt.
          *compact* : bool
             write a compact representation
          *centergroup*
             Index group to center on ["Protein"]
          *fit*
             Create an additional trajectory from the stripped one in which
             the *fitgroup* group is rms-fitted to the initial structure. See
             :meth:`gromacs.cbook.Transformer.fit` for details. Useful
             values:

             - "xy" : perform a rot+trans fit in the x-y plane
             - "all": rot+trans
             - ``None``: no fitting

             If *fit* is not supplied then the constructor-default is used
             (:attr:`_StripWater.parameters.fit`).
          *fitgroup*
             Index group to fit to with the *fit* option; must be changed if
             molecule is not a protein and automatically recognized. Also
             consider supplying a custom index file. ["backbone"]
          *resn*
             name of the residues that are stripped (typically it is
             safe to leave this at the default 'SOL')
          *outdir*
             place generated files in *outdir* instead of the same directory
             where the input tpr/xtc lived [``None``]

        .. Note::

           If set, *dt* is only applied to a fit step; the no-water
           trajectory is always generated for all time steps of the
           input.

        """
        # specific arguments: take them before calling the super class that
        # does not know what to do with them
        _fitvalues = ("xy", "all", None)
        parameters = {}
        parameters['fit'] = kwargs.pop('fit',None)            # fitting algorithm
        if not parameters['fit'] in _fitvalues:
            raise ValueError("StripWater: *fit* must be one of {_fitvalues!r}, not {fit!r}.".format(**vars()))
	parameters['fitgroup'] = kwargs.pop('fitgroup', "backbone")
	parameters['centergroup'] = kwargs.pop('centergroup', "Protein")
        parameters['compact'] = kwargs.pop('compact', False)  # compact+centered ?
        parameters['resn'] = kwargs.pop('resn', 'SOL')        # residue name to be stripped
        parameters['dt'] = kwargs.pop('dt', None)
        parameters['force'] = kwargs.pop('force', None)
        parameters['outdir'] = kwargs.pop('outdir', None)

        # super class init: do this before doing anything else
        # (also sets up self.parameters and self.results)
        super(_StripWater, self).__init__(**kwargs)

        # self.parameters is set up by the base Worker class...
        self.parameters.filenames = AttributeDict()
        self.parameters.update(parameters)

        # self.simulation might have been set by the super class
        # already; just leave this snippet at the end. Do all
        # initialization that requires the simulation class in the
        # _register_hook() method.
        if self.simulation is not None:
            self._register_hook()