class Simulation(object): """Class that represents one simulation. Analysis capabilities are added via plugins. 1. Set the *active plugin* with the :meth:`Simulation.set_plugin` method. 2. Analyze the trajectory with the active plugin by calling the :meth:`Simulation.run` method. 3. Analyze the output from :meth:`run` with :meth:`Simulation.analyze`; results are stored in the plugin's :attr:`~Worker.results` dictionary. 4. Plot results with :meth:`Simulation.plot`. """ # NOTE: not suitable for multiple inheritance def __init__(self, **kwargs): """Set up a Simulation object. :Keywords: *sim* Any object that contains the attributes *tpr*, *xtc*, and optionally *ndx* (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such as *xtc* override the values in *sim*. *tpr* Gromacs tpr file (**required**) *xtc* Gromacs trajectory, can also be a trr (**required**) *edr* Gromacs energy file (only required for some plugins) *ndx* Gromacs index file *absolute* ``True``: Turn file names into absolute paths (typically required for most plugins); ``False`` keep a they are [``True``] *strict* ``True``: missing required file keyword raises a :exc:`TypeError` and missing the file itself raises a :exc:`IOError`. ``False``: missing required files only give a warning. [``True``] *analysisdir* directory under which derived data are stored; defaults to the directory containing the tpr [None] *plugins* : list plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples (*plugin_class_name*, *kwarg dict*) to be used; more can be added later with :meth:`Simulation.add_plugin`. """ logger.info("Loading simulation data") sim = kwargs.pop('sim', None) strict = kwargs.pop('strict', True) def getpop(attr, required=False, strict=strict): """Return attribute from from kwargs or sim or None""" val = kwargs.pop(attr, None) # must pop from kwargs to clean it if val is not None: return val try: return sim.__getattribute__(attr) except AttributeError: if required: errmsg = "Required attribute {0!r} not found in kwargs or sim".format( attr) if strict: logger.fatal(errmsg) raise TypeError(errmsg) else: logger.warn(errmsg + "... continuing because of strict=False") warnings.warn(errmsg) return None make_absolute = kwargs.pop('absolute', True) def canonical(*args): """Join *args* and get the :func:`os.path.realpath`.""" if None in args: return None if not make_absolute: return os.path.join(*args) return os.path.realpath(os.path.join(*args)) # required files self.tpr = canonical(getpop('tpr', required=True)) self.xtc = canonical(getpop('xtc', required=True)) # optional files self.ndx = canonical(getpop('ndx')) self.edr = canonical(getpop('edr')) # check existence of required files resolve = "exception" if not strict: resolve = "warn" for v in ('tpr', 'xtc'): self.check_file(v, self.__getattribute__(v), resolve=resolve) self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr)) #: Registry for plugins: This dict is central. self.plugins = AttributeDict() #: Use this plugin if none is explicitly specified. Typically set with #: :meth:`~Simulation.set_plugin`. self.default_plugin_name = None # XXX: Or should we simply add instances and then re-register # all instances using register() ? # XXX: ... this API should be cleaned up. It seems to be connected # back and forth in vicious circles. -- OB 2009-07-10 plugins = kwargs.pop('plugins', []) # list of tuples (plugin, kwargs) or just (plugin,) if no kwords # required (eg if plugin is an instance) for x in plugins: try: P, kwargs = asiterable( x) # make sure to wrap strings, especially 2-letter ones! except ValueError: P = x kwargs = {} self.add_plugin(P, **kwargs) # convenience: if only a single plugin was registered we default to that one if len(self.plugins) == 1: self.set_plugin(self.plugins.keys()[0]) # Is this needed? If done properly, kwargs should be empty by now BUT # because the same list is re-used for all plugins I cannot pop them in # the plugins. I don't think multiple inheritance would work with this # setup so let's not pretend it does: hence comment out the super-init # call: ## super(Simulation, self).__init__(**kwargs) logger.info("Simulation instance initialised:") logger.info(str(self)) def add_plugin(self, plugin, **kwargs): """Add a plugin to the registry. - If *plugin* is a :class:`Plugin` instance then the instance is directly registered and any keyword arguments are ignored. - If *plugin* is a :class:`Plugin` class object or a string that can be found in :mod:`gromacs.analysis.plugins` then first an instance is created with the given keyword arguments and then registered. :Arguments: *plugin* : class or string, or instance If the parameter is a class then it should have been derived from :class:`Plugin`. If it is a string then it is taken as a plugin name in :mod:`gromacs.analysis.plugins` and the corresponding class is added. In both cases any parameters for initizlization should be provided. If *plugin* is already a :class:`Plugin` instance then the kwargs will be ignored. *kwargs* The kwargs are specific for the plugin and should be described in its documentation. """ # simulation=self must be provided so that plugin knows who owns it try: plugin.register(simulation=self) except (TypeError, AttributeError): # NOTE: this except clause can mask bugs in the plugin code!! if type(plugin) is str: import plugins # We should be able to import this safely now... plugin = plugins.__plugin_classes__[plugin] # plugin registers itself in self.plugins plugin(simulation=self, **kwargs) # simulation=self is REQUIRED! def topdir(self, *args): """Returns a path under self.analysis_dir, which is guaranteed to exist. .. Note:: Parent dirs are created if necessary.""" p = os.path.join(self.analysis_dir, *args) parent = os.path.dirname(p) try: os.makedirs(parent) except OSError, err: if err.errno != errno.EEXIST: raise return p
class _COM(Worker): """COM worker class.""" def __init__(self, **kwargs): """Set up COM analysis. :Keywords: *group_names* list of index group names *ndx* index file if groups are not in the default index *offset* add the *offset* to the residue numbers [0] *name* plugin name [COM] *simulation* The :class:`gromacs.analysis.Simulation` instance that owns the plugin [None] """ group_names = asiterable(kwargs.pop('group_names', [])) ndx = kwargs.pop('ndx', None) offset = kwargs.pop('offset', 0) super(_COM, self).__init__(**kwargs) self.parameters.group_names = group_names self.parameters.offset = offset self.ndx = ndx if self.simulation is not None: self._register_hook() def _register_hook(self, **kwargs): """Run when registering; requires simulation.""" super(_COM, self)._register_hook(**kwargs) assert self.simulation is not None if self.ndx is None: self.ndx = self.simulation.ndx self.parameters.filenames = { # result xvg files 'com': self.plugindir('com.xvg'), } # default filename for the plots -- not used self.parameters.fignames = { 'com': self.figdir('com'), } def run(self, force=None, **gmxargs): """Analyze trajectory and write COM file. All three components of the COM coordinate are written. :Arguments: - *force*: ``True`` does analysis and overwrites existing files - *gmxargs*: additional keyword arguments for :func:`gromacs.g_bundle` """ gmxargs['com'] = True gmxargs['mol'] = False gmxargs['ng'] = len(self.parameters.group_names) gmxargs['x'] = True gmxargs['y'] = True gmxargs['z'] = True if gmxargs['ng'] == 0: errmsg = "No index group name(s) provided. Use group_name with the constructor." logger.error(errmsg) raise ValueError(errmsg) if self.check_file_exists(self.parameters.filenames['com'], resolve='warning', force=force): return logger.info("Analyzing COM ...") f = self.parameters.filenames gromacs.g_traj(s=self.simulation.tpr, f=self.simulation.xtc, n=self.ndx, ox=f['com'], input=self.parameters.group_names, **gmxargs) def analyze(self, **kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. - Make COM as a function of time available as XVG files and objects. - Compute RMSD of the COM of each group (from average position, "rmsd"). - Compute distance whic encompasses 50% of observations ("median") - Compute drift of COM, i.e. length of the vector between initial and final position. Initial and final position are computed as averages over *nframesavg* frames ("drift"). RMSD, median, and drift are columns in an xvg file. The rows correspond to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`. :Keywords: *nframesavg* number of initial and final frames that are averaged in order to compute the drift of the COM of each group [5000] *refgroup* group name whose com is taken as the reference and subtracted from all other coms for the distance calculations. If supplied, additional result 'com_relative_*refgroup*' is created. :Returns: a dictionary of the results and also sets :attr:`gromacs.analysis.plugins.com.Worker.results`. """ from gromacs.formats import XVG logger.info("Preparing COM graphs as XVG objects.") self.results = AttributeDict( (k, XVG(fn)) for k, fn in self.parameters.filenames.items()) # compute RMSD of COM and shift of COM (drift) between avg pos # over first/last 5,000 frames nframesavg = kwargs.pop('nframesavg', 5000) ngroups = len(self.parameters.group_names) xcom = self.results['com'].array refgroup = kwargs.pop('refgroup', None) if refgroup is not None: if not refgroup in self.parameters.group_names: errmsg = "refgroup={0!s} must be one of {1!r}".format( refgroup, self.parameters.group_names) logger.error(errmsg) raise ValueError(errmsg) nreference = 1 + 3 * self.parameters.group_names.index( refgroup) # 1-based !! reference_com = xcom[nreference:nreference + 3] xcom[1:] -= numpy.vstack(ngroups * [reference_com]) # can't use broadcast logger.debug("distances computed with refgroup %r", refgroup) self.store_xvg('com_relative_{0!s}'.format(refgroup), xcom, names=['time'] + self.parameters.group_names) def vlength(v): return numpy.sqrt(numpy.sum(v**2, axis=0)) # distances over time step logger.debug( "drift calculated between %d-frame averages at beginning and end", nframesavg) records = [] for i in xrange(1, 3 * ngroups + 1, 3): x = xcom[i:i + 3] r = vlength( x - x.mean(axis=1)[:, numpy.newaxis]) # distances over time step #r0 = vlength(r - r[:,0][:,numpy.newaxis]) # distances over time step from r(t=0) #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True) #m = 0.5*(edges[1:]+edges[:-1]) #c = h.cumsum(dtype=float) # integral #c /= c[-1] # normalized (0 to 1) #median = m[c < 0.5][-1] #g = h/(4*numpy.pi*m**2) #import scipy.integrate #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m) #g /= radint(g) # properly normalized radial distribution function rmsd = numpy.sqrt(numpy.mean( r**2)) # radial spread sqrt(radint(m**2 * g)) median = numpy.median( r) # radius that contains 50% of the observations dx = x[:, :nframesavg].mean(axis=1) - x[:, -nframesavg:].mean(axis=1) drift = vlength(dx) records.append((rmsd, median, drift)) self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift") return self.results def plot(self, **kwargs): """Plot all results in one graph, labelled by the result keys. :Keywords: observables select one or more of the stored results. Can be a list or a string (a key into the results dict). ``None`` plots everything [``None``] figure - ``True``: save figures in the given formats - "name.ext": save figure under this filename (``ext`` -> format) - ``False``: only show on screen [``False``] formats : sequence sequence of all formats that should be saved [('png', 'pdf')] plotargs keyword arguments for pylab.plot() """ import pylab figure = kwargs.pop('figure', False) observables = asiterable(kwargs.pop('observables', self.results.keys())) extensions = kwargs.pop('formats', ('pdf', 'png')) for name in observables: result = self.results[name] try: result.plot( **kwargs ) # This requires result classes with a plot() method!! except AttributeError: warnings.warn( "Sorry, plotting of result {name!r} is not implemented". format(**vars()), category=UserWarning) # quick labels -- relies on the proper ordering labels = [ str(n) + " " + dim for n in self.parameters.group_names for dim in 'xyz' ] if kwargs.get('columns') is not None: # select labels according to columns; only makes sense # if plotting against the time (col 0) if kwargs['columns'][0] == 0: labels = numpy.array([None] + labels)[kwargs['columns'][1:]] else: labels = () pylab.legend(labels, loc='best') if figure is True: for ext in extensions: self.savefig(ext=ext) elif figure: self.savefig(filename=figure)
class Simulation(object): """Class that represents one simulation. Analysis capabilities are added via plugins. 1. Set the *active plugin* with the :meth:`Simulation.set_plugin` method. 2. Analyze the trajectory with the active plugin by calling the :meth:`Simulation.run` method. 3. Analyze the output from :meth:`run` with :meth:`Simulation.analyze`; results are stored in the plugin's :attr:`~Worker.results` dictionary. 4. Plot results with :meth:`Simulation.plot`. """ # NOTE: not suitable for multiple inheritance def __init__(self, **kwargs): """Set up a Simulation object. :Keywords: *sim* Any object that contains the attributes *tpr*, *xtc*, and optionally *ndx* (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such as *xtc* override the values in *sim*. *tpr* Gromacs tpr file (**required**) *xtc* Gromacs trajectory, can also be a trr (**required**) *edr* Gromacs energy file (only required for some plugins) *ndx* Gromacs index file *absolute* ``True``: Turn file names into absolute paths (typically required for most plugins); ``False`` keep a they are [``True``] *strict* ``True``: missing required file keyword raises a :exc:`TypeError` and missing the file itself raises a :exc:`IOError`. ``False``: missing required files only give a warning. [``True``] *analysisdir* directory under which derived data are stored; defaults to the directory containing the tpr [None] *plugins* : list plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples (*plugin_class_name*, *kwarg dict*) to be used; more can be added later with :meth:`Simulation.add_plugin`. """ logger.info("Loading simulation data") sim = kwargs.pop('sim', None) strict = kwargs.pop('strict', True) def getpop(attr, required=False, strict=strict): """Return attribute from from kwargs or sim or None""" val = kwargs.pop(attr, None) # must pop from kwargs to clean it if not val is None: return val try: return sim.__getattribute__(attr) except AttributeError: if required: errmsg = "Required attribute %r not found in kwargs or sim" % attr if strict: logger.fatal(errmsg) raise TypeError(errmsg) else: logger.warn(errmsg+"... continuing because of strict=False") warnings.warn(errmsg) return None make_absolute = kwargs.pop('absolute', True) def canonical(*args): """Join *args* and get the :func:`os.path.realpath`.""" if None in args: return None if not make_absolute: return os.path.join(*args) return os.path.realpath(os.path.join(*args)) # required files self.tpr = canonical(getpop('tpr', required=True)) self.xtc = canonical(getpop('xtc', required=True)) # optional files self.ndx = canonical(getpop('ndx')) self.edr = canonical(getpop('edr')) # check existence of required files resolve = "exception" if not strict: resolve = "warn" for v in ('tpr', 'xtc'): self.check_file(v, self.__getattribute__(v), resolve=resolve) self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr)) #: Registry for plugins: This dict is central. self.plugins = AttributeDict() #: Use this plugin if none is explicitly specified. Typically set with #: :meth:`~Simulation.set_plugin`. self.default_plugin_name = None # XXX: Or should we simply add instances and then re-register # all instances using register() ? # XXX: ... this API should be cleaned up. It seems to be connected # back and forth in vicious circles. -- OB 2009-07-10 plugins = kwargs.pop('plugins', []) # list of tuples (plugin, kwargs) or just (plugin,) if no kwords # required (eg if plugin is an instance) for x in plugins: try: P, kwargs = asiterable(x) # make sure to wrap strings, especially 2-letter ones! except ValueError: P = x kwargs = {} self.add_plugin(P, **kwargs) # convenience: if only a single plugin was registered we default to that one if len(self.plugins) == 1: self.set_plugin(self.plugins.keys()[0]) # Is this needed? If done properly, kwargs should be empty by now BUT # because the same list is re-used for all plugins I cannot pop them in # the plugins. I don't think multiple inheritance would work with this # setup so let's not pretend it does: hence comment out the super-init # call: ## super(Simulation, self).__init__(**kwargs) logger.info("Simulation instance initialised:") logger.info(str(self)) def add_plugin(self, plugin, **kwargs): """Add a plugin to the registry. - If *plugin* is a :class:`Plugin` instance then the instance is directly registered and any keyword arguments are ignored. - If *plugin* is a :class:`Plugin` class object or a string that can be found in :mod:`gromacs.analysis.plugins` then first an instance is created with the given keyword arguments and then registered. :Arguments: *plugin* : class or string, or instance If the parameter is a class then it should have been derived from :class:`Plugin`. If it is a string then it is taken as a plugin name in :mod:`gromacs.analysis.plugins` and the corresponding class is added. In both cases any parameters for initizlization should be provided. If *plugin* is already a :class:`Plugin` instance then the kwargs will be ignored. *kwargs* The kwargs are specific for the plugin and should be described in its documentation. """ # simulation=self must be provided so that plugin knows who owns it try: plugin.register(simulation=self) except (TypeError, AttributeError): # NOTE: this except clause can mask bugs in the plugin code!! if type(plugin) is str: import plugins # We should be able to import this safely now... plugin = plugins.__plugin_classes__[plugin] # plugin registers itself in self.plugins plugin(simulation=self, **kwargs) # simulation=self is REQUIRED! def topdir(self,*args): """Returns a path under self.analysis_dir, which is guaranteed to exist. .. Note:: Parent dirs are created if necessary.""" p = os.path.join(self.analysis_dir, *args) parent = os.path.dirname(p) try: os.makedirs(parent) except OSError,err: if err.errno != errno.EEXIST: raise return p
class _COM(Worker): """COM worker class.""" def __init__(self,**kwargs): """Set up COM analysis. :Keywords: *group_names* list of index group names *ndx* index file if groups are not in the default index *offset* add the *offset* to the residue numbers [0] *name* plugin name [COM] *simulation* The :class:`gromacs.analysis.Simulation` instance that owns the plugin [None] """ group_names = asiterable(kwargs.pop('group_names', [])) ndx = kwargs.pop('ndx', None) offset = kwargs.pop('offset', 0) super(_COM, self).__init__(**kwargs) self.parameters.group_names = group_names self.parameters.offset = offset self.ndx = ndx if not self.simulation is None: self._register_hook() def _register_hook(self, **kwargs): """Run when registering; requires simulation.""" super(_COM, self)._register_hook(**kwargs) assert not self.simulation is None if self.ndx is None: self.ndx = self.simulation.ndx self.parameters.filenames = { # result xvg files 'com': self.plugindir('com.xvg'), } # default filename for the plots -- not used self.parameters.fignames = { 'com': self.figdir('com'), } def run(self, force=None, **gmxargs): """Analyze trajectory and write COM file. All three components of the COM coordinate are written. :Arguments: - *force*: ``True`` does analysis and overwrites existing files - *gmxargs*: additional keyword arguments for :func:`gromacs.g_bundle` """ gmxargs['com'] = True gmxargs['mol'] = False gmxargs['ng'] = len(self.parameters.group_names) gmxargs['x'] = True gmxargs['y'] = True gmxargs['z'] = True if gmxargs['ng'] == 0: errmsg = "No index group name(s) provided. Use group_name with the constructor." logger.error(errmsg) raise ValueError(errmsg) if self.check_file_exists(self.parameters.filenames['com'], resolve='warning', force=force): return logger.info("Analyzing COM ...") f = self.parameters.filenames gromacs.g_traj(s=self.simulation.tpr, f=self.simulation.xtc, n=self.ndx, ox=f['com'], input=self.parameters.group_names, **gmxargs) def analyze(self,**kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. - Make COM as a function of time available as XVG files and objects. - Compute RMSD of the COM of each group (from average position, "rmsd"). - Compute distance whic encompasses 50% of observations ("median") - Compute drift of COM, i.e. length of the vector between initial and final position. Initial and final position are computed as averages over *nframesavg* frames ("drift"). RMSD, median, and drift are columns in an xvg file. The rows correspond to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`. :Keywords: *nframesavg* number of initial and final frames that are averaged in order to compute the drift of the COM of each group [5000] *refgroup* group name whose com is taken as the reference and subtracted from all other coms for the distance calculations. If supplied, additional result 'com_relative_*refgroup*' is created. :Returns: a dictionary of the results and also sets :attr:`gromacs.analysis.plugins.com.Worker.results`. """ from gromacs.formats import XVG logger.info("Preparing COM graphs as XVG objects.") self.results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() ) # compute RMSD of COM and shift of COM (drift) between avg pos # over first/last 5,000 frames nframesavg = kwargs.pop('nframesavg', 5000) ngroups = len(self.parameters.group_names) xcom = self.results['com'].array refgroup = kwargs.pop('refgroup', None) if not refgroup is None: if not refgroup in self.parameters.group_names: errmsg = "refgroup=%s must be one of %r" % (refgroup, self.parameters.group_names) logger.error(errmsg) raise ValueError(errmsg) nreference = 1 + 3 * self.parameters.group_names.index(refgroup) # 1-based !! reference_com = xcom[nreference:nreference+3] xcom[1:] -= numpy.vstack(ngroups * [reference_com]) # can't use broadcast logger.debug("distances computed with refgroup %r", refgroup) self.store_xvg('com_relative_%s' % refgroup, xcom, names=['time']+self.parameters.group_names) def vlength(v): return numpy.sqrt(numpy.sum(v**2, axis=0)) # distances over time step logger.debug("drift calculated between %d-frame averages at beginning and end",nframesavg) records = [] for i in xrange(1, 3*ngroups+1, 3): x = xcom[i:i+3] r = vlength(x - x.mean(axis=1)[:,numpy.newaxis]) # distances over time step #r0 = vlength(r - r[:,0][:,numpy.newaxis]) # distances over time step from r(t=0) #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True) #m = 0.5*(edges[1:]+edges[:-1]) #c = h.cumsum(dtype=float) # integral #c /= c[-1] # normalized (0 to 1) #median = m[c < 0.5][-1] #g = h/(4*numpy.pi*m**2) #import scipy.integrate #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m) #g /= radint(g) # properly normalized radial distribution function rmsd = numpy.sqrt(numpy.mean(r**2)) # radial spread sqrt(radint(m**2 * g)) median = numpy.median(r) # radius that contains 50% of the observations dx = x[:,:nframesavg].mean(axis=1) - x[:,-nframesavg:].mean(axis=1) drift = vlength(dx) records.append((rmsd, median, drift)) self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift") return self.results def plot(self, **kwargs): """Plot all results in one graph, labelled by the result keys. :Keywords: observables select one or more of the stored results. Can be a list or a string (a key into the results dict). ``None`` plots everything [``None``] figure - ``True``: save figures in the given formats - "name.ext": save figure under this filename (``ext`` -> format) - ``False``: only show on screen [``False``] formats : sequence sequence of all formats that should be saved [('png', 'pdf')] plotargs keyword arguments for pylab.plot() """ import pylab figure = kwargs.pop('figure', False) observables = asiterable(kwargs.pop('observables', self.results.keys())) extensions = kwargs.pop('formats', ('pdf','png')) for name in observables: result = self.results[name] try: result.plot(**kwargs) # This requires result classes with a plot() method!! except AttributeError: warnings.warn("Sorry, plotting of result %(name)r is not implemented" % vars(), category=UserWarning) # quick labels -- relies on the proper ordering labels = [str(n)+" "+dim for n in self.parameters.group_names for dim in 'xyz'] if not kwargs.get('columns', None) is None: # select labels according to columns; only makes sense # if plotting against the time (col 0) if kwargs['columns'][0] == 0: labels = numpy.array([None]+labels)[kwargs['columns'][1:]] else: labels = () pylab.legend(labels, loc='best') if figure is True: for ext in extensions: self.savefig(ext=ext) elif figure: self.savefig(filename=figure)