def __init__(self, **kwargs): """Set up Worker class. :Keywords: *plugin* : instance The :class:`Plugin` instance that owns this worker. **Must be supplied.** *simulation* A :class:Simulation` object, required for registration, but can be supplied later. *kwargs* All other keyword arguments are passed to the super class. """ self.plugin = kwargs.pop('plugin', None) """:class:`Plugin` instance that owns this Worker.""" assert self.plugin is not None # must be supplied, non-opt kw arg self.plugin_name = self.plugin.plugin_name """Name of the plugin that this Worker belongs to.""" self.simulation = kwargs.pop( 'simulation', None) # eventually needed but can come after init self.location = self.plugin_name # directory name under analysisdir self.results = AttributeDict() # store results self.parameters = AttributeDict( ) # container for options, filenames, etc... self.parameters.filenames = AttributeDict() super(Worker, self).__init__(**kwargs)
def analyze(self,**kwargs): """Short description of postprocessing. The analyze method typically postprocesses the data files generated by run. Splitting the complete analysis task into two parts (*run* and *analyze*) is advantageous because in this way parameters of postprocessing steps can be easily changed without having to rerun the time consuming trajectory analysis. :Keywords: *kw1* description :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XVG results = AttributeDict() # - Do postprocessing here. # - Store results of calculation in results[key] where key can be chosen freely # but *must* be provided so that other functions can uniformly access results. # - You are encouraged to store class instances with a plot() method; if you do # this then you can just don't have to change the plot() method below. # For instance you can use gromacs.formats.XVG(filename) to create # a object from a xvg file that knows how to plot itself. self.results = results return results
def analyze(self, **kwargs): """Make data files available as numpy arrays.""" results = AttributeDict() for name, f in self.parameters.filenames.items(): results[name] = XVG(f) self.results = results return results
def analyze(self, **kwargs): """Analyze hydrogen bond output. * hydrogen bond existence (existence) * total number of hydrogen bonds (num) * (others can be added easily) :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XPM, XVG results = AttributeDict() results['num'] = XVG(self.parameters.filenames['num']) results['matrix'] = hbm = XPM(self.parameters.filenames['hbm'], reverse=True) hb_fraction = hbm.array.mean(axis=0) desc = [ line.strip() for line in open(self.parameters.filenames['log']) if not line.startswith('#') ] results['existence'] = zip(desc, hb_fraction) with open(self.parameters.filenames['existence'], "w") as out: logger.info( "Hydrogen bond existence analysis (results['existence'] and %(existence)r)", self.parameters.filenames) for name, frac in results['existence']: logger.info("hb_existence: %-40s %4.1f%%", name, 100 * frac) out.write("{0:<40!s} {1:4.1f}%\n".format(name, 100 * frac)) self.results = results return results
def analyze(self, **kwargs): """Mindist analysis for all cysteines. Returns results for interactive analysis.""" results = AttributeDict() for resid in self.parameters.cysteines: groupname = 'Cys{resid:d}'.format( **vars()) # identifier should be a valid python variable name results[groupname] = self._mindist(resid) self.results = results return results
def analyze(self,**kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XVG logger.info("Preparing HelixBundle graphs as XVG objects.") results = AttributeDict( (k, XVG(fn)) for k,fn in self.parameters.filenames.items() ) self.results = results return results
def analyze(self,**kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. :Returns: a dictionary of the results and also sets ``self.results``. """ from gromacs.formats import XVG logger.info("Preparing Energy graphs as XVG objects.") results = AttributeDict(Energy=XVG(self.parameters.filenames['Energy'])) self.results = results return results
def __init__(self, **kwargs): """Set up a Simulation object. :Keywords: *sim* Any object that contains the attributes *tpr*, *xtc*, and optionally *ndx* (e.g. :class:`gromacs.cbook.Transformer`). The individual keywords such as *xtc* override the values in *sim*. *tpr* Gromacs tpr file (**required**) *xtc* Gromacs trajectory, can also be a trr (**required**) *edr* Gromacs energy file (only required for some plugins) *ndx* Gromacs index file *absolute* ``True``: Turn file names into absolute paths (typically required for most plugins); ``False`` keep a they are [``True``] *strict* ``True``: missing required file keyword raises a :exc:`TypeError` and missing the file itself raises a :exc:`IOError`. ``False``: missing required files only give a warning. [``True``] *analysisdir* directory under which derived data are stored; defaults to the directory containing the tpr [None] *plugins* : list plugin instances or tuples (*plugin class*, *kwarg dict*) or tuples (*plugin_class_name*, *kwarg dict*) to be used; more can be added later with :meth:`Simulation.add_plugin`. """ logger.info("Loading simulation data") sim = kwargs.pop('sim', None) strict = kwargs.pop('strict', True) def getpop(attr, required=False, strict=strict): """Return attribute from from kwargs or sim or None""" val = kwargs.pop(attr, None) # must pop from kwargs to clean it if val is not None: return val try: return sim.__getattribute__(attr) except AttributeError: if required: errmsg = "Required attribute {0!r} not found in kwargs or sim".format( attr) if strict: logger.fatal(errmsg) raise TypeError(errmsg) else: logger.warn(errmsg + "... continuing because of strict=False") warnings.warn(errmsg) return None make_absolute = kwargs.pop('absolute', True) def canonical(*args): """Join *args* and get the :func:`os.path.realpath`.""" if None in args: return None if not make_absolute: return os.path.join(*args) return os.path.realpath(os.path.join(*args)) # required files self.tpr = canonical(getpop('tpr', required=True)) self.xtc = canonical(getpop('xtc', required=True)) # optional files self.ndx = canonical(getpop('ndx')) self.edr = canonical(getpop('edr')) # check existence of required files resolve = "exception" if not strict: resolve = "warn" for v in ('tpr', 'xtc'): self.check_file(v, self.__getattribute__(v), resolve=resolve) self.analysis_dir = kwargs.pop('analysisdir', os.path.dirname(self.tpr)) #: Registry for plugins: This dict is central. self.plugins = AttributeDict() #: Use this plugin if none is explicitly specified. Typically set with #: :meth:`~Simulation.set_plugin`. self.default_plugin_name = None # XXX: Or should we simply add instances and then re-register # all instances using register() ? # XXX: ... this API should be cleaned up. It seems to be connected # back and forth in vicious circles. -- OB 2009-07-10 plugins = kwargs.pop('plugins', []) # list of tuples (plugin, kwargs) or just (plugin,) if no kwords # required (eg if plugin is an instance) for x in plugins: try: P, kwargs = asiterable( x) # make sure to wrap strings, especially 2-letter ones! except ValueError: P = x kwargs = {} self.add_plugin(P, **kwargs) # convenience: if only a single plugin was registered we default to that one if len(self.plugins) == 1: self.set_plugin(self.plugins.keys()[0]) # Is this needed? If done properly, kwargs should be empty by now BUT # because the same list is re-used for all plugins I cannot pop them in # the plugins. I don't think multiple inheritance would work with this # setup so let's not pretend it does: hence comment out the super-init # call: ## super(Simulation, self).__init__(**kwargs) logger.info("Simulation instance initialised:") logger.info(str(self))
def __init__(self, **kwargs): """Set up ProteinOnly :Arguments: *force* ``True`` will always regenerate trajectories even if they already exist, ``False`` raises an exception, ``None`` does the sensible thing in most cases (i.e. notify and then move on). *dt* : float or list of floats only write every dt timestep (in ps); if a list of floats is supplied, write multiple trajectories, one for each dt. *compact* : bool write a compact representation *fit* Create an additional trajectory from the stripped one in which the Protein group is rms-fitted to the initial structure. See :meth:`gromacs.cbook.Transformer.fit` for details. Useful values: - "xy" : perform a rot+trans fit in the x-y plane - "all": rot+trans - ``None``: no fitting If *fit* is not supplied then the constructore-default is used (:attr:`_ProteinOnly.parameters.fit`). *keepalso* List of literal ``make_ndx`` selections that select additional groups of atoms that should also be kept in addition to the protein. For example *keepalso* = ['"POPC"', 'resname DRUG']. """ # specific arguments: take them before calling the super class that # does not know what to do with them _fitvalues = ("xy", "all", None) parameters = {} parameters['fit'] = kwargs.pop('fit', None) # fitting algorithm if not parameters['fit'] in _fitvalues: raise ValueError( "ProteinOnly: *fit* must be one of {_fitvalues!r}, not {fit!r}." .format(**vars())) parameters['compact'] = kwargs.pop('compact', False) # compact+centered ? parameters['dt'] = kwargs.pop('dt', None) parameters['force'] = kwargs.pop('force', None) parameters['keepalso'] = kwargs.pop('keepalso', None) # super class init: do this before doing anything else # (also sets up self.parameters and self.results) super(_ProteinOnly, self).__init__(**kwargs) # self.parameters is set up by the base Worker class... self.parameters.filenames = AttributeDict() self.parameters.update(parameters) # self.simulation might have been set by the super class # already; just leave this snippet at the end. Do all # initialization that requires the simulation class in the # _register_hook() method. if self.simulation is not None: self._register_hook()
def __init__(self, molecule=None, **kwargs): """Set up Simulation instance. The *molecule* of the compound molecule should be supplied. Existing files (which have been generated in previous runs) can also be supplied. :Keywords: *molecule* Identifier for the compound molecule. This is the same as the entry in the ``[ molecule ]`` section of the itp file. ["DRUG"] *filename* If provided and *molecule* is ``None`` then load the instance from the pickle file *filename*, which was generated with :meth:`~mdpow.equil.Simulation.save`. *dirname* base directory; all other directories are created under it *forcefield* 'OPLS-AA' or 'CHARMM' or 'AMBER' *solvent* 'water' or 'octanol' or 'cyclohexane' or 'wetoctanol' *solventmodel* ``None`` chooses the default (e.g, :data:`mdpow.forcefields.DEFAULT_WATER_MODEL` for ``solvent == "water"``. Other options are the models defined in :data:`mdpow.forcefields.GROMACS_WATER_MODELS`. At the moment, there are no alternative parameterizations included for other solvents. *mdp* dict with keys corresponding to the stages ``energy_minimize``, ``MD_restrained``, ``MD_relaxed``, ``MD_NPT`` and values *mdp* file names (if no entry then the package defaults are used) *distance* minimum distance between solute and closest box face *kwargs* advanced keywords for short-circuiting; see :data:`mdpow.equil.Simulation.filekeys`. """ self.__cache = {} filename = kwargs.pop('filename', None) dirname = kwargs.pop('dirname', self.dirname_default) forcefield = kwargs.pop('forcefield', 'OPLS-AA') solvent = kwargs.pop('solvent', self.solvent_default) # mdp files --- should get values from default runinput.cfg # None values in the kwarg mdp dict are ignored # self.mdp: key = stage, value = path to MDP file # 'water' will choose the default ('tip4p'), other choices are # 'tip3p', 'spc', 'spce', 'm24', for water; no choices # available for 'cyclohexane' and 'octanol' solventmodel = kwargs.pop('solventmodel', None) mdp_kw = kwargs.pop('mdp', {}) self.mdp = dict((stage, config.get_template(fn)) for stage, fn in self.mdp_defaults.items()) self.mdp.update( dict((stage, config.get_template(fn)) for stage, fn in mdp_kw.items() if fn is not None)) if molecule is None and filename is not None: # load from pickle file self.load(filename) self.filename = filename kwargs = {} # for super else: self.molecule = molecule or 'DRUG' self.dirs = AttributeDict( basedir=realpath(dirname), # .../Equilibrium/<solvent> includes=list(asiterable(kwargs.pop('includes', []))) + [config.includedir], ) # pre-set filenames: keyword == variable name self.files = AttributeDict([(k, kwargs.pop(k, None)) for k in self.filekeys]) self.deffnm = kwargs.pop("deffnm", "md") if self.files.topology: # assume that a user-supplied topology lives in a 'standard' top dir # that includes the necessary itp file(s) self.dirs.topology = realpath( os.path.dirname(self.files.topology)) self.dirs.includes.append(self.dirs.topology) self.forcefield = forcefield self.solvent_type = solvent self.solventmodel_identifier = forcefields.get_solvent_identifier( solvent, model=solventmodel, forcefield=forcefield, ) if self.solventmodel_identifier is None: msg = "No parameters for solvent {0} and solventmodel {1} available.".format( solvent, solventmodel) logger.error(msg) raise ValueError(msg) self.solventmodel = forcefields.get_solvent_model( self.solventmodel_identifier, forcefield=forcefield, ) distance = kwargs.pop('distance', None) distance = distance if distance is not None else DIST[solvent] self.solvent = AttributeDict(itp=self.solventmodel.itp, box=self.solventmodel.coordinates, distance=distance) self.filename = filename or self.solvent_type + '.simulation' super(Simulation, self).__init__(**kwargs)
def analyze(self, **kwargs): """Load results from disk into :attr:`_Dihedrals.results` and compute PMF. The PMF W(phi) in kT is computed from each dihedral probability distribution P(phi) as W(phi) = -kT ln P(phi) It is stored in :attr:`_Dihedrals.results` with the key *PMF*. :Keywords: *bins* bins for histograms (passed to numpy.histogram(new=True)) :Returns: a dictionary of the results and also sets :attr:`_Dihedrals.results`. """ bins = kwargs.pop('bins', 361) results = AttributeDict() # get graphs that were produced by g_angle for name, f in self.parameters.filenames.items(): try: results[name] = XVG(f) except IOError: pass # either not computed (yet) or some failure # compute individual distributions ts = results['timeseries'].array # ts[0] = time, ts[1] = avg dih = ts[2:] phi_range = (-180., 180.) Ndih = len(dih) p = Ndih * [None] # histograms (prob. distributions), one for each dihedral i for i in xrange(Ndih): phis = dih[i] p[i],e = numpy.histogram(phis, bins=bins, range=phi_range, normed=True, new=True) P = numpy.array(p) phi = 0.5*(e[:-1]+e[1:]) # midpoints of bin edges distributions = numpy.concatenate((phi[numpy.newaxis, :], P)) # phi, P[0], P[1], ... xvg = XVG() xvg.set(distributions) xvg.write(self.parameters.filenames['distributions']) results['distributions'] = xvg del xvg # compute PMF (from individual distributions) W = -numpy.log(P) # W(phi)/kT = -ln P W -= W.min(axis=1)[:, numpy.newaxis] # minimum at 0 kT pmf = numpy.concatenate((phi[numpy.newaxis, :], W), axis=0) xvg = XVG() xvg.set(pmf) xvg.write(self.parameters.filenames['PMF']) results['PMF'] = xvg self.results = results return results
def analyze(self, **kwargs): """Collect output xvg files as :class:`gromacs.formats.XVG` objects. - Make COM as a function of time available as XVG files and objects. - Compute RMSD of the COM of each group (from average position, "rmsd"). - Compute distance whic encompasses 50% of observations ("median") - Compute drift of COM, i.e. length of the vector between initial and final position. Initial and final position are computed as averages over *nframesavg* frames ("drift"). RMSD, median, and drift are columns in an xvg file. The rows correspond to the groups in :attr:`gromacs.analysis.plugins.com.Worker.results.group_names`. :Keywords: *nframesavg* number of initial and final frames that are averaged in order to compute the drift of the COM of each group [5000] *refgroup* group name whose com is taken as the reference and subtracted from all other coms for the distance calculations. If supplied, additional result 'com_relative_*refgroup*' is created. :Returns: a dictionary of the results and also sets :attr:`gromacs.analysis.plugins.com.Worker.results`. """ from gromacs.formats import XVG logger.info("Preparing COM graphs as XVG objects.") self.results = AttributeDict( (k, XVG(fn)) for k, fn in self.parameters.filenames.items()) # compute RMSD of COM and shift of COM (drift) between avg pos # over first/last 5,000 frames nframesavg = kwargs.pop('nframesavg', 5000) ngroups = len(self.parameters.group_names) xcom = self.results['com'].array refgroup = kwargs.pop('refgroup', None) if refgroup is not None: if not refgroup in self.parameters.group_names: errmsg = "refgroup={0!s} must be one of {1!r}".format( refgroup, self.parameters.group_names) logger.error(errmsg) raise ValueError(errmsg) nreference = 1 + 3 * self.parameters.group_names.index( refgroup) # 1-based !! reference_com = xcom[nreference:nreference + 3] xcom[1:] -= numpy.vstack(ngroups * [reference_com]) # can't use broadcast logger.debug("distances computed with refgroup %r", refgroup) self.store_xvg('com_relative_{0!s}'.format(refgroup), xcom, names=['time'] + self.parameters.group_names) def vlength(v): return numpy.sqrt(numpy.sum(v**2, axis=0)) # distances over time step logger.debug( "drift calculated between %d-frame averages at beginning and end", nframesavg) records = [] for i in xrange(1, 3 * ngroups + 1, 3): x = xcom[i:i + 3] r = vlength( x - x.mean(axis=1)[:, numpy.newaxis]) # distances over time step #r0 = vlength(r - r[:,0][:,numpy.newaxis]) # distances over time step from r(t=0) #h,edges = numpy.histogram(r, bins=kwargs.get('bins', 100), normed=True) #m = 0.5*(edges[1:]+edges[:-1]) #c = h.cumsum(dtype=float) # integral #c /= c[-1] # normalized (0 to 1) #median = m[c < 0.5][-1] #g = h/(4*numpy.pi*m**2) #import scipy.integrate #radint = lambda y: 4*numpy.pi*scipy.integrate.simps(m**2*y, x=m) #g /= radint(g) # properly normalized radial distribution function rmsd = numpy.sqrt(numpy.mean( r**2)) # radial spread sqrt(radint(m**2 * g)) median = numpy.median( r) # radius that contains 50% of the observations dx = x[:, :nframesavg].mean(axis=1) - x[:, -nframesavg:].mean(axis=1) drift = vlength(dx) records.append((rmsd, median, drift)) self.store_xvg('distance', numpy.transpose(records), names="rmsd,median,drift") return self.results
def __init__(self,**kwargs): """Set up StripWater :Arguments: *force* ``True`` will always regenerate trajectories even if they already exist, ``False`` raises an exception, ``None`` does the sensible thing in most cases (i.e. notify and then move on). *dt* : float or list of floats only write every dt timestep (in ps); if a list of floats is supplied, write multiple trajectories, one for each dt. *compact* : bool write a compact representation *centergroup* Index group to center on ["Protein"] *fit* Create an additional trajectory from the stripped one in which the *fitgroup* group is rms-fitted to the initial structure. See :meth:`gromacs.cbook.Transformer.fit` for details. Useful values: - "xy" : perform a rot+trans fit in the x-y plane - "all": rot+trans - ``None``: no fitting If *fit* is not supplied then the constructor-default is used (:attr:`_StripWater.parameters.fit`). *fitgroup* Index group to fit to with the *fit* option; must be changed if molecule is not a protein and automatically recognized. Also consider supplying a custom index file. ["backbone"] *resn* name of the residues that are stripped (typically it is safe to leave this at the default 'SOL') *outdir* place generated files in *outdir* instead of the same directory where the input tpr/xtc lived [``None``] .. Note:: If set, *dt* is only applied to a fit step; the no-water trajectory is always generated for all time steps of the input. """ # specific arguments: take them before calling the super class that # does not know what to do with them _fitvalues = ("xy", "all", None) parameters = {} parameters['fit'] = kwargs.pop('fit',None) # fitting algorithm if not parameters['fit'] in _fitvalues: raise ValueError("StripWater: *fit* must be one of {_fitvalues!r}, not {fit!r}.".format(**vars())) parameters['fitgroup'] = kwargs.pop('fitgroup', "backbone") parameters['centergroup'] = kwargs.pop('centergroup', "Protein") parameters['compact'] = kwargs.pop('compact', False) # compact+centered ? parameters['resn'] = kwargs.pop('resn', 'SOL') # residue name to be stripped parameters['dt'] = kwargs.pop('dt', None) parameters['force'] = kwargs.pop('force', None) parameters['outdir'] = kwargs.pop('outdir', None) # super class init: do this before doing anything else # (also sets up self.parameters and self.results) super(_StripWater, self).__init__(**kwargs) # self.parameters is set up by the base Worker class... self.parameters.filenames = AttributeDict() self.parameters.update(parameters) # self.simulation might have been set by the super class # already; just leave this snippet at the end. Do all # initialization that requires the simulation class in the # _register_hook() method. if self.simulation is not None: self._register_hook()