def split_charmm(info, data_dir=path.join(os.getcwd(), "data"), **kwargs): """Create a subtrajectory from a CHARMM trajectory. Parameters ---------- info : :class:`collections.namedTuple` Contains information about the data subdirectory and start and stop frames data_dir : str, optional Location of the main data directory toppar : str, optional Directory containing CHARMM topology/parameter files trajectory : str, optional A CHARMM trajectory file (e.g., dcd) outfile : str, optional A CHARMM trajectory file (e.g., dcd) logfile : str, optional Log file for output of command charmm_version : int Version of CHARMM """ # Trajectory splitting information subdir, start, stop = info subdir = path.join(data_dir, "{}".format(subdir)) charmm_exec = mdutil.which("charmm") # Attempt to create the necessary subdirectory try: os.makedirs(subdir) except OSError: pass # Various filenames version = kwargs.get("charmm_version", 41) toppar = kwargs.get("toppar", "/opt/local/charmm/c{:d}b1/toppar".format(version)) trajectory = kwargs.get("trajectory", path.join(os.curdir, "md.dcd")) outfile = path.join(subdir, kwargs.get("outfile", "aa.dcd")) logfile = path.join(subdir, kwargs.get("logfile", "split.log")) inpfile = path.join(subdir, "split.inp") with mdutil.openany(inpfile, "w") as charmm_input: charmm_inp = charmm_split.split_inp.format( toppar=toppar, trajectory=trajectory, outfile=outfile, version=version, start=start, stop=stop, ) charmm_inp = textwrap.dedent(charmm_inp[1:]) print(charmm_inp, file=charmm_input) command = [ charmm_exec, "-i", inpfile, "-o", path.join(subdir, logfile), ] subprocess.check_call(command)
def run_atomic_fluct(self, charmm_exec=None): # Find CHARMM executable charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm")) if charmm_exec is None else charmm_exec) if charmm_exec is None: logger.exception( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.") raise_with_traceback( OSError( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.")) # Write CHARMM input files and run atomic fluctuations if not path.exists(self.filenames["qha_input"]): version = self.kwargs.get("charmm_version", 41) dimension = ("dimension chsize 1000000" if version >= 36 else "") with open(self.filenames["qha_input"], mode="wb") as charmm_file: logger.info("Writing CHARMM QHA input file.") charmm_qha_inp = charmm_afqha.afqha.format( temperature=self.temperature, flex="flex" if version else "", version=version, dimension=dimension, **self.filenames) charmm_qha_inp = textwrap.dedent(charmm_qha_inp[1:]) charmm_file.write(charmm_qha_inp.encode()) logger.info("Running QHA Atomic Fluctuations.") with open(self.filenames["qha_log"], "w") as log_file: subprocess.check_call( [charmm_exec, "-i", self.filenames["qha_input"]], stdout=log_file, stderr=subprocess.STDOUT, ) if not path.exists(self.filenames["nma_input"]): version = self.kwargs.get("charmm_version", 41) dimension = ("dimension chsize 1000000" if version >= 36 else "") with open(self.filenames["nma_input"], mode="wb") as charmm_file: logger.info("Writing NMA CHARMM input file.") charmm_nma_inp = charmm_afnma.afnma.format( temperature=self.temperature, flex="flex" if version else "", version=version, dimension=dimension, **self.filenames) charmm_nma_inp = textwrap.dedent(charmm_nma_inp[1:]) charmm_file.write(charmm_nma_inp.encode()) logger.info("Running NMA Atomic Fluctuations.") with open(self.filenames["nma_log"], "w") as log_file: subprocess.check_call( [charmm_exec, "-i", self.filenames["nma_input"]], stdout=log_file, stderr=subprocess.STDOUT, )
def split_gmx(info, data_dir=path.join(os.getcwd(), "data"), **kwargs): """Create a subtrajectory from a Gromacs trajectory. Parameters ---------- info : :class:`collections.namedTuple` Contains information about the data subdirectory and start and stop frames data_dir : str, optional Location of the main data directory topology : str, optional Topology filename (e.g., tpr gro g96 pdb brk ent) trajectory : str, optional A Gromacs trajectory file (e.g., xtc trr) index : str, optional A Gromacs index file (e.g., ndx) outfile : str, optional A Gromacs trajectory file (e.g., xtc trr) logfile : str, optional Log file for output of command system : int Atom selection from Gromacs index file (0 = System, 1 = Protein) """ # Trajectory splitting information subdir, start, stop = info subdir = path.join(data_dir, "{}".format(subdir)) gromacs_exec = mdutil.which("gmx") # Attempt to create the necessary subdirectory try: os.makedirs(subdir) except OSError: pass # Various filenames topology = kwargs.get("topology", "md.tpr") trajectory = kwargs.get("trajectory", path.join(os.curdir, "md.xtc")) index = kwargs.get("index") outfile = path.join(subdir, kwargs.get("outfile", "aa.xtc")) logfile = path.join(subdir, kwargs.get("logfile", "split.log")) if index is not None: command = [ "gmx", "trjconv", "-s", topology, "-f", trajectory, "-n", index, "-o", outfile, "-b", "{:d}".format(start), "-e", "{:d}".format(stop), ] else: command = [ gromacs_exec, "trjconv", "-s", topology, "-f", trajectory, "-o", outfile, "-b", "{:d}".format(start), "-e", "{:d}".format(stop), ] fd, fpath = tempfile.mkstemp(text=True) with mdutil.openany(fpath, "w") as temp: print(kwargs.get("system", 0), file=temp) with mdutil.openany(fpath, "r") as temp, \ mdutil.openany(logfile, mode="w") as log: logger.info("Writing trajectory to {}".format(outfile)) logger.info("Writing Gromacs output to {}".format(logfile)) subprocess.check_call(command, stdin=temp, stdout=log, stderr=subprocess.STDOUT) os.remove(fpath)
@click.option( "-o", "outdir", metavar="DIR", default=os.getcwd(), show_default=True, type=click.Path(exists=False, file_okay=False, resolve_path=True), help="Directory", ) @click.option( "-e", "--exec", "nma_exec", metavar="FILE", envvar="CHARMMEXEC", default=which("charmm"), show_default=True, type=click.Path(exists=False, file_okay=True, resolve_path=True), help="CHARMM executable file", ) @click.option( "-t", "--temperature", metavar="TEMP", type=click.FLOAT, default=300.0, show_default=True, help="Temperature of simulation", ) @click.option( "-n",
def __init__(self, filename, **kwargs): """Set up parameters to run HOLE_ on PDB *filename*. :Arguments: *filename* The *filename* is used as input for HOLE in the "COORD" card of the input file. It specifies the name of a PDB co-ordinate file to be used. This must be in Brookhaven protein databank format or something closely approximating this. Both ATOM and HETATM records are read. Note that if water molecules or ions are present in the channel these can be ignored on read by the use of the *ignore_residues* keyword. **Wildcard pattern**. A new feature (in release 2.1 of HOLE) was the option to include a wild card (``*``) in the filename. e.g., *filename* = `"ab*.pdb"` will apply hole to all files in the directory whose name starts with ``ab`` and ends with ``.pdb``. This is intended to aid the analysis of multiple copies of the same molecule - produced during molecular dynamics or other method. The hole procedure will be applied to each file in turn with the same setup conditions (initial point, sampling distance etc.). Graphics files will contain a combination of the individual runs, one after another. Note that the pdb files are read independently so that they need not have an identical number of atoms or atom order etc. (though they should be sufficiently similar for a HOLE run from identical starting conditions to be useful). .. SeeAlso:: An alternative way to load in multiple files is a direct read from a CHARMM binary dynamics DCD coordinate file - using the *dcd* keyword or use :class:`HOLEtraj`. :Keywords: *dcd* DCD trajectory (must be supplied together with a matching PDB file *filename*) and then HOLE runs its analysis on each frame. It does multiple HOLE runs on positions taken from a CHARMM binary dynamics format .DCD trajectory file. The *dcd* file must have exactly the same number of atoms in exactly the same order as the pdb file specified by *filename*. Note that if this option is used the pdb file is used as a template only - the coordinates are ignored. Note that structural parameters determined for each individual structure are written in a tagged format so that it is possible to extract the information from the text output file using a :program:`grep` command. The reading of the file can be controlled by the *step* keyword and/or setting :attr:`HOLE.dcd_iniskip` to the number of frames to be skipped initially. .. Note:: HOLE is very picky and does not read all DCD-like formats. If in doubt, look into the *logfile* for error diagnostics. At the moment, DCDs generated with MDAnalysis are not accepted by HOLE — use :class:`HOLEtraj`, which works with anything that MDAnalysis can read. *logfile* name of the file collecting HOLE's output (which can be parsed using :meth:`HOLE.collect` ["hole.out"] *sphpdb* name of the HOLE sph file, a PDB-like file containig the coordinates of the pore centers ["hole.sph"] *step* step size for going through the trajectory (skips *step* - 1 frames) [1] *cpoint* coordinates of a point inside the pore, e.g. ``[12.3, 0.7, 18.55]``. If ``None`` then HOLE's own simple search algorithm is used. This specifies a point which lies within the channel, for simple channels such as gramicidin results do not show great sensitivity to the exact point taken. An easy way to produce an initial point is to use molecular graphics to find two atoms which lie either side of the pore and to average their co-ordinates. Or if the channel structure contains water molecules or counter ions then take the coordinates of one of these (and use the *ignore_residues* keyword to ignore them in the pore radius calculation). If this card is not specified then HOLE now (from version 2.2) attempts to make a guess where the channel will be. The procedure assumes the channel is reasonably symmetric. The initial guess on cpoint will be the centroid of all alpha carbon atoms (name 'CA' in pdb file). This is then refined by a crude grid search up to 5 Å from the original position. This procedure works most of the time but is clearly far from infallible — results should be careful checked (with molecular graphics) if it is used. [``None``] *cvect* Search direction, should be parallel to the pore axis, e.g. ``[0,0,1]`` for the z-axis. If this keyword is ``None`` then HOLE now attempts to make a guess where the channel will be. The procedure assumes the channel is reasonably symmetric. The guess will be either along the X axis (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not aligned on one of these axis the results will clearly be approximate. If a guess is used then results should be carefully checked. [``None``] *sample* distance of sample points in Å Specifies the distance between the planes used in the HOLE procedure. The default value is 0.2 Å, this should be reasonable for most purposes. However, if you wish to visualize a very tight constriction then specify a smaller value. [0.2] *dotden* density of facettes for generating a 3D pore representation This number controls the density of dots which will be used by the program. A sphere of dots is placed on each centre determined in the Monte Carlo procedure. Only dots which do not lie within any other sphere are considered. The actual number of dots written is therefore controlled by *dotden* and *sample*. *dotden* should be set to between 5 (few dots per sphere) and 35 (large number of dots per sphere). [15] *endrad* Radius which is considered to be the end of the pore. This keyword can be used to specify the radius above which the program regards a result as indicating that the end of the pore has been reached. The default value is 22.0 Å. This may need to be increased for large channels or reduced for small. [22.0] *shorto* Determines the output of output in the *logfile*; for automated processing this must be < 3. - 0: Full text output - 1: All text output given except "run in progress" (i.e., detailed contemporary description of what HOLE is doing). - 2: Ditto plus no graph type output - only leaving minimum radius and conductance calculations. - 3: All text output other than input card mirroring and error messages turned off. *ignore_residues* sequence of three-letter residues that are not taken into account during the calculation; wildcards are *not* supported [ ``["SOL","WAT", "TIP", "HOH", "K ", "NA ", "CL "]`` ] *radius* Path to the radii; if set to None then a set of default radii, :data:`SIMPLE2_RAD`, is used (an extension of ``simple.rad`` from the HOLE distribution) This specifies the name for the file specifying van der Waals radii for each atom. A number of files with different values are supplied with HOLE. *sphpdb* This keyword specifies the filename for output of the sphere centre information in pdb form. Its typical suffix is ".sph". The co-ordinates are set to the sphere centres and the occupancies are the sphere radii. All centres are assigned the atom name QSS and residue name SPH and the residue number is set to the storage number of the centre. The file can be imported into molecular graphics programs but are likely to be bonded together in a awful manner - as the points are very close to one another. In VMD sph objects are best displayed as "Points". Displaying .sph objects rather than rendered or dot surfaces can be useful to analyze the distance of particular atoms from the sphere-centre line. Most usefully .sph files can be used to produce molecular graphical output from a hole run. This is achieved by using the :program:`sph_process` program to read the .sph file. ["hole.sph"] *executable* Path to the :program:`hole` executable (e.g. ``/opt/hole/exe/hole``); the other programs :program:`sph_process` and :program:`sos_triangle` are assumed to live in the same directory as :program:`hole`. If :program:`hole` is found on the :envvar:`PATH` then the bare executable name is sufficient. ["hole"] """ # list of temporary files, to be cleaned up on __del__ self.tempfiles = [] self.tempdirs = [] self.filename = filename self.coordinates = self.check_and_fix_long_filename(self.filename) self.dcd = kwargs.pop('dcd', None) if self.dcd: self.dcd = self.check_and_fix_long_filename(self.dcd) self.dcd_step = kwargs.pop( "step", 1) - 1 # HOLE docs description is confusing: step or skip?? self.dcd_iniskip = 0 self.cpoint = kwargs.pop("cpoint", None) self.cvect = kwargs.pop("cvect", None) self.sample = float(kwargs.pop("sample", 0.20)) self.dotden = int(kwargs.pop("dotden", 15)) self.endrad = float(kwargs.pop("endrad", 22.)) self.shorto = int(kwargs.pop( "shorto", 0)) # look at using SHORTO 2 for minimum output self.ignore_residues = kwargs.pop("ignore_residues", self.default_ignore_residues) self.radius = self.check_and_fix_long_filename( realpath(kwargs.pop('radius', None) or write_simplerad2())) logger.info("Setting up HOLE analysis for %(filename)r", vars(self)) logger.info("Using radius file %(radius)r", vars(self)) # guess executables self.exe = {} hole_exe_name = kwargs.pop('executable', 'hole') self.exe['hole'] = which(hole_exe_name) if self.exe['hole'] is None: errmsg = "HOLE binary %(hole_exe_name)r not found." % vars() logger.fatal(errmsg) logger.fatal( "%(hole_exe_name)r must be on the PATH or provided as keyword argument 'executable'.", vars()) raise OSError(errno.ENOENT, errmsg) holepath = os.path.dirname(self.exe['hole']) self.exe['sos_triangle'] = os.path.join(holepath, "sos_triangle") self.exe['sph_process'] = os.path.join(holepath, "sph_process") self.sphpdb = kwargs.pop("sphpdb", "hole.sph") self.logfile = kwargs.pop("logfile", "hole.out") self.template = textwrap.dedent(""" ! Input file for Oliver Smart's HOLE program ! written by MDAnalysis.analysis.hole.HOLE ! filename = %(filename)s COORD %(coordinates)s RADIUS %(radius)s SPHPDB %(sphpdb)s SAMPLE %(sample)f ENDRAD %(endrad)f IGNORE %(ignore)s SHORTO %(shorto)d """) if self.cpoint is not None: # note: if it is None then we can't change this with a kw for run() !! self.template += "CPOINT %(cpoint_xyz)s\n" else: logger.info("HOLE will guess CPOINT") if self.cvect is not None: # note: if it is None then we can't change this with a kw for run() !! self.template += "CVECT %(cvect_xyz)s\n" else: logger.info("HOLE will guess CVECT") if self.dcd: # CHARMD -- DCD (matches COORD) # CHARMS int int -- ignore_first_N_frames skip_every_X_frames # http://s3.smartsci.uk/hole2/doc/old/hole_d03.html#CHARMD self.template += "\nCHARMD %(dcd)s\nCHARMS %(dcd_iniskip)d %(dcd_step)d\n" # sanity checks if self.shorto > 2: logger.warn( "SHORTO (%d) needs to be < 3 in order to extract a HOLE profile!", self.shorto) for program, path in self.exe.items(): if path is None or which(path) is None: logger.error( "Executable %(program)r not found, should have been %(path)r.", vars()) # results self.profiles = {}
def __init__(self, filename, **kwargs): """Set up parameters to run X3DNA_ on PDB *filename*. Parameters ---------- filename : str The `filename` is used as input for X3DNA in the :program:`xdna_ensemble` command. It specifies the name of a PDB coordinate file to be used. This must be in Brookhaven protein databank format or something closely approximating this. executable : str (optional) Path to the :program:`xdna_ensemble` executable directories (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set and then added to export in bashrc file. See X3DNA documentation for set-up instructions. x3dna_param : bool (optional) Determines whether base step or base pair parameters will be calculated. If ``True`` (default) then stacked *base step* parameters will be analyzed. If ``False`` then stacked *base pair* parameters will be analyzed. logfile : str (optional) Write output from X3DNA to `logfile` (default: "bp_step.par") See Also -------- :class:`X3DNAtraj` """ # list of temporary files, to be cleaned up on __del__ self.tempfiles = [ "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par", "cf_7methods.par", "col_chains.scr", "col_helices.scr", "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb", "stacking.pdb" ] self.tempdirs = [] self.filename = filename logger.info("Setting up X3DNA analysis for %(filename)r", vars(self)) # guess executables self.exe = {} x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble') self.x3dna_param = kwargs.pop('x3dna_param', True) self.exe['xdna_ensemble'] = which(x3dna_exe_name) if self.exe['xdna_ensemble'] is None: errmsg = "X3DNA binary {x3dna_exe_name!r} not found.".format( **vars()) logger.fatal(errmsg) logger.fatal( "%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.", vars()) raise OSError(errno.ENOENT, errmsg) x3dnapath = os.path.dirname(self.exe['xdna_ensemble']) self.logfile = kwargs.pop("logfile", "bp_step.par") if self.x3dna_param is False: self.template = textwrap.dedent( """x3dna_ensemble analyze -b 355d.bps --one %(filename)r """) else: self.template = textwrap.dedent( """find_pair -s %(filename)r stdout |analyze stdin """) # sanity checks for program, path in self.exe.items(): if path is None or which(path) is None: logger.error( "Executable %(program)r not found, should have been %(path)r.", vars()) # results self.profiles = OrderedDict()
def cli(program, toppar, topology, trajectory, data, index, outfile, logfile, system, start, stop, window_size): logging.config.dictConfig({ "version": 1, "disable_existing_loggers": False, # this fixes the problem "formatters": { "standard": { "class": "logging.Formatter", "format": "%(name)-12s %(levelname)-8s %(message)s", }, "detailed": { "class": "logging.Formatter", "format": "%(asctime)s %(name)-15s %(levelname)-8s %(message)s", "datefmt": "%m-%d-%y %H:%M", }, }, "handlers": { "console": { "class": "logging.StreamHandler", "level": "INFO", "formatter": "standard", }, "file": { "class": "logging.FileHandler", "filename": path.join(os.getcwd(), logfile), "level": "INFO", "mode": "w", "formatter": "detailed", } }, "root": { "level": "INFO", "handlers": ["console", "file"] }, }) logger = logging.getLogger(__name__) if program == "GMX" and mdutil.which("gmx") is None: logger.error("Gromacs 5.0+ is required. " "If installed, please ensure that it is in your path.") raise OSError("Gromacs 5.0+ is required. " "If installed, please ensure that it is in your path.") if program == "CHARMM" and mdutil.which("charmm") is None: logger.error("CHARMM is required. If installed, " "please ensure that it is in your path.") raise OSError("CHARMM is required. If installed, " "please ensure that it is in your path.") half_size = window_size // 2 beg = start - half_size if start >= window_size else start values = zip(range(beg, stop + 1, half_size), range(beg + window_size - 1, stop + 1, half_size)) values = [((y // half_size) - 1, x, y) for x, y in values] func = functools.partial( _CONVERT[program], data_dir=data, topology=topology, toppar=toppar, trajectory=trajectory, index=index, outfile=outfile, logfile=logfile, system=system, ) # Run multiple instances simultaneously pool = mp.Pool() pool.map_async(func, values) pool.close() pool.join()
def calculate_thermo(self, nma_exec=None): """Calculate the thermodynamic properties of the trajectory. Parameters ---------- nma_exec : str executable file for normal mode analysis """ # Find CHARMM executable charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm")) if nma_exec is None else nma_exec) if charmm_exec is None: logger.exception( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.") raise_with_traceback( OSError( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.")) if not path.exists(self.filenames["thermo_input"]): version = self.kwargs.get("charmm_version", 41) dimension = ("dimension chsize 500000 maxres 3000000" if version >= 36 else "") with open(self.filenames["thermo_input"], mode="wb") as charmm_file: logger.info("Writing CHARMM input file.") charmm_inp = charmm_thermo.thermodynamics.format( trajectory=path.join(self.outdir, self.args[-1]), temperature=self.temperature, flex="flex" if version else "", version=version, dimension=dimension, **self.filenames) charmm_inp = textwrap.dedent(charmm_inp[1:]) charmm_file.write(charmm_inp.encode()) # Calculate thermodynamic properties of the trajectory. with open(self.filenames["thermo_log"], "w") as log_file: logger.info("Running thermodynamic calculation.") subprocess.check_call( [charmm_exec, "-i", self.filenames["thermo_input"]], stdout=log_file, stderr=subprocess.STDOUT, ) logger.info("Calculations completed.") header = ("SEGI RESN RESI Entropy Enthalpy " "Heatcap Atm/res Ign.frq") columns = np.array(header.split()) columns[:3] = np.array(["segidI", "RESN", "resI"]) thermo = [] # Read log file with open(self.filenames["thermo_log"], "rb") as log_file: logger.info("Reading CHARMM log file.") for line in log_file: if line.find(header) < 0: continue break for line in log_file: if len(line.strip().split()) == 0: break thermo.append(line.strip().split()) # Create human-readable table thermo = pd.DataFrame(thermo, columns=columns) thermo.drop(["RESN", "Atm/res", "Ign.frq"], axis=1, inplace=True) thermo.set_index(["segidI", "resI"], inplace=True) thermo = thermo.astype(np.float) # Write data to file with open(self.filenames["thermo_data"], "wb") as data_file: logger.info("Writing thermodynamics data file.") thermo = thermo.to_csv(index=True, sep=native_str(" "), float_format=native_str("%.4f"), encoding="utf-8") data_file.write(thermo.encode())
def run(self, nma_exec=None, tol=1.e-3, n_cycles=300, low_bound=0.): """Perform a self-consistent fluctuation matching. Parameters ---------- nma_exec : str executable file for normal mode analysis tol : float, optional fluct difference tolerance n_cycles : int, optional number of fluctuation matching cycles low_bound : float, optional lowest Kb values to reduce noise """ # Find CHARMM executable charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm")) if nma_exec is None else nma_exec) if charmm_exec is None: logger.exception( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.") raise_with_traceback( OSError( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.")) # Read the parameters if not self.parameters: try: self.initialize(nma_exec, restart=True) except IOError: raise_with_traceback( (IOError("Some files are missing. Unable to restart."))) # Write CHARMM input file. if not path.exists(self.filenames["charmm_input"]): version = self.kwargs.get("charmm_version", 41) dimension = ("dimension chsize 1000000" if version >= 36 else "") with open(self.filenames["charmm_input"], mode="wb") as charmm_file: logger.info("Writing CHARMM input file.") charmm_inp = charmm_nma.nma.format( temperature=self.temperature, flex="flex" if version else "", version=version, dimension=dimension, **self.filenames) charmm_inp = textwrap.dedent(charmm_inp[1:]) charmm_file.write(charmm_inp.encode()) # Set the indices for the parameter tables. self.target["BONDS"].set_index(self.bond_def, inplace=True) bond_values = self.target["BONDS"].columns # Check for restart. try: if os.stat(self.filenames["error_data"]).st_size > 0: with open(self.filenames["error_data"], "rb") as data: error_info = pd.read_csv(data, header=0, skipinitialspace=True, delim_whitespace=True) if not error_info.empty: self.error["step"] = error_info["step"].values[-1] else: raise FileNotFoundError except (FileNotFoundError, OSError): with open(self.filenames["error_data"], "wb") as data: np.savetxt( data, [ self.error_hdr, ], fmt=native_str("%15s"), # Nix delimiter=native_str("")) self.error["step"] += 1 # Initiate an all true index data, for preserving bond convergence if not self.restart: temp = ~self.target["BONDS"]["Kb"].isna() temp = temp.reset_index() self.converge_bnd_list = temp.iloc[:, 2] # Start self-consistent iteration for Fluctuation Matching # Run simulation logger.info( f"Starting fluctuation matching--{n_cycles} iterations to run") if low_bound != 0.: logger.info( f"Lower bound after 75% iteration is set to {low_bound}") st = time.time() fdiff = [] for i in range(n_cycles): ct = time.time() self.error["step"] = i + 1 with open(self.filenames["charmm_log"], "w") as log_file: subprocess.check_call( [charmm_exec, "-i", self.filenames["charmm_input"]], stdout=log_file, stderr=subprocess.STDOUT, ) self.dynamic_params["BONDS"].set_index(self.bond_def, inplace=True) self.parameters["BONDS"].set_index(self.bond_def, inplace=True) # Read the average bond distance. with reader(self.filenames["avg_ic"]) as icavg: avg_ic = icavg.read().set_index(self.bond_def)["r_IJ"] # Read the bond fluctuations. with reader(self.filenames["fluct_ic"]) as icfluct: fluct_ic = icfluct.read().set_index(self.bond_def)["r_IJ"] vib_ic = pd.concat([fluct_ic, avg_ic], axis=1) vib_ic.columns = bond_values logger.info(f"Checking for bondlist convergence") fluct_diff = np.abs(vib_ic[bond_values[0]] - self.target["BONDS"][bond_values[0]]) fdiff.append(fluct_diff) fluct_diff = fluct_diff.reset_index() tmp = self.parameters["BONDS"][bond_values[0]].reset_index() if not self.restart: self.converge_bnd_list &= ((fluct_diff.iloc[:, 2] > tol) & (tmp.iloc[:, 2] > 0)) else: if i == 0: self.converge_bnd_list = ((fluct_diff.iloc[:, 2] > tol) & (tmp.iloc[:, 2] > 0)) else: self.converge_bnd_list &= ((fluct_diff.iloc[:, 2] > tol) & (tmp.iloc[:, 2] > 0)) # Calculate the r.m.s.d. between fluctuation and distances # compared with the target values. vib_error = self.target["BONDS"] - vib_ic vib_error = vib_error.apply(np.square).mean(axis=0) vib_error = np.sqrt(vib_error) self.error[self.error.columns[-2:]] = vib_error.T.values # Calculate the new force constant. optimized = vib_ic.apply(np.reciprocal).apply(np.square) target = self.target["BONDS"].apply(np.reciprocal).apply(np.square) optimized -= target optimized *= self.BOLTZ * self.KFACTOR # update bond list vib_ic[bond_values[0]] = ( self.parameters["BONDS"][bond_values[0]] - optimized[bond_values[0]]) vib_ic[bond_values[0]] = (vib_ic[bond_values[0]].where( vib_ic[bond_values[0]] >= 0., 0.)) # set negative to zero if low_bound > 0. and i > int(n_cycles * 0.75): logger.info( f"Fluctuation matching cycle {i}: low bound is {low_bound}" ) vib_ic[bond_values[0]] = (vib_ic[bond_values[0]].where( vib_ic[bond_values[0]] >= low_bound, 0.)) # r.m.s.d. between previous and current force constant diff = self.dynamic_params["BONDS"] - vib_ic diff = diff.apply(np.square).mean(axis=0) diff = np.sqrt(diff) self.error[self.error.columns[1]] = diff.values[0] # Update the parameters and write to file. self.parameters["BONDS"][bond_values[0]] = vib_ic[bond_values[0]] self.dynamic_params["BONDS"][bond_values[0]] = vib_ic[ bond_values[0]] self.dynamic_params["BONDS"][bond_values[1]] = vib_ic[ bond_values[1]] self.parameters["BONDS"].reset_index(inplace=True) self.dynamic_params["BONDS"].reset_index(inplace=True) with mda.Writer(self.filenames["fixed_prm"], **self.kwargs) as prm: prm.write(self.parameters) with mda.Writer(self.filenames["dynamic_prm"], **self.kwargs) as prm: prm.write(self.dynamic_params) # Update the error values. with open(self.filenames["error_data"], "ab") as error_file: np.savetxt( error_file, self.error, fmt=native_str("%15d%15.6f%15.6f%15.6f", ), # Nix delimiter=native_str(""), ) logger.info( "Fluctuation matching cycle {} completed in {:.6f}".format( i, time.time() - ct)) logger.info( f"{self.converge_bnd_list.sum()} not converged out of {len(self.converge_bnd_list)}" ) if self.converge_bnd_list.sum() <= len( self.converge_bnd_list.values.tolist()) * 0.003: # if bonds to converge is less than 0.3% of total bonds, use relative difference as criteria # as it takes more than 100 iterations for these 0.3% bonds to converge. relative_diff = (fluct_diff.iloc[:, 2] - tol) / tol ### To know the late converged bonds uncomment the below 5 lines ### # late_converged = pd.DataFrame() # indx = self.converge_bnd_list[self.converge_bnd_list].index.values # late_converged = pd.concat([fluct_diff.loc[indx], relative_diff.loc[indx]], axis=1) # late_converged.columns = ["I", "J", "fluct_diff_Kb", "relative_diff_kb"] # print(late_converged) self.converge_bnd_list = self.converge_bnd_list & ( relative_diff > 5) if self.converge_bnd_list.sum() == 0: logger.info( "Checking relative difference: All bonds converged, exiting" ) break fluct_conv = pd.concat(fdiff, axis=1).round(6) fluct_conv.columns = [j for j in range(1, i + 2)] fluct_conv.to_csv(self.filenames["bond_convergence"]) logger.info( "Fluctuation matching completed in {:.6f}".format(time.time() - st)) self.target["BONDS"].reset_index(inplace=True)
def initialize(self, nma_exec=None, restart=False): """Create an elastic network model from a basic coarse-grain model. Parameters ---------- nma_exec : str executable file for normal mode analysis restart : bool, optional Reinitialize the object by reading files instead of doing initial calculations. """ self.restart = restart if not self.restart: # Write CHARMM input file. if not path.exists(self.filenames["init_input"]): version = self.kwargs.get("charmm_version", 41) dimension = ("dimension chsize 1000000" if version >= 36 else "") with open(self.filenames["init_input"], mode="wb") as charmm_file: logger.info("Writing CHARMM input file.") charmm_inp = charmm_init.init.format( flex="flex" if version else "", version=version, dimension=dimension, **self.filenames) charmm_inp = textwrap.dedent(charmm_inp[1:]) charmm_file.write(charmm_inp.encode()) charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm")) if nma_exec is None else nma_exec) with open(self.filenames["init_log"], "w") as log_file: subprocess.check_call( [charmm_exec, "-i", self.filenames["init_input"]], stdout=log_file, stderr=subprocess.STDOUT, ) # Write the parameter files. with reader(self.filenames["init_fluct_ic"]) as icfile: std_bonds = icfile.read().set_index(self.bond_def) with reader(self.filenames["init_avg_ic"]) as icfile: avg_bonds = icfile.read().set_index(self.bond_def) target = pd.concat([std_bonds["r_IJ"], avg_bonds["r_IJ"]], axis=1) target.reset_index(inplace=True) logger.info("Calculating the initial CHARMM parameters...") universe = mda.Universe(self.filenames["xplor_psf_file"], self.filenames["crd_file"]) self.target = prmutils.create_empty_parameters( universe, **self.kwargs) target.columns = self.target["BONDS"].columns self.target["BONDS"] = target.copy(deep=True) self.parameters = copy.deepcopy(self.target) self.parameters["BONDS"]["Kb"] = ( self.BOLTZ / self.parameters["BONDS"]["Kb"].apply(np.square)) self.dynamic_params = copy.deepcopy(self.parameters) with mda.Writer(self.filenames["fixed_prm"], **self.kwargs) as prm: logger.info("Writing {}...".format( self.filenames["fixed_prm"])) prm.write(self.parameters) with mda.Writer(self.filenames["dynamic_prm"], **self.kwargs) as prm: logger.info("Writing {}...".format( self.filenames["dynamic_prm"])) prm.write(self.dynamic_params) else: print("FM Restarted") if not path.exists(self.filenames["fixed_prm"]): self.initialize(nma_exec, restart=False) try: # Read the parameter files. logger.info("Loading parameter and internal coordinate files.") with reader(self.filenames["fixed_prm"]) as fixed: self.parameters.update(fixed.read()) with reader(self.filenames["dynamic_prm"]) as dynamic: self.dynamic_params.update(dynamic.read()) # Read the initial internal coordinate files. with reader(self.filenames["init_avg_ic"]) as init_avg: avg_table = init_avg.read().set_index( self.bond_def)["r_IJ"] with reader(self.filenames["init_fluct_ic"]) as init_fluct: fluct_table = (init_fluct.read().set_index( self.bond_def)["r_IJ"]) table = pd.concat([fluct_table, avg_table], axis=1) # Set the target fluctuation values. logger.info("Files loaded successfully...") self.target = copy.deepcopy(self.parameters) self.target["BONDS"].set_index(self.bond_def, inplace=True) cols = self.target["BONDS"].columns table.columns = cols self.target["BONDS"] = table.copy(deep=True).reset_index() except (FileNotFoundError, IOError): raise_with_traceback( (IOError("Some files are missing. Unable to restart.")))
def __init__(self, filename, **kwargs): """Set up parameters to run HOLE_ on PDB *filename*. :Arguments: *filename* The *filename* is used as input for HOLE in the "COORD" card of the input file. It specifies the name of a PDB co-ordinate file to be used. This must be in Brookhaven protein databank format or something closely approximating this. Both ATOM and HETATM records are read. Note that if water molecules or ions are present in the channel these can be ignored on read by the use of the *ignore_residues* keyword. **Wildcard pattern**. A new feature (in release 2.1 of HOLE) was the option to include a wild card (``*``) in the filename. e.g., *filename* = `"ab*.pdb"` will apply hole to all files in the directory whose name starts with ``ab`` and ends with ``.pdb``. This is intended to aid the analysis of multiple copies of the same molecule - produced during molecular dynamics or other method. The hole procedure will be applied to each file in turn with the same setup conditions (initial point, sampling distance etc.). Graphics files will contain a combination of the individual runs, one after another. Note that the pdb files are read independently so that they need not have an identical number of atoms or atom order etc. (though they should be sufficiently similar for a HOLE run from identical starting conditions to be useful). .. SeeAlso:: An alternative way to load in multiple files is a direct read from a CHARMM binary dynamics DCD coordinate file - using the *dcd* keyword or use :class:`HOLEtraj`. :Keywords: *dcd* DCD trajectory (must be supplied together with a matching PDB file *filename*) and then HOLE runs its analysis on each frame. It does multiple HOLE runs on positions taken from a CHARMM binary dynamics format .DCD trajectory file. The *dcd* file must have exactly the same number of atoms in exactly the same order as the pdb file specified by *filename*. Note that if this option is used the pdb file is used as a template only - the coordinates are ignored. Note that structural parameters determined for each individual structure are written in a tagged format so that it is possible to extract the information from the text output file using a :program:`grep` command. The reading of the file can be controlled by the *step* keyword and/or setting :attr:`HOLE.dcd_iniskip` to the number of frames to be skipped initially. .. Note:: HOLE is very picky and does not read all DCD-like formats. If in doubt, look into the *logfile* for error diagnostics. At the moment, DCDs generated with MDAnalysis are not accepted by HOLE — use :class:`HOLEtraj`, which works with anything that MDAnalysis can read. *logfile* name of the file collecting HOLE's output (which can be parsed using :meth:`HOLE.collect` ["hole.out"] *sphpdb* name of the HOLE sph file, a PDB-like file containig the coordinates of the pore centers ["hole.sph"] *step* step size for going through the trajectory (skips *step* - 1 frames) [1] *cpoint* coordinates of a point inside the pore, e.g. ``[12.3, 0.7, 18.55]``. If ``None`` then HOLE's own simple search algorithm is used. This specifies a point which lies within the channel, for simple channels such as gramicidin results do not show great sensitivity to the exact point taken. An easy way to produce an initial point is to use molecular graphics to find two atoms which lie either side of the pore and to average their co-ordinates. Or if the channel structure contains water molecules or counter ions then take the coordinates of one of these (and use the *ignore_residues* keyword to ignore them in the pore radius calculation). If this card is not specified then HOLE now (from version 2.2) attempts to make a guess where the channel will be. The procedure assumes the channel is reasonably symmetric. The initial guess on cpoint will be the centroid of all alpha carbon atoms (name 'CA' in pdb file). This is then refined by a crude grid search up to 5 Å from the original position. This procedure works most of the time but is clearly far from infallible — results should be careful checked (with molecular graphics) if it is used. [``None``] *cvect* Search direction, should be parallel to the pore axis, e.g. ``[0,0,1]`` for the z-axis. If this keyword is ``None`` then HOLE now attempts to make a guess where the channel will be. The procedure assumes the channel is reasonably symmetric. The guess will be either along the X axis (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not aligned on one of these axis the results will clearly be approximate. If a guess is used then results should be carefully checked. [``None``] *sample* distance of sample points in Å Specifies the distance between the planes used in the HOLE procedure. The default value is 0.2 Å, this should be reasonable for most purposes. However, if you wish to visualize a very tight constriction then specify a smaller value. [0.2] *dotden* density of facettes for generating a 3D pore representation This number controls the density of dots which will be used by the program. A sphere of dots is placed on each centre determined in the Monte Carlo procedure. Only dots which do not lie within any other sphere are considered. The actual number of dots written is therefore controlled by *dotden* and *sample*. *dotden* should be set to between 5 (few dots per sphere) and 35 (large number of dots per sphere). [15] *endrad* Radius which is considered to be the end of the pore. This keyword can be used to specify the radius above which the program regards a result as indicating that the end of the pore has been reached. The default value is 22.0 Å. This may need to be increased for large channels or reduced for small. [22.0] *shorto* Determines the output of output in the *logfile*; for automated processing this must be < 3. - 0: Full text output - 1: All text output given except "run in progress" (i.e., detailed contemporary description of what HOLE is doing). - 2: Ditto plus no graph type output - only leaving minimum radius and conductance calculations. - 3: All text output other than input card mirroring and error messages turned off. *ignore_residues* sequence of three-letter residues that are not taken into account during the calculation; wildcards are *not* supported [ ``["SOL","WAT", "TIP", "HOH", "K ", "NA ", "CL "]`` ] *radius* Path to the radii; if set to None then a set of default radii, :data:`SIMPLE2_RAD`, is used (an extension of ``simple.rad`` from the HOLE distribution) This specifies the name for the file specifying van der Waals radii for each atom. A number of files with different values are supplied with HOLE. *sphpdb* This keyword specifies the filename for output of the sphere centre information in pdb form. Its typical suffix is ".sph". The co-ordinates are set to the sphere centres and the occupancies are the sphere radii. All centres are assigned the atom name QSS and residue name SPH and the residue number is set to the storage number of the centre. The file can be imported into molecular graphics programs but are likely to be bonded together in a awful manner - as the points are very close to one another. In VMD sph objects are best displayed as "Points". Displaying .sph objects rather than rendered or dot surfaces can be useful to analyze the distance of particular atoms from the sphere-centre line. Most usefully .sph files can be used to produce molecular graphical output from a hole run. This is achieved by using the :program:`sph_process` program to read the .sph file. ["hole.sph"] *executable* Path to the :program:`hole` executable (e.g. ``/opt/hole/exe/hole``); the other programs :program:`sph_process` and :program:`sos_triangle` are assumed to live in the same directory as :program:`hole`. If :program:`hole` is found on the :envvar:`PATH` then the bare executable name is sufficient. ["hole"] """ # list of temporary files, to be cleaned up on __del__ self.tempfiles = [] self.tempdirs = [] self.filename = filename self.coordinates = self.check_and_fix_long_filename(self.filename) self.dcd = kwargs.pop('dcd', None) if self.dcd: self.dcd = self.check_and_fix_long_filename(self.dcd) self.dcd_step = kwargs.pop("step", 1) - 1 # HOLE docs description is confusing: step or skip?? self.dcd_iniskip = 0 self.cpoint = kwargs.pop("cpoint", None) self.cvect = kwargs.pop("cvect", None) self.sample = float(kwargs.pop("sample", 0.20)) self.dotden = int(kwargs.pop("dotden", 15)) self.endrad = float(kwargs.pop("endrad", 22.)) self.shorto = int(kwargs.pop("shorto", 0)) # look at using SHORTO 2 for minimum output self.ignore_residues = kwargs.pop("ignore_residues", self.default_ignore_residues) self.radius = self.check_and_fix_long_filename( realpath(kwargs.pop('radius', None) or write_simplerad2())) logger.info("Setting up HOLE analysis for %(filename)r", vars(self)) logger.info("Using radius file %(radius)r", vars(self)) # guess executables self.exe = {} hole_exe_name = kwargs.pop('executable', 'hole') self.exe['hole'] = which(hole_exe_name) if self.exe['hole'] is None: errmsg = "HOLE binary %(hole_exe_name)r not found." % vars() logger.fatal(errmsg) logger.fatal("%(hole_exe_name)r must be on the PATH or provided as keyword argument 'executable'.", vars()) raise OSError(errno.ENOENT, errmsg) holepath = os.path.dirname(self.exe['hole']) self.exe['sos_triangle'] = os.path.join(holepath, "sos_triangle") self.exe['sph_process'] = os.path.join(holepath, "sph_process") self.sphpdb = kwargs.pop("sphpdb", "hole.sph") self.logfile = kwargs.pop("logfile", "hole.out") self.template = textwrap.dedent(""" ! Input file for Oliver Smart's HOLE program ! written by MDAnalysis.analysis.hole.HOLE ! filename = %(filename)s COORD %(coordinates)s RADIUS %(radius)s SPHPDB %(sphpdb)s SAMPLE %(sample)f ENDRAD %(endrad)f IGNORE %(ignore)s SHORTO %(shorto)d """) if self.cpoint is not None: # note: if it is None then we can't change this with a kw for run() !! self.template += "CPOINT %(cpoint_xyz)s\n" else: logger.info("HOLE will guess CPOINT") if self.cvect is not None: # note: if it is None then we can't change this with a kw for run() !! self.template += "CVECT %(cvect_xyz)s\n" else: logger.info("HOLE will guess CVECT") if self.dcd: # CHARMD -- DCD (matches COORD) # CHARMS int int -- ignore_first_N_frames skip_every_X_frames # http://s3.smartsci.uk/hole2/doc/old/hole_d03.html#CHARMD self.template += "\nCHARMD %(dcd)s\nCHARMS %(dcd_iniskip)d %(dcd_step)d\n" # sanity checks if self.shorto > 2: logger.warn("SHORTO (%d) needs to be < 3 in order to extract a HOLE profile!", self.shorto) for program, path in self.exe.items(): if path is None or which(path) is None: logger.error("Executable %(program)r not found, should have been %(path)r.", vars()) # results self.profiles = {}
def __init__(self, filename, **kwargs): """Set up parameters to run X3DNA_ on PDB *filename*. Parameters ---------- filename : str The `filename` is used as input for X3DNA in the :program:`xdna_ensemble` command. It specifies the name of a PDB coordinate file to be used. This must be in Brookhaven protein databank format or something closely approximating this. executable : str (optional) Path to the :program:`xdna_ensemble` executable directories (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set and then added to export in bashrc file. See X3DNA documentation for set-up instructions. x3dna_param : bool (optional) Determines whether base step or base pair parameters will be calculated. If ``True`` (default) then stacked *base step* parameters will be analyzed. If ``False`` then stacked *base pair* parameters will be analyzed. logfile : str (optional) Write output from X3DNA to `logfile` (default: "bp_step.par") See Also -------- :class:`X3DNAtraj` """ # list of temporary files, to be cleaned up on __del__ self.tempfiles = [ "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par", "cf_7methods.par", "col_chains.scr", "col_helices.scr", "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb", "stacking.pdb" ] self.tempdirs = [] self.filename = filename logger.info("Setting up X3DNA analysis for %(filename)r", vars(self)) # guess executables self.exe = {} x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble') self.x3dna_param = kwargs.pop('x3dna_param', True) self.exe['xdna_ensemble'] = which(x3dna_exe_name) if self.exe['xdna_ensemble'] is None: errmsg = "X3DNA binary {x3dna_exe_name!r} not found.".format(**vars()) logger.fatal(errmsg) logger.fatal("%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.", vars()) raise OSError(errno.ENOENT, errmsg) x3dnapath = os.path.dirname(self.exe['xdna_ensemble']) self.logfile = kwargs.pop("logfile", "bp_step.par") if self.x3dna_param is False: self.template = textwrap.dedent("""x3dna_ensemble analyze -b 355d.bps --one %(filename)r """) else: self.template = textwrap.dedent("""find_pair -s %(filename)r stdout |analyze stdin """) # sanity checks for program, path in self.exe.items(): if path is None or which(path) is None: logger.error("Executable %(program)r not found, should have been %(path)r.", vars()) # results self.profiles = OrderedDict()
def __init__(self, filename, **kwargs): """Set up parameters to run X3DNA_ on PDB *filename*. :Arguments: *filename* The *filename* is used as input for X3DNA in the "xdna_ensemble" command. It specifies the name of a PDB coordinate file to be used. This must be in Brookhaven protein databank format or something closely approximating this. Both ATOM and HETATM records are read. Note that if water molecules or ions are present in the channel these can be ignored on read by the use of the *ignore_residues* keyword. :Keywords: *executable* Path to the :program:`xdna_ensemble` executable directories (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set and then added to export in bashrc file. See X3DNA documentation for set-up instructions. *x3dna_param* Determines whether base step or base pair parameters will be calculated. If True then stacked base step parameters will be analyzed [Default is True]. If False then stacked base pair parameters will be analyzed. """ # list of temporary files, to be cleaned up on __del__ self.tempfiles = [ "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par", "cf_7methods.par", "col_chains.scr", "col_helices.scr", "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb", "stacking.pdb" ] self.tempdirs = [] self.filename = filename logger.info("Setting up X3DNA analysis for %(filename)r", vars(self)) # guess executables self.exe = {} x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble') self.x3dna_param = kwargs.pop('x3dna_param', True) self.exe['xdna_ensemble'] = which(x3dna_exe_name) if self.exe['xdna_ensemble'] is None: errmsg = "X3DNA binary %(x3dna_exe_name)r not found." % vars() logger.fatal(errmsg) logger.fatal("%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.", vars()) raise OSError(errno.ENOENT, errmsg) x3dnapath = os.path.dirname(self.exe['xdna_ensemble']) self.logfile = kwargs.pop("logfile", "bp_step.par") if self.x3dna_param is False: self.template = textwrap.dedent("""x3dna_ensemble analyze -b 355d.bps --one %(filename)r """) else: self.template = textwrap.dedent("""find_pair -s %(filename)r stdout |analyze stdin """) # sanity checks for program, path in self.exe.items(): if path is None or which(path) is None: logger.error("Executable %(program)r not found, should have been %(path)r.", vars()) # results self.profiles = {}
def run(self, nma_exec=None, tol=1.e-4, n_cycles=250): """Perform a self-consistent fluctuation matching. Parameters ---------- nma_exec : str executable file for normal mode analysis tol : float, optional error tolerance n_cycles : int, optional number of fluctuation matching cycles """ # Find CHARMM executable charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm")) if nma_exec is None else nma_exec) if charmm_exec is None: logger.exception( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.") raise_with_traceback( OSError( "Please set CHARMMEXEC with the location of your CHARMM " "executable file or add the charmm path to your PATH " "environment.")) # Read the parameters if not self.parameters: try: self.initialize(nma_exec, restart=True) except IOError: raise_with_traceback( (IOError("Some files are missing. Unable to restart."))) # Write CHARMM input file. if not path.exists(self.filenames["charmm_input"]): version = self.kwargs.get("charmm_version", 41) dimension = ("dimension chsize 1000000" if version >= 36 else "") with open( self.filenames["charmm_input"], mode="wb") as charmm_file: logger.info("Writing CHARMM input file.") charmm_inp = charmm_nma.nma.format( temperature=self.temperature, flex="flex" if version else "", version=version, dimension=dimension, **self.filenames) charmm_inp = textwrap.dedent(charmm_inp[1:]) charmm_file.write(charmm_inp.encode()) # Set the indices for the parameter tables. self.target["BONDS"].set_index(self.bond_def, inplace=True) bond_values = self.target["BONDS"].columns # Check for restart. try: if os.stat(self.filenames["error_data"]).st_size > 0: with open(self.filenames["error_data"], "rb") as data: error_info = pd.read_csv( data, header=0, skipinitialspace=True, delim_whitespace=True) if not error_info.empty: self.error["step"] = error_info["step"].values[-1] else: raise FileNotFoundError except (FileNotFoundError, OSError): with open(self.filenames["error_data"], "wb") as data: np.savetxt( data, [ self.error_hdr, ], fmt=native_str("%10s"), delimiter=native_str("")) self.error["step"] += 1 # Run simulation logger.info("Starting fluctuation matching") st = time.time() for i in range(n_cycles): self.error["step"] = i + 1 with open(self.filenames["charmm_log"], "w") as log_file: subprocess.check_call( [charmm_exec, "-i", self.filenames["charmm_input"]], stdout=log_file, stderr=subprocess.STDOUT, ) self.dynamic_params["BONDS"].set_index(self.bond_def, inplace=True) self.parameters["BONDS"].set_index(self.bond_def, inplace=True) # Read the average bond distance. with reader(self.filenames["avg_ic"]) as intcor: avg_ic = intcor.read().set_index(self.bond_def)["r_IJ"] # Read the bond fluctuations. with reader(self.filenames["fluct_ic"]) as intcor: fluct_ic = intcor.read().set_index(self.bond_def)["r_IJ"] vib_ic = pd.concat([fluct_ic, avg_ic], axis=1) vib_ic.columns = bond_values # Calculate the r.m.s.d. between fluctuation and distances # compared with the target values. vib_error = self.target["BONDS"] - vib_ic vib_error = vib_error.apply(np.square).mean(axis=0) vib_error = np.sqrt(vib_error) self.error[self.error.columns[-2:]] = vib_error.T.values # Calculate the new force constant. optimized = vib_ic.apply(np.reciprocal).apply(np.square) target = self.target["BONDS"].apply(np.reciprocal).apply(np.square) optimized -= target optimized *= self.BOLTZ * self.KFACTOR vib_ic[bond_values[0]] = (self.parameters["BONDS"][bond_values[0]] - optimized[bond_values[0]]) vib_ic[bond_values[0]] = (vib_ic[bond_values[0]].where( vib_ic[bond_values[0]] >= 0., 0.)) # r.m.s.d. between previous and current force constant diff = self.dynamic_params["BONDS"] - vib_ic diff = diff.apply(np.square).mean(axis=0) diff = np.sqrt(diff) self.error[self.error.columns[1]] = diff.values[0] # Update the parameters and write to file. self.parameters["BONDS"][bond_values[0]] = ( vib_ic[bond_values[0]].copy(deep=True)) self.dynamic_params["BONDS"] = vib_ic.copy(deep=True) self.parameters["BONDS"].reset_index(inplace=True) self.dynamic_params["BONDS"].reset_index(inplace=True) with mda.Writer(self.filenames["fixed_prm"], **self.kwargs) as prm: prm.write(self.parameters) with mda.Writer(self.filenames["dynamic_prm"], **self.kwargs) as prm: prm.write(self.dynamic_params) # Update the error values. with open(self.filenames["error_data"], "ab") as error_file: np.savetxt( error_file, self.error, fmt=native_str("%10d%10.6f%10.6f%10.6f", ), delimiter=native_str(""), ) if (self.error[self.error.columns[1]] < tol).bool(): break logger.info("Fluctuation matching completed in {:.6f}".format( time.time() - st)) self.target["BONDS"].reset_index(inplace=True)
def __init__(self, filename, **kwargs): """Set up parameters to run X3DNA_ on PDB *filename*. :Arguments: *filename* The *filename* is used as input for X3DNA in the "xdna_ensemble" command. It specifies the name of a PDB coordinate file to be used. This must be in Brookhaven protein databank format or something closely approximating this. Both ATOM and HETATM records are read. Note that if water molecules or ions are present in the channel these can be ignored on read by the use of the *ignore_residues* keyword. :Keywords: *executable* Path to the :program:`xdna_ensemble` executable directories (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set and then added to export in bashrc file. See X3DNA documentation for set-up instructions. *x3dna_param* Determines whether base step or base pair parameters will be calculated. If True then stacked base step parameters will be analyzed [Default is True]. If False then stacked base pair parameters will be analyzed. """ # list of temporary files, to be cleaned up on __del__ self.tempfiles = [ "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par", "cf_7methods.par", "col_chains.scr", "col_helices.scr", "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb", "stacking.pdb" ] self.tempdirs = [] self.filename = filename logger.info("Setting up X3DNA analysis for %(filename)r", vars(self)) # guess executables self.exe = {} x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble') self.x3dna_param = kwargs.pop('x3dna_param', True) self.exe['xdna_ensemble'] = which(x3dna_exe_name) if self.exe['xdna_ensemble'] is None: errmsg = "X3DNA binary {x3dna_exe_name!r} not found.".format( **vars()) logger.fatal(errmsg) logger.fatal( "%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.", vars()) raise OSError(errno.ENOENT, errmsg) x3dnapath = os.path.dirname(self.exe['xdna_ensemble']) self.logfile = kwargs.pop("logfile", "bp_step.par") if self.x3dna_param is False: self.template = textwrap.dedent( """x3dna_ensemble analyze -b 355d.bps --one %(filename)r """) else: self.template = textwrap.dedent( """find_pair -s %(filename)r stdout |analyze stdin """) # sanity checks for program, path in self.exe.items(): if path is None or which(path) is None: logger.error( "Executable %(program)r not found, should have been %(path)r.", vars()) # results self.profiles = {}