Ejemplo n.º 1
0
def split_charmm(info, data_dir=path.join(os.getcwd(), "data"), **kwargs):
    """Create a subtrajectory from a CHARMM trajectory.

    Parameters
    ----------
    info : :class:`collections.namedTuple`
        Contains information about the data subdirectory and start and
        stop frames
    data_dir : str, optional
        Location of the main data directory
    toppar : str, optional
        Directory containing CHARMM topology/parameter files
    trajectory : str, optional
        A CHARMM trajectory file (e.g., dcd)
    outfile : str, optional
        A CHARMM trajectory file (e.g., dcd)
    logfile : str, optional
        Log file for output of command
    charmm_version : int
        Version of CHARMM
    """
    # Trajectory splitting information
    subdir, start, stop = info
    subdir = path.join(data_dir, "{}".format(subdir))
    charmm_exec = mdutil.which("charmm")

    # Attempt to create the necessary subdirectory
    try:
        os.makedirs(subdir)
    except OSError:
        pass

    # Various filenames
    version = kwargs.get("charmm_version", 41)
    toppar = kwargs.get("toppar",
                        "/opt/local/charmm/c{:d}b1/toppar".format(version))
    trajectory = kwargs.get("trajectory", path.join(os.curdir, "md.dcd"))
    outfile = path.join(subdir, kwargs.get("outfile", "aa.dcd"))
    logfile = path.join(subdir, kwargs.get("logfile", "split.log"))
    inpfile = path.join(subdir, "split.inp")

    with mdutil.openany(inpfile, "w") as charmm_input:
        charmm_inp = charmm_split.split_inp.format(
            toppar=toppar,
            trajectory=trajectory,
            outfile=outfile,
            version=version,
            start=start,
            stop=stop,
        )
        charmm_inp = textwrap.dedent(charmm_inp[1:])
        print(charmm_inp, file=charmm_input)
    command = [
        charmm_exec,
        "-i",
        inpfile,
        "-o",
        path.join(subdir, logfile),
    ]
    subprocess.check_call(command)
Ejemplo n.º 2
0
    def run_atomic_fluct(self, charmm_exec=None):

        # Find CHARMM executable
        charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm"))
                       if charmm_exec is None else charmm_exec)
        if charmm_exec is None:
            logger.exception(
                "Please set CHARMMEXEC with the location of your CHARMM "
                "executable file or add the charmm path to your PATH "
                "environment.")
            raise_with_traceback(
                OSError(
                    "Please set CHARMMEXEC with the location of your CHARMM "
                    "executable file or add the charmm path to your PATH "
                    "environment."))

        # Write CHARMM input files and run atomic fluctuations
        if not path.exists(self.filenames["qha_input"]):
            version = self.kwargs.get("charmm_version", 41)
            dimension = ("dimension chsize 1000000" if version >= 36 else "")
            with open(self.filenames["qha_input"], mode="wb") as charmm_file:
                logger.info("Writing CHARMM QHA input file.")
                charmm_qha_inp = charmm_afqha.afqha.format(
                    temperature=self.temperature,
                    flex="flex" if version else "",
                    version=version,
                    dimension=dimension,
                    **self.filenames)
                charmm_qha_inp = textwrap.dedent(charmm_qha_inp[1:])
                charmm_file.write(charmm_qha_inp.encode())
            logger.info("Running QHA Atomic Fluctuations.")
            with open(self.filenames["qha_log"], "w") as log_file:
                subprocess.check_call(
                    [charmm_exec, "-i", self.filenames["qha_input"]],
                    stdout=log_file,
                    stderr=subprocess.STDOUT,
                )

        if not path.exists(self.filenames["nma_input"]):
            version = self.kwargs.get("charmm_version", 41)
            dimension = ("dimension chsize 1000000" if version >= 36 else "")
            with open(self.filenames["nma_input"], mode="wb") as charmm_file:
                logger.info("Writing NMA CHARMM input file.")
                charmm_nma_inp = charmm_afnma.afnma.format(
                    temperature=self.temperature,
                    flex="flex" if version else "",
                    version=version,
                    dimension=dimension,
                    **self.filenames)
                charmm_nma_inp = textwrap.dedent(charmm_nma_inp[1:])
                charmm_file.write(charmm_nma_inp.encode())

            logger.info("Running NMA Atomic Fluctuations.")
            with open(self.filenames["nma_log"], "w") as log_file:
                subprocess.check_call(
                    [charmm_exec, "-i", self.filenames["nma_input"]],
                    stdout=log_file,
                    stderr=subprocess.STDOUT,
                )
Ejemplo n.º 3
0
def split_gmx(info, data_dir=path.join(os.getcwd(), "data"), **kwargs):
    """Create a subtrajectory from a Gromacs trajectory.

    Parameters
    ----------
    info : :class:`collections.namedTuple`
        Contains information about the data subdirectory and start and
        stop frames
    data_dir : str, optional
        Location of the main data directory
    topology : str, optional
        Topology filename (e.g., tpr gro g96 pdb brk ent)
    trajectory : str, optional
        A Gromacs trajectory file (e.g., xtc trr)
    index : str, optional
        A Gromacs index file (e.g., ndx)
    outfile : str, optional
        A Gromacs trajectory file (e.g., xtc trr)
    logfile : str, optional
        Log file for output of command
    system : int
        Atom selection from Gromacs index file (0 = System, 1 = Protein)
    """
    # Trajectory splitting information
    subdir, start, stop = info
    subdir = path.join(data_dir, "{}".format(subdir))
    gromacs_exec = mdutil.which("gmx")

    # Attempt to create the necessary subdirectory
    try:
        os.makedirs(subdir)
    except OSError:
        pass

    # Various filenames
    topology = kwargs.get("topology", "md.tpr")
    trajectory = kwargs.get("trajectory", path.join(os.curdir, "md.xtc"))
    index = kwargs.get("index")
    outfile = path.join(subdir, kwargs.get("outfile", "aa.xtc"))
    logfile = path.join(subdir, kwargs.get("logfile", "split.log"))

    if index is not None:
        command = [
            "gmx",
            "trjconv",
            "-s",
            topology,
            "-f",
            trajectory,
            "-n",
            index,
            "-o",
            outfile,
            "-b",
            "{:d}".format(start),
            "-e",
            "{:d}".format(stop),
        ]
    else:
        command = [
            gromacs_exec,
            "trjconv",
            "-s",
            topology,
            "-f",
            trajectory,
            "-o",
            outfile,
            "-b",
            "{:d}".format(start),
            "-e",
            "{:d}".format(stop),
        ]
    fd, fpath = tempfile.mkstemp(text=True)
    with mdutil.openany(fpath, "w") as temp:
        print(kwargs.get("system", 0), file=temp)
    with mdutil.openany(fpath, "r") as temp, \
        mdutil.openany(logfile, mode="w") as log:
        logger.info("Writing trajectory to {}".format(outfile))
        logger.info("Writing Gromacs output to {}".format(logfile))
        subprocess.check_call(command,
                              stdin=temp,
                              stdout=log,
                              stderr=subprocess.STDOUT)
    os.remove(fpath)
Ejemplo n.º 4
0
@click.option(
    "-o",
    "outdir",
    metavar="DIR",
    default=os.getcwd(),
    show_default=True,
    type=click.Path(exists=False, file_okay=False, resolve_path=True),
    help="Directory",
)
@click.option(
    "-e",
    "--exec",
    "nma_exec",
    metavar="FILE",
    envvar="CHARMMEXEC",
    default=which("charmm"),
    show_default=True,
    type=click.Path(exists=False, file_okay=True, resolve_path=True),
    help="CHARMM executable file",
)
@click.option(
    "-t",
    "--temperature",
    metavar="TEMP",
    type=click.FLOAT,
    default=300.0,
    show_default=True,
    help="Temperature of simulation",
)
@click.option(
    "-n",
Ejemplo n.º 5
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run HOLE_ on PDB *filename*.

        :Arguments:

          *filename*

               The *filename* is used as input for HOLE in the "COORD" card of
               the input file.  It specifies the name of a PDB co-ordinate file
               to be used. This must be in Brookhaven protein databank format
               or something closely approximating this. Both ATOM and HETATM
               records are read. Note that if water molecules or ions are
               present in the channel these can be ignored on read by the use
               of the *ignore_residues* keyword.

               **Wildcard pattern**. A new feature (in release 2.1 of HOLE) was
               the option to include a wild card (``*``) in the filename. e.g.,
               *filename* = `"ab*.pdb"` will apply hole to all files in the
               directory whose name starts with ``ab`` and ends with
               ``.pdb``. This is intended to aid the analysis of multiple
               copies of the same molecule - produced during molecular dynamics
               or other method. The hole procedure will be applied to each file
               in turn with the same setup conditions (initial point, sampling
               distance etc.). Graphics files will contain a combination of the
               individual runs, one after another. Note that the pdb files are
               read independently so that they need not have an identical
               number of atoms or atom order etc. (though they should be
               sufficiently similar for a HOLE run from identical starting
               conditions to be useful).

               .. SeeAlso::

                  An alternative way to load in multiple files is a direct read
                  from a CHARMM binary dynamics DCD coordinate file - using the
                  *dcd* keyword or use :class:`HOLEtraj`.


        :Keywords:

          *dcd*
               DCD trajectory (must be supplied together with a matching
               PDB file *filename*) and then HOLE runs its analysis on each frame.

               It does multiple HOLE runs on positions taken from a CHARMM binary
               dynamics format .DCD trajectory file. The *dcd* file must have
               exactly the same number of atoms in exactly the same order as
               the pdb file specified by *filename*. Note that if this option
               is used the pdb file is used as a template only - the
               coordinates are ignored. Note that structural parameters
               determined for each individual structure are written in a tagged
               format so that it is possible to extract the information from
               the text output file using a :program:`grep` command. The
               reading of the file can be controlled by the *step* keyword
               and/or setting :attr:`HOLE.dcd_iniskip` to the number of frames
               to be skipped initially.

               .. Note::

                  HOLE is very picky and does not read all DCD-like formats. If
                  in doubt, look into the *logfile* for error diagnostics.

                  At the moment, DCDs generated with MDAnalysis are not
                  accepted by HOLE — use :class:`HOLEtraj`, which works with
                  anything that MDAnalysis can read.

          *logfile*

               name of the file collecting HOLE's output (which can be parsed
               using :meth:`HOLE.collect` ["hole.out"]

          *sphpdb*

               name of the HOLE sph file, a PDB-like file containig the coordinates
               of the pore centers ["hole.sph"]

          *step*

               step size for going through the trajectory (skips *step* - 1
               frames) [1]

          *cpoint*

               coordinates of a point inside the pore, e.g. ``[12.3, 0.7,
               18.55]``. If ``None`` then HOLE's own simple search algorithm is
               used.

               This specifies a point which lies within the channel, for simple
               channels such as gramicidin results do not show great
               sensitivity to the exact point taken. An easy way to produce an
               initial point is to use molecular graphics to find two atoms
               which lie either side of the pore and to average their
               co-ordinates. Or if the channel structure contains water
               molecules or counter ions then take the coordinates of one of
               these (and use the *ignore_residues* keyword to ignore them in
               the pore radius calculation).

               If this card is not specified then HOLE now (from version 2.2)
               attempts to make a guess where the channel will be. The
               procedure assumes the channel is reasonably symmetric. The
               initial guess on cpoint will be the centroid of all alpha carbon
               atoms (name 'CA' in pdb file). This is then refined by a crude
               grid search up to 5 Å from the original position. This procedure
               works most of the time but is clearly far from infallible —
               results should be careful checked (with molecular graphics) if
               it is used. [``None``]

          *cvect*

               Search direction, should be parallel to the pore axis,
               e.g. ``[0,0,1]`` for the z-axis.

               If this keyword is ``None`` then HOLE now attempts to make a
               guess where the channel will be. The procedure assumes the
               channel is reasonably symmetric. The guess will be either along
               the X axis (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the
               structure is not aligned on one of these axis the results will
               clearly be approximate. If a guess is used then results should
               be carefully checked. [``None``]

          *sample*
               distance of sample points in Å

               Specifies the distance between the planes used in the HOLE
               procedure. The default value is 0.2 Å, this should be reasonable
               for most purposes. However, if you wish to visualize a very
               tight constriction then specify a smaller value.  [0.2]

          *dotden*
               density of facettes for generating a 3D pore representation

               This number controls the density of dots which will be used by
               the program. A sphere of dots is placed on each centre
               determined in the Monte Carlo procedure. Only dots which do not
               lie within any other sphere are considered. The actual number of
               dots written is therefore controlled by *dotden* and
               *sample*. *dotden* should be set to between 5 (few dots per
               sphere) and 35 (large number of dots per sphere). [15]

          *endrad*
               Radius which is considered to be the end of the pore. This
               keyword can be used to specify the radius above which the
               program regards a result as indicating that the end of the pore
               has been reached. The default value is 22.0 Å. This may need to
               be increased for large channels or reduced for small. [22.0]

          *shorto*

               Determines the output of output in the *logfile*; for automated processing
               this must be < 3.

               - 0: Full text output
               - 1: All text output given except "run in progress" (i.e.,
                 detailed contemporary description of what HOLE is doing).
               - 2: Ditto plus no graph type output - only leaving minimum
                 radius and conductance calculations.
               - 3: All text output other than input card mirroring and error messages
                 turned off.

          *ignore_residues*

               sequence of three-letter residues that are not taken into account during the
               calculation; wildcards are *not* supported
               [ ``["SOL","WAT", "TIP", "HOH", "K  ", "NA ", "CL "]`` ]

          *radius*
               Path to the radii; if set to None then a set of default radii, :data:`SIMPLE2_RAD`,
               is used (an extension of ``simple.rad`` from the HOLE distribution)

               This specifies the name for the file specifying van der Waals
               radii for each atom. A number of files with different values are
               supplied with HOLE.

          *sphpdb*

               This keyword specifies the filename for output of the sphere
               centre information in pdb form. Its typical suffix is
               ".sph". The co-ordinates are set to the sphere centres and the
               occupancies are the sphere radii. All centres are assigned the
               atom name QSS and residue name SPH and the residue number is set
               to the storage number of the centre. The file can be imported
               into molecular graphics programs but are likely to be bonded
               together in a awful manner - as the points are very close to one
               another. In VMD sph objects are best displayed as
               "Points". Displaying .sph objects rather than rendered or dot
               surfaces can be useful to analyze the distance of particular
               atoms from the sphere-centre line.

               Most usefully .sph files can be used to produce molecular
               graphical output from a hole run. This is achieved by using the
               :program:`sph_process` program to read the .sph
               file. ["hole.sph"]

          *executable*

               Path to the :program:`hole` executable
               (e.g. ``/opt/hole/exe/hole``); the other programs
               :program:`sph_process` and :program:`sos_triangle` are assumed
               to live in the same directory as :program:`hole`. If
               :program:`hole` is found on the :envvar:`PATH` then the bare
               executable name is sufficient.  ["hole"]
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = []
        self.tempdirs = []

        self.filename = filename
        self.coordinates = self.check_and_fix_long_filename(self.filename)
        self.dcd = kwargs.pop('dcd', None)
        if self.dcd:
            self.dcd = self.check_and_fix_long_filename(self.dcd)
        self.dcd_step = kwargs.pop(
            "step",
            1) - 1  # HOLE docs description is confusing: step or skip??
        self.dcd_iniskip = 0
        self.cpoint = kwargs.pop("cpoint", None)
        self.cvect = kwargs.pop("cvect", None)
        self.sample = float(kwargs.pop("sample", 0.20))
        self.dotden = int(kwargs.pop("dotden", 15))
        self.endrad = float(kwargs.pop("endrad", 22.))
        self.shorto = int(kwargs.pop(
            "shorto", 0))  # look at using SHORTO 2 for minimum output
        self.ignore_residues = kwargs.pop("ignore_residues",
                                          self.default_ignore_residues)
        self.radius = self.check_and_fix_long_filename(
            realpath(kwargs.pop('radius', None) or write_simplerad2()))

        logger.info("Setting up HOLE analysis for %(filename)r", vars(self))
        logger.info("Using radius file %(radius)r", vars(self))

        # guess executables
        self.exe = {}
        hole_exe_name = kwargs.pop('executable', 'hole')
        self.exe['hole'] = which(hole_exe_name)
        if self.exe['hole'] is None:
            errmsg = "HOLE binary %(hole_exe_name)r not found." % vars()
            logger.fatal(errmsg)
            logger.fatal(
                "%(hole_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                vars())
            raise OSError(errno.ENOENT, errmsg)
        holepath = os.path.dirname(self.exe['hole'])
        self.exe['sos_triangle'] = os.path.join(holepath, "sos_triangle")
        self.exe['sph_process'] = os.path.join(holepath, "sph_process")

        self.sphpdb = kwargs.pop("sphpdb", "hole.sph")
        self.logfile = kwargs.pop("logfile", "hole.out")

        self.template = textwrap.dedent("""
            ! Input file for Oliver Smart's HOLE program
            ! written by MDAnalysis.analysis.hole.HOLE
            ! filename = %(filename)s
            COORD  %(coordinates)s
            RADIUS %(radius)s
            SPHPDB %(sphpdb)s
            SAMPLE %(sample)f
            ENDRAD %(endrad)f
            IGNORE %(ignore)s
            SHORTO %(shorto)d
            """)
        if self.cpoint is not None:
            # note: if it is None then we can't change this with a kw for run() !!
            self.template += "CPOINT %(cpoint_xyz)s\n"
        else:
            logger.info("HOLE will guess CPOINT")
        if self.cvect is not None:
            # note: if it is None then we can't change this with a kw for run() !!
            self.template += "CVECT  %(cvect_xyz)s\n"
        else:
            logger.info("HOLE will guess CVECT")

        if self.dcd:
            # CHARMD -- DCD (matches COORD)
            # CHARMS int int -- ignore_first_N_frames   skip_every_X_frames
            #        http://s3.smartsci.uk/hole2/doc/old/hole_d03.html#CHARMD
            self.template += "\nCHARMD %(dcd)s\nCHARMS %(dcd_iniskip)d %(dcd_step)d\n"

        # sanity checks
        if self.shorto > 2:
            logger.warn(
                "SHORTO (%d) needs to be < 3 in order to extract a HOLE profile!",
                self.shorto)
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error(
                    "Executable %(program)r not found, should have been %(path)r.",
                    vars())
        # results
        self.profiles = {}
Ejemplo n.º 6
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run X3DNA_ on PDB *filename*.

        Parameters
        ----------
        filename : str
               The `filename` is used as input for X3DNA in the
               :program:`xdna_ensemble` command.  It specifies the name of a
               PDB coordinate file to be used. This must be in Brookhaven
               protein databank format or something closely approximating
               this.
        executable : str (optional)
               Path to the :program:`xdna_ensemble` executable directories
               (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set
               and then added to export in bashrc file. See X3DNA
               documentation for set-up instructions.
        x3dna_param : bool (optional)
               Determines whether base step or base pair parameters will be
               calculated. If ``True`` (default) then stacked *base step*
               parameters will be analyzed.  If ``False`` then stacked *base
               pair* parameters will be analyzed.
        logfile : str (optional)
               Write output from X3DNA to `logfile` (default: "bp_step.par")


        See Also
        --------
        :class:`X3DNAtraj`
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = [
            "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par",
            "cf_7methods.par", "col_chains.scr", "col_helices.scr",
            "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb",
            "stacking.pdb"
        ]
        self.tempdirs = []
        self.filename = filename

        logger.info("Setting up X3DNA analysis for %(filename)r", vars(self))

        # guess executables
        self.exe = {}
        x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble')
        self.x3dna_param = kwargs.pop('x3dna_param', True)
        self.exe['xdna_ensemble'] = which(x3dna_exe_name)
        if self.exe['xdna_ensemble'] is None:
            errmsg = "X3DNA binary {x3dna_exe_name!r} not found.".format(
                **vars())
            logger.fatal(errmsg)
            logger.fatal(
                "%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                vars())
            raise OSError(errno.ENOENT, errmsg)
        x3dnapath = os.path.dirname(self.exe['xdna_ensemble'])
        self.logfile = kwargs.pop("logfile", "bp_step.par")

        if self.x3dna_param is False:
            self.template = textwrap.dedent(
                """x3dna_ensemble analyze -b 355d.bps --one %(filename)r """)
        else:
            self.template = textwrap.dedent(
                """find_pair -s %(filename)r stdout |analyze stdin """)

        # sanity checks
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error(
                    "Executable %(program)r not found, should have been %(path)r.",
                    vars())
        # results
        self.profiles = OrderedDict()
Ejemplo n.º 7
0
def cli(program, toppar, topology, trajectory, data, index, outfile, logfile,
        system, start, stop, window_size):
    logging.config.dictConfig({
        "version": 1,
        "disable_existing_loggers": False,  # this fixes the problem
        "formatters": {
            "standard": {
                "class": "logging.Formatter",
                "format": "%(name)-12s %(levelname)-8s %(message)s",
            },
            "detailed": {
                "class": "logging.Formatter",
                "format":
                "%(asctime)s %(name)-15s %(levelname)-8s %(message)s",
                "datefmt": "%m-%d-%y %H:%M",
            },
        },
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
                "level": "INFO",
                "formatter": "standard",
            },
            "file": {
                "class": "logging.FileHandler",
                "filename": path.join(os.getcwd(), logfile),
                "level": "INFO",
                "mode": "w",
                "formatter": "detailed",
            }
        },
        "root": {
            "level": "INFO",
            "handlers": ["console", "file"]
        },
    })
    logger = logging.getLogger(__name__)

    if program == "GMX" and mdutil.which("gmx") is None:
        logger.error("Gromacs 5.0+ is required. "
                     "If installed, please ensure that it is in your path.")
        raise OSError("Gromacs 5.0+ is required. "
                      "If installed, please ensure that it is in your path.")
    if program == "CHARMM" and mdutil.which("charmm") is None:
        logger.error("CHARMM is required. If installed, "
                     "please ensure that it is in your path.")
        raise OSError("CHARMM is required. If installed, "
                      "please ensure that it is in your path.")

    half_size = window_size // 2
    beg = start - half_size if start >= window_size else start
    values = zip(range(beg, stop + 1, half_size),
                 range(beg + window_size - 1, stop + 1, half_size))
    values = [((y // half_size) - 1, x, y) for x, y in values]

    func = functools.partial(
        _CONVERT[program],
        data_dir=data,
        topology=topology,
        toppar=toppar,
        trajectory=trajectory,
        index=index,
        outfile=outfile,
        logfile=logfile,
        system=system,
    )

    # Run multiple instances simultaneously
    pool = mp.Pool()
    pool.map_async(func, values)
    pool.close()
    pool.join()
Ejemplo n.º 8
0
    def calculate_thermo(self, nma_exec=None):
        """Calculate the thermodynamic properties of the trajectory.

        Parameters
        ----------
        nma_exec : str
            executable file for normal mode analysis
        """
        # Find CHARMM executable
        charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm"))
                       if nma_exec is None else nma_exec)
        if charmm_exec is None:
            logger.exception(
                "Please set CHARMMEXEC with the location of your CHARMM "
                "executable file or add the charmm path to your PATH "
                "environment.")
            raise_with_traceback(
                OSError(
                    "Please set CHARMMEXEC with the location of your CHARMM "
                    "executable file or add the charmm path to your PATH "
                    "environment."))

        if not path.exists(self.filenames["thermo_input"]):
            version = self.kwargs.get("charmm_version", 41)
            dimension = ("dimension chsize 500000 maxres 3000000"
                         if version >= 36 else "")
            with open(self.filenames["thermo_input"],
                      mode="wb") as charmm_file:
                logger.info("Writing CHARMM input file.")
                charmm_inp = charmm_thermo.thermodynamics.format(
                    trajectory=path.join(self.outdir, self.args[-1]),
                    temperature=self.temperature,
                    flex="flex" if version else "",
                    version=version,
                    dimension=dimension,
                    **self.filenames)
                charmm_inp = textwrap.dedent(charmm_inp[1:])
                charmm_file.write(charmm_inp.encode())

        # Calculate thermodynamic properties of the trajectory.
        with open(self.filenames["thermo_log"], "w") as log_file:
            logger.info("Running thermodynamic calculation.")
            subprocess.check_call(
                [charmm_exec, "-i", self.filenames["thermo_input"]],
                stdout=log_file,
                stderr=subprocess.STDOUT,
            )
            logger.info("Calculations completed.")

        header = ("SEGI  RESN  RESI     Entropy    Enthalpy     "
                  "Heatcap     Atm/res   Ign.frq")
        columns = np.array(header.split())
        columns[:3] = np.array(["segidI", "RESN", "resI"])
        thermo = []

        # Read log file
        with open(self.filenames["thermo_log"], "rb") as log_file:
            logger.info("Reading CHARMM log file.")
            for line in log_file:
                if line.find(header) < 0:
                    continue
                break
            for line in log_file:
                if len(line.strip().split()) == 0:
                    break
                thermo.append(line.strip().split())

        # Create human-readable table
        thermo = pd.DataFrame(thermo, columns=columns)
        thermo.drop(["RESN", "Atm/res", "Ign.frq"], axis=1, inplace=True)
        thermo.set_index(["segidI", "resI"], inplace=True)
        thermo = thermo.astype(np.float)

        # Write data to file
        with open(self.filenames["thermo_data"], "wb") as data_file:
            logger.info("Writing thermodynamics data file.")
            thermo = thermo.to_csv(index=True,
                                   sep=native_str(" "),
                                   float_format=native_str("%.4f"),
                                   encoding="utf-8")
            data_file.write(thermo.encode())
Ejemplo n.º 9
0
    def run(self, nma_exec=None, tol=1.e-3, n_cycles=300, low_bound=0.):
        """Perform a self-consistent fluctuation matching.

        Parameters
        ----------
        nma_exec : str
            executable file for normal mode analysis
        tol : float, optional
            fluct difference tolerance
        n_cycles : int, optional
            number of fluctuation matching cycles
        low_bound  : float, optional
            lowest Kb values to reduce noise
        """
        # Find CHARMM executable
        charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm"))
                       if nma_exec is None else nma_exec)
        if charmm_exec is None:
            logger.exception(
                "Please set CHARMMEXEC with the location of your CHARMM "
                "executable file or add the charmm path to your PATH "
                "environment.")
            raise_with_traceback(
                OSError(
                    "Please set CHARMMEXEC with the location of your CHARMM "
                    "executable file or add the charmm path to your PATH "
                    "environment."))

        # Read the parameters
        if not self.parameters:
            try:
                self.initialize(nma_exec, restart=True)
            except IOError:
                raise_with_traceback(
                    (IOError("Some files are missing. Unable to restart.")))

        # Write CHARMM input file.
        if not path.exists(self.filenames["charmm_input"]):
            version = self.kwargs.get("charmm_version", 41)
            dimension = ("dimension chsize 1000000" if version >= 36 else "")
            with open(self.filenames["charmm_input"],
                      mode="wb") as charmm_file:
                logger.info("Writing CHARMM input file.")
                charmm_inp = charmm_nma.nma.format(
                    temperature=self.temperature,
                    flex="flex" if version else "",
                    version=version,
                    dimension=dimension,
                    **self.filenames)
                charmm_inp = textwrap.dedent(charmm_inp[1:])
                charmm_file.write(charmm_inp.encode())

        # Set the indices for the parameter tables.
        self.target["BONDS"].set_index(self.bond_def, inplace=True)
        bond_values = self.target["BONDS"].columns

        # Check for restart.
        try:
            if os.stat(self.filenames["error_data"]).st_size > 0:
                with open(self.filenames["error_data"], "rb") as data:
                    error_info = pd.read_csv(data,
                                             header=0,
                                             skipinitialspace=True,
                                             delim_whitespace=True)
                    if not error_info.empty:
                        self.error["step"] = error_info["step"].values[-1]
            else:
                raise FileNotFoundError
        except (FileNotFoundError, OSError):
            with open(self.filenames["error_data"], "wb") as data:
                np.savetxt(
                    data,
                    [
                        self.error_hdr,
                    ],
                    fmt=native_str("%15s"),  # Nix
                    delimiter=native_str(""))
        self.error["step"] += 1

        # Initiate an all true index data, for preserving bond convergence
        if not self.restart:
            temp = ~self.target["BONDS"]["Kb"].isna()
            temp = temp.reset_index()
            self.converge_bnd_list = temp.iloc[:, 2]

        # Start self-consistent iteration for Fluctuation Matching
        # Run simulation
        logger.info(
            f"Starting fluctuation matching--{n_cycles} iterations to run")
        if low_bound != 0.:
            logger.info(
                f"Lower bound after 75% iteration is set to {low_bound}")
        st = time.time()
        fdiff = []
        for i in range(n_cycles):
            ct = time.time()
            self.error["step"] = i + 1
            with open(self.filenames["charmm_log"], "w") as log_file:
                subprocess.check_call(
                    [charmm_exec, "-i", self.filenames["charmm_input"]],
                    stdout=log_file,
                    stderr=subprocess.STDOUT,
                )
            self.dynamic_params["BONDS"].set_index(self.bond_def, inplace=True)
            self.parameters["BONDS"].set_index(self.bond_def, inplace=True)

            # Read the average bond distance.
            with reader(self.filenames["avg_ic"]) as icavg:
                avg_ic = icavg.read().set_index(self.bond_def)["r_IJ"]

            # Read the bond fluctuations.
            with reader(self.filenames["fluct_ic"]) as icfluct:
                fluct_ic = icfluct.read().set_index(self.bond_def)["r_IJ"]

            vib_ic = pd.concat([fluct_ic, avg_ic], axis=1)
            vib_ic.columns = bond_values
            logger.info(f"Checking for bondlist convergence")
            fluct_diff = np.abs(vib_ic[bond_values[0]] -
                                self.target["BONDS"][bond_values[0]])
            fdiff.append(fluct_diff)
            fluct_diff = fluct_diff.reset_index()
            tmp = self.parameters["BONDS"][bond_values[0]].reset_index()

            if not self.restart:
                self.converge_bnd_list &= ((fluct_diff.iloc[:, 2] > tol) &
                                           (tmp.iloc[:, 2] > 0))
            else:
                if i == 0:
                    self.converge_bnd_list = ((fluct_diff.iloc[:, 2] > tol) &
                                              (tmp.iloc[:, 2] > 0))
                else:
                    self.converge_bnd_list &= ((fluct_diff.iloc[:, 2] > tol) &
                                               (tmp.iloc[:, 2] > 0))

            # Calculate the r.m.s.d. between fluctuation and distances
            # compared with the target values.
            vib_error = self.target["BONDS"] - vib_ic
            vib_error = vib_error.apply(np.square).mean(axis=0)
            vib_error = np.sqrt(vib_error)
            self.error[self.error.columns[-2:]] = vib_error.T.values

            # Calculate the new force constant.
            optimized = vib_ic.apply(np.reciprocal).apply(np.square)
            target = self.target["BONDS"].apply(np.reciprocal).apply(np.square)
            optimized -= target
            optimized *= self.BOLTZ * self.KFACTOR

            # update  bond list
            vib_ic[bond_values[0]] = (
                self.parameters["BONDS"][bond_values[0]] -
                optimized[bond_values[0]])
            vib_ic[bond_values[0]] = (vib_ic[bond_values[0]].where(
                vib_ic[bond_values[0]] >= 0., 0.))  # set negative to zero

            if low_bound > 0. and i > int(n_cycles * 0.75):
                logger.info(
                    f"Fluctuation matching cycle {i}: low bound is {low_bound}"
                )
                vib_ic[bond_values[0]] = (vib_ic[bond_values[0]].where(
                    vib_ic[bond_values[0]] >= low_bound, 0.))

            # r.m.s.d. between previous and current force constant
            diff = self.dynamic_params["BONDS"] - vib_ic
            diff = diff.apply(np.square).mean(axis=0)
            diff = np.sqrt(diff)
            self.error[self.error.columns[1]] = diff.values[0]

            # Update the parameters and write to file.
            self.parameters["BONDS"][bond_values[0]] = vib_ic[bond_values[0]]
            self.dynamic_params["BONDS"][bond_values[0]] = vib_ic[
                bond_values[0]]
            self.dynamic_params["BONDS"][bond_values[1]] = vib_ic[
                bond_values[1]]

            self.parameters["BONDS"].reset_index(inplace=True)
            self.dynamic_params["BONDS"].reset_index(inplace=True)
            with mda.Writer(self.filenames["fixed_prm"], **self.kwargs) as prm:
                prm.write(self.parameters)
            with mda.Writer(self.filenames["dynamic_prm"],
                            **self.kwargs) as prm:
                prm.write(self.dynamic_params)

            # Update the error values.
            with open(self.filenames["error_data"], "ab") as error_file:
                np.savetxt(
                    error_file,
                    self.error,
                    fmt=native_str("%15d%15.6f%15.6f%15.6f", ),  # Nix
                    delimiter=native_str(""),
                )
            logger.info(
                "Fluctuation matching cycle {} completed in {:.6f}".format(
                    i,
                    time.time() - ct))
            logger.info(
                f"{self.converge_bnd_list.sum()} not converged out of {len(self.converge_bnd_list)}"
            )

            if self.converge_bnd_list.sum() <= len(
                    self.converge_bnd_list.values.tolist()) * 0.003:
                # if bonds to converge is less than 0.3% of total bonds, use relative difference as criteria
                # as it takes more than 100 iterations for these 0.3%  bonds to converge.
                relative_diff = (fluct_diff.iloc[:, 2] - tol) / tol

                ### To know the late converged bonds uncomment the below 5 lines ###

                # late_converged = pd.DataFrame()
                # indx = self.converge_bnd_list[self.converge_bnd_list].index.values
                # late_converged = pd.concat([fluct_diff.loc[indx], relative_diff.loc[indx]], axis=1)
                # late_converged.columns = ["I", "J", "fluct_diff_Kb", "relative_diff_kb"]
                # print(late_converged)

                self.converge_bnd_list = self.converge_bnd_list & (
                    relative_diff > 5)
                if self.converge_bnd_list.sum() == 0:
                    logger.info(
                        "Checking relative difference: All bonds converged, exiting"
                    )
                    break
        fluct_conv = pd.concat(fdiff, axis=1).round(6)
        fluct_conv.columns = [j for j in range(1, i + 2)]
        fluct_conv.to_csv(self.filenames["bond_convergence"])
        logger.info(
            "Fluctuation matching completed in {:.6f}".format(time.time() -
                                                              st))
        self.target["BONDS"].reset_index(inplace=True)
Ejemplo n.º 10
0
    def initialize(self, nma_exec=None, restart=False):
        """Create an elastic network model from a basic coarse-grain model.

        Parameters
        ----------
        nma_exec : str
            executable file for normal mode analysis
        restart : bool, optional
            Reinitialize the object by reading files instead of doing initial
            calculations.
        """
        self.restart = restart
        if not self.restart:
            # Write CHARMM input file.
            if not path.exists(self.filenames["init_input"]):
                version = self.kwargs.get("charmm_version", 41)
                dimension = ("dimension chsize 1000000"
                             if version >= 36 else "")
                with open(self.filenames["init_input"],
                          mode="wb") as charmm_file:
                    logger.info("Writing CHARMM input file.")
                    charmm_inp = charmm_init.init.format(
                        flex="flex" if version else "",
                        version=version,
                        dimension=dimension,
                        **self.filenames)
                    charmm_inp = textwrap.dedent(charmm_inp[1:])
                    charmm_file.write(charmm_inp.encode())

            charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm"))
                           if nma_exec is None else nma_exec)
            with open(self.filenames["init_log"], "w") as log_file:
                subprocess.check_call(
                    [charmm_exec, "-i", self.filenames["init_input"]],
                    stdout=log_file,
                    stderr=subprocess.STDOUT,
                )

            # Write the parameter files.
            with reader(self.filenames["init_fluct_ic"]) as icfile:
                std_bonds = icfile.read().set_index(self.bond_def)
            with reader(self.filenames["init_avg_ic"]) as icfile:
                avg_bonds = icfile.read().set_index(self.bond_def)
            target = pd.concat([std_bonds["r_IJ"], avg_bonds["r_IJ"]], axis=1)
            target.reset_index(inplace=True)
            logger.info("Calculating the initial CHARMM parameters...")
            universe = mda.Universe(self.filenames["xplor_psf_file"],
                                    self.filenames["crd_file"])
            self.target = prmutils.create_empty_parameters(
                universe, **self.kwargs)
            target.columns = self.target["BONDS"].columns
            self.target["BONDS"] = target.copy(deep=True)
            self.parameters = copy.deepcopy(self.target)
            self.parameters["BONDS"]["Kb"] = (
                self.BOLTZ / self.parameters["BONDS"]["Kb"].apply(np.square))
            self.dynamic_params = copy.deepcopy(self.parameters)
            with mda.Writer(self.filenames["fixed_prm"], **self.kwargs) as prm:
                logger.info("Writing {}...".format(
                    self.filenames["fixed_prm"]))
                prm.write(self.parameters)
            with mda.Writer(self.filenames["dynamic_prm"],
                            **self.kwargs) as prm:
                logger.info("Writing {}...".format(
                    self.filenames["dynamic_prm"]))
                prm.write(self.dynamic_params)
        else:
            print("FM Restarted")
            if not path.exists(self.filenames["fixed_prm"]):
                self.initialize(nma_exec, restart=False)
            try:
                # Read the parameter files.
                logger.info("Loading parameter and internal coordinate files.")
                with reader(self.filenames["fixed_prm"]) as fixed:
                    self.parameters.update(fixed.read())
                with reader(self.filenames["dynamic_prm"]) as dynamic:
                    self.dynamic_params.update(dynamic.read())

                # Read the initial internal coordinate files.
                with reader(self.filenames["init_avg_ic"]) as init_avg:
                    avg_table = init_avg.read().set_index(
                        self.bond_def)["r_IJ"]

                with reader(self.filenames["init_fluct_ic"]) as init_fluct:
                    fluct_table = (init_fluct.read().set_index(
                        self.bond_def)["r_IJ"])
                table = pd.concat([fluct_table, avg_table], axis=1)

                # Set the target fluctuation values.
                logger.info("Files loaded successfully...")
                self.target = copy.deepcopy(self.parameters)
                self.target["BONDS"].set_index(self.bond_def, inplace=True)
                cols = self.target["BONDS"].columns
                table.columns = cols
                self.target["BONDS"] = table.copy(deep=True).reset_index()

            except (FileNotFoundError, IOError):
                raise_with_traceback(
                    (IOError("Some files are missing. Unable to restart.")))
Ejemplo n.º 11
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run HOLE_ on PDB *filename*.

        :Arguments:

          *filename*

               The *filename* is used as input for HOLE in the "COORD" card of
               the input file.  It specifies the name of a PDB co-ordinate file
               to be used. This must be in Brookhaven protein databank format
               or something closely approximating this. Both ATOM and HETATM
               records are read. Note that if water molecules or ions are
               present in the channel these can be ignored on read by the use
               of the *ignore_residues* keyword.

               **Wildcard pattern**. A new feature (in release 2.1 of HOLE) was
               the option to include a wild card (``*``) in the filename. e.g.,
               *filename* = `"ab*.pdb"` will apply hole to all files in the
               directory whose name starts with ``ab`` and ends with
               ``.pdb``. This is intended to aid the analysis of multiple
               copies of the same molecule - produced during molecular dynamics
               or other method. The hole procedure will be applied to each file
               in turn with the same setup conditions (initial point, sampling
               distance etc.). Graphics files will contain a combination of the
               individual runs, one after another. Note that the pdb files are
               read independently so that they need not have an identical
               number of atoms or atom order etc. (though they should be
               sufficiently similar for a HOLE run from identical starting
               conditions to be useful).

               .. SeeAlso::

                  An alternative way to load in multiple files is a direct read
                  from a CHARMM binary dynamics DCD coordinate file - using the
                  *dcd* keyword or use :class:`HOLEtraj`.


        :Keywords:

          *dcd*
               DCD trajectory (must be supplied together with a matching
               PDB file *filename*) and then HOLE runs its analysis on each frame.

               It does multiple HOLE runs on positions taken from a CHARMM binary
               dynamics format .DCD trajectory file. The *dcd* file must have
               exactly the same number of atoms in exactly the same order as
               the pdb file specified by *filename*. Note that if this option
               is used the pdb file is used as a template only - the
               coordinates are ignored. Note that structural parameters
               determined for each individual structure are written in a tagged
               format so that it is possible to extract the information from
               the text output file using a :program:`grep` command. The
               reading of the file can be controlled by the *step* keyword
               and/or setting :attr:`HOLE.dcd_iniskip` to the number of frames
               to be skipped initially.

               .. Note::

                  HOLE is very picky and does not read all DCD-like formats. If
                  in doubt, look into the *logfile* for error diagnostics.

                  At the moment, DCDs generated with MDAnalysis are not
                  accepted by HOLE — use :class:`HOLEtraj`, which works with
                  anything that MDAnalysis can read.

          *logfile*

               name of the file collecting HOLE's output (which can be parsed
               using :meth:`HOLE.collect` ["hole.out"]

          *sphpdb*

               name of the HOLE sph file, a PDB-like file containig the coordinates
               of the pore centers ["hole.sph"]

          *step*

               step size for going through the trajectory (skips *step* - 1
               frames) [1]

          *cpoint*

               coordinates of a point inside the pore, e.g. ``[12.3, 0.7,
               18.55]``. If ``None`` then HOLE's own simple search algorithm is
               used.

               This specifies a point which lies within the channel, for simple
               channels such as gramicidin results do not show great
               sensitivity to the exact point taken. An easy way to produce an
               initial point is to use molecular graphics to find two atoms
               which lie either side of the pore and to average their
               co-ordinates. Or if the channel structure contains water
               molecules or counter ions then take the coordinates of one of
               these (and use the *ignore_residues* keyword to ignore them in
               the pore radius calculation).

               If this card is not specified then HOLE now (from version 2.2)
               attempts to make a guess where the channel will be. The
               procedure assumes the channel is reasonably symmetric. The
               initial guess on cpoint will be the centroid of all alpha carbon
               atoms (name 'CA' in pdb file). This is then refined by a crude
               grid search up to 5 Å from the original position. This procedure
               works most of the time but is clearly far from infallible —
               results should be careful checked (with molecular graphics) if
               it is used. [``None``]

          *cvect*

               Search direction, should be parallel to the pore axis,
               e.g. ``[0,0,1]`` for the z-axis.

               If this keyword is ``None`` then HOLE now attempts to make a
               guess where the channel will be. The procedure assumes the
               channel is reasonably symmetric. The guess will be either along
               the X axis (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the
               structure is not aligned on one of these axis the results will
               clearly be approximate. If a guess is used then results should
               be carefully checked. [``None``]

          *sample*
               distance of sample points in Å

               Specifies the distance between the planes used in the HOLE
               procedure. The default value is 0.2 Å, this should be reasonable
               for most purposes. However, if you wish to visualize a very
               tight constriction then specify a smaller value.  [0.2]

          *dotden*
               density of facettes for generating a 3D pore representation

               This number controls the density of dots which will be used by
               the program. A sphere of dots is placed on each centre
               determined in the Monte Carlo procedure. Only dots which do not
               lie within any other sphere are considered. The actual number of
               dots written is therefore controlled by *dotden* and
               *sample*. *dotden* should be set to between 5 (few dots per
               sphere) and 35 (large number of dots per sphere). [15]

          *endrad*
               Radius which is considered to be the end of the pore. This
               keyword can be used to specify the radius above which the
               program regards a result as indicating that the end of the pore
               has been reached. The default value is 22.0 Å. This may need to
               be increased for large channels or reduced for small. [22.0]

          *shorto*

               Determines the output of output in the *logfile*; for automated processing
               this must be < 3.

               - 0: Full text output
               - 1: All text output given except "run in progress" (i.e.,
                 detailed contemporary description of what HOLE is doing).
               - 2: Ditto plus no graph type output - only leaving minimum
                 radius and conductance calculations.
               - 3: All text output other than input card mirroring and error messages
                 turned off.

          *ignore_residues*

               sequence of three-letter residues that are not taken into account during the
               calculation; wildcards are *not* supported
               [ ``["SOL","WAT", "TIP", "HOH", "K  ", "NA ", "CL "]`` ]

          *radius*
               Path to the radii; if set to None then a set of default radii, :data:`SIMPLE2_RAD`,
               is used (an extension of ``simple.rad`` from the HOLE distribution)

               This specifies the name for the file specifying van der Waals
               radii for each atom. A number of files with different values are
               supplied with HOLE.

          *sphpdb*

               This keyword specifies the filename for output of the sphere
               centre information in pdb form. Its typical suffix is
               ".sph". The co-ordinates are set to the sphere centres and the
               occupancies are the sphere radii. All centres are assigned the
               atom name QSS and residue name SPH and the residue number is set
               to the storage number of the centre. The file can be imported
               into molecular graphics programs but are likely to be bonded
               together in a awful manner - as the points are very close to one
               another. In VMD sph objects are best displayed as
               "Points". Displaying .sph objects rather than rendered or dot
               surfaces can be useful to analyze the distance of particular
               atoms from the sphere-centre line.

               Most usefully .sph files can be used to produce molecular
               graphical output from a hole run. This is achieved by using the
               :program:`sph_process` program to read the .sph
               file. ["hole.sph"]

          *executable*

               Path to the :program:`hole` executable
               (e.g. ``/opt/hole/exe/hole``); the other programs
               :program:`sph_process` and :program:`sos_triangle` are assumed
               to live in the same directory as :program:`hole`. If
               :program:`hole` is found on the :envvar:`PATH` then the bare
               executable name is sufficient.  ["hole"]
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = []
        self.tempdirs = []

        self.filename = filename
        self.coordinates = self.check_and_fix_long_filename(self.filename)
        self.dcd = kwargs.pop('dcd', None)
        if self.dcd:
            self.dcd = self.check_and_fix_long_filename(self.dcd)
        self.dcd_step = kwargs.pop("step", 1) - 1  # HOLE docs description is confusing: step or skip??
        self.dcd_iniskip = 0
        self.cpoint = kwargs.pop("cpoint", None)
        self.cvect = kwargs.pop("cvect", None)
        self.sample = float(kwargs.pop("sample", 0.20))
        self.dotden = int(kwargs.pop("dotden", 15))
        self.endrad = float(kwargs.pop("endrad", 22.))
        self.shorto = int(kwargs.pop("shorto", 0))  # look at using SHORTO 2 for minimum output
        self.ignore_residues = kwargs.pop("ignore_residues", self.default_ignore_residues)
        self.radius = self.check_and_fix_long_filename(
            realpath(kwargs.pop('radius', None) or write_simplerad2()))

        logger.info("Setting up HOLE analysis for %(filename)r", vars(self))
        logger.info("Using radius file %(radius)r", vars(self))

        # guess executables
        self.exe = {}
        hole_exe_name = kwargs.pop('executable', 'hole')
        self.exe['hole'] = which(hole_exe_name)
        if self.exe['hole'] is None:
            errmsg = "HOLE binary %(hole_exe_name)r not found." % vars()
            logger.fatal(errmsg)
            logger.fatal("%(hole_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                         vars())
            raise OSError(errno.ENOENT, errmsg)
        holepath = os.path.dirname(self.exe['hole'])
        self.exe['sos_triangle'] = os.path.join(holepath, "sos_triangle")
        self.exe['sph_process'] = os.path.join(holepath, "sph_process")

        self.sphpdb = kwargs.pop("sphpdb", "hole.sph")
        self.logfile = kwargs.pop("logfile", "hole.out")

        self.template = textwrap.dedent("""
            ! Input file for Oliver Smart's HOLE program
            ! written by MDAnalysis.analysis.hole.HOLE
            ! filename = %(filename)s
            COORD  %(coordinates)s
            RADIUS %(radius)s
            SPHPDB %(sphpdb)s
            SAMPLE %(sample)f
            ENDRAD %(endrad)f
            IGNORE %(ignore)s
            SHORTO %(shorto)d
            """)
        if self.cpoint is not None:
            # note: if it is None then we can't change this with a kw for run() !!
            self.template += "CPOINT %(cpoint_xyz)s\n"
        else:
            logger.info("HOLE will guess CPOINT")
        if self.cvect is not None:
            # note: if it is None then we can't change this with a kw for run() !!
            self.template += "CVECT  %(cvect_xyz)s\n"
        else:
            logger.info("HOLE will guess CVECT")

        if self.dcd:
            # CHARMD -- DCD (matches COORD)
            # CHARMS int int -- ignore_first_N_frames   skip_every_X_frames
            #        http://s3.smartsci.uk/hole2/doc/old/hole_d03.html#CHARMD
            self.template += "\nCHARMD %(dcd)s\nCHARMS %(dcd_iniskip)d %(dcd_step)d\n"

        # sanity checks
        if self.shorto > 2:
            logger.warn("SHORTO (%d) needs to be < 3 in order to extract a HOLE profile!",
                        self.shorto)
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error("Executable %(program)r not found, should have been %(path)r.",
                             vars())
        # results
        self.profiles = {}
Ejemplo n.º 12
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run X3DNA_ on PDB *filename*.

        Parameters
        ----------
        filename : str
               The `filename` is used as input for X3DNA in the
               :program:`xdna_ensemble` command.  It specifies the name of a
               PDB coordinate file to be used. This must be in Brookhaven
               protein databank format or something closely approximating
               this.
        executable : str (optional)
               Path to the :program:`xdna_ensemble` executable directories
               (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set
               and then added to export in bashrc file. See X3DNA
               documentation for set-up instructions.
        x3dna_param : bool (optional)
               Determines whether base step or base pair parameters will be
               calculated. If ``True`` (default) then stacked *base step*
               parameters will be analyzed.  If ``False`` then stacked *base
               pair* parameters will be analyzed.
        logfile : str (optional)
               Write output from X3DNA to `logfile` (default: "bp_step.par")


        See Also
        --------
        :class:`X3DNAtraj`
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = [
            "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par", "cf_7methods.par",
            "col_chains.scr", "col_helices.scr", "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb", "stacking.pdb"
        ]
        self.tempdirs = []
        self.filename = filename

        logger.info("Setting up X3DNA analysis for %(filename)r", vars(self))

        # guess executables
        self.exe = {}
        x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble')
        self.x3dna_param = kwargs.pop('x3dna_param', True)
        self.exe['xdna_ensemble'] = which(x3dna_exe_name)
        if self.exe['xdna_ensemble'] is None:
            errmsg = "X3DNA binary {x3dna_exe_name!r} not found.".format(**vars())
            logger.fatal(errmsg)
            logger.fatal("%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                         vars())
            raise OSError(errno.ENOENT, errmsg)
        x3dnapath = os.path.dirname(self.exe['xdna_ensemble'])
        self.logfile = kwargs.pop("logfile", "bp_step.par")

        if self.x3dna_param is False:
            self.template = textwrap.dedent("""x3dna_ensemble analyze -b 355d.bps --one %(filename)r """)
        else:
            self.template = textwrap.dedent("""find_pair -s %(filename)r stdout |analyze stdin """)

        # sanity checks
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error("Executable %(program)r not found, should have been %(path)r.",
                             vars())
        # results
        self.profiles = OrderedDict()
Ejemplo n.º 13
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run X3DNA_ on PDB *filename*.

        :Arguments:

          *filename*

               The *filename* is used as input for X3DNA in the "xdna_ensemble"
               command.  It specifies the name of a PDB coordinate file
               to be used. This must be in Brookhaven protein databank format
               or something closely approximating this. Both ATOM and HETATM
               records are read. Note that if water molecules or ions are
               present in the channel these can be ignored on read by the use
               of the *ignore_residues* keyword.

        :Keywords:

          *executable*

               Path to the :program:`xdna_ensemble` executable directories
               (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set
               and then added to export in bashrc file. See X3DNA
               documentation for set-up instructions.

          *x3dna_param*

               Determines whether base step or base pair parameters will be
               calculated. If True then stacked base step parameters will be
               analyzed [Default is True].  If False then stacked base pair
               parameters will be analyzed.
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = [
            "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par", "cf_7methods.par",
            "col_chains.scr", "col_helices.scr", "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb", "stacking.pdb"
        ]
        self.tempdirs = []
        self.filename = filename

        logger.info("Setting up X3DNA analysis for %(filename)r", vars(self))

        # guess executables
        self.exe = {}
        x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble')
        self.x3dna_param = kwargs.pop('x3dna_param', True)
        self.exe['xdna_ensemble'] = which(x3dna_exe_name)
        if self.exe['xdna_ensemble'] is None:
            errmsg = "X3DNA binary %(x3dna_exe_name)r not found." % vars()
            logger.fatal(errmsg)
            logger.fatal("%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                         vars())
            raise OSError(errno.ENOENT, errmsg)
        x3dnapath = os.path.dirname(self.exe['xdna_ensemble'])
        self.logfile = kwargs.pop("logfile", "bp_step.par")

        if self.x3dna_param is False:
            self.template = textwrap.dedent("""x3dna_ensemble analyze -b 355d.bps --one %(filename)r """)
        else:
            self.template = textwrap.dedent("""find_pair -s %(filename)r stdout |analyze stdin """)

        # sanity checks
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error("Executable %(program)r not found, should have been %(path)r.",
                             vars())
        # results
        self.profiles = {}
Ejemplo n.º 14
0
    def run(self, nma_exec=None, tol=1.e-4, n_cycles=250):
        """Perform a self-consistent fluctuation matching.

        Parameters
        ----------
        nma_exec : str
            executable file for normal mode analysis
        tol : float, optional
            error tolerance
        n_cycles : int, optional
            number of fluctuation matching cycles
        """
        # Find CHARMM executable
        charmm_exec = (os.environ.get("CHARMMEXEC", util.which("charmm"))
                       if nma_exec is None else nma_exec)
        if charmm_exec is None:
            logger.exception(
                "Please set CHARMMEXEC with the location of your CHARMM "
                "executable file or add the charmm path to your PATH "
                "environment.")
            raise_with_traceback(
                OSError(
                    "Please set CHARMMEXEC with the location of your CHARMM "
                    "executable file or add the charmm path to your PATH "
                    "environment."))

        # Read the parameters
        if not self.parameters:
            try:
                self.initialize(nma_exec, restart=True)
            except IOError:
                raise_with_traceback(
                    (IOError("Some files are missing. Unable to restart.")))

        # Write CHARMM input file.
        if not path.exists(self.filenames["charmm_input"]):
            version = self.kwargs.get("charmm_version", 41)
            dimension = ("dimension chsize 1000000" if version >= 36 else "")
            with open(
                    self.filenames["charmm_input"], mode="wb") as charmm_file:
                logger.info("Writing CHARMM input file.")
                charmm_inp = charmm_nma.nma.format(
                    temperature=self.temperature,
                    flex="flex" if version else "",
                    version=version,
                    dimension=dimension,
                    **self.filenames)
                charmm_inp = textwrap.dedent(charmm_inp[1:])
                charmm_file.write(charmm_inp.encode())

        # Set the indices for the parameter tables.
        self.target["BONDS"].set_index(self.bond_def, inplace=True)
        bond_values = self.target["BONDS"].columns

        # Check for restart.
        try:
            if os.stat(self.filenames["error_data"]).st_size > 0:
                with open(self.filenames["error_data"], "rb") as data:
                    error_info = pd.read_csv(
                        data,
                        header=0,
                        skipinitialspace=True,
                        delim_whitespace=True)
                    if not error_info.empty:
                        self.error["step"] = error_info["step"].values[-1]
            else:
                raise FileNotFoundError
        except (FileNotFoundError, OSError):
            with open(self.filenames["error_data"], "wb") as data:
                np.savetxt(
                    data, [
                        self.error_hdr,
                    ],
                    fmt=native_str("%10s"),
                    delimiter=native_str(""))
        self.error["step"] += 1

        # Run simulation
        logger.info("Starting fluctuation matching")
        st = time.time()

        for i in range(n_cycles):
            self.error["step"] = i + 1
            with open(self.filenames["charmm_log"], "w") as log_file:
                subprocess.check_call(
                    [charmm_exec, "-i", self.filenames["charmm_input"]],
                    stdout=log_file,
                    stderr=subprocess.STDOUT,
                )
            self.dynamic_params["BONDS"].set_index(self.bond_def, inplace=True)
            self.parameters["BONDS"].set_index(self.bond_def, inplace=True)

            # Read the average bond distance.
            with reader(self.filenames["avg_ic"]) as intcor:
                avg_ic = intcor.read().set_index(self.bond_def)["r_IJ"]

            # Read the bond fluctuations.
            with reader(self.filenames["fluct_ic"]) as intcor:
                fluct_ic = intcor.read().set_index(self.bond_def)["r_IJ"]

            vib_ic = pd.concat([fluct_ic, avg_ic], axis=1)
            vib_ic.columns = bond_values

            # Calculate the r.m.s.d. between fluctuation and distances
            # compared with the target values.
            vib_error = self.target["BONDS"] - vib_ic
            vib_error = vib_error.apply(np.square).mean(axis=0)
            vib_error = np.sqrt(vib_error)
            self.error[self.error.columns[-2:]] = vib_error.T.values

            # Calculate the new force constant.
            optimized = vib_ic.apply(np.reciprocal).apply(np.square)
            target = self.target["BONDS"].apply(np.reciprocal).apply(np.square)
            optimized -= target
            optimized *= self.BOLTZ * self.KFACTOR
            vib_ic[bond_values[0]] = (self.parameters["BONDS"][bond_values[0]]
                                      - optimized[bond_values[0]])
            vib_ic[bond_values[0]] = (vib_ic[bond_values[0]].where(
                vib_ic[bond_values[0]] >= 0., 0.))

            # r.m.s.d. between previous and current force constant
            diff = self.dynamic_params["BONDS"] - vib_ic
            diff = diff.apply(np.square).mean(axis=0)
            diff = np.sqrt(diff)
            self.error[self.error.columns[1]] = diff.values[0]

            # Update the parameters and write to file.
            self.parameters["BONDS"][bond_values[0]] = (
                vib_ic[bond_values[0]].copy(deep=True))
            self.dynamic_params["BONDS"] = vib_ic.copy(deep=True)
            self.parameters["BONDS"].reset_index(inplace=True)
            self.dynamic_params["BONDS"].reset_index(inplace=True)
            with mda.Writer(self.filenames["fixed_prm"], **self.kwargs) as prm:
                prm.write(self.parameters)
            with mda.Writer(self.filenames["dynamic_prm"],
                            **self.kwargs) as prm:
                prm.write(self.dynamic_params)

            # Update the error values.
            with open(self.filenames["error_data"], "ab") as error_file:
                np.savetxt(
                    error_file,
                    self.error,
                    fmt=native_str("%10d%10.6f%10.6f%10.6f", ),
                    delimiter=native_str(""),
                )

            if (self.error[self.error.columns[1]] < tol).bool():
                break

        logger.info("Fluctuation matching completed in {:.6f}".format(
            time.time() - st))
        self.target["BONDS"].reset_index(inplace=True)
Ejemplo n.º 15
0
    def __init__(self, filename, **kwargs):
        """Set up parameters to run X3DNA_ on PDB *filename*.

        :Arguments:

          *filename*

               The *filename* is used as input for X3DNA in the "xdna_ensemble"
               command.  It specifies the name of a PDB coordinate file
               to be used. This must be in Brookhaven protein databank format
               or something closely approximating this. Both ATOM and HETATM
               records are read. Note that if water molecules or ions are
               present in the channel these can be ignored on read by the use
               of the *ignore_residues* keyword.

        :Keywords:

          *executable*

               Path to the :program:`xdna_ensemble` executable directories
               (e.g. ``/opt/x3dna/2.1 and /opt/x3dna/2.1/bin``) must be set
               and then added to export in bashrc file. See X3DNA
               documentation for set-up instructions.

          *x3dna_param*

               Determines whether base step or base pair parameters will be
               calculated. If True then stacked base step parameters will be
               analyzed [Default is True].  If False then stacked base pair
               parameters will be analyzed.
        """
        # list of temporary files, to be cleaned up on __del__
        self.tempfiles = [
            "auxiliary.par", "bestpairs.pdb", "bp_order.dat", "bp_helical.par",
            "cf_7methods.par", "col_chains.scr", "col_helices.scr",
            "hel_regions.pdb", "ref_frames.dat", "hstacking.pdb",
            "stacking.pdb"
        ]
        self.tempdirs = []
        self.filename = filename

        logger.info("Setting up X3DNA analysis for %(filename)r", vars(self))

        # guess executables
        self.exe = {}
        x3dna_exe_name = kwargs.pop('executable', 'xdna_ensemble')
        self.x3dna_param = kwargs.pop('x3dna_param', True)
        self.exe['xdna_ensemble'] = which(x3dna_exe_name)
        if self.exe['xdna_ensemble'] is None:
            errmsg = "X3DNA binary {x3dna_exe_name!r} not found.".format(
                **vars())
            logger.fatal(errmsg)
            logger.fatal(
                "%(x3dna_exe_name)r must be on the PATH or provided as keyword argument 'executable'.",
                vars())
            raise OSError(errno.ENOENT, errmsg)
        x3dnapath = os.path.dirname(self.exe['xdna_ensemble'])
        self.logfile = kwargs.pop("logfile", "bp_step.par")

        if self.x3dna_param is False:
            self.template = textwrap.dedent(
                """x3dna_ensemble analyze -b 355d.bps --one %(filename)r """)
        else:
            self.template = textwrap.dedent(
                """find_pair -s %(filename)r stdout |analyze stdin """)

        # sanity checks
        for program, path in self.exe.items():
            if path is None or which(path) is None:
                logger.error(
                    "Executable %(program)r not found, should have been %(path)r.",
                    vars())
        # results
        self.profiles = {}