Exemple #1
0
def ff99(molecule, work_dir=None, property_map={}):
    """Parameterise using the ff99 force field.

       Parameters
       ----------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule to parameterise.

       work_dir : str
           The working directory for the process.

       property_map : dict
           A dictionary that maps system "properties" to their user defined
           values. This allows the user to refer to properties with their
           own naming scheme, e.g. { "charge" : "my-charge" }

       Returns
       -------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The parameterised molecule.
    """

    if _amber_home is None and (_gmx_exe is None or _gromacs_path is None):
        raise _MissingSoftwareError(
            "'BioSimSpace.Parameters.ff99' is not supported. "
            "Please install AMBER (http://ambermd.org) or "
            "GROMACS (http://www.gromacs.org).")

    # Validate arguments.

    if type(molecule) is not _Molecule:
        raise TypeError(
            "'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(property_map) is not dict:
        raise TypeError("'property_map' must be of type 'dict'")

    # Create a default protocol.
    protocol = _Protocol.FF99(property_map=property_map)

    # Run the parameterisation protocol in the background and return
    # a handle to the thread.
    return _Process(molecule, protocol, work_dir=work_dir, auto_start=True)
    def __init__(self, protocol=None, work_dir=None, engine=None):
        """Constructor.

           Parameters
           ----------

           protocol : :class:`Protocol.FreeEnergy <BioSimSpace.Protocol.FreeEnergy>`
               The simulation protocol.

           work_dir : str
               The working directory for the simulation.

           engine: str
               The molecular dynamics engine used to run the simulation. Available
               options are "GROMACS", or "SOMD". If this argument is omitted then
               BioSimSpace will choose an appropriate engine for you.
        """

        # Don't allow user to create an instance of this base class.
        if type(self) is FreeEnergy:
            raise Exception("<FreeEnergy> must be subclassed.")

        # Flag that this is a dual leg simulation (default).
        self._is_dual = True

        # Validate the input.

        if protocol is not None:
            if type(protocol) is not _Protocol.FreeEnergy:
                raise TypeError(
                    "'protocol' must be of type 'BioSimSpace.Protocol.FreeEnergy'"
                )
            else:
                self._protocol = protocol
        else:
            # Use a default protocol.
            self._protocol = _FreeEnergy()

        # Create a temporary working directory and store the directory name.
        if work_dir is None:
            self._tmp_dir = _tempfile.TemporaryDirectory()
            self._work_dir = self._tmp_dir.name

        # User specified working directory.
        else:
            self._work_dir = work_dir

            # Create the directory if it doesn't already exist.
            if not _os.path.isdir(work_dir):
                _os.makedirs(work_dir, exist_ok=True)

        # Validate the user specified molecular dynamics engine.
        if engine is not None:
            if type(engine) is not str:
                raise Types("'engine' must be of type 'str'.")

            # Strip whitespace from engine and convert to upper case.
            engine = engine.replace(" ", "").upper()

            # Check that the engine is supported.
            if engine not in self._engines:
                raise ValueError("Unsupported molecular dynamics engine '%s'. "
                                 "Supported engines are: %r." %
                                 ", ".join(self._engines))

            # Make sure GROMACS is installed if GROMACS engine is selected.
            if engine == "GROMACS" and _gmx_exe is None:
                raise _MissingSoftwareError(
                    "Cannot use GROMACS engine as GROMACS is not installed!")
        else:
            # Use SOMD as a default.
            engine = "SOMD"

        # Set the engine.
        self._engine = engine
class FreeEnergy():
    """Base class for configuring and running free energy simulations."""

    # Check that the analyse_freenrg script exists.
    if _sys.platform != "win32":
        _analyse_freenrg = _os.path.join(_getBinDir(), "analyse_freenrg")
    else:
        _analyse_freenrg = _os.path.join(_os.path.normpath(_getShareDir()),
                                         "scripts", "analyse_freenrg.py")
    if not _os.path.isfile(_analyse_freenrg):
        raise _MissingSoftwareError(
            "Cannot find free energy analysis script in expected location: '%s'"
            % _analyse_freenrg)
    if _sys.platform == "win32":
        _analyse_freenrg = "%s %s" % (_os.path.join(
            _os.path.normpath(_getBinDir()),
            "sire_python.exe"), _analyse_freenrg)

    # Create a list of supported molecular dynamics engines.
    _engines = ["GROMACS", "SOMD"]

    def __init__(self, protocol=None, work_dir=None, engine=None):
        """Constructor.

           Parameters
           ----------

           protocol : :class:`Protocol.FreeEnergy <BioSimSpace.Protocol.FreeEnergy>`
               The simulation protocol.

           work_dir : str
               The working directory for the simulation.

           engine: str
               The molecular dynamics engine used to run the simulation. Available
               options are "GROMACS", or "SOMD". If this argument is omitted then
               BioSimSpace will choose an appropriate engine for you.
        """

        # Don't allow user to create an instance of this base class.
        if type(self) is FreeEnergy:
            raise Exception("<FreeEnergy> must be subclassed.")

        # Flag that this is a dual leg simulation (default).
        self._is_dual = True

        # Validate the input.

        if protocol is not None:
            if type(protocol) is not _Protocol.FreeEnergy:
                raise TypeError(
                    "'protocol' must be of type 'BioSimSpace.Protocol.FreeEnergy'"
                )
            else:
                self._protocol = protocol
        else:
            # Use a default protocol.
            self._protocol = _FreeEnergy()

        # Create a temporary working directory and store the directory name.
        if work_dir is None:
            self._tmp_dir = _tempfile.TemporaryDirectory()
            self._work_dir = self._tmp_dir.name

        # User specified working directory.
        else:
            self._work_dir = work_dir

            # Create the directory if it doesn't already exist.
            if not _os.path.isdir(work_dir):
                _os.makedirs(work_dir, exist_ok=True)

        # Validate the user specified molecular dynamics engine.
        if engine is not None:
            if type(engine) is not str:
                raise Types("'engine' must be of type 'str'.")

            # Strip whitespace from engine and convert to upper case.
            engine = engine.replace(" ", "").upper()

            # Check that the engine is supported.
            if engine not in self._engines:
                raise ValueError("Unsupported molecular dynamics engine '%s'. "
                                 "Supported engines are: %r." %
                                 ", ".join(self._engines))

            # Make sure GROMACS is installed if GROMACS engine is selected.
            if engine == "GROMACS" and _gmx_exe is None:
                raise _MissingSoftwareError(
                    "Cannot use GROMACS engine as GROMACS is not installed!")
        else:
            # Use SOMD as a default.
            engine = "SOMD"

        # Set the engine.
        self._engine = engine

    def run(self):
        """Run the simulation."""
        self._runner.startAll()

    def _analyse_gromacs(self):
        """Analyse the GROMACS free energy data.

           Returns
           -------

           pmf0 : [(float, :class:`Energy <BioSimSpace.Types.Energy>`, :class:`Energy <BioSimSpace.Types.Energy>`)]
               The potential of mean force (PMF) for the first leg of the
               simulation. The data is a list of tuples, where each tuple
               contains the lambda value, the PMF, and the standard error.

           pmf1 : [(float, :class:`Energy <BioSimSpace.Types.Energy>`, :class:`Energy <BioSimSpace.Types.Energy>`)]
               The potential of mean force (PMF) for the second leg of the
               simulation. The data is a list of tuples, where each tuple
               contains the lambda value, the PMF, and the standard error.

           free_energy : (:class:`Energy <BioSimSpace.Types.Energy>`, :class:`Energy <BioSimSpace.Types.Energy>`)
               The free energy difference and its associated error.
        """

        # Create the commands for the two legs.
        command0 = "%s bar -f %s/lambda_*/*.xvg -o %s/bar_leg0.xvg" % (
            _gmx_exe, self._dir0, self._work_dir)
        command1 = "%s bar -f %s/lambda_*/*.xvg -o %s/bar_leg1.xvg" % (
            _gmx_exe, self._dir1, self._work_dir)

        # Run the first command.
        proc = _subprocess.run(command0,
                               shell=True,
                               stdout=_subprocess.PIPE,
                               stderr=_subprocess.PIPE)
        if proc.returncode != 0:
            return None

        # Run the second command.
        if self._is_dual:
            proc = _subprocess.run(command1,
                                   shell=True,
                                   stdout=_subprocess.PIPE,
                                   stderr=_subprocess.PIPE)
            if proc.returncode != 0:
                return None

        # Initialise lists to hold the data from each leg.
        leg0 = []
        leg1 = []

        # Extract the data from the output files.

        # First leg.
        with open("%s/bar_leg0.xvg" % self._work_dir) as file:

            # Read all of the lines into a list.
            lines = []
            for line in file:
                # Ignore comments and xmgrace directives.
                if line[0] != "#" and line[0] != "@":
                    lines.append(line.rstrip())

            # Store the initial free energy reading.
            leg0.append((0.0, 0.0 * _Units.Energy.kcal_per_mol,
                         0.0 * _Units.Energy.kcal_per_mol))

            # Zero the accumulated error.
            total_error = 0

            # Zero the accumulated free energy difference.
            total_freenrg = 0

            # Process the BAR data.
            for x, line in enumerate(lines):
                # Extract the data from the line.
                data = line.split()

                # Update the total free energy difference.
                total_freenrg += float(data[1])

                # Extract the error.
                error = float(data[2])

                # Update the accumulated error.
                total_error = _math.sqrt(total_error * total_error +
                                         error * error)

                # Append the data.
                leg0.append(((x + 1) / (len(lines)),
                             (total_freenrg * _Units.Energy.kt).kcal_per_mol(),
                             (total_error * _Units.Energy.kt).kcal_per_mol()))

        # Second leg.
        if self._is_dual:
            with open("%s/bar_leg1.xvg" % self._work_dir) as file:

                # Read all of the lines into a list.
                lines = []
                for line in file:
                    # Ignore comments and xmgrace directives.
                    if line[0] != "#" and line[0] != "@":
                        lines.append(line.rstrip())

                # Store the initial free energy reading.
                leg1.append((0.0, 0.0 * _Units.Energy.kcal_per_mol,
                             0.0 * _Units.Energy.kcal_per_mol))

                # Zero the accumulated error.
                total_error = 0

                # Zero the accumulated free energy difference.
                total_freenrg = 0

                # Process the BAR data.
                for x, line in enumerate(lines):
                    # Extract the data from the line.
                    data = line.split()

                    # Update the total free energy difference.
                    total_freenrg += float(data[1])

                    # Extract the error.
                    error = float(data[2])

                    # Update the accumulated error.
                    total_error = _math.sqrt(total_error * total_error +
                                             error * error)

                    # Append the data.
                    leg1.append(
                        ((x + 1) / (len(lines)),
                         (total_freenrg * _Units.Energy.kt).kcal_per_mol(),
                         (total_error * _Units.Energy.kt).kcal_per_mol()))

        # Work out the difference in free energy.
        if self._is_dual:
            free_energy = (leg0[-1][1] - leg0[0][1]) - (leg1[-1][1] -
                                                        leg1[0][1])
        else:
            free_energy = leg0[-1][1] - leg0[0][1]

        # Propagate the errors. (These add in quadrature.)

        # First leg.
        error0 = _math.sqrt((leg0[-1][2].magnitude() *
                             leg0[-1][2].magnitude()) +
                            (leg0[0][2].magnitude() * leg0[0][2].magnitude()))

        # Second leg.
        if self._is_dual:
            error1 = _math.sqrt(
                (leg1[-1][2].magnitude() * leg1[-1][2].magnitude()) +
                (leg1[0][2].magnitude() * leg1[0][2].magnitude()))
        else:
            error1 = 0

        # Free energy difference.
        error = _math.sqrt((error0 * error0) +
                           (error1 * error1)) * _Units.Energy.kcal_per_mol

        # Bundle the free energy and its associated error.
        free_energy = (free_energy, error)

        return (leg0, leg1, free_energy)

    def _analyse_somd(self):
        """Analyse the SOMD free energy data.

           Returns
           -------

           pmf0 : [(float, :class:`Energy <BioSimSpace.Types.Energy>`, :class:`Energy <BioSimSpace.Types.Energy>`)]
               The potential of mean force (PMF) for the first leg of the
               simulation. The data is a list of tuples, where each tuple
               contains the lambda value, the PMF, and the standard error.

           pmf1 : [(float, :class:`Energy <BioSimSpace.Types.Energy>`, :class:`Energy <BioSimSpace.Types.Energy>`)]
               The potential of mean force (PMF) for the second leg of the
               simulation. The data is a list of tuples, where each tuple
               contains the lambda value, the PMF, and the standard error.

           free_energy : (:class:`Energy <BioSimSpace.Types.Energy>`, :class:`Energy <BioSimSpace.Types.Energy>`)
               The free energy difference and its associated error.
        """

        # Create the commands for the two legs.
        command0 = "%s mbar -i %s/lambda_*/simfile.dat -o %s/mbar_leg0.txt" % (
            self._analyse_freenrg, self._dir0, self._work_dir)
        command1 = "%s mbar -i %s/lambda_*/simfile.dat -o %s/mbar_leg1.txt" % (
            self._analyse_freenrg, self._dir1, self._work_dir)

        # Run the first command.
        proc = _subprocess.run(command0,
                               shell=True,
                               stdout=_subprocess.PIPE,
                               stderr=_subprocess.PIPE)
        if proc.returncode != 0:
            return None

        # Run the second command.
        if self._is_dual:
            proc = _subprocess.run(command1,
                                   shell=True,
                                   stdout=_subprocess.PIPE,
                                   stderr=_subprocess.PIPE)
            if proc.returncode != 0:
                return None

        # Initialise lists to hold the data from each leg.
        leg0 = []
        leg1 = []

        # Extract the data from the output files.

        # First leg.
        with open("%s/mbar_leg0.txt" % self._work_dir) as file:

            # Read all of the lines into a list.
            lines = []
            for line in file:
                lines.append(line.rstrip())

            # Find the MBAR data.
            for x, line in enumerate(lines):
                if "PMF from MBAR" in line:
                    # Increment the line index.
                    x += 1

                    # Loop until we hit the next comment.
                    while lines[x][0] != "#":
                        # Split the line.
                        data = lines[x].split()

                        # Append the data.
                        leg0.append(
                            (float(data[0]),
                             float(data[1]) * _Units.Energy.kcal_per_mol,
                             float(data[2]) * _Units.Energy.kcal_per_mol))

                        # Increment the line index.
                        x += 1

                    break

        # Second leg.
        if self._is_dual:
            with open("%s/mbar_leg1.txt" % self._work_dir) as file:

                # Read all of the lines into a list.
                lines = []
                for line in file:
                    lines.append(line.rstrip())

                # Find the MBAR data.
                for x, line in enumerate(lines):
                    if "PMF from MBAR" in line:
                        # Increment the line index.
                        x += 1

                        # Loop until we hit the next comment.
                        while lines[x][0] != "#":
                            # Split the line.
                            data = lines[x].split()

                            # Append the data.
                            leg1.append(
                                (float(data[0]),
                                 float(data[1]) * _Units.Energy.kcal_per_mol,
                                 float(data[2]) * _Units.Energy.kcal_per_mol))

                            # Increment the line index.
                            x += 1

                        break

        # Work out the difference in free energy.
        if self._is_dual:
            free_energy = (leg0[-1][1] - leg0[0][1]) - (leg1[-1][1] -
                                                        leg1[0][1])
        else:
            free_energy = leg0[-1][1] - leg0[0][1]

        # Propagate the errors. (These add in quadrature.)

        # First leg.
        error0 = _math.sqrt((leg0[-1][2].magnitude() *
                             leg0[-1][2].magnitude()) +
                            (leg0[0][2].magnitude() * leg0[0][2].magnitude()))

        # Second leg.
        if self._is_dual:
            error1 = _math.sqrt(
                (leg1[-1][2].magnitude() * leg1[-1][2].magnitude()) +
                (leg1[0][2].magnitude() * leg1[0][2].magnitude()))
        else:
            error1 = 0

        # Free energy difference.
        error = _math.sqrt((error0 * error0) +
                           (error1 * error1)) * _Units.Energy.kcal_per_mol

        # Bundle the free energy and its associated error.
        free_energy = (free_energy, error)

        return (leg0, leg1, free_energy)

    def _initialise_runner(self, system0, system1):
        """Internal helper function to initialise the process runner.

           Parameters
           ----------

           system0 : :class:`System <BioSimSpace._SireWrappers.System>`
               The system for the first free energy leg.

           system1 : :class:`System <BioSimSpace._SireWrappers.System>`
               The system for the second free energy leg.
        """

        if type(system0) is not _System:
            raise TypeError(
                "'system0' must be of type 'BioSimSpace._SireWrappers.System'")

        if type(system1) is not _System:
            raise TypeError(
                "'system1' must be of type 'BioSimSpace._SireWrappers.System'")

        # Initialise lists to store the processes for each leg.
        leg0 = []
        leg1 = []

        # Get the simulation type.
        sim_type = self.__class__.__name__

        # Store the working directories for the legs.

        if sim_type == "Solvation":
            self._dir0 = "%s/free" % self._work_dir
            if self._is_dual:
                self._dir1 = "%s/vacuum" % self._work_dir
        elif sim_type == "Binding":
            self._dir0 = "%s/bound" % self._work_dir
            if self._is_dual:
                self._dir1 = "%s/free" % self._work_dir
        else:
            raise TypeError("Unsupported FreeEnergy simulation: '%s'" %
                            sim_type)

        # Convert to an appropriate AMBER topology. (Required by SOMD.)
        if self._engine == "SOMD":
            # Try to get the water model used to solvate the system.
            try:
                water_model = system0._sire_object.property(
                    "water_model").toString()
                waters0 = _SireIO.setAmberWater(
                    system0._sire_object.search("water"), water_model)
                if self._is_dual:
                    waters1 = _SireIO.setAmberWater(
                        system1._sire_object.search("water"), water_model)

            # If the system wasn't solvated by BioSimSpace, e.g. read from file, then try
            # to guess the water model from the topology.
            except:
                num_point = system0.getWaterMolecules()[0].nAtoms()

                if num_point == 3:
                    # TODO: Assume TIP3P. Not sure how to detect SPC/E.
                    waters0 = _SireIO.setAmberWater(
                        system0._sire_object.search("water"), "TIP3P")
                    if self._is_dual:
                        waters1 = _SireIO.setAmberWater(
                            system1._sire_object.search("water"), "TIP3P")
                    water_model = "tip3p"
                elif num_point == 4:
                    waters0 = _SireIO.setAmberWater(
                        system0._sire_object.search("water"), "TIP4P")
                    if self._is_dual:
                        waters1 = _SireIO.setAmberWater(
                            system1._sire_object.search("water"), "TIP4P")
                    water_model = "tip4p"
                elif num_point == 5:
                    waters0 = _SireIO.setAmberWater(
                        system0._sire_object.search("water"), "TIP5P")
                    if self._is_dual:
                        waters1 = _SireIO.setAmberWater(
                            system1._sire_object.search("water"), "TIP5P")
                    water_model = "tip5p"
                else:
                    raise RuntimeError("Unsupported %d-point water model!" %
                                       num_point)

                # Warn the user that we've guessed the water topology.
                _warnings.warn("Guessed water topology: %r" % water_model)

            # Remove the existing water molecules from the systems.
            system0.removeWaterMolecules()
            if self._is_dual:
                system1.removeWaterMolecules()

            # Convert the waters to BioSimSpace molecule containers.
            waters0 = _Molecules(waters0.toMolecules())
            if self._is_dual:
                waters1 = _Molecules(waters1.toMolecules())

            # Add the updated water topology back into the systems.
            system0.addMolecules(waters0)
            if self._is_dual:
                system1.addMolecules(waters1)

        # Get the lambda values from the protocol.
        lam_vals = self._protocol.getLambdaValues()

        # Loop over all of the lambda values.
        for lam in lam_vals:
            # Update the protocol lambda values.
            self._protocol.setLambdaValues(lam=lam, lam_vals=lam_vals)

            # Create and append the required processes for each leg.
            # Nest the working directories inside self._work_dir.

            # SOMD.
            if self._engine == "SOMD":
                # Check for GPU support.
                if "CUDA_VISIBLE_DEVICES" in _os.environ:
                    platform = "CUDA"
                else:
                    platform = "CPU"

                leg0.append(
                    _Process.Somd(system0,
                                  self._protocol,
                                  platform=platform,
                                  work_dir="%s/lambda_%5.4f" %
                                  (self._dir0, lam)))

                if self._is_dual:
                    leg1.append(
                        _Process.Somd(system1,
                                      self._protocol,
                                      platform=platform,
                                      work_dir="%s/lambda_%5.4f" %
                                      (self._dir1, lam)))

            # GROMACS.
            elif self._engine == "GROMACS":
                leg0.append(
                    _Process.Gromacs(system0,
                                     self._protocol,
                                     work_dir="%s/lambda_%5.4f" %
                                     (self._dir0, lam)))

                if self._is_dual:
                    leg1.append(
                        _Process.Gromacs(system1,
                                         self._protocol,
                                         work_dir="%s/lambda_%5.4f" %
                                         (self._dir1, lam)))

        # Initialise the process runner. All processes have already been nested
        # inside the working directory so no need to re-nest.
        self._runner = _Process.ProcessRunner(leg0 + leg1,
                                              work_dir=self._work_dir,
                                              nest_dirs=False)

    def _update_run_args(self, args):
        """Internal function to update run arguments for all subprocesses.

           Parameters
           ----------

           args : dict, collections.OrderedDict
               A dictionary which contains the new command-line arguments
               for the process executable.
        """

        if type(args) is not dict and type(args) is not _OrderedDict:
            raise TypeError(
                "'args' must be of type 'dict', or 'collections.OrderedDict'")

        for process in self._runner.processes():
            process.setArgs(args)
Exemple #4
0
def matchAtoms(molecule0,
               molecule1,
               scoring_function="rmsd_align",
               matches=1,
               return_scores=False,
               prematch={},
               timeout=5 * _Units.Time.second,
               property_map0={},
               property_map1={}):
    """Find mappings between atom indices in molecule0 to those in molecule1.
       Molecules are aligned using a Maximum Common Substructure (MCS) search.
       When requesting more than one match, the mappings will be sorted using
       a scoring function and returned in order of best to worst score. (Note
       that, depending on the scoring function the "best" score may have the
       lowest value.)

       Parameters
       ----------

       molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule of interest.

       molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The reference molecule.

       scoring_function : str
           The scoring function used to match atoms. Available options are:
             - "rmsd"
                 Calculate the root mean squared distance between the
                 coordinates of atoms in molecule0 to those that they
                 map to in molecule1.
             - "rmsd_align"
                 Align molecule0 to molecule1 based on the mapping before
                 computing the above RMSD score.
             - "rmsd_flex_align"
                 Flexibly align molecule0 to molecule1 based on the mapping
                 before computing the above RMSD score. (Requires the
                 'fkcombu'. package: http://strcomp.protein.osaka-u.ac.jp/kcombu)

       matches : int
           The maximum number of matches to return. (Sorted in order of score).

       return_scores : bool
           Whether to return a list containing the scores for each mapping.

       prematch : dict
           A dictionary of atom mappings that must be included in the match.

       timeout : BioSimSpace.Types.Time
           The timeout for the maximum common substructure search.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       Returns
       -------

       matches : dict, [dict], ([dict], list)
           The best atom mapping, a list containing a user specified number of
           the best mappings ranked by their score, or a tuple containing the
           list of best mappings and a list of the corresponding scores.

       Examples
       --------

       Find the best maximum common substructure mapping between two molecules.

       >>> import BioSimSpace as BSS
       >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1)

       Find the 5 best mappings.

       >>> import BioSimSpace as BSS
       >>> mappings = BSS.Align.matchAtoms(molecule0, molecule1, matches=5)

       Find the 5 best mappings along with their ranking scores.

       >>> import BioSimSpace as BSS
       >>> mappings, scores = BSS.Align.matchAtoms(molecule0, molecule1, matches=5, return_scores=True)

       Find the 5 best mappings along with their ranking scores. Score
       by flexibly aligning molecule0 to molecule1 based on each mapping
       and computing the root mean squared displacement of the matched
       atoms.

       >>> import BioSimSpace as BSS
       >>> mappings, scores = BSS.Align.matchAtoms(molecule0, molecule1, matches=5, return_scores=True, scoring_function="rmsd_flex_align")

       Find the best mapping that contains a prematch (this is a dictionary mapping
       atom indices in molecule0 to those in molecule1).

       >>> import BioSimSpace as BSS
       >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, prematch={0 : 10, 3 : 7})
    """

    # A list of supported scoring functions.
    scoring_functions = ["RMSD", "RMSDALIGN", "RMSDFLEXALIGN"]

    # Validate input.

    if type(molecule0) is not _Molecule:
        raise TypeError(
            "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(molecule1) is not _Molecule:
        raise TypeError(
            "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(scoring_function) is not str:
        raise TypeError("'scoring_function' must be of type 'str'")
    else:
        # Strip underscores and whitespace, then convert to upper case.
        _scoring_function = scoring_function.replace("_", "").upper()
        _scoring_function = _scoring_function.replace(" ", "").upper()
        if not _scoring_function in scoring_functions:
            raise ValueError(
                "Unsupported scoring function '%s'. Options are: %s" %
                (scoring_function, scoring_functions))

    if _scoring_function == "RMSDFLEXALIGN" and _fkcombu_exe is None:
        raise _MissingSoftwareError(
            "'rmsd_flex_align' option requires the 'fkcombu' program: "
            "http://strcomp.protein.osaka-u.ac.jp/kcombu")

    if type(matches) is not int:
        raise TypeError("'matches' must be of type 'int'")
    else:
        if matches < 0:
            raise ValueError("'matches' must be positive!")

    if type(return_scores) is not bool:
        raise TypeError("'return_matches' must be of type 'bool'")

    if type(prematch) is not dict:
        raise TypeError("'prematch' must be of type 'dict'")
    else:
        _validate_mapping(molecule0, molecule1, prematch, "prematch")

    if type(timeout) is not _Units.Time._Time:
        raise TypeError("'timeout' must be of type 'BioSimSpace.Types.Time'")

    if type(property_map0) is not dict:
        raise TypeError("'property_map0' must be of type 'dict'")

    if type(property_map1) is not dict:
        raise TypeError("'property_map1' must be of type 'dict'")

    # Extract the Sire molecule from each BioSimSpace molecule.
    mol0 = molecule0._getSireObject()
    mol1 = molecule1._getSireObject()

    # Convert the timeout to seconds and take the magnitude as an integer.
    timeout = int(timeout.seconds().magnitude())

    # Create a temporary working directory.
    tmp_dir = _tempfile.TemporaryDirectory()
    work_dir = tmp_dir.name

    # Use RDKkit to find the maximum common substructure.

    try:
        # Run inside a temporary directory.
        with _Utils.cd(work_dir):
            # Write both molecules to PDB files.
            _IO.saveMolecules("tmp0",
                              molecule0,
                              "PDB",
                              property_map=property_map0)
            _IO.saveMolecules("tmp1",
                              molecule1,
                              "PDB",
                              property_map=property_map1)

            # Load the molecules with RDKit.
            # Note that the C++ function overloading seems to be broken, so we
            # need to pass all arguments by position, rather than keyword.
            # The arguments are: "filename", "sanitize", "removeHs", "flavor"
            mols = [
                _Chem.MolFromPDBFile("tmp0.pdb", False, False, 0),
                _Chem.MolFromPDBFile("tmp1.pdb", False, False, 0)
            ]

            # Generate the MCS match.
            mcs = _rdFMCS.FindMCS(mols,
                                  atomCompare=_rdFMCS.AtomCompare.CompareAny,
                                  bondCompare=_rdFMCS.BondCompare.CompareAny,
                                  completeRingsOnly=True,
                                  ringMatchesRingOnly=True,
                                  matchChiralTag=False,
                                  matchValences=False,
                                  maximizeBonds=False,
                                  timeout=timeout)

            # Get the common substructure as a SMARTS string.
            mcs_smarts = _Chem.MolFromSmarts(mcs.smartsString)

    except:
        raise RuntimeError("RDKIT MCS mapping failed!")

    # Score the mappings and return them in sorted order (best to worst).
    mappings, scores = _score_rdkit_mappings(mol0, mol1, mols[0], mols[1],
                                             mcs_smarts, prematch,
                                             _scoring_function, property_map0,
                                             property_map1)

    # Sometimes RDKit fails to generate a mapping that includes the prematch.
    # If so, then try generating a mapping using the MCS routine from Sire.
    if len(mappings) == 1 and mappings[0] == prematch:

        # Convert timeout to a Sire Unit.
        timeout = timeout * _SireUnits.second

        # Regular match. Include light atoms, but don't allow matches between heavy
        # and light atoms.
        m0 = mol0.evaluate().findMCSmatches(
            mol1, _SireMol.AtomResultMatcher(_to_sire_mapping(prematch)),
            timeout, True, property_map0, property_map1, 6, False)

        # Include light atoms, and allow matches between heavy and light atoms.
        # This captures mappings such as O --> H in methane to methanol.
        m1 = mol0.evaluate().findMCSmatches(
            mol1, _SireMol.AtomResultMatcher(_to_sire_mapping(prematch)),
            timeout, True, property_map0, property_map1, 0, False)

        # Take the mapping with the larger number of matches.
        if len(m1) > 0:
            if len(m0) > 0:
                if len(m1[0]) > len(m0[0]):
                    mappings = m1
                else:
                    mappings = m0
            else:
                mappings = m1
        else:
            mappings = m0

        # Score the mappings and return them in sorted order (best to worst).
        mappings, scores = _score_sire_mappings(mol0, mol1, mappings, prematch,
                                                _scoring_function,
                                                property_map0, property_map1)

    if matches == 1:
        if return_scores:
            return (mappings[0], scores[0])
        else:
            return mappings[0]
    else:
        # Return a list of matches from best to worst.
        if return_scores:
            return (mappings[0:matches], scores[0:matches])
        # Return a tuple containing the list of matches from best to
        # worst along with the list of scores.
        else:
            return mappings[0:matches]
Exemple #5
0
def flexAlign(molecule0,
              molecule1,
              mapping=None,
              fkcombu_exe=None,
              property_map0={},
              property_map1={}):
    """Flexibly align atoms in molecule0 to those in molecule1 using the
       mapping between matched atom indices.

       Parameters
       ----------

       molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule to align.

       molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The reference molecule.

       mapping : dict
           A dictionary mapping atoms in molecule0 to those in molecule1.

       fkcombu_exe : str
           Path to the fkcombu executable. If None is passed, then BioSimSpace
           will attempt to find fkcombu by searching your PATH.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       Returns
       -------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The aligned molecule.

       Examples
       --------

       Align molecule0 to molecule1 based on a precomputed mapping.

       >>> import BioSimSpace as BSS
       >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping)

       Align molecule0 to molecule1. Since no mapping is passed one will be
       autogenerated using :class:`matchAtoms <BioSimSpace.Align.matchAtoms>`
       with default options.

       >>> import BioSimSpace as BSS
       >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1)
    """

    # Check that we found fkcombu in the PATH.
    if fkcombu_exe is None:
        if _fkcombu_exe is None:
            raise _MissingSoftwareError(
                "'BioSimSpace.Align.flexAlign' requires the 'fkcombu' program: "
                "http://strcomp.protein.osaka-u.ac.jp/kcombu")
        else:
            fkcombu_exe = _fkcombu_exe
    # Check that the user supplied executable exists.
    else:
        if not _os.path.isfile(fkcombu_exe):
            raise IOError("'fkcombu' executable doesn't exist: '%s'" %
                          fkcombu_exe)

    if type(molecule0) is not _Molecule:
        raise TypeError(
            "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(molecule1) is not _Molecule:
        raise TypeError(
            "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(property_map0) is not dict:
        raise TypeError("'property_map0' must be of type 'dict'")

    if type(property_map1) is not dict:
        raise TypeError("'property_map1' must be of type 'dict'")

    # The user has passed an atom mapping.
    if mapping is not None:
        if type(mapping) is not dict:
            raise TypeError("'mapping' must be of type 'dict'.")
        else:
            _validate_mapping(molecule0, molecule1, mapping, "mapping")

    # Get the best match atom mapping.
    else:
        mapping = matchAtoms(molecule0,
                             molecule1,
                             property_map0=property_map0,
                             property_map1=property_map1)

    # Convert the mapping to AtomIdx key:value pairs.
    sire_mapping = _to_sire_mapping(mapping)

    # Create a temporary working directory.
    tmp_dir = _tempfile.TemporaryDirectory()
    work_dir = tmp_dir.name

    # Execute in the working directory.
    with _Utils.cd(work_dir):

        # Write the two molecules to PDB files.
        _IO.saveMolecules("molecule0",
                          molecule0,
                          "PDB",
                          property_map=property_map0)
        _IO.saveMolecules("molecule1",
                          molecule1,
                          "PDB",
                          property_map=property_map1)

        # Write the mapping to text. (Increment indices by one).
        with open("mapping.txt", "w") as file:
            for idx0, idx1 in sire_mapping.items():
                file.write("%d %d\n" % (idx0.value() + 1, idx1.value() + 1))

        # Create the fkcombu command string.
        command = "%s -T molecule0.pdb -R molecule1.pdb -alg F -iam mapping.txt -opdbT aligned.pdb" % fkcombu_exe

        # Run the command as a subprocess.
        proc = _subprocess.run(command,
                               shell=True,
                               stdout=_subprocess.PIPE,
                               stderr=_subprocess.PIPE)

        # Check that the output file exists.
        if not _os.path.isfile("aligned.pdb"):
            raise _AlignmentError(
                "Failed to align molecules based on mapping: %r" %
                mapping) from None

        # Load the aligned molecule.
        aligned = _IO.readMolecules("aligned.pdb")[0]

        # Get the "coordinates" property for molecule0.
        prop = property_map0.get("coordinates", "coordinates")

        # Copy the coordinates back into the original molecule.
        molecule0._sire_object = molecule0._sire_object.edit() \
            .setProperty(prop, aligned._sire_object.property("coordinates")).commit()

    # Return the aligned molecule.
    return _Molecule(molecule0)
Exemple #6
0
def tip5p(molecule=None,
          box=None,
          shell=None,
          ion_conc=0,
          is_neutral=True,
          work_dir=None,
          property_map={}):
    """Add TIP5P solvent.

       Parameters
       ----------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, \
                  :class:`Molecule <BioSimSpace._SireWrappers.Molecules>`, \
                  :class:`System <BioSimSpace._SireWrappers.System>`
           A molecule, or container/system of molecules.

       box : [:class:`Length <BioSimSpace.Types.Length>`]
           A list containing the box size in each dimension.

       shell : :class:`Length` <BioSimSpace.Types.Length>`
           Thickness of the water shell around the solute. Note that the
           base length of the resulting box must be at least twice as large
           as the cutoff used by the chosen molecular dynamics engine. As such,
           the shell option is often unsuitable for small molecules.

       ion_conc : float
           The ion concentration in (mol per litre).

       is_neutral : bool
           Whether to neutralise the system.

       work_dir : str
           The working directory for the process.

       property_map : dict
           A dictionary that maps system "properties" to their user defined
           values. This allows the user to refer to properties with their
           own naming scheme, e.g. { "charge" : "my-charge" }

       Returns
       -------

       system : :class:`System <BioSimSpace._SireWrappers.System>`
           The solvated molecular system.
    """

    if _gmx_exe is None:
        raise _MissingSoftwareError(
            "'BioSimSpace.Solvent.tip5p' is not supported. "
            "Please install GROMACS (http://www.gromacs.org).")

    # Validate arguments.
    molecule, box, shell, work_dir, property_map = \
        _validate_input(molecule, box, shell, ion_conc, is_neutral, work_dir, property_map)

    # Return the solvated system.
    return _solvate(molecule,
                    box,
                    shell,
                    "tip5p",
                    5,
                    ion_conc,
                    is_neutral,
                    work_dir=work_dir,
                    property_map=property_map)
Exemple #7
0
    def __init__(self,
                 system,
                 protocol,
                 exe=None,
                 name="somd",
                 platform="CPU",
                 work_dir=None,
                 seed=None,
                 property_map={}):
        """Constructor.

           Parameters
           ----------

           system : :class:`System <BioSimSpace._SireWrappers.System>`
               The molecular system.

           protocol : :class:`Protocol <BioSimSpace.Protocol>`
               The protocol for the SOMD process.

           exe : str
               The full path to the SOMD executable.

           name : str
               The name of the process.

           platform : str
               The platform for the simulation: "CPU", "CUDA", or "OPENCL".

           work_dir :
               The working directory for the process.

           seed : int
               A random number seed.

           property_map : dict
               A dictionary that maps system "properties" to their user defined
               values. This allows the user to refer to properties with their
               own naming scheme, e.g. { "charge" : "my-charge" }
        """

        # Call the base class constructor.
        super().__init__(system, protocol, name, work_dir, seed, property_map)

        # Set the package name.
        self._package_name = "SOMD"

        # This process can generate trajectory data.
        self._has_trajectory = True

        if type(platform) is not str:
            raise TypeError("'platform' must be of type 'str'.")
        else:
            # Strip all whitespace and convert to upper case.
            platform = platform.replace(" ", "").upper()

            # Check for platform support.
            if platform not in self._platforms:
                raise ValueError("Supported platforms are: %s" %
                                 self._platforms.keys())
            else:
                self._platform = self._platforms[platform]

        # If the path to the executable wasn't specified, then use the bundled SOMD
        # executable.
        if exe is None:
            # Generate the name of the SOMD exe.
            if _sys.platform != "win32":
                somd_path = _SireBase.getBinDir()
                somd_suffix = ""
            else:
                somd_path = _os.path.join(
                    _os.path.normpath(_SireBase.getShareDir()), "scripts")
                somd_interpreter = _os.path.join(
                    _os.path.normpath(_SireBase.getBinDir()),
                    "sire_python.exe")
                somd_suffix = ".py"
            if type(self._protocol) is _Protocol.FreeEnergy:
                somd_exe = "somd-freenrg"
            else:
                somd_exe = "somd"
            somd_exe = _os.path.join(somd_path, somd_exe) + somd_suffix
            if not _os.path.isfile(somd_exe):
                raise _MissingSoftwareError(
                    "'Cannot find SOMD executable in expected location: '%s'" %
                    somd_exe)
            if _sys.platform != "win32":
                self._exe = somd_exe
            else:
                self._exe = somd_interpreter
                self._script = somd_exe
        else:
            # Make sure executable exists.
            if _os.path.isfile(exe):
                self._exe = exe
            else:
                raise IOError("SOMD executable doesn't exist: '%s'" % exe)

        # The names of the input files.
        self._rst_file = "%s/%s.rst7" % (self._work_dir, name)
        self._top_file = "%s/%s.prm7" % (self._work_dir, name)

        # The name of the trajectory file.
        self._traj_file = "%s/traj000000001.dcd" % self._work_dir

        # The name of the binary restart file.
        self._restart_file = "%s/latest.rst" % self._work_dir

        # Set the path for the SOMD configuration file.
        self._config_file = "%s/%s.cfg" % (self._work_dir, name)

        # Set the path for the perturbation file.
        self._pert_file = "%s/%s.pert" % (self._work_dir, name)

        # Set the path for the gradient file and create the gradient list.
        self._gradient_file = "%s/gradients.dat" % self._work_dir
        self._gradients = []

        # Create the list of input files.
        self._input_files = [self._config_file, self._rst_file, self._top_file]

        # Initalise the number of moves per cycle.
        self._num_moves = 10000

        # Initialise the buffering frequency.
        self._buffer_freq = 0

        # Now set up the working directory for the process.
        self._setup()
Exemple #8
0
def gaff2(molecule, work_dir=None, net_charge=None, property_map={}):
    """Parameterise using the gaff force field.

       Parameters
       ----------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule to parameterise.

       net_charge : int, :class:`Charge <BioSimSpace.Types.Charge>`
           The net charge on the molecule.

       work_dir : str
           The working directory for the process.

       property_map : dict
           A dictionary that maps system "properties" to their user defined
           values. This allows the user to refer to properties with their
           own naming scheme, e.g. { "charge" : "my-charge" }

       Returns
       -------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The parameterised molecule.
    """

    if _amber_home is None:
        raise _MissingSoftwareError(
            "'BioSimSpace.Parameters.gaff2' is not supported. "
            "Please install AMBER (http://ambermd.org).")

    # Validate arguments.

    if type(molecule) is not _Molecule:
        raise TypeError(
            "'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if net_charge is not None:
        # Get the magnitude of the charge.
        if type(net_charge) is _Charge:
            net_charge = net_charge.magnitude()

        if type(net_charge) is float:
            if net_charge % 1 != 0:
                raise ValueError("'net_charge' must be integer valued.")

        # Try to convert to int.
        try:
            net_charge = int(net_charge)
        except:
            raise TypeError(
                "'net_charge' must be of type 'int', or `BioSimSpace.Types.Charge'"
            )

        if net_charge % 1 != 0:
            raise ValueError("'net_charge' must be integer valued.")

    if type(property_map) is not dict:
        raise TypeError("'property_map' must be of type 'dict'")

    # Create a default protocol.
    protocol = _Protocol.GAFF2(net_charge=net_charge,
                               property_map=property_map)

    # Run the parameterisation protocol in the background and return
    # a handle to the thread.
    return _Process(molecule, protocol, work_dir=work_dir, auto_start=True)
Exemple #9
0
    def run(self, molecule, work_dir=None, queue=None):
        """Run the parameterisation protocol.

           Parameters
           ----------

           molecule : BioSimSpace._SireWrappers.Molecule
               The molecule to apply the parameterisation protocol to.

           work_dir : str
               The working directory.

           queue : queue.Queue
               The thread queue is which this method has been run.

           Returns
           -------

           molecule : BioSimSpace._SireWrappers.Molecule
               The parameterised molecule.
        """

        if type(molecule) is not _Molecule:
            raise TypeError(
                "'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'"
            )

        if type(work_dir) is not None and type(work_dir) is not str:
            raise TypeError("'work_dir' must be of type 'str'")

        if type(queue) is not None and type(queue) is not _queue.Queue:
            raise TypeError("'queue' must be of type 'queue.Queue'")

        # Set work_dir to the current directory.
        if work_dir is None:
            work_dir = _os.getcwd()

        # Create the file prefix.
        prefix = work_dir + "/"

        # Create a copy of the molecule.
        new_mol = molecule.copy()

        # Choose the program to run with depending on the force field compatibility.
        # If tLEaP and pdb2gmx are supported, default to tLEaP, then use pdb2gmx if
        # tLEaP fails to produce output.

        # First, try parameterise using tLEaP.
        if self._tleap:
            if _tleap_exe is not None:
                output = self._run_tleap(molecule, work_dir)
            # Otherwise, try using pdb2gmx.
            elif self._pdb2gmx:
                if _gmx_exe is not None:
                    output = self._run_pdb2gmx(molecule, work_dir)
                else:
                    raise _MissingSoftwareError(
                        "Cannot parameterise. Missing AmberTools and GROMACS.")

        # Parameterise using pdb2gmx.
        elif self._pdb2gmx:
            if _gmx_exe is not None:
                output = self._run_pdb2gmx(molecule, work_dir)
            else:
                raise _MissingSoftwareError(
                    "Cannot use pdb2gmx since GROMACS is not installed!")

        # Prepend the working directory to the output file names.
        output = [prefix + output[0], prefix + output[1]]

        try:
            # Load the parameterised molecule.
            par_mol = _Molecule(
                _IO.readMolecules(output)._getSireObject()[_SireMol.MolIdx(0)])
        except Exception as e:
            msg = "Failed to read molecule from: '%s', '%s'" % (output[0],
                                                                output[1])
            if _isVerbose():
                raise IOError(msg) from e
            else:
                raise IOError(msg) from None

        # Make the molecule 'mol' compatible with 'par_mol'. This will create
        # a mapping between atom indices in the two molecules and add all of
        # the new properties from 'par_mol' to 'mol'.
        new_mol._makeCompatibleWith(par_mol,
                                    property_map=self._property_map,
                                    overwrite=True,
                                    verbose=False)

        # Record the forcefield used to parameterise the molecule.
        new_mol._forcefield = self._forcefield

        if queue is not None:
            queue.put(new_mol)
        return new_mol
Exemple #10
0
def _find_md_package(system, protocol, gpu_support=False):
    """Find a molecular dynamics package on the system and return
       a handle to it as a MDPackage object.

       Parameters
       ----------

       system : :class:`System <BioSimSpace._SireWrappers.System>`
           The molecular system.

       protocol : :class:`Protocol <BioSimSpace.Protocol>`
           The simulation protocol.

       gpu_support : bool
           Whether to use package must have GPU support.

       Returns
       -------

       (package, exe) : (str, str)
           The name of the MD package and a path to its executable.
    """

    # The input has already been validated in the run method, so no need
    # to re-validate here.

    # Get the file format of the molecular system.
    fileformat = system.fileFormat()

    # Make sure that this format is supported.
    if not fileformat in _file_extensions:
        raise ValueError("Cannot find an MD package that supports format: %s" %
                         fileformat)
    else:
        packages = _file_extensions[fileformat]

    # Is this a free energy protocol.
    if type(protocol) is _Protocol.FreeEnergy:
        is_free_energy = True
    else:
        is_free_energy = False

    # Loop over each package that supports the file format.
    for package in packages:
        # If this is free energy protocol, then check that the package has support.
        if not is_free_energy or _free_energy[package]:
            # Check whether this package exists on the system and has the desired
            # GPU support.
            for exe, gpu in _md_packages[package].items():
                # If the user has requested GPU support make sure the package
                # supports it.
                if not gpu_support or gpu:
                    # AMBER
                    if package == "AMBER":
                        # Search AMBERHOME, if set.
                        if _amber_home is not None:
                            _exe = "%s/bin/%s" % (_amber_home, exe)
                            if _os.path.isfile(_exe):
                                return (package, _exe)
                        # Search system PATH.
                        else:
                            try:
                                exe = _SireBase.findExe(exe).absoluteFilePath()
                                return (package, exe)
                            except:
                                pass
                    # GROMACS
                    elif package == "GROMACS":
                        if _gmx_exe is not None:
                            return (package, _gmx_exe)
                    # SOMD
                    elif package == "SOMD":
                        return (package, _SireBase.getBinDir() + "/somd")
                    # Search system PATH.
                    else:
                        try:
                            exe = _SireBase.findExe(exe).absoluteFilePath()
                            return (package, exe)
                        except:
                            pass

    # If we get this far, then no package was found.
    raise _MissingSoftwareError("Couldn't find package to support format: %s" %
                                fileformat)