Exemple #1
0
def _score_rdkit_mappings(molecule0, molecule1, rdkit_molecule0,
                          rdkit_molecule1, mcs_smarts, prematch,
                          scoring_function, property_map0, property_map1):
    """Internal function to score atom mappings based on the root mean squared
       displacement (RMSD) between mapped atoms in two molecules. Optionally,
       molecule0 can first be aligned to molecule1 based on the mapping prior
       to computing the RMSD. The function returns the mappings sorted based
       on their score from best to worst, along with a list containing the
       scores for each mapping.

       Parameters
       ----------

       molecule0 : Sire.Molecule.Molecule
           The first molecule (Sire representation).

       molecule0 : Sire.Molecule.Molecule
           The second molecule (Sire representation).

       rdkit_mol0 : RDKit.Chem.Mol
           The first molecule (RDKit representation).

       rdkit_mol1 : RDKit.Chem.Mol
           The second molecule (RDKit representation).

       mcs_smarts : RDKit.Chem.MolFromSmarts
           The smarts string representing the maximum common substructure of
           the two molecules.

       prematch : dict
           A dictionary of atom mappings that must be included in the match.

       scoring_function : str
           The RMSD scoring function.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       Returns
       -------

       mapping, scores : ([dict], list)
           The ranked mappings and corresponding scores.
    """

    # Adapted from FESetup: https://github.com/CCPBioSim/fesetup

    # Make sure to re-map the coordinates property in both molecules, otherwise
    # the move and align functions from Sire will not work.
    prop0 = property_map0.get("coordinates", "coordinates")
    prop1 = property_map1.get("coordinates", "coordinates")

    if prop0 != "coordinates":
        molecule0 = molecule0.edit().setProperty(
            "coordinates", molecule0.property(prop0)).commit()
    if prop1 != "coordinates":
        molecule1 = molecule1.edit().setProperty(
            "coordinates", molecule1.property(prop1)).commit()

    # Get the set of matching substructures in each molecule. For some reason
    # setting uniquify to True removes valid matches, in some cases even the
    # best match! As such, we set uniquify to False and account ignore duplicate
    # mappings in the code below.
    matches0 = rdkit_molecule0.GetSubstructMatches(mcs_smarts,
                                                   uniquify=False,
                                                   maxMatches=1000,
                                                   useChirality=False)
    matches1 = rdkit_molecule1.GetSubstructMatches(mcs_smarts,
                                                   uniquify=False,
                                                   maxMatches=1000,
                                                   useChirality=False)

    # Swap the order of the matches.
    if len(matches0) < len(matches1):
        matches0, matches1 = matches1, matches0
        is_swapped = True
    else:
        is_swapped = False

    # Initialise a list to hold the mappings.
    mappings = []

    # Initialise a list of to hold the score for each mapping.
    scores = []

    # Loop over all matches from mol0.
    for x in range(len(matches0)):
        match0 = matches0[x]

        # Loop over all matches from mol1.
        for y in range(len(matches1)):
            match1 = matches1[y]

            # Initialise the mapping for this match.
            mapping = {}
            sire_mapping = {}

            # Loop over all atoms in the match.
            for i, idx0 in enumerate(match0):
                idx1 = match1[i]

                # Add to the mapping.
                if is_swapped:
                    mapping[idx1] = idx0
                    sire_mapping[_SireMol.AtomIdx(idx1)] = _SireMol.AtomIdx(
                        idx0)
                else:
                    mapping[idx0] = idx1
                    sire_mapping[_SireMol.AtomIdx(idx0)] = _SireMol.AtomIdx(
                        idx1)

            # This is a new mapping:
            if not mapping in mappings:
                # Check that the mapping contains the pre-match.
                is_valid = True
                for idx0, idx1 in prematch.items():
                    # Pre-match isn't found, return to top of loop.
                    if idx0 not in mapping or mapping[idx0] != idx1:
                        is_valid = False
                        break

                if is_valid:
                    # Rigidly align molecule0 to molecule1 based on the mapping.
                    if scoring_function == "RMSDALIGN":
                        try:
                            molecule0 = molecule0.move().align(
                                molecule1,
                                _SireMol.AtomResultMatcher(
                                    sire_mapping)).molecule()
                        except Exception as e:
                            msg = "Failed to align molecules when scoring based on mapping: %r" % mapping
                            if _isVerbose():
                                raise _AlignmentError(msg) from e
                            else:
                                raise _AlignmentError(msg) from None
                    # Flexibly align molecule0 to molecule1 based on the mapping.
                    elif scoring_function == "RMSDFLEXALIGN":
                        molecule0 = flexAlign(
                            _Molecule(molecule0),
                            _Molecule(molecule1),
                            mapping,
                            property_map0=property_map0,
                            property_map1=property_map1)._sire_object

                    # Append the mapping to the list.
                    mappings.append(mapping)

                    # We now compute the RMSD between the coordinates of the matched atoms
                    # in molecule0 and molecule1.

                    # Initialise lists to hold the coordinates.
                    c0 = []
                    c1 = []

                    # Loop over each atom index in the map.
                    for idx0, idx1 in sire_mapping.items():
                        # Append the coordinates of the matched atom in molecule0.
                        c0.append(molecule0.atom(idx0).property("coordinates"))
                        # Append the coordinates of atom in molecule1 to which it maps.
                        c1.append(molecule1.atom(idx1).property("coordinates"))

                    # Compute the RMSD between the two sets of coordinates.
                    scores.append(_SireMaths.getRMSD(c0, c1))

    # No mappings were found.
    if len(mappings) == 0:
        if len(prematch) == 0:
            return ([{}], [])
        else:
            return ([prematch], [])

    # Sort the scores and return the sorted keys. (Smaller RMSD is best)
    keys = sorted(range(len(scores)), key=lambda k: scores[k])

    # Sort the mappings.
    mappings = [mappings[x] for x in keys]

    # Sort the scores and convert to Angstroms.
    scores = [scores[x] * _Units.Length.angstrom for x in keys]

    # Return the sorted mappings and their scores.
    return (mappings, scores)
Exemple #2
0
def _score_sire_mappings(molecule0, molecule1, sire_mappings, prematch,
                         scoring_function, property_map0, property_map1):
    """Internal function to score atom mappings based on the root mean squared
       displacement (RMSD) between mapped atoms in two molecules. Optionally,
       molecule0 can first be aligned to molecule1 based on the mapping prior
       to computing the RMSD. The function returns the mappings sorted based
       on their score from best to worst, along with a list containing the
       scores for each mapping.

       Parameters
       ----------

       molecule0 : Sire.Molecule.Molecule
           The first molecule (Sire representation).

       molecule0 : Sire.Molecule.Molecule
           The second molecule (Sire representation).

       sire_mappings : [{}]
           The list of mappings generated by Sire.

       prematch : dict
           A dictionary of atom mappings that must be included in the match.

       scoring_function : str
           The RMSD scoring function.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       Returns
       -------

       mapping, scores : ([dict], list)
           The ranked mappings and corresponding scores.
    """

    # Make sure to re-map the coordinates property in both molecules, otherwise
    # the move and align functions from Sire will not work.
    prop0 = property_map0.get("coordinates", "coordinates")
    prop1 = property_map1.get("coordinates", "coordinates")

    if prop0 != "coordinates":
        molecule0 = molecule0.edit().setProperty(
            "coordinates", molecule0.property(prop0)).commit()
    if prop1 != "coordinates":
        molecule1 = molecule1.edit().setProperty(
            "coordinates", molecule1.property(prop1)).commit()

    # Initialise a list to hold the mappings.
    mappings = []

    # Initialise a list of to hold the score for each mapping.
    scores = []

    # Loop over all of the mappings.
    for mapping in sire_mappings:

        # Check that the mapping contains the pre-match.
        is_valid = True
        for idx0, idx1 in prematch.items():
            # Pre-match isn't found, return to top of loop.
            if _SireMol.AtomIdx(idx0) not in mapping or mapping[
                    _SireMol.AtomIdx(idx0)] != _SireMol.AtomIdx(idx1):
                is_valid = False
                break

        if is_valid:
            # Rigidly align molecule0 to molecule1 based on the mapping.
            if scoring_function == "RMSDALIGN":
                try:
                    molecule0 = molecule0.move().align(
                        molecule1,
                        _SireMol.AtomResultMatcher(mapping)).molecule()
                except Exception as e:
                    msg = "Failed to align molecules when scoring based on mapping: %r" % mapping
                    if _isVerbose():
                        raise _AlignmentError(msg) from e
                    else:
                        raise _AlignmentError(msg) from None
            # Flexibly align molecule0 to molecule1 based on the mapping.
            elif scoring_function == "RMSDFLEXALIGN":
                molecule0 = flexAlign(_Molecule(molecule0),
                                      _Molecule(molecule1),
                                      _from_sire_mapping(mapping),
                                      property_map0=property_map0,
                                      property_map1=property_map1)._sire_object

            # Append the mapping to the list.
            mappings.append(_from_sire_mapping(mapping))

            # We now compute the RMSD between the coordinates of the matched atoms
            # in molecule0 and molecule1.

            # Initialise lists to hold the coordinates.
            c0 = []
            c1 = []

            # Loop over each atom index in the map.
            for idx0, idx1 in mapping.items():
                # Append the coordinates of the matched atom in molecule0.
                c0.append(molecule0.atom(idx0).property("coordinates"))
                # Append the coordinates of atom in molecule1 to which it maps.
                c1.append(molecule1.atom(idx1).property("coordinates"))

            # Compute the RMSD between the two sets of coordinates.
            scores.append(_SireMaths.getRMSD(c0, c1))

    # No mappings were found.
    if len(mappings) == 0:
        if len(prematch) == 0:
            return ([{}], [])
        else:
            return ([prematch], [])

    # Sort the scores and return the sorted keys. (Smaller RMSD is best)
    keys = sorted(range(len(scores)), key=lambda k: scores[k])

    # Sort the mappings.
    mappings = [mappings[x] for x in keys]

    # Sort the scores and convert to Angstroms.
    scores = [scores[x] * _Units.Length.angstrom for x in keys]

    # Return the sorted mappings and their scores.
    return (mappings, scores)
Exemple #3
0
def matchAtoms(molecule0,
               molecule1,
               scoring_function="rmsd_align",
               matches=1,
               return_scores=False,
               prematch={},
               timeout=5 * _Units.Time.second,
               property_map0={},
               property_map1={}):
    """Find mappings between atom indices in molecule0 to those in molecule1.
       Molecules are aligned using a Maximum Common Substructure (MCS) search.
       When requesting more than one match, the mappings will be sorted using
       a scoring function and returned in order of best to worst score. (Note
       that, depending on the scoring function the "best" score may have the
       lowest value.)

       Parameters
       ----------

       molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule of interest.

       molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The reference molecule.

       scoring_function : str
           The scoring function used to match atoms. Available options are:
             - "rmsd"
                 Calculate the root mean squared distance between the
                 coordinates of atoms in molecule0 to those that they
                 map to in molecule1.
             - "rmsd_align"
                 Align molecule0 to molecule1 based on the mapping before
                 computing the above RMSD score.
             - "rmsd_flex_align"
                 Flexibly align molecule0 to molecule1 based on the mapping
                 before computing the above RMSD score. (Requires the
                 'fkcombu'. package: http://strcomp.protein.osaka-u.ac.jp/kcombu)

       matches : int
           The maximum number of matches to return. (Sorted in order of score).

       return_scores : bool
           Whether to return a list containing the scores for each mapping.

       prematch : dict
           A dictionary of atom mappings that must be included in the match.

       timeout : BioSimSpace.Types.Time
           The timeout for the maximum common substructure search.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       Returns
       -------

       matches : dict, [dict], ([dict], list)
           The best atom mapping, a list containing a user specified number of
           the best mappings ranked by their score, or a tuple containing the
           list of best mappings and a list of the corresponding scores.

       Examples
       --------

       Find the best maximum common substructure mapping between two molecules.

       >>> import BioSimSpace as BSS
       >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1)

       Find the 5 best mappings.

       >>> import BioSimSpace as BSS
       >>> mappings = BSS.Align.matchAtoms(molecule0, molecule1, matches=5)

       Find the 5 best mappings along with their ranking scores.

       >>> import BioSimSpace as BSS
       >>> mappings, scores = BSS.Align.matchAtoms(molecule0, molecule1, matches=5, return_scores=True)

       Find the 5 best mappings along with their ranking scores. Score
       by flexibly aligning molecule0 to molecule1 based on each mapping
       and computing the root mean squared displacement of the matched
       atoms.

       >>> import BioSimSpace as BSS
       >>> mappings, scores = BSS.Align.matchAtoms(molecule0, molecule1, matches=5, return_scores=True, scoring_function="rmsd_flex_align")

       Find the best mapping that contains a prematch (this is a dictionary mapping
       atom indices in molecule0 to those in molecule1).

       >>> import BioSimSpace as BSS
       >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, prematch={0 : 10, 3 : 7})
    """

    # A list of supported scoring functions.
    scoring_functions = ["RMSD", "RMSDALIGN", "RMSDFLEXALIGN"]

    # Validate input.

    if type(molecule0) is not _Molecule:
        raise TypeError(
            "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(molecule1) is not _Molecule:
        raise TypeError(
            "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(scoring_function) is not str:
        raise TypeError("'scoring_function' must be of type 'str'")
    else:
        # Strip underscores and whitespace, then convert to upper case.
        _scoring_function = scoring_function.replace("_", "").upper()
        _scoring_function = _scoring_function.replace(" ", "").upper()
        if not _scoring_function in scoring_functions:
            raise ValueError(
                "Unsupported scoring function '%s'. Options are: %s" %
                (scoring_function, scoring_functions))

    if _scoring_function == "RMSDFLEXALIGN" and _fkcombu_exe is None:
        raise _MissingSoftwareError(
            "'rmsd_flex_align' option requires the 'fkcombu' program: "
            "http://strcomp.protein.osaka-u.ac.jp/kcombu")

    if type(matches) is not int:
        raise TypeError("'matches' must be of type 'int'")
    else:
        if matches < 0:
            raise ValueError("'matches' must be positive!")

    if type(return_scores) is not bool:
        raise TypeError("'return_matches' must be of type 'bool'")

    if type(prematch) is not dict:
        raise TypeError("'prematch' must be of type 'dict'")
    else:
        _validate_mapping(molecule0, molecule1, prematch, "prematch")

    if type(timeout) is not _Units.Time._Time:
        raise TypeError("'timeout' must be of type 'BioSimSpace.Types.Time'")

    if type(property_map0) is not dict:
        raise TypeError("'property_map0' must be of type 'dict'")

    if type(property_map1) is not dict:
        raise TypeError("'property_map1' must be of type 'dict'")

    # Extract the Sire molecule from each BioSimSpace molecule.
    mol0 = molecule0._getSireObject()
    mol1 = molecule1._getSireObject()

    # Convert the timeout to seconds and take the magnitude as an integer.
    timeout = int(timeout.seconds().magnitude())

    # Create a temporary working directory.
    tmp_dir = _tempfile.TemporaryDirectory()
    work_dir = tmp_dir.name

    # Use RDKkit to find the maximum common substructure.

    try:
        # Run inside a temporary directory.
        with _Utils.cd(work_dir):
            # Write both molecules to PDB files.
            _IO.saveMolecules("tmp0",
                              molecule0,
                              "PDB",
                              property_map=property_map0)
            _IO.saveMolecules("tmp1",
                              molecule1,
                              "PDB",
                              property_map=property_map1)

            # Load the molecules with RDKit.
            # Note that the C++ function overloading seems to be broken, so we
            # need to pass all arguments by position, rather than keyword.
            # The arguments are: "filename", "sanitize", "removeHs", "flavor"
            mols = [
                _Chem.MolFromPDBFile("tmp0.pdb", False, False, 0),
                _Chem.MolFromPDBFile("tmp1.pdb", False, False, 0)
            ]

            # Generate the MCS match.
            mcs = _rdFMCS.FindMCS(mols,
                                  atomCompare=_rdFMCS.AtomCompare.CompareAny,
                                  bondCompare=_rdFMCS.BondCompare.CompareAny,
                                  completeRingsOnly=True,
                                  ringMatchesRingOnly=True,
                                  matchChiralTag=False,
                                  matchValences=False,
                                  maximizeBonds=False,
                                  timeout=timeout)

            # Get the common substructure as a SMARTS string.
            mcs_smarts = _Chem.MolFromSmarts(mcs.smartsString)

    except:
        raise RuntimeError("RDKIT MCS mapping failed!")

    # Score the mappings and return them in sorted order (best to worst).
    mappings, scores = _score_rdkit_mappings(mol0, mol1, mols[0], mols[1],
                                             mcs_smarts, prematch,
                                             _scoring_function, property_map0,
                                             property_map1)

    # Sometimes RDKit fails to generate a mapping that includes the prematch.
    # If so, then try generating a mapping using the MCS routine from Sire.
    if len(mappings) == 1 and mappings[0] == prematch:

        # Convert timeout to a Sire Unit.
        timeout = timeout * _SireUnits.second

        # Regular match. Include light atoms, but don't allow matches between heavy
        # and light atoms.
        m0 = mol0.evaluate().findMCSmatches(
            mol1, _SireMol.AtomResultMatcher(_to_sire_mapping(prematch)),
            timeout, True, property_map0, property_map1, 6, False)

        # Include light atoms, and allow matches between heavy and light atoms.
        # This captures mappings such as O --> H in methane to methanol.
        m1 = mol0.evaluate().findMCSmatches(
            mol1, _SireMol.AtomResultMatcher(_to_sire_mapping(prematch)),
            timeout, True, property_map0, property_map1, 0, False)

        # Take the mapping with the larger number of matches.
        if len(m1) > 0:
            if len(m0) > 0:
                if len(m1[0]) > len(m0[0]):
                    mappings = m1
                else:
                    mappings = m0
            else:
                mappings = m1
        else:
            mappings = m0

        # Score the mappings and return them in sorted order (best to worst).
        mappings, scores = _score_sire_mappings(mol0, mol1, mappings, prematch,
                                                _scoring_function,
                                                property_map0, property_map1)

    if matches == 1:
        if return_scores:
            return (mappings[0], scores[0])
        else:
            return mappings[0]
    else:
        # Return a list of matches from best to worst.
        if return_scores:
            return (mappings[0:matches], scores[0:matches])
        # Return a tuple containing the list of matches from best to
        # worst along with the list of scores.
        else:
            return mappings[0:matches]
Exemple #4
0
def rmsdAlign(molecule0,
              molecule1,
              mapping=None,
              property_map0={},
              property_map1={}):
    """Align atoms in molecule0 to those in molecule1 using the mapping
       between matched atom indices. The molecule is aligned using rigid-body
       translation and rotations, with a root mean squared displacement (RMSD)
       fit to find the optimal translation vector (as opposed to merely taking
       the difference of centroids).

       Parameters
       ----------

       molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule to align.

       molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The reference molecule.

       mapping : dict
           A dictionary mapping atoms in molecule0 to those in molecule1.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       Returns
       -------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The aligned molecule.

       Examples
       --------

       Align molecule0 to molecule1 based on a precomputed mapping.

       >>> import BioSimSpace as BSS
       >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1, mapping)

       Align molecule0 to molecule1. Since no mapping is passed one will be
       autogenerated using :class:`matchAtoms <BioSimSpace.Align.matchAtoms>`
       with default options.

       >>> import BioSimSpace as BSS
       >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1)
    """

    if type(molecule0) is not _Molecule:
        raise TypeError(
            "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(molecule1) is not _Molecule:
        raise TypeError(
            "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(property_map0) is not dict:
        raise TypeError("'property_map0' must be of type 'dict'")

    if type(property_map1) is not dict:
        raise TypeError("'property_map1' must be of type 'dict'")

    # The user has passed an atom mapping.
    if mapping is not None:
        if type(mapping) is not dict:
            raise TypeError("'mapping' must be of type 'dict'.")
        else:
            _validate_mapping(molecule0, molecule1, mapping, "mapping")

    # Get the best match atom mapping.
    else:
        mapping = matchAtoms(molecule0,
                             molecule1,
                             property_map0=property_map0,
                             property_map1=property_map1)

    # Extract the Sire molecule from each BioSimSpace molecule.
    mol0 = molecule0._getSireObject()
    mol1 = molecule1._getSireObject()

    # Convert the mapping to AtomIdx key:value pairs.
    sire_mapping = _to_sire_mapping(mapping)

    # Perform the alignment, mol0 to mol1.
    try:
        mol0 = mol0.move().align(
            mol1, _SireMol.AtomResultMatcher(sire_mapping)).molecule()
    except Exception as e:
        msg = "Failed to align molecules based on mapping: %r" % mapping
        if _isVerbose():
            raise _AlignmentError(msg) from e
        else:
            raise _AlignmentError(msg) from None

    # Return the aligned molecule.
    return _Molecule(mol0)
Exemple #5
0
def _score_rmsd(molecule0, molecule1, mappings, is_align=False):
    """Internal function to score atom mappings based on the root mean squared
       displacement (RMSD) between mapped atoms in two molecules. Optionally,
       molecule0 can first be aligned to molecule1 based on the mapping prior
       to computing the RMSD. The function returns the mappings sorted based
       on their score from best to worst, along with a list containing the
       scores for each mapping.

       Parameters
       ----------

       molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The reference molecule.

       molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The target molecule.

       mappings : [ dict ]
           A list of dictionaries mapping  atoms in molecule0 to those in
           molecule1.

       is_align : bool
           Whether to align molecule0 to molecule1 based on the mapping
           before calculating the RMSD score.

       Returns
       -------

       mappings : [ dict ]
           The sorted mappings.
    """

    # Initialise a list of scores.
    scores = []

    # Loop over all mappings.
    for mapping in mappings:
        # Align molecule0 to molecule1 based on the mapping.
        if is_align:
            try:
                molecule0 = molecule0.move().align(
                    molecule1, _SireMol.AtomResultMatcher(mapping)).molecule()
            except:
                raise _AlignmentError(
                    "Failed to align molecules when scoring based on mapping: %r"
                    % mapping) from None

        # We now compute the RMSD between the coordinates of the matched atoms
        # in molecule0 and molecule1.

        # Initialise lists to hold the coordinates.
        c0 = []
        c1 = []

        # Loop over each atom index in the map.
        for idx0, idx1 in mapping.items():
            # Append the coordinates of the matched atom in molecule0.
            c0.append(molecule0.atom(idx0).property("coordinates"))
            # Append the coordinates of atom in molecule1 to which it maps.
            c1.append(molecule1.atom(idx1).property("coordinates"))

        # Compute the RMSD between the two sets of coordinates.
        scores.append(_SireMaths.getRMSD(c0, c1))

    # Sort the scores and return the sorted keys. (Smaller RMSD is best)
    keys = sorted(range(len(scores)), key=lambda k: scores[k])

    # Sort the mappings.
    mappings = [mappings[x] for x in keys]

    # Sort the scores and convert to Angstroms.
    scores = [scores[x] * _Units.Length.angstrom for x in keys]

    # Return the sorted mappings and their scores.
    return (mappings, scores)
Exemple #6
0
def matchAtoms(molecule0,
               molecule1,
               scoring_function="RMSD align",
               matches=1,
               return_scores=False,
               prematch={},
               timeout=5 * _Units.Time.second,
               match_light=True,
               property_map0={},
               property_map1={},
               verbose=False):
    """Find mappings between atom indices in molecule0 to those in molecule1.
       Molecules are aligned using a Maximum Common Substructure (MCS) search.
       When requesting more than one match, the mappings will be sorted using
       a scoring function and returned in order of best to worst score. (Note
       that, depending on the scoring function the "best" score may have the
       lowest value.)

       Parameters
       ----------

       molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule of interest.

       molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The reference molecule.

       scoring_function : str
           The scoring function used to match atoms. Available options are:
             - "RMSD"
                 Calculate the root mean squared distance between the
                 coordinates of atoms in molecule0 to those that they
                 map to in molecule1.
             - "RMSD align"
                 Align molecule0 to molecule1 based on the mapping before
                 computing the above RMSD score.

       matches : int
           The maximum number of matches to return. (Sorted in order of score).

       return_scores : bool
           Whether to return a list containing the scores for each mapping.

       prematch : dict
           A pre-match to use as the basis of the search.

       timeout : :class:`Time <BioSimSpace.Types.Time>`
           The timeout for the matching algorithm.

       match_light : bool
           Whether to match light atoms.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       verbose : bool
           Whether to print status information from the matcher.

       Returns
       -------

       matches : dict, [dict], ([dict], list)
           The best atom mapping, a list containing a user specified number of
           the best mappings ranked by their score, or a tuple containing the
           list of best mappings and a list of the corresponding scores.

       Examples
       --------

       Find the best maximum common substructure mapping between two molecules.

       >>> import BioSimSpace as BSS
       >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1)

       Find the 5 best mappings.

       >>> import BioSimSpace as BSS
       >>> mappings = BSS.Align.matchAtoms(molecule0, molecule1, matches=5)

       Find the 5 best mappings along with their ranking scores.

       >>> import BioSimSpace as BSS
       >>> mappings, scores = BSS.Align.matchAtoms(molecule0, molecule1, matches=5, return_scores=True)

       Find the best mapping that contains a prematch (this is a dictionary mapping
       atom indices in molecule0 to those in molecule1).

       >>> import BioSimSpace as BSS
       >>> from Sire.Mol import AtomIdx
       >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, prematch={AtomIdx(0), AtomIdx(10)})

       Find the best mapping, excluding light atoms.

       >>> import BioSimSpace as BSS
       >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, match_light=False)
    """

    # A list of supported scoring functions.
    scoring_functions = ["RMSD", "RMSDALIGN"]

    # Validate input.

    if type(molecule0) is not _Molecule:
        raise TypeError(
            "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(molecule1) is not _Molecule:
        raise TypeError(
            "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(scoring_function) is not str:
        raise TypeError("'scoring_function' must be of type 'str'")
    else:
        # Strip whitespace and convert to upper case.
        scoring_function = scoring_function.replace(" ", "").upper()
        if not scoring_function in scoring_functions:
            raise ValueError(
                "Unsupported scoring function '%s'. Options are: %s" %
                (scoring_function, scoring_functions))

    if type(matches) is not int:
        raise TypeError("'matches' must be of type 'int'")
    else:
        if matches < 0:
            raise ValueError("'matches' must be positive!")

    if type(prematch) is not dict:
        raise TypeError("'prematch' must be of type 'dict'")
    else:
        for idx0, idx1 in prematch.items():
            if type(idx0) is not _SireMol.AtomIdx or type(
                    idx1) is not _SireMol.AtomIdx:
                raise TypeError(
                    "'prematch' dictionary key:value pairs must be of type 'Sire.Mol.AtomIdx'"
                )
            if idx0.value() < 0 or idx0.value() >= molecule0.nAtoms() or \
               idx1.value() < 0 or idx1.value() >= molecule1.nAtoms():
                raise ValueError(
                    "'prematch' dictionary key:value pair '%s : %s' is out of range! "
                    "The molecules contain %d and %d atoms." %
                    (idx0, idx1, molecule0.nAtoms(), molecule1.nAtoms()))

    if type(timeout) is not _Units.Time._Time:
        raise TypeError("'timeout' must be of type 'BioSimSpace.Types.Time'")

    if type(match_light) is not bool:
        raise TypeError("'match_light' must be of type 'bool'")

    if type(property_map0) is not dict:
        raise TypeError("'property_map0' must be of type 'dict'")

    if type(property_map1) is not dict:
        raise TypeError("'property_map1' must be of type 'dict'")

    if type(verbose) is not bool:
        raise TypeError("'verbose' must be of type 'bool'")

    # Extract the Sire molecule from each BioSimSpace molecule.
    mol0 = molecule0._getSireMolecule()
    mol1 = molecule1._getSireMolecule()

    # Convert the timeout to a Sire unit.
    timeout = timeout.magnitude() * timeout._supported_units[timeout.unit()]

    # Are we performing an alignment before scoring.
    if scoring_function == "RMSDALIGN":
        is_align = True
    else:
        is_align = False

    # Find all of the best maximum common substructure matches.
    # We perform two matches to handle different edge cases. The best
    # match is the one that matches the greater number of atom pairs.
    # Ideally the two runs should be performed concurrently, but this
    # isn't currently possible from within Python since the underlying
    # Sire objects aren't pickable.

    # Regular match. Include light atoms, but don't allow matches between heavy
    # and light atoms.
    m0 = mol0.evaluate().findMCSmatches(mol1,
                                        _SireMol.AtomResultMatcher(prematch),
                                        timeout, match_light, property_map0,
                                        property_map1, 6, verbose)

    # Include light atoms, and allow matches between heavy and light atoms.
    # This captures mappings such as O --> H in methane to methanol.
    m1 = mol0.evaluate().findMCSmatches(mol1,
                                        _SireMol.AtomResultMatcher(prematch),
                                        timeout, match_light, property_map0,
                                        property_map1, 0, verbose)

    # Take the mapping with the larger number of matches.
    if len(m1) > 0:
        if len(m0) > 0:
            if len(m1[0]) > len(m0[0]):
                mappings = m1
            else:
                mappings = m0
        else:
            mappings = m1
    else:
        mappings = m0

    # No matches!
    if len(mappings) == 0:
        return None

    # Score the mappings and return them in sorted order (best to worst).
    # For now we default to RMSD scoring, since it's the only option.
    else:
        # Return the best match.
        if matches == 1:
            return _score_rmsd(mol0, mol1, mappings, is_align)[0][0]
        else:
            # Return a list of matches from best to worst.
            if return_scores:
                (mappings, scores) = _score_rmsd(mol0, mol1, mappings,
                                                 is_align)
                return (mappings[0:matches], scores[0:matches])
            # Return a tuple containing the list of matches from best to
            # worst along with the list of scores.
            else:
                return _score_rmsd(mol0, mol1, mappings,
                                   is_align)[0][0:matches]
Exemple #7
0
def rmsdAlign(molecule0,
              molecule1,
              mapping=None,
              property_map0={},
              property_map1={}):
    """Align atoms in molecule0 to those in molecule1 using the mapping
       between matched atom indices. The molecule is aligned based on
       a root mean squared displacement (RMSD) fit to find the optimal
       translation vector (as opposed to merely taking the difference of
       centroids).

       Parameters
       ----------

       molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The molecule to align.

       molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The reference molecule.

       mapping : dict
           A dictionary mapping atoms in molecule0 to those in molecule1.

       property_map0 : dict
           A dictionary that maps "properties" in molecule0 to their user
           defined values. This allows the user to refer to properties
           with their own naming scheme, e.g. { "charge" : "my-charge" }

       property_map1 : dict
           A dictionary that maps "properties" in molecule1 to their user
           defined values.

       Returns
       -------

       molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
           The aligned molecule.

       Examples
       --------

       Align molecule0 to molecule1 based on a precomputed mapping.

       >>> import BioSimSpace as BSS
       >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1, mapping)

       Align molecule0 to molecule1. Since no mapping is passed one will be
       autogenerated using :class:`matchAtoms <BioSimSpace.Align.matchAtoms>`
       with default options.

       >>> import BioSimSpace as BSS
       >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1)
    """

    if type(molecule0) is not _Molecule:
        raise TypeError(
            "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(molecule1) is not _Molecule:
        raise TypeError(
            "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'")

    if type(property_map0) is not dict:
        raise TypeError("'property_map0' must be of type 'dict'")

    if type(property_map1) is not dict:
        raise TypeError("'property_map1' must be of type 'dict'")

    # The user has passed an atom mapping.
    if mapping is not None:
        if type(mapping) is not dict:
            raise TypeError("'mapping' must be of type 'dict'.")
        else:
            # Make sure all key/value pairs are of type AtomIdx.
            for idx0, idx1 in mapping.items():
                if type(idx0) is not _SireMol.AtomIdx or type(
                        idx1) is not _SireMol.AtomIdx:
                    raise TypeError(
                        "key:value pairs in 'mapping' must be of type 'Sire.Mol.AtomIdx'"
                    )
                if idx0.value() < 0 or idx0.value() >= molecule0.nAtoms() or \
                   idx1.value() < 0 or idx1.value() >= molecule1.nAtoms():
                    raise ValueError(
                        "'mapping' dictionary key:value pair '%s : %s' is out of range! "
                        "The molecules contain %d and %d atoms." %
                        (idx0, idx1, molecule0.nAtoms(), molecule1.nAtoms()))

    # Get the best match atom mapping.
    else:
        mapping = matchAtoms(molecule0,
                             molecule1,
                             property_map0=property_map0,
                             property_map1=property_map1)

    # Extract the Sire molecule from each BioSimSpace molecule.
    mol0 = molecule0._getSireMolecule()
    mol1 = molecule1._getSireMolecule()

    # Perform the alignment, mol0 to mol1.
    try:
        mol0 = mol0.move().align(
            mol1, _SireMol.AtomResultMatcher(mapping)).molecule()
    except:
        raise _AlignmentError(
            "Failed to align molecules based on mapping: %r" %
            mapping) from None

    # Return the aligned molecule.
    return _Molecule(mol0)