def PrintTanimoto(molA, molB, minb, maxb):
    fpA = oegraphsim.OEFingerPrint()
    fpB = oegraphsim.OEFingerPrint()
    numbits = 2048
    atype = oegraphsim.OEFPAtomType_DefaultPathAtom
    btype = oegraphsim.OEFPBondType_DefaultPathBond
    oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype)
    oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype)
    print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
def PrintTanimoto(molA, molB, atype, btype):
    fpA = oegraphsim.OEFingerPrint()
    fpB = oegraphsim.OEFingerPrint()
    numbits = 2048
    minb = 0
    maxb = 5
    oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype)
    oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype)
    print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
Beispiel #3
0
def calculate_mol_similarity(molA, molB):
    """
    Function to calculate the similarity between two oemol objects
    should be used to utils/openeye.py or openmoltools
    :param molA: oemol object of molecule A
    :param molB: oemol object of molecule B
    :return: float, tanimoto score of the two molecules, between 0 and 1
    """
    fpA = oegraphsim.OEFingerPrint()
    fpB = oegraphsim.OEFingerPrint()
    oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166)
    oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166)

    return oegraphsim.OETanimoto(fpA, fpB)
Beispiel #4
0
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

import sys
from openeye import oechem
from openeye import oegraphsim

# @ <SNIPPET-CALC-FROM-FILE>
if len(sys.argv) != 3:
    oechem.OEThrow.Usage("%s <queryfile> <targetfile>" % sys.argv[0])

ifs = oechem.oemolistream()
if not ifs.open(sys.argv[1]):
    oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1])

qmol = oechem.OEGraphMol()
if not oechem.OEReadMolecule(ifs, qmol):
    oechem.OEThrow.Fatal("Unable to read query molecule")
qfp = oegraphsim.OEFingerPrint()
oegraphsim.OEMakeFP(qfp, qmol, oegraphsim.OEFPType_Path)

if not ifs.open(sys.argv[2]):
    oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[2])

tfp = oegraphsim.OEFingerPrint()
for tmol in ifs.GetOEGraphMols():
    oegraphsim.OEMakeFP(tfp, tmol, oegraphsim.OEFPType_Path)
    print("%.3f" % oegraphsim.OETanimoto(qfp, tfp))
# @ </SNIPPET-CALC-FROM-FILE>
Beispiel #5
0
def cluster_similar_molecules(
    smiles, fingerprint_type=oegraphsim.OEFPType_Tree, eps=0.5, min_samples=2
):
    """The method attempts to cluster a sets of molecules based on their
    similarity using a Tanimoto distance metric and the `sklearn` DBSCAN
    clustering code.

    Notes
    -----
    This is based on the code by David Mobley:

    https://github.com/openforcefield/release-1-benchmarking/blob/master/QM_molecule_selection/divide_sets.ipynb

    Parameters
    ----------
    smiles: list of str
        The SMILES representations of the molecules to cluster.
    fingerprint_type
        The type of molecular fingerprint to use.
    eps: float
        The `eps` parameter to pass as an argument to DBSCAN while clustering.
    min_samples: int
        The `min_samples` parameter to pass as an argument to DBSCAN while
        clustering.

    Returns
    -------
    dict of str and list of str
        The clustered SMILES patterns.
    """
    assert isinstance(smiles, list)

    # Build fingerprints
    fingerprints = {}

    for smiles_pattern in smiles:

        oe_molecule = oechem.OEMol()
        oechem.OEParseSmiles(oe_molecule, smiles_pattern)

        fingerprint = oegraphsim.OEFingerPrint()
        oegraphsim.OEMakeFP(fingerprint, oe_molecule, fingerprint_type)

        fingerprints[smiles_pattern] = fingerprint

    # Build a similarity matrix
    distance_matrix = np.zeros((len(smiles), len(smiles)))

    for i, smiles_i in enumerate(smiles):

        for j, smiles_j in enumerate(smiles):

            if i == j:
                continue

            distance_matrix[i, j] = 1.0 - oegraphsim.OETanimoto(
                fingerprints[smiles_i], fingerprints[smiles_j]
            )

    # Cluster the data
    clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="precomputed")
    clustered_smiles = clustering.fit(distance_matrix)

    labels = clustered_smiles.labels_

    smiles_by_cluster = {}

    for label in set(labels):

        smiles_by_cluster[label] = [
            smiles[i] for i, x in enumerate(labels) if x == label
        ]

    return smiles_by_cluster
Beispiel #6
0
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

from openeye import oechem
from openeye import oegraphsim

# @ <SNIPPET-FP-TANIMOTO>
molA = oechem.OEGraphMol()
oechem.OESmilesToMol(molA, "c1ccc2c(c1)c(c(oc2=O)OCCSC(=N)N)Cl")
fpA = oegraphsim.OEFingerPrint()
oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166)

molB = oechem.OEGraphMol()
oechem.OESmilesToMol(molB, "COc1cc2ccc(cc2c(=O)o1)NC(=N)N")
fpB = oegraphsim.OEFingerPrint()
oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166)

molC = oechem.OEGraphMol()
oechem.OESmilesToMol(molC, "COc1c(c2ccc(cc2c(=O)o1)NC(=N)N)Cl")
fpC = oegraphsim.OEFingerPrint()
oegraphsim.OEMakeFP(fpC, molC, oegraphsim.OEFPType_MACCS166)

print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
print("Tanimoto(A,C) = %.3f" % oegraphsim.OETanimoto(fpA, fpC))
print("Tanimoto(B,C) = %.3f" % oegraphsim.OETanimoto(fpB, fpC))
# @ </SNIPPET-FP-TANIMOTO>
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

from __future__ import print_function
from openeye import oechem
from openeye import oegraphsim

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1ccccc1")

# @ <SNIPPET-PATH-FP-TYPE>
fpA = oegraphsim.OEFingerPrint()
numbits = 1024
minbonds = 0
maxbonds = 5
oegraphsim.OEMakePathFP(fpA, mol, numbits, minbonds, maxbonds,
                        oegraphsim.OEFPAtomType_DefaultPathAtom,
                        oegraphsim.OEFPBondType_DefaultPathBond)
fpB = oegraphsim.OEFingerPrint()
numbits = 2048
oegraphsim.OEMakePathFP(fpB, mol, numbits, minbonds, maxbonds,
                        oegraphsim.OEFPAtomType_DefaultPathAtom,
                        oegraphsim.OEFPBondType_DefaultPathBond)
print("same fingerprint types = %r" % oegraphsim.OEIsSameFPType(fpA, fpB))
print(oegraphsim.OETanimoto(fpA, fpB))
# @ </SNIPPET-PATH-FP-TYPE>