コード例 #1
0
def PrintTanimoto(molA, molB, minb, maxb):
    fpA = oegraphsim.OEFingerPrint()
    fpB = oegraphsim.OEFingerPrint()
    numbits = 2048
    atype = oegraphsim.OEFPAtomType_DefaultPathAtom
    btype = oegraphsim.OEFPBondType_DefaultPathBond
    oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype)
    oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype)
    print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
コード例 #2
0
def PrintTanimoto(molA, molB, atype, btype):
    fpA = oegraphsim.OEFingerPrint()
    fpB = oegraphsim.OEFingerPrint()
    numbits = 2048
    minb = 0
    maxb = 5
    oegraphsim.OEMakePathFP(fpA, molA, numbits, minb, maxb, atype, btype)
    oegraphsim.OEMakePathFP(fpB, molB, numbits, minb, maxb, atype, btype)
    print("Tanimoto(A,B) = %.3f" % oegraphsim.OETanimoto(fpA, fpB))
コード例 #3
0
ファイル: openeye.py プロジェクト: LaYeqa/perses
def calculate_mol_similarity(molA, molB):
    """
    Function to calculate the similarity between two oemol objects
    should be used to utils/openeye.py or openmoltools
    :param molA: oemol object of molecule A
    :param molB: oemol object of molecule B
    :return: float, tanimoto score of the two molecules, between 0 and 1
    """
    fpA = oegraphsim.OEFingerPrint()
    fpB = oegraphsim.OEFingerPrint()
    oegraphsim.OEMakeFP(fpA, molA, oegraphsim.OEFPType_MACCS166)
    oegraphsim.OEMakeFP(fpB, molB, oegraphsim.OEFPType_MACCS166)

    return oegraphsim.OETanimoto(fpA, fpB)
コード例 #4
0
    def calculate_fp(self):

        for mol in self.act_list:
            fp = oegraphsim.OEFingerPrint()
            oegraphsim.OEMakeFP(fp, mol, self.args.fptype)

            self.fp_list.append(fp)
コード例 #5
0
def circular_wrapper(smi, num_bits=1024, min_radius=2, max_radius=2):

    mol = oechem.OEGraphMol()
    oechem.OESmilesToMol(mol, smi)
    fp = oegraphsim.OEFingerPrint()
    oegraphsim.OEMakeCircularFP(fp, mol, num_bits, min_radius, max_radius,
                                oegraphsim.OEFPAtomType_DefaultPathAtom,
                                oegraphsim.OEFPBondType_DefaultPathBond)
    return get_bit_string(fp)
コード例 #6
0
def maccs_wrapper(smi, ):

    mol = oechem.OEGraphMol()
    oechem.OESmilesToMol(mol, smi)
    fp = oegraphsim.OEFingerPrint()
    oegraphsim.OEMakeMACCS166FP(
        fp,
        mol,
    )
    return get_bit_string(fp)
コード例 #7
0
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

from openeye import oechem
from openeye import oegraphsim

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1ccccc1")

# @ <SNIPPET-MAKE-FP>
fpA = oegraphsim.OEFingerPrint()
oegraphsim.OEMakePathFP(fpA, mol)

fpB = oegraphsim.OEFingerPrint()
oegraphsim.OEMakeFP(fpB, mol, fpA.GetFPTypeBase())
# @ </SNIPPET-MAKE-FP>

if oegraphsim.OEIsSameFPType(fpA, fpB):
    print("same fingerprint types")
else:
    print("different fingerprint types")
コード例 #8
0
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

from openeye import oechem
from openeye import oegraphsim

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1ccccc1")

# @ <SNIPPET-MAKE-PATH-1>
fp = oegraphsim.OEFingerPrint()
oegraphsim.OEMakePathFP(fp, mol)
# @ </SNIPPET-MAKE-PATH-1>

print(fp.GetFPTypeBase().GetFPTypeString())

# @ <SNIPPET-MAKE-PATH-2>
oegraphsim.OEMakeFP(fp, mol, oegraphsim.OEFPType_Path)
# @ </SNIPPET-MAKE-PATH-2>

print(fp.GetFPTypeBase().GetFPTypeString())

# @ <SNIPPET-MAKE-PATH-3>
numbits = 1024
minbonds = 0
maxbonds = 5
コード例 #9
0
def cluster_similar_molecules(
    smiles, fingerprint_type=oegraphsim.OEFPType_Tree, eps=0.5, min_samples=2
):
    """The method attempts to cluster a sets of molecules based on their
    similarity using a Tanimoto distance metric and the `sklearn` DBSCAN
    clustering code.

    Notes
    -----
    This is based on the code by David Mobley:

    https://github.com/openforcefield/release-1-benchmarking/blob/master/QM_molecule_selection/divide_sets.ipynb

    Parameters
    ----------
    smiles: list of str
        The SMILES representations of the molecules to cluster.
    fingerprint_type
        The type of molecular fingerprint to use.
    eps: float
        The `eps` parameter to pass as an argument to DBSCAN while clustering.
    min_samples: int
        The `min_samples` parameter to pass as an argument to DBSCAN while
        clustering.

    Returns
    -------
    dict of str and list of str
        The clustered SMILES patterns.
    """
    assert isinstance(smiles, list)

    # Build fingerprints
    fingerprints = {}

    for smiles_pattern in smiles:

        oe_molecule = oechem.OEMol()
        oechem.OEParseSmiles(oe_molecule, smiles_pattern)

        fingerprint = oegraphsim.OEFingerPrint()
        oegraphsim.OEMakeFP(fingerprint, oe_molecule, fingerprint_type)

        fingerprints[smiles_pattern] = fingerprint

    # Build a similarity matrix
    distance_matrix = np.zeros((len(smiles), len(smiles)))

    for i, smiles_i in enumerate(smiles):

        for j, smiles_j in enumerate(smiles):

            if i == j:
                continue

            distance_matrix[i, j] = 1.0 - oegraphsim.OETanimoto(
                fingerprints[smiles_i], fingerprints[smiles_j]
            )

    # Cluster the data
    clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="precomputed")
    clustered_smiles = clustering.fit(distance_matrix)

    labels = clustered_smiles.labels_

    smiles_by_cluster = {}

    for label in set(labels):

        smiles_by_cluster[label] = [
            smiles[i] for i, x in enumerate(labels) if x == label
        ]

    return smiles_by_cluster
コード例 #10
0
def main(argv=[__name__]):

    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)

    defopts = oegraphsim.OEFPDatabaseOptions(10,
                                             oegraphsim.OESimMeasure_Tanimoto)
    oegraphsim.OEConfigureFPDatabaseOptions(itf, defopts)
    oegraphsim.OEConfigureFingerPrint(
        itf, oegraphsim.OEGetFPType(oegraphsim.OEFPType_Tree))

    if not oechem.OEParseCommandLine(itf, argv):
        return 0

    qfname = itf.GetString("-query")
    mfname = itf.GetString("-molfname")
    ofname = itf.GetString("-out")

    # initialize databases

    timer = oechem.OEWallTimer()
    timer.Start()

    ifs = oechem.oemolistream()
    if not ifs.open(qfname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    query = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, query):
        oechem.OEThrow.Fatal("Cannot read query molecule!")

    moldb = oechem.OEMolDatabase()
    if not moldb.Open(mfname):
        oechem.OEThrow.Fatal("Cannot open molecule database!")

    ofs = oechem.oemolostream()
    if not ofs.open(ofname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    fptype = oegraphsim.OESetupFingerPrint(itf)
    oechem.OEThrow.Info("Using fingerprint type %s" % fptype.GetFPTypeString())
    fpdb = oegraphsim.OEFPDatabase(fptype)

    emptyfp = oegraphsim.OEFingerPrint()
    emptyfp.SetFPTypeBase(fptype)

    nrmols = moldb.GetMaxMolIdx()

    mol = oechem.OEGraphMol()
    for idx in range(0, nrmols):
        if moldb.GetMolecule(mol, idx):
            fpdb.AddFP(mol)
        else:
            fpdb.AddFP(emptyfp)

    nrfps = fpdb.NumFingerPrints()
    oechem.OEThrow.Info("%5.2f sec to initialize databases" % timer.Elapsed())

    opts = oegraphsim.OEFPDatabaseOptions()
    oegraphsim.OESetupFPDatabaseOptions(opts, itf)

    # search fingerprint database

    timer.Start()
    scores = fpdb.GetSortedScores(query, opts)
    oechem.OEThrow.Info("%5.2f sec to search %d fingerprints" %
                        (timer.Elapsed(), nrfps))

    timer.Start()
    hit = oechem.OEGraphMol()
    for si in scores:
        if moldb.GetMolecule(hit, si.GetIdx()):
            oechem.OEWriteMolecule(ofs, hit)
    oechem.OEThrow.Info("%5.2f sec to write %d hits" %
                        (timer.Elapsed(), opts.GetLimit()))

    return 0