Exemple #1
0
 def computeSimilarityFP(self, c_chem, typeFP, typeMetric):
     
     try:
         if typeMetric == 'Tanimoto':
             return DataStructs.TanimotoSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "Dice":
             return DataStructs.DiceSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "Cosine":
             return DataStructs.CosineSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "Sokal":
             return DataStructs.SokalSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "Russel":
             return DataStructs.RusselSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "RogotGoldberg":
             return DataStructs.RogotGoldbergSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "AllBit":
             return DataStructs.AllBitSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "Kulczynski":
             return DataStructs.KulczynskiSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "McConnaughey":
             return DataStructs.McConnaugheySimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "Asymmetric":
             return DataStructs.AsymmetricSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
         elif typeMetric == "BraunBlanquet":
             return DataStructs.BraunBlanquetSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP])
     except:
         print("Combination %s and %s not supported"%(typeFP, typeMetric))
         self.log = "%sCombination %s and %s not supported\n"%(self.log, typeFP, typeMetric)
         return "NA"
def rd_fingerprint_evaluation(references, candidates):
    """
    Enumerate linear Fragement
    """
    print("Calculating Similarity via RDFIngerprint Path Similarity")
    similarities = [
        [], [], [], [], []
    ]  # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey
    for img in references:
        similarity = [0, 0, 0, 0, 0]
        if img in candidates:
            candidate_rdkfingerprint = rdmolops.RDKFingerprint(candidates[img],
                                                               fpSize=2048,
                                                               minPath=1,
                                                               maxPath=7)
            reference_rdkfingerprint = rdmolops.RDKFingerprint(references[img],
                                                               fpSize=2048,
                                                               minPath=1,
                                                               maxPath=7)
            similarity[0] = round(
                DataStructs.TanimotoSimilarity(reference_rdkfingerprint,
                                               candidate_rdkfingerprint), 4)
            similarity[1] = round(
                DataStructs.DiceSimilarity(reference_rdkfingerprint,
                                           candidate_rdkfingerprint), 4)
            similarity[2] = round(
                DataStructs.CosineSimilarity(reference_rdkfingerprint,
                                             candidate_rdkfingerprint), 4)
            similarity[3] = round(
                DataStructs.SokalSimilarity(reference_rdkfingerprint,
                                            candidate_rdkfingerprint), 4)
            similarity[4] = round(
                DataStructs.McConnaugheySimilarity(reference_rdkfingerprint,
                                                   candidate_rdkfingerprint),
                4)
        similarities[0].append(similarity[0])
        similarities[1].append(similarity[1])
        similarities[2].append(similarity[2])
        similarities[3].append(similarity[3])
        similarities[4].append(similarity[4])
    print("Done Calculating Similarity via RDFIngerprint Path Similarity")
    print("##########################################")
    print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4)))
    print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4)))
    print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4)))
    print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4)))
    print("McConnaughey Similarity:{}".format(
        round(np.mean(similarities[4]), 4)))
    print("##########################################")
    return round(np.mean(similarities[0]), 4)
def morgan_fingerprint_evaluation(references, candidates):
    """
    Circular based fingerprints
    https://doi.org/10.1021/ci100050t
    """
    print("Calculating Similarity via Morgan based Circular Fingerprint")
    similarities = [
        [], [], [], [], []
    ]  # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey
    for img in references:
        similarity = [0, 0, 0, 0, 0]
        if img in candidates:
            morgan_fp_candidate = AllChem.GetMorganFingerprintAsBitVect(
                candidates[img], 2, nBits=1024)
            morgan_fp_reference = AllChem.GetMorganFingerprintAsBitVect(
                references[img], 2, nBits=1024)
            similarity[0] = round(
                DataStructs.TanimotoSimilarity(morgan_fp_reference,
                                               morgan_fp_candidate), 4)
            similarity[1] = round(
                DataStructs.DiceSimilarity(morgan_fp_reference,
                                           morgan_fp_candidate), 4)
            similarity[2] = round(
                DataStructs.CosineSimilarity(morgan_fp_reference,
                                             morgan_fp_candidate), 4)
            similarity[3] = round(
                DataStructs.SokalSimilarity(morgan_fp_reference,
                                            morgan_fp_candidate), 4)
            similarity[4] = round(
                DataStructs.McConnaugheySimilarity(morgan_fp_reference,
                                                   morgan_fp_candidate), 4)
        similarities[0].append(similarity[0])
        similarities[1].append(similarity[1])
        similarities[2].append(similarity[2])
        similarities[3].append(similarity[3])
        similarities[4].append(similarity[4])
    print("Done Calculating Similarity via  Morgan based Circular Fingerprint")
    print("##########################################")
    print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4)))
    print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4)))
    print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4)))
    print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4)))
    print("McConnaughey Similarity:{}".format(
        round(np.mean(similarities[4]), 4)))
    print("##########################################")
    return round(np.mean(similarities[0]), 4)
def get_similarity_all(fp1, fp2):
    """
    Get similarity score for fingerprints that are supplied always as SparseBitVect
    RDKit has the following similarity measures:
        Tanimoto, Dice, Cosine, Sokal, Russel, Kulczynski, McConnaughey, and Tversky.
    """
    similarity_scores = [
        DataStructs.TanimotoSimilarity(fp1, fp2),
        DataStructs.DiceSimilarity(fp1, fp2),
        DataStructs.CosineSimilarity(fp1, fp2),
        #        DataStructs.SokalSimilarity(fp1,fp2),
        DataStructs.RusselSimilarity(fp1, fp2),
        DataStructs.KulczynskiSimilarity(fp1, fp2),
        DataStructs.McConnaugheySimilarity(fp1, fp2)
    ]

    return similarity_scores
def maacs_fingerprint_evaluation(references, candidates):
    """ 
    Generate Similarity via MACCSKeys
    """
    print("Calculating Similarity via MACCS Keys")
    similarities = [
        [], [], [], [], []
    ]  # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey
    for img in references:
        similarity = [0, 0, 0, 0, 0]
        if img in candidates:
            candidate_maccs = MACCSkeys.GenMACCSKeys(candidates[img])
            reference_maccs = MACCSkeys.GenMACCSKeys(references[img])
            similarity[0] = round(
                DataStructs.TanimotoSimilarity(reference_maccs,
                                               candidate_maccs), 4)
            similarity[1] = round(
                DataStructs.DiceSimilarity(reference_maccs, candidate_maccs),
                4)
            similarity[2] = round(
                DataStructs.CosineSimilarity(reference_maccs, candidate_maccs),
                4)
            similarity[3] = round(
                DataStructs.SokalSimilarity(reference_maccs, candidate_maccs),
                4)
            similarity[4] = round(
                DataStructs.McConnaugheySimilarity(reference_maccs,
                                                   candidate_maccs), 4)
        similarities[0].append(similarity[0])
        similarities[1].append(similarity[1])
        similarities[2].append(similarity[2])
        similarities[3].append(similarity[3])
        similarities[4].append(similarity[4])
    print("Done Calculating Similarity via MACCS Keys")
    print("##########################################")
    print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4)))
    print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4)))
    print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4)))
    print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4)))
    print("McConnaughey Similarity:{}".format(
        round(np.mean(similarities[4]), 4)))
    print("##########################################")
    return round(np.mean(similarities[0]), 4)
Exemple #6
0
def GetSimilarity(rdkmol1, rdkmol2, metric='tanimoto'):
    '''
    mol1 and mol2 are RDKit fingerprint objects for molecule
    '''
    valid_metric = ('tanimoto', 'dice', 'cosine', 'sokal', 'russel', 'kulczynski', 'mcconnaughey')
    if metric.lower() == 'tanimoto':
        return DataStructs.TanimotoSimilarity(rdkmol1, rdkmol2)
    elif metric.lower() == 'dice':
        return DataStructs.DiceSimilarity(rdkmol1, rdkmol2)
    elif metric.lower() == 'cosine':
        return DataStructs.CosineSimilarity(rdkmol1, rdkmol2)
    elif metric.lower() == 'sokal':
        return DataStructs.SokalSimilarity(rdkmol1, rdkmol2)
    elif metric.lower() == 'russel':
        return DataStructs.RusselSimilarity(rdkmol1, rdkmol2)
    elif metric.lower() == 'kulczynski':
        return DataStructs.KulczynskiSimilarity(rdkmol1, rdkmol2)
    elif metric.lower() == 'mcconnaughey':
        return DataStructs.McConnaugheySimilarity(rdkmol1, rdkmol2)
    #elif metric.lower() == 'tversky': #Was returning error
    #    return DataStructs.TverskySimilarity(rdkmol1, rdkmol2)
    else:
        sys.exit('***ERROR: Unrecognized similarity metric: %s***.\n Use one of %s' % (metric,', '.join(valid_metric)))