def computeSimilarityFP(self, c_chem, typeFP, typeMetric): try: if typeMetric == 'Tanimoto': return DataStructs.TanimotoSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Dice": return DataStructs.DiceSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Cosine": return DataStructs.CosineSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Sokal": return DataStructs.SokalSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Russel": return DataStructs.RusselSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "RogotGoldberg": return DataStructs.RogotGoldbergSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "AllBit": return DataStructs.AllBitSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Kulczynski": return DataStructs.KulczynskiSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "McConnaughey": return DataStructs.McConnaugheySimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Asymmetric": return DataStructs.AsymmetricSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "BraunBlanquet": return DataStructs.BraunBlanquetSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) except: print("Combination %s and %s not supported"%(typeFP, typeMetric)) self.log = "%sCombination %s and %s not supported\n"%(self.log, typeFP, typeMetric) return "NA"
def rd_fingerprint_evaluation(references, candidates): """ Enumerate linear Fragement """ print("Calculating Similarity via RDFIngerprint Path Similarity") similarities = [ [], [], [], [], [] ] # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey for img in references: similarity = [0, 0, 0, 0, 0] if img in candidates: candidate_rdkfingerprint = rdmolops.RDKFingerprint(candidates[img], fpSize=2048, minPath=1, maxPath=7) reference_rdkfingerprint = rdmolops.RDKFingerprint(references[img], fpSize=2048, minPath=1, maxPath=7) similarity[0] = round( DataStructs.TanimotoSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[1] = round( DataStructs.DiceSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[2] = round( DataStructs.CosineSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[3] = round( DataStructs.SokalSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[4] = round( DataStructs.McConnaugheySimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarities[0].append(similarity[0]) similarities[1].append(similarity[1]) similarities[2].append(similarity[2]) similarities[3].append(similarity[3]) similarities[4].append(similarity[4]) print("Done Calculating Similarity via RDFIngerprint Path Similarity") print("##########################################") print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4))) print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4))) print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4))) print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4))) print("McConnaughey Similarity:{}".format( round(np.mean(similarities[4]), 4))) print("##########################################") return round(np.mean(similarities[0]), 4)
def morgan_fingerprint_evaluation(references, candidates): """ Circular based fingerprints https://doi.org/10.1021/ci100050t """ print("Calculating Similarity via Morgan based Circular Fingerprint") similarities = [ [], [], [], [], [] ] # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey for img in references: similarity = [0, 0, 0, 0, 0] if img in candidates: morgan_fp_candidate = AllChem.GetMorganFingerprintAsBitVect( candidates[img], 2, nBits=1024) morgan_fp_reference = AllChem.GetMorganFingerprintAsBitVect( references[img], 2, nBits=1024) similarity[0] = round( DataStructs.TanimotoSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[1] = round( DataStructs.DiceSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[2] = round( DataStructs.CosineSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[3] = round( DataStructs.SokalSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[4] = round( DataStructs.McConnaugheySimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarities[0].append(similarity[0]) similarities[1].append(similarity[1]) similarities[2].append(similarity[2]) similarities[3].append(similarity[3]) similarities[4].append(similarity[4]) print("Done Calculating Similarity via Morgan based Circular Fingerprint") print("##########################################") print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4))) print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4))) print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4))) print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4))) print("McConnaughey Similarity:{}".format( round(np.mean(similarities[4]), 4))) print("##########################################") return round(np.mean(similarities[0]), 4)
def get_similarity_all(fp1, fp2): """ Get similarity score for fingerprints that are supplied always as SparseBitVect RDKit has the following similarity measures: Tanimoto, Dice, Cosine, Sokal, Russel, Kulczynski, McConnaughey, and Tversky. """ similarity_scores = [ DataStructs.TanimotoSimilarity(fp1, fp2), DataStructs.DiceSimilarity(fp1, fp2), DataStructs.CosineSimilarity(fp1, fp2), # DataStructs.SokalSimilarity(fp1,fp2), DataStructs.RusselSimilarity(fp1, fp2), DataStructs.KulczynskiSimilarity(fp1, fp2), DataStructs.McConnaugheySimilarity(fp1, fp2) ] return similarity_scores
def maacs_fingerprint_evaluation(references, candidates): """ Generate Similarity via MACCSKeys """ print("Calculating Similarity via MACCS Keys") similarities = [ [], [], [], [], [] ] # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey for img in references: similarity = [0, 0, 0, 0, 0] if img in candidates: candidate_maccs = MACCSkeys.GenMACCSKeys(candidates[img]) reference_maccs = MACCSkeys.GenMACCSKeys(references[img]) similarity[0] = round( DataStructs.TanimotoSimilarity(reference_maccs, candidate_maccs), 4) similarity[1] = round( DataStructs.DiceSimilarity(reference_maccs, candidate_maccs), 4) similarity[2] = round( DataStructs.CosineSimilarity(reference_maccs, candidate_maccs), 4) similarity[3] = round( DataStructs.SokalSimilarity(reference_maccs, candidate_maccs), 4) similarity[4] = round( DataStructs.McConnaugheySimilarity(reference_maccs, candidate_maccs), 4) similarities[0].append(similarity[0]) similarities[1].append(similarity[1]) similarities[2].append(similarity[2]) similarities[3].append(similarity[3]) similarities[4].append(similarity[4]) print("Done Calculating Similarity via MACCS Keys") print("##########################################") print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4))) print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4))) print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4))) print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4))) print("McConnaughey Similarity:{}".format( round(np.mean(similarities[4]), 4))) print("##########################################") return round(np.mean(similarities[0]), 4)
def GetSimilarity(rdkmol1, rdkmol2, metric='tanimoto'): ''' mol1 and mol2 are RDKit fingerprint objects for molecule ''' valid_metric = ('tanimoto', 'dice', 'cosine', 'sokal', 'russel', 'kulczynski', 'mcconnaughey') if metric.lower() == 'tanimoto': return DataStructs.TanimotoSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'dice': return DataStructs.DiceSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'cosine': return DataStructs.CosineSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'sokal': return DataStructs.SokalSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'russel': return DataStructs.RusselSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'kulczynski': return DataStructs.KulczynskiSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'mcconnaughey': return DataStructs.McConnaugheySimilarity(rdkmol1, rdkmol2) #elif metric.lower() == 'tversky': #Was returning error # return DataStructs.TverskySimilarity(rdkmol1, rdkmol2) else: sys.exit('***ERROR: Unrecognized similarity metric: %s***.\n Use one of %s' % (metric,', '.join(valid_metric)))