def computeSimilarityFP(self, c_chem, typeFP, typeMetric): try: if typeMetric == 'Tanimoto': return DataStructs.TanimotoSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Dice": return DataStructs.DiceSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Cosine": return DataStructs.CosineSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Sokal": return DataStructs.SokalSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Russel": return DataStructs.RusselSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "RogotGoldberg": return DataStructs.RogotGoldbergSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "AllBit": return DataStructs.AllBitSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Kulczynski": return DataStructs.KulczynskiSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "McConnaughey": return DataStructs.McConnaugheySimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "Asymmetric": return DataStructs.AsymmetricSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) elif typeMetric == "BraunBlanquet": return DataStructs.BraunBlanquetSimilarity(self.d_FP[typeFP], c_chem.d_FP[typeFP]) except: print("Combination %s and %s not supported"%(typeFP, typeMetric)) self.log = "%sCombination %s and %s not supported\n"%(self.log, typeFP, typeMetric) return "NA"
def rd_fingerprint_evaluation(references, candidates): """ Enumerate linear Fragement """ print("Calculating Similarity via RDFIngerprint Path Similarity") similarities = [ [], [], [], [], [] ] # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey for img in references: similarity = [0, 0, 0, 0, 0] if img in candidates: candidate_rdkfingerprint = rdmolops.RDKFingerprint(candidates[img], fpSize=2048, minPath=1, maxPath=7) reference_rdkfingerprint = rdmolops.RDKFingerprint(references[img], fpSize=2048, minPath=1, maxPath=7) similarity[0] = round( DataStructs.TanimotoSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[1] = round( DataStructs.DiceSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[2] = round( DataStructs.CosineSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[3] = round( DataStructs.SokalSimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarity[4] = round( DataStructs.McConnaugheySimilarity(reference_rdkfingerprint, candidate_rdkfingerprint), 4) similarities[0].append(similarity[0]) similarities[1].append(similarity[1]) similarities[2].append(similarity[2]) similarities[3].append(similarity[3]) similarities[4].append(similarity[4]) print("Done Calculating Similarity via RDFIngerprint Path Similarity") print("##########################################") print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4))) print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4))) print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4))) print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4))) print("McConnaughey Similarity:{}".format( round(np.mean(similarities[4]), 4))) print("##########################################") return round(np.mean(similarities[0]), 4)
def morgan_fingerprint_evaluation(references, candidates): """ Circular based fingerprints https://doi.org/10.1021/ci100050t """ print("Calculating Similarity via Morgan based Circular Fingerprint") similarities = [ [], [], [], [], [] ] # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey for img in references: similarity = [0, 0, 0, 0, 0] if img in candidates: morgan_fp_candidate = AllChem.GetMorganFingerprintAsBitVect( candidates[img], 2, nBits=1024) morgan_fp_reference = AllChem.GetMorganFingerprintAsBitVect( references[img], 2, nBits=1024) similarity[0] = round( DataStructs.TanimotoSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[1] = round( DataStructs.DiceSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[2] = round( DataStructs.CosineSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[3] = round( DataStructs.SokalSimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarity[4] = round( DataStructs.McConnaugheySimilarity(morgan_fp_reference, morgan_fp_candidate), 4) similarities[0].append(similarity[0]) similarities[1].append(similarity[1]) similarities[2].append(similarity[2]) similarities[3].append(similarity[3]) similarities[4].append(similarity[4]) print("Done Calculating Similarity via Morgan based Circular Fingerprint") print("##########################################") print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4))) print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4))) print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4))) print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4))) print("McConnaughey Similarity:{}".format( round(np.mean(similarities[4]), 4))) print("##########################################") return round(np.mean(similarities[0]), 4)
def maacs_fingerprint_evaluation(references, candidates): """ Generate Similarity via MACCSKeys """ print("Calculating Similarity via MACCS Keys") similarities = [ [], [], [], [], [] ] # various similarities: Tanimoto, Dice, Cosine, Sokal, McConnaughey for img in references: similarity = [0, 0, 0, 0, 0] if img in candidates: candidate_maccs = MACCSkeys.GenMACCSKeys(candidates[img]) reference_maccs = MACCSkeys.GenMACCSKeys(references[img]) similarity[0] = round( DataStructs.TanimotoSimilarity(reference_maccs, candidate_maccs), 4) similarity[1] = round( DataStructs.DiceSimilarity(reference_maccs, candidate_maccs), 4) similarity[2] = round( DataStructs.CosineSimilarity(reference_maccs, candidate_maccs), 4) similarity[3] = round( DataStructs.SokalSimilarity(reference_maccs, candidate_maccs), 4) similarity[4] = round( DataStructs.McConnaugheySimilarity(reference_maccs, candidate_maccs), 4) similarities[0].append(similarity[0]) similarities[1].append(similarity[1]) similarities[2].append(similarity[2]) similarities[3].append(similarity[3]) similarities[4].append(similarity[4]) print("Done Calculating Similarity via MACCS Keys") print("##########################################") print("Tanimoto Similarity:{}".format(round(np.mean(similarities[0]), 4))) print("Dice Similarity:{}".format(round(np.mean(similarities[1]), 4))) print("Cosine Similarity:{}".format(round(np.mean(similarities[2]), 4))) print("Sokal Similarity:{}".format(round(np.mean(similarities[3]), 4))) print("McConnaughey Similarity:{}".format( round(np.mean(similarities[4]), 4))) print("##########################################") return round(np.mean(similarities[0]), 4)
def compare_structure(smiles1, smiles2, fp_type="Morgan", sim_type="Dice"): """ Task: Compare structual similarity of two compound based on fingerprints. Parameters: smiles1: str, smiles of the compound 1 smiles2: str, smiles of the compound 2 fp_type: str, type of fingerprints sim_type: str, method for calculating similarity """ if fp_type == "Morgan": getfp = lambda smi: AllChem.GetMorganFingerprint( Chem.MolFromSmiles(smi), 2, useFeatures=False) elif fp_type == "MorganWithFeature": getfp = lambda smi: AllChem.GetMorganFingerprint( Chem.MolFromSmiles(smi), 2, useFeatures=True) elif fp_type == "MACCS": getfp = lambda smi: Chem.MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smi) ) elif fp_type == "Topological": getfp = lambda smi: FingerprintMols.FingerprintMol( Chem.MolFromSmiles(smi)) elif fp_type == "AtomPairs": getfp = lambda smi: Pairs.GetAtomPairFingerprint( Chem.MolFromSmiles(smi)) try: fp1 = getfp(smiles1) fp2 = getfp(smiles2) if sim_type == "Dice": sim_fp = DataStructs.DiceSimilarity(fp1, fp2) elif sim_type == "Tanimoto": sim_fp = DataStructs.TanimotoSimilarity(fp1, fp2) elif sim_type == "Cosine": sim_fp = DataStructs.CosineSimilarity(fp1, fp2) elif sim_type == "Sokal": sim_fp = DataStructs.SokalSimilarity(fp1, fp2) elif sim_type == "Russel": sim_fp = DataStructs.RusselSimilarity(fp1, fp2) except Exception as e: sim_fp = -1 return sim_fp
def GetSimilarity(rdkmol1, rdkmol2, metric='tanimoto'): ''' mol1 and mol2 are RDKit fingerprint objects for molecule ''' valid_metric = ('tanimoto', 'dice', 'cosine', 'sokal', 'russel', 'kulczynski', 'mcconnaughey') if metric.lower() == 'tanimoto': return DataStructs.TanimotoSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'dice': return DataStructs.DiceSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'cosine': return DataStructs.CosineSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'sokal': return DataStructs.SokalSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'russel': return DataStructs.RusselSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'kulczynski': return DataStructs.KulczynskiSimilarity(rdkmol1, rdkmol2) elif metric.lower() == 'mcconnaughey': return DataStructs.McConnaugheySimilarity(rdkmol1, rdkmol2) #elif metric.lower() == 'tversky': #Was returning error # return DataStructs.TverskySimilarity(rdkmol1, rdkmol2) else: sys.exit('***ERROR: Unrecognized similarity metric: %s***.\n Use one of %s' % (metric,', '.join(valid_metric)))