taxol = taxol[0] type(taxol) ### pubchempy.Compound taxol = Chem.MolFromSmiles(taxol.canonical_smiles) type(taxol) ### rdkit.Chem.rdchem.Mol ### Morganフィンガープリント bitI_morgan = {} fp_morgan = AllChem.GetMorganFingerprintAsBitVect(taxol, 2, bitInfo=bitI_morgan) ### RDKitフィンガープリント bitI_rdkit = {} fp_rdkit = Chem.RDKFingerprint(taxol, bitInfo=bitI_rdkit) print(fp_morgan.GetNumBits(),fp_morgan.GetNumOnBits()) ### 2048 86 print(len(bitI_morgan)) ### 86 print(len(fp_rdkit), len(bitI_rdkit.keys())) ### (2048, 1444) for key in list(bitI_morgan.keys())[:5]: print(bitI_morgan[key]) ### Morgan可視化 morgan_turples = ((taxol, bit, bitI_morgan) for bit in list(bitI_morgan.keys())[:12]) Draw.DrawMorganBits(morgan_turples, molsPerRow=4, legends=['bit: '+str(x) for x in list(bitI_morgan.keys())[:12]]) ### RDKit可視化 rdkit_turples = ((taxol, bit, bitI_rdkit) for bit in list(bitI_rdkit.keys())[:12]) Draw.DrawRDKitBits(rdkit_turples, molsPerRow=4, legends=['bit: '+str(x) for x in list(bitI_rdkit.keys())[:12]])
# 3. 灰色:说明该原子是脂肪烃原子 # # 3.8 可视化拓扑指纹中的bit # 拓扑指纹也称为RDKit指纹,其调用函数Chem.RDKFingerprint(mol) mol = Chem.MolFromSmiles('c1cccnc1C') rdkbi = {} rdkfp = Chem.RDKFingerprint(mol, maxPath=2, bitInfo=rdkbi) print(list(rdkbi.keys())) # [5, 161, 294, 330, 633, 684, 744, 808, 842, 930, 1026, 1027, 1060, 1649, 1909] # 可视化展示 rdkbi_tuple = [(mol, bit, rdkbi) for bit in list(rdkbi.keys())] img = Draw.DrawRDKitBits( rdkbi_tuple, molsPerRow=4, subImgSize=(200, 200), legends=list( map(str, list(rdkbi.keys())) ) ) with open('/drug_development/studyRdkit/st_rdcit/img/mol27.svg', 'w+') as outf: outf.write(img) # # 3.9 基于分子指纹挑选差异较大的分子 # 药物虚拟筛选中关键步骤挑选分子,比如筛选获得前1000个分子, 由于成本、时间等因素你想挑选100个分子进行活性测试, 如果你直接挑选前100个分子进行测试,命中率可能会降低。 一般流程是对1000个分子进行聚类,然后每一类里面挑选一个分子(或者中心分子), 这样可以提高分子骨架的多样性,从而提供虚拟筛选的成功率。 ms = [x for x in Chem.SDMolSupplier( '/Users/zeoy/st/drug_development/st_rdcit/2d.sdf')] while ms.count(None): ms.remove(None) fps = [AllChem.GetMorganFingerprint(x, 3) for x in ms]