def test_generate_fraggle_fragmentation(self): mol = Chem.MolFromSmiles('COc1cc(CN2CCC(CC2)NC(=O)c2cncc(C)c2)c(OC)c2ccccc12') frags = FraggleSim.generate_fraggle_fragmentation(mol) self.assertEqual(len(frags), 16) expected = ( '[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]Cc1cc(OC)c2ccccc2c1OC', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1', '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12', '[*]c1c(OC)cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c1[*]', '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12', '[*]N1CCC(NC(=O)c2cncc(C)c2)CC1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1') expected = [_of(s) for s in expected] for smi in frags: self.assertTrue(_of(smi) in expected) # Test case for fragments that contain a cyclic and acyclic component mol = Chem.MolFromSmiles('c12c(CCC)cccc2cccc1') frags = FraggleSim.generate_fraggle_fragmentation(mol) expected = ['[*]CCC.[*]c1ccccc1[*]', '[*]Cc1cccc2ccccc12', '[*]c1cccc(CCC)c1[*]', '[*]c1cccc2ccccc12', '[*]c1ccccc1[*]'] expected = [_of(s) for s in expected] for smi in frags: self.assertTrue(_of(smi) in expected)
def test_GetFraggleSimilarity(self): q = Chem.MolFromSmiles( 'COc1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12') m = Chem.MolFromSmiles( 'COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12') sim, match = FraggleSim.GetFraggleSimilarity(q, m) self.assertAlmostEqual(sim, 0.980, places=2) self.assertEqual(match, '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1') m = Chem.MolFromSmiles( 'COc1cc(CN2CCC(Nc3nc4ccccc4s3)CC2)c(OC)c2ccccc12') sim, match = FraggleSim.GetFraggleSimilarity(q, m) self.assertAlmostEqual(sim, 0.794, places=2) self.assertEqual(match, '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1') q = Chem.MolFromSmiles('COc1ccccc1') sim, match = FraggleSim.GetFraggleSimilarity(q, m) self.assertAlmostEqual(sim, 0.347, places=2) self.assertEqual(match, '[*]c1ccccc1') m = Chem.MolFromSmiles( 'COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12') sim, match = FraggleSim.GetFraggleSimilarity(q, m) self.assertAlmostEqual(sim, 0.266, places=2) self.assertEqual(match, '[*]c1ccccc1')
def test_isValidRingCut(self): rdBase.DisableLog('rdApp.error') self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]CCC[*]')), False) self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]C1CC1[*]')), True) self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]c1ccccc1[*]')), True) self.assertEqual( FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]cccc[*]', sanitize=False)), False) rdBase.EnableLog('rdApp.error')
def testFragmentation(self): """ """ mol = Chem.MolFromSmiles( 'COc1cc(CN2CCC(CC2)NC(=O)c2cncc(C)c2)c(OC)c2ccccc12') frags = FraggleSim.generate_fraggle_fragmentation(mol) self.assertEqual(len(frags), 16) expected = ('[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]Cc1cc(OC)c2ccccc2c1OC', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1', '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12', '[*]c1c(OC)cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c1[*]', '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12', '[*]N1CCC(NC(=O)c2cncc(C)c2)CC1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1') for smi in frags: self.assertTrue(smi in expected)
def testFragmentation(self): """ """ mol = Chem.MolFromSmiles('COc1cc(CN2CCC(CC2)NC(=O)c2cncc(C)c2)c(OC)c2ccccc12') frags = FraggleSim.generate_fraggle_fragmentation(mol) self.failUnlessEqual(len(frags),16) expected=('[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]C(=O)c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1', '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12', '[*]c1c([*])c(OC)c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc1OC', '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12', '[*]c1cc(OC)c2ccccc2c1OC.[*]N1CCC(NC(=O)c2cncc(C)c2)CC1', '[*]c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC') for smi in frags: self.failUnless(smi in expected)
def frag( mol: Chem.Mol, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Generate all possible fragmentation of a molecule. Args: mol: a molecule. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ frags = FraggleSim.generate_fraggle_fragmentation(mol) smiles = set([]) for seq in frags: smiles |= {s.strip() for s in seq.split(".")} smiles = list(sorted(smiles, reverse=True)) frags = [dm.to_mol(s) for s in smiles] if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] frags = [x for x in frags if x is not None] if remove_parent: return frags return [mol] + frags
def generate_sim(base: str, smile: str): base = Chem.MolFromSmiles(base) smile = Chem.MolFromSmiles(smile) try: sim, match = FraggleSim.GetFraggleSimilarity(smile, base) except: return 0.0, None return sim, match
def testFragmentation2(self): mol = Chem.MolFromSmiles('COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12') frags = FraggleSim.generate_fraggle_fragmentation(mol) self.assertEqual(len(frags), 13) expected = ( '[*]C(=O)c1ccccc1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1ccccc1.[*]Cc1cc(OC)c2ccccc2c1OC', '[*]C(=O)c1ccccc1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1ccccc1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1ccccc1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1ccccc1', '[*]N1CCC(NC(=O)c2ccccc2)CC1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC(=O)c1ccccc1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1ccccc1', '[*]c1c(CN2CCC(NC(=O)c3ccccc3)CC2)cc(OC)c2ccccc12', '[*]c1c(OC)cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c1[*]', '[*]c1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1ccccc1') expected = [_of(s) for s in expected] for smi in frags: self.assertIn(_of(smi), expected)
if __name__ == '__main__': import sys, re if (len(sys.argv) >= 2): print( "Program to run the first part of Fraggle. Program splits the molecule\nready for the search\n") print("USAGE: ./fraggle.py <file_of_smiles") print("Format of smiles file: SMILES ID (space or comma separated)") print("Output: whole mol smiles,ID,fraggle split smiles\n") sys.exit(1) #read the STDIN for line in sys.stdin: line = line.rstrip() smi, id_ = re.split('\s|,', line) #print smi,id_ mol = Chem.MolFromSmiles(smi) if mol is None: sys.stderr.write("Can't generate mol for: %s\n" % (smi)) continue out_fragments = FraggleSim.generate_fraggle_fragmentation(mol) #print out the unique fragments for x in out_fragments: #cansmi temp = Chem.MolFromSmiles(x) print("%s,%s,%s" % (smi, id_, Chem.MolToSmiles(temp)))
# test_mol_neu.SetProp("NScode", mol.GetProp("NScode")) reference_desalt.append(test_mol_neu) count = count+1 print("Reference Molecule Num: ", len(reference_desalt)) # Morganによる類似性評価 morgan_reference = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048) for mol in reference_desalt] #(mol,中心からの半径,bit数) morgan_query = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048) for mol in query] tanimoto = DataStructs.BulkTanimotoSimilarity(morgan_query[0], morgan_reference) # Fraggleによる類似性評価 fraggle_sim = [] fraggle_match = [] for (sim, match) in [FraggleSim.GetFraggleSimilarity(query[0], reference_desalt[i]) for i in range(len(reference_desalt))] : fraggle_sim.append(sim) fraggle_match.append(match) # 結果をPDFファイルに書き込み # img = Draw.MolsToGridImage(namiki[:16], molsPerRow=4, subImgSize=(300,200), legends=['Fraggle: {:.2f}'.format(i) for i in fraggle_similarity[:16]]) # img.save('./fraggle.pdf') # 結果をSDFファイルに書き込み sdf_filename = "./fraggle-result.sdf" writer = Chem.SDWriter(sdf_filename) query[0].SetProp("NScode", "2-17") #molオブジェクトにプロパティを付加する query[0].SetProp("Tanimoto_Sim", "1") query[0].SetProp("Fraggle_Sim", "1") writer.write(query[0])
if (iMol == None): sys.stderr.write("Can't generate mol for: %s\n" % (inSmi)) continue #discard based on atom size if (iMol.GetNumAtoms() < query_size[qID] - 3): #sys.stderr.write("Too small: %s\n" % (inSmi) ) continue if (iMol.GetNumAtoms() > query_size[qID] + 4): #sys.stderr.write("Too large: %s\n" % (inSmi) ) continue #print '>>>',id_ rdkit_sim, fraggle_sim = FraggleSim.compute_fraggle_similarity_for_subs( iMol, query_mols[qID], qSmi, qSubs, options.pfp) day_sim[qID][id_] = rdkit_sim frag_sim[qID][id_] = max(frag_sim[qID][id_], fraggle_sim) #check if you have the fp for the modified query #and generate if need to #right, print out the results for the query #Format: SMILES,ID,QuerySMI,QueryID,Fraggle_Similarity,Daylight_Similarity for qID in frag_sim: for id_ in frag_sim[qID]: if (frag_sim[qID][id_] >= fraggle_cutoff): print("%s,%s,%s,%s,%s,%s" % (id_to_smi[id_], id_, id_to_smi[qID], qID, frag_sim[qID][id_], day_sim[qID][id_]))
if(iMol == None): sys.stderr.write("Can't generate mol for: %s\n" % (inSmi) ) continue #discard based on atom size if(iMol.GetNumAtoms() < query_size[qID]-3): #sys.stderr.write("Too small: %s\n" % (inSmi) ) continue; if(iMol.GetNumAtoms() > query_size[qID]+4): #sys.stderr.write("Too large: %s\n" % (inSmi) ) continue; #print '>>>',id_ rdkit_sim,fraggle_sim = FraggleSim.compute_fraggle_similarity_for_subs(iMol,query_mols[qID],qSmi,qSubs, options.pfp) day_sim[qID][id_] = rdkit_sim frag_sim[qID][id_] = max(frag_sim[qID][id_],fraggle_sim) #check if you have the fp for the modified query #and generate if need to #right, print out the results for the query #Format: SMILES,ID,QuerySMI,QueryID,Fraggle_Similarity,Daylight_Similarity for qID in frag_sim: for id_ in frag_sim[qID]: if(frag_sim[qID][id_] >= fraggle_cutoff): print("%s,%s,%s,%s,%s,%s" % (id_to_smi[id_],id_,id_to_smi[qID],qID,frag_sim[qID][id_],day_sim[qID][id_]))
if __name__ =='__main__': import sys,re if (len(sys.argv) >= 2): print "Program to run the first part of Fraggle. Program splits the molecule\nready for the search\n" print "USAGE: ./fraggle.py <file_of_smiles" print "Format of smiles file: SMILES ID (space or comma separated)" print "Output: whole mol smiles,ID,fraggle split smiles\n" sys.exit(1) #read the STDIN for line in sys.stdin: line = line.rstrip() smi,id_ = re.split('\s|,',line) #print smi,id_ mol = Chem.MolFromSmiles(smi) if mol is None: sys.stderr.write("Can't generate mol for: %s\n" % (smi) ) continue out_fragments = FraggleSim.generate_fraggle_fragmentation(mol) #print out the unique fragments for x in out_fragments: #cansmi temp = Chem.MolFromSmiles(x) print "%s,%s,%s" % (smi,id_,Chem.MolToSmiles(temp))