def testRGroupState(self): if not rdChemReactions.EnumerateLibraryCanSerialize(): print( "-- Skipping testRGroupState, serialization of EnumerateLibrary not enabled", file=sys.stderr) return log("testRGroupState") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) reagents = [[ Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S') ], [ Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), ]] def tostr(l): return [[str(x) for x in v] for v in l] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) state = enumerator.GetState() p = enumerator.nextSmiles() p2 = enumerator.nextSmiles() enumerator.SetState(state) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p)) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2)) enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) state = enumerator.GetState() p = enumerator.nextSmiles() p2 = enumerator.nextSmiles() enumerator.SetState(state) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p)) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2)) enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy()) state = enumerator.GetState() p = enumerator.nextSmiles() p2 = enumerator.nextSmiles() enumerator.SetState(state) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p)) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2)) enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) smiresults = [ 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' ] smiresults = [ Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults ] enumerator.GetEnumerator().Skip(10) enumerator.ResetState() results = [] for result in enumerator: for prodSet in result: for mol in prodSet: results.append(Chem.MolToSmiles(mol)) self.assertEquals(results, smiresults)
def testRandomEnumerateLibrary(self): log("testRandomEnumerateLibrary") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) reagents = [[ Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S') ], [ Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), ]] enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) self.assertTrue(enumerator) smiresults = [ 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' ] results = [ Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults ] enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) iteren = iter(enumerator) res = set() count = 0 while res != set(results): count += 1 if count > 100000: print( "Unable to find enumerate set with 100,000 random samples!", file=sys.stderr) self.assertEquals(res, set(results)) prod = iteren.next() for mols in prod: smi1 = Chem.MolToSmiles(mols[0]) res.add(smi1) if rdChemReactions.EnumerateLibraryCanSerialize(): enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) pickle = enumerator.Serialize() enumerator2 = rdChemReactions.EnumerateLibrary() enumerator2.InitFromString(pickle) self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type()) iteren = iter(enumerator) iteren2 = iter(enumerator2) outsmiles = [] for i in range(10): prods1 = iteren.next() prods2 = iteren2.next() self.assertEquals(len(prods1), len(prods2)) for mols1, mols2 in zip(prods1, prods2): self.assertEquals(len(mols1), 1) smi1 = Chem.MolToSmiles(mols1[0]) self.assertEquals(smi1, Chem.MolToSmiles(mols2[0])) outsmiles.append(smi1) if i == 1: pickle_at_2 = enumerator.Serialize() # make sure we can pickle the state as well enumerator3 = rdChemReactions.EnumerateLibrary() enumerator3.InitFromString(pickle_at_2) iteren3 = iter(enumerator3) outsmiles2 = [] for i in range(8): prods3 = iteren3.next() for mols3 in prods3: self.assertEquals(len(mols3), 1) smi1 = Chem.MolToSmiles(mols3[0]) self.assertEquals(smi1, Chem.MolToSmiles(mols3[0])) outsmiles2.append(smi1) self.assertEquals(outsmiles2, outsmiles[2:])
def testRandomEnumerateAllBBsLibrary(self): log("testRandomEnumerateAllBBsLibrary") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) reagents = [[ Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S') ], [ Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), ]] enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy()) self.assertTrue(enumerator) # test the BB sampling here strategy = iter(enumerator) r1 = set() r2 = set() strategy.next() groups = strategy.GetPosition() print("**", list(groups), file=sys.stderr) r1.add(groups[0]) r2.add(groups[1]) strategy.next() groups = strategy.GetPosition() print("**", list(groups), file=sys.stderr) r1.add(groups[0]) r2.add(groups[1]) self.assertEquals(r1, set( [0, 1])) # two bbs at reagent one all sampled at one iteration strategy.next() groups = strategy.GetPosition() print("**", list(groups), file=sys.stderr) r1.add(groups[0]) r2.add(groups[1]) self.assertEquals(r2, set( [0, 1, 2])) # three bbs at reagent one all sampled in three iterations smiresults = [ 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' ] results = [ Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults ] if rdChemReactions.EnumerateLibraryCanSerialize(): enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy()) self.assertTrue(enumerator) pickle = enumerator.Serialize() enumerator2 = rdChemReactions.EnumerateLibrary() enumerator2.InitFromString(pickle) self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type()) iteren = iter(enumerator) iteren2 = iter(enumerator2) outsmiles = [] for i in range(10): prods1 = iteren.next() prods2 = iteren2.next() self.assertEquals(len(prods1), len(prods2)) for mols1, mols2 in zip(prods1, prods2): self.assertEquals(len(mols1), 1) smi1 = Chem.MolToSmiles(mols1[0]) self.assertEquals(smi1, Chem.MolToSmiles(mols2[0])) outsmiles.append(smi1) if i == 1: pickle_at_2 = enumerator.Serialize() # make sure we can pickle the state as well enumerator3 = rdChemReactions.EnumerateLibrary() enumerator3.InitFromString(pickle_at_2) self.assertEquals(enumerator.GetEnumerator().Type(), enumerator3.GetEnumerator().Type()) iteren3 = iter(enumerator3) outsmiles2 = [] for i in range(8): prods3 = iteren3.next() for mols3 in prods3: self.assertEquals(len(mols3), 1) smi1 = Chem.MolToSmiles(mols3[0]) self.assertEquals(smi1, Chem.MolToSmiles(mols3[0])) outsmiles2.append(smi1) self.assertEquals(outsmiles2, outsmiles[2:])
def testEnumerateLibrary(self): log("testEnumerateLibrary") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) reagents = [[ Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S') ], [ Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), ]] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) self.assertTrue(enumerator) # need to initialize the reaction before getting the binary serialization rxn.Initialize() self.assertEquals(rxn.ToBinary(), enumerator.GetReaction().ToBinary()) bbs = enumerator.GetReagents() for i in range(len(bbs)): for j in range(len(bbs[i])): self.assertTrue( Chem.MolToSmiles(reagents[i][j]) == Chem.MolToSmiles(bbs[i] [j])) smiresults = [ 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' ] results = [ Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults ] enumerators = [enumerator] # add serialized enumerators as well for testing if possible if rdChemReactions.EnumerateLibraryCanSerialize(): pickle = enumerator.Serialize() enumerator2 = rdChemReactions.EnumerateLibrary() enumerator2.InitFromString(pickle) # make sure old pickles work enumerator3 = rdChemReactions.EnumerateLibrary() enumerator3.InitFromString( open(os.path.join(self.dataDir, "enumeration.pickle"), 'rb').read()) print("==", enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type()) self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type()) enumerators.append(enumerator2) enumerators.append(enumerator3) # check for fully sampled and deterministic ordering in final index values expected_positions = [[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]] out = [] for en in enumerators: i = 0 positions = [] for i, prods in enumerate(en): positions.append(list(en.GetPosition())) for mols in prods: self.assertEquals(len(mols), 1) smi = Chem.MolToSmiles(mols[0]) if en is enumerator: out.append(smi) self.assertEquals(smi, results[i]) if en is enumerator and i == 1 and rdChemReactions.EnumerateLibraryCanSerialize( ): # save the state not at the start pickle_at_2 = enumerator.Serialize() self.assertEquals(i, 5) self.assertEquals(positions, expected_positions) if rdChemReactions.EnumerateLibraryCanSerialize(): # see if we can restore the enumeration from the middle out3 = [] enumerator3 = rdChemReactions.EnumerateLibrary() enumerator3.InitFromString(pickle_at_2) for prods in enumerator3: for mols in prods: self.assertEquals(len(mols), 1) smi = Chem.MolToSmiles(mols[0]) out3.append(smi) self.assertEquals(out[2:], out3) # test smiles interface enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) i = 0 while enumerator: for mols in enumerator.nextSmiles(): self.assertEquals(len(mols), 1) self.assertEquals(mols[0], results[i]) i += 1 self.assertEquals(i, 6)