Beispiel #1
0
    def testRGroupState(self):
        if not rdChemReactions.EnumerateLibraryCanSerialize():
            print(
                "-- Skipping testRGroupState, serialization of EnumerateLibrary not enabled",
                file=sys.stderr)
            return

        log("testRGroupState")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]

        def tostr(l):
            return [[str(x) for x in v] for v in l]

        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        state = enumerator.GetState()
        p = enumerator.nextSmiles()
        p2 = enumerator.nextSmiles()
        enumerator.SetState(state)
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleStrategy())

        state = enumerator.GetState()
        p = enumerator.nextSmiles()
        p2 = enumerator.nextSmiles()
        enumerator.SetState(state)
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy())
        state = enumerator.GetState()
        p = enumerator.nextSmiles()
        p2 = enumerator.nextSmiles()
        enumerator.SetState(state)
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))

        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        smiresults = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]
        enumerator.GetEnumerator().Skip(10)
        enumerator.ResetState()

        results = []
        for result in enumerator:
            for prodSet in result:
                for mol in prodSet:
                    results.append(Chem.MolToSmiles(mol))

        self.assertEquals(results, smiresults)
Beispiel #2
0
    def testRandomEnumerateLibrary(self):
        log("testRandomEnumerateLibrary")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleStrategy())
        self.assertTrue(enumerator)
        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        results = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleStrategy())
        iteren = iter(enumerator)
        res = set()
        count = 0
        while res != set(results):
            count += 1
            if count > 100000:
                print(
                    "Unable to find enumerate set with 100,000 random samples!",
                    file=sys.stderr)
                self.assertEquals(res, set(results))

            prod = iteren.next()
            for mols in prod:
                smi1 = Chem.MolToSmiles(mols[0])
                res.add(smi1)

        if rdChemReactions.EnumerateLibraryCanSerialize():
            enumerator = rdChemReactions.EnumerateLibrary(
                rxn, reagents, rdChemReactions.RandomSampleStrategy())
            pickle = enumerator.Serialize()
            enumerator2 = rdChemReactions.EnumerateLibrary()
            enumerator2.InitFromString(pickle)

            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator2.GetEnumerator().Type())

            iteren = iter(enumerator)
            iteren2 = iter(enumerator2)

            outsmiles = []
            for i in range(10):
                prods1 = iteren.next()
                prods2 = iteren2.next()
                self.assertEquals(len(prods1), len(prods2))
                for mols1, mols2 in zip(prods1, prods2):
                    self.assertEquals(len(mols1), 1)
                    smi1 = Chem.MolToSmiles(mols1[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols2[0]))
                    outsmiles.append(smi1)

                if i == 1:
                    pickle_at_2 = enumerator.Serialize()

            # make sure we can pickle the state as well
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(pickle_at_2)
            iteren3 = iter(enumerator3)
            outsmiles2 = []
            for i in range(8):
                prods3 = iteren3.next()
                for mols3 in prods3:
                    self.assertEquals(len(mols3), 1)
                    smi1 = Chem.MolToSmiles(mols3[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols3[0]))
                    outsmiles2.append(smi1)

            self.assertEquals(outsmiles2, outsmiles[2:])
Beispiel #3
0
    def testRandomEnumerateAllBBsLibrary(self):
        log("testRandomEnumerateAllBBsLibrary")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]
        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy())
        self.assertTrue(enumerator)

        # test the BB sampling here
        strategy = iter(enumerator)
        r1 = set()
        r2 = set()
        strategy.next()
        groups = strategy.GetPosition()
        print("**", list(groups), file=sys.stderr)
        r1.add(groups[0])
        r2.add(groups[1])
        strategy.next()
        groups = strategy.GetPosition()
        print("**", list(groups), file=sys.stderr)
        r1.add(groups[0])
        r2.add(groups[1])
        self.assertEquals(r1, set(
            [0, 1]))  # two bbs at reagent one all sampled at one iteration
        strategy.next()
        groups = strategy.GetPosition()
        print("**", list(groups), file=sys.stderr)
        r1.add(groups[0])
        r2.add(groups[1])
        self.assertEquals(r2, set(
            [0, 1,
             2]))  # three bbs at reagent one all sampled in three iterations

        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        results = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]

        if rdChemReactions.EnumerateLibraryCanSerialize():
            enumerator = rdChemReactions.EnumerateLibrary(
                rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy())
            self.assertTrue(enumerator)

            pickle = enumerator.Serialize()
            enumerator2 = rdChemReactions.EnumerateLibrary()
            enumerator2.InitFromString(pickle)

            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator2.GetEnumerator().Type())
            iteren = iter(enumerator)
            iteren2 = iter(enumerator2)

            outsmiles = []
            for i in range(10):
                prods1 = iteren.next()
                prods2 = iteren2.next()
                self.assertEquals(len(prods1), len(prods2))
                for mols1, mols2 in zip(prods1, prods2):
                    self.assertEquals(len(mols1), 1)
                    smi1 = Chem.MolToSmiles(mols1[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols2[0]))
                    outsmiles.append(smi1)

                if i == 1:
                    pickle_at_2 = enumerator.Serialize()

            # make sure we can pickle the state as well
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(pickle_at_2)
            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator3.GetEnumerator().Type())

            iteren3 = iter(enumerator3)
            outsmiles2 = []
            for i in range(8):
                prods3 = iteren3.next()
                for mols3 in prods3:
                    self.assertEquals(len(mols3), 1)
                    smi1 = Chem.MolToSmiles(mols3[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols3[0]))
                    outsmiles2.append(smi1)

            self.assertEquals(outsmiles2, outsmiles[2:])
Beispiel #4
0
    def testEnumerateLibrary(self):
        log("testEnumerateLibrary")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]

        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        self.assertTrue(enumerator)

        # need to initialize the reaction before getting the binary serialization
        rxn.Initialize()
        self.assertEquals(rxn.ToBinary(), enumerator.GetReaction().ToBinary())

        bbs = enumerator.GetReagents()
        for i in range(len(bbs)):
            for j in range(len(bbs[i])):
                self.assertTrue(
                    Chem.MolToSmiles(reagents[i][j]) == Chem.MolToSmiles(bbs[i]
                                                                         [j]))

        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        results = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]

        enumerators = [enumerator]

        # add serialized enumerators as well for testing if possible
        if rdChemReactions.EnumerateLibraryCanSerialize():
            pickle = enumerator.Serialize()
            enumerator2 = rdChemReactions.EnumerateLibrary()
            enumerator2.InitFromString(pickle)

            # make sure old pickles work
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(
                open(os.path.join(self.dataDir, "enumeration.pickle"),
                     'rb').read())

            print("==",
                  enumerator.GetEnumerator().Type(),
                  enumerator2.GetEnumerator().Type())
            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator2.GetEnumerator().Type())
            enumerators.append(enumerator2)
            enumerators.append(enumerator3)

        # check for fully sampled and deterministic ordering in final index values
        expected_positions = [[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]]

        out = []
        for en in enumerators:
            i = 0
            positions = []
            for i, prods in enumerate(en):
                positions.append(list(en.GetPosition()))
                for mols in prods:
                    self.assertEquals(len(mols), 1)
                    smi = Chem.MolToSmiles(mols[0])
                    if en is enumerator:
                        out.append(smi)
                    self.assertEquals(smi, results[i])

                if en is enumerator and i == 1 and rdChemReactions.EnumerateLibraryCanSerialize(
                ):
                    # save the state not at the start
                    pickle_at_2 = enumerator.Serialize()
            self.assertEquals(i, 5)
            self.assertEquals(positions, expected_positions)

        if rdChemReactions.EnumerateLibraryCanSerialize():
            # see if we can restore the enumeration from the middle
            out3 = []
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(pickle_at_2)
            for prods in enumerator3:
                for mols in prods:
                    self.assertEquals(len(mols), 1)
                    smi = Chem.MolToSmiles(mols[0])
                    out3.append(smi)

            self.assertEquals(out[2:], out3)
        # test smiles interface
        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        i = 0
        while enumerator:
            for mols in enumerator.nextSmiles():
                self.assertEquals(len(mols), 1)
                self.assertEquals(mols[0], results[i])
            i += 1
        self.assertEquals(i, 6)