예제 #1
0
  def testRemovingBadMatches(self):
    log("testRemoveBadMatches")
    smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
    
    rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
    # invert matches so nothing matches
    reagents = [
      [Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
       Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
       Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
     ],

      [Chem.MolFromSmiles('C=CCN=C=S'),
       Chem.MolFromSmiles('CC=CCN=C=S'),
       Chem.MolFromSmiles('CCC'),
       Chem.MolFromSmiles('CCCCC'),
     ],
    ]

    enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
    self.assertEquals([], list(enumerator))
예제 #2
0
    def testRemoveInsaneReagents(self):
        rxndata = "$RXN\nUntitled Document-1\n  ChemDraw10291618492D\n\n  3  1\n$MOL\n\n\n\n  2  1  0  0  0  0  0  0  0  0999 V2000\n    0.4125    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  3  0  0\n   -0.4125    0.0000    0.0000 R2  0  0  0  0  0  0  0  0  0  2  0  0\n  1  2  1  0        0\nM  END\n$MOL\n\n\n\n  2  1  0  0  0  0  0  0  0  0999 V2000\n   -0.4125    0.0000    0.0000 R1  0  0  0  0  0  0  0  0  0  1  0  0\n    0.4125    0.0000    0.0000 Cl  0  0  0  0  0  0  0  0  0  0  0  0\n  1  2  1  0        0\nM  END\n$MOL\n\n\n\n  2  1  0  0  0  0  0  0  0  0999 V2000\n    0.4125    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  5  0  0\n   -0.4125    0.0000    0.0000 R4  0  0  0  0  0  0  0  0  0  4  0  0\n  1  2  1  0        0\nM  END\n$MOL\n\n\n\n 14 15  0  0  0  0  0  0  0  0999 V2000\n    0.5072   -0.5166    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.5072    0.3084    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.2949   -0.7616    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n    1.7817   -0.0880    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.2967    0.5794    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.5558   -1.5443    0.0000 R1  0  0  0  0  0  0  0  0  0  1  0  0\n   -0.2073    0.7208    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -0.9218    0.3083    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -0.9217   -0.5167    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -0.2073   -0.9292    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.6362    0.7208    0.0000 N   0  0  0  0  0  0  0  0  0  3  0  0\n    1.5452    1.3661    0.0000 N   0  0  0  0  0  0  0  0  0  5  0  0\n    2.3507    1.5443    0.0000 R4  0  0  0  0  0  0  0  0  0  4  0  0\n   -2.3507    0.3083    0.0000 R2  0  0  0  0  0  0  0  0  0  2  0  0\n  1  2  2  0        0\n  1  3  1  0        0\n  3  4  1  0        0\n  4  5  1  0        0\n  5  2  1  0        0\n  3  6  1  0        0\n  2  7  1  0        0\n  7  8  2  0        0\n  8  9  1  0        0\n  9 10  2  0        0\n 10  1  1  0        0\n  8 11  1  0        0\n 12 13  1  0        0\n 11 14  1  0        0\n 12  5  1  0        0\nM  END\n"

        rxn = AllChem.ReactionFromRxnBlock(rxndata)
        bbs = []
        r1 = [
            Chem.MolFromSmiles("CCNCC"),
            Chem.MolFromSmiles("NCC"),
        ]
        r2 = [
            Chem.MolFromSmiles("ClC1CCCC1"),
            Chem.MolFromSmiles("ClC1CCCC1Cl"),
        ]
        r3 = [
            Chem.MolFromSmiles("CCNCC"),
            Chem.MolFromSmiles("NCC"),
        ]
        bbs = [r1, r2, r3]

        # nothing matches!
        for i, reagent in enumerate(rxn.GetReactants()):
            for bb in bbs[i]:
                self.assertFalse(bb.HasSubstructMatch(reagent))

        # everything matches - yay sanitization!
        rdChemReactions.SanitizeRxn(rxn)
        for i, reagent in enumerate(rxn.GetReactants()):
            for bb in bbs[i]:
                self.assertTrue(bb.HasSubstructMatch(reagent))

        en = rdChemReactions.EnumerateLibrary(rxn, bbs)
        self.assertTrue(len(en.GetReagents()[0]) == 2)
        self.assertTrue(len(en.GetReagents()[1]) == 2)
        self.assertTrue(len(en.GetReagents()[2]) == 2)

        #####################################################################################
        # Match only at rgroups (ChemDraw style)
        rxn = AllChem.ReactionFromRxnBlock(rxndata)
        expected_matches = [[False, True], [True, True], [False, True]]
        rdChemReactions.SanitizeRxn(
            rxn, params=rdChemReactions.GetChemDrawRxnAdjustParams())
        for i, (reagent, expected) in enumerate(
                zip(rxn.GetReactants(), expected_matches)):
            match = [bb.HasSubstructMatch(reagent) for reagent in bbs[i]]
            self.assertTrue(match, expected)

        # Now try EnumerateLibrary
        en = rdChemReactions.EnumerateLibrary(rxn, bbs)
        self.assertTrue(len(en.GetReagents()[0]) == 1)
        self.assertTrue(len(en.GetReagents()[1]) == 2)
        self.assertTrue(len(en.GetReagents()[2]) == 1)

        #####################################################################################
        # now set the removal options ot only make one product per reagent set
        rxn = AllChem.ReactionFromRxnBlock(rxndata)
        rdChemReactions.SanitizeRxn(rxn)

        opts = rdChemReactions.EnumerationParams()
        opts.reagentMaxMatchCount = 1
        en = rdChemReactions.EnumerateLibrary(rxn, bbs, params=opts)
        self.assertTrue(len(en.GetReagents()[0]) == 1)
        self.assertTrue(len(en.GetReagents()[1]) == 1)
        self.assertTrue(len(en.GetReagents()[2]) == 1)

        #####################################################################################
        # now set the removal options ot only make one product per reagent set
        #  but wt
        rxn = AllChem.ReactionFromRxnBlock(rxndata)
        rdChemReactions.SanitizeRxn(
            rxn, params=rdChemReactions.GetChemDrawRxnAdjustParams())

        opts = rdChemReactions.EnumerationParams()
        opts.reagentMaxMatchCount = 1
        en = rdChemReactions.EnumerateLibrary(rxn, bbs, params=opts)
        self.assertTrue(len(en.GetReagents()[0]) == 1)
        self.assertTrue(len(en.GetReagents()[1]) == 1)
        self.assertTrue(len(en.GetReagents()[2]) == 1)
예제 #3
0
    def testRGroupState(self):
        if not rdChemReactions.EnumerateLibraryCanSerialize():
            print(
                "-- Skipping testRGroupState, serialization of EnumerateLibrary not enabled",
                file=sys.stderr)
            return

        log("testRGroupState")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]

        def tostr(l):
            return [[str(x) for x in v] for v in l]

        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        state = enumerator.GetState()
        p = enumerator.nextSmiles()
        p2 = enumerator.nextSmiles()
        enumerator.SetState(state)
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleStrategy())

        state = enumerator.GetState()
        p = enumerator.nextSmiles()
        p2 = enumerator.nextSmiles()
        enumerator.SetState(state)
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy())
        state = enumerator.GetState()
        p = enumerator.nextSmiles()
        p2 = enumerator.nextSmiles()
        enumerator.SetState(state)
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p))
        self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2))

        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        smiresults = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]
        enumerator.GetEnumerator().Skip(10)
        enumerator.ResetState()

        results = []
        for result in enumerator:
            for prodSet in result:
                for mol in prodSet:
                    results.append(Chem.MolToSmiles(mol))

        self.assertEquals(results, smiresults)
예제 #4
0
    def testRandomEnumerateAllBBsLibrary(self):
        log("testRandomEnumerateAllBBsLibrary")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]
        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy())
        self.assertTrue(enumerator)

        # test the BB sampling here
        strategy = iter(enumerator)
        r1 = set()
        r2 = set()
        strategy.next()
        groups = strategy.GetPosition()
        print("**", list(groups), file=sys.stderr)
        r1.add(groups[0])
        r2.add(groups[1])
        strategy.next()
        groups = strategy.GetPosition()
        print("**", list(groups), file=sys.stderr)
        r1.add(groups[0])
        r2.add(groups[1])
        self.assertEquals(r1, set(
            [0, 1]))  # two bbs at reagent one all sampled at one iteration
        strategy.next()
        groups = strategy.GetPosition()
        print("**", list(groups), file=sys.stderr)
        r1.add(groups[0])
        r2.add(groups[1])
        self.assertEquals(r2, set(
            [0, 1,
             2]))  # three bbs at reagent one all sampled in three iterations

        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        results = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]

        if rdChemReactions.EnumerateLibraryCanSerialize():
            enumerator = rdChemReactions.EnumerateLibrary(
                rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy())
            self.assertTrue(enumerator)

            pickle = enumerator.Serialize()
            enumerator2 = rdChemReactions.EnumerateLibrary()
            enumerator2.InitFromString(pickle)

            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator2.GetEnumerator().Type())
            iteren = iter(enumerator)
            iteren2 = iter(enumerator2)

            outsmiles = []
            for i in range(10):
                prods1 = iteren.next()
                prods2 = iteren2.next()
                self.assertEquals(len(prods1), len(prods2))
                for mols1, mols2 in zip(prods1, prods2):
                    self.assertEquals(len(mols1), 1)
                    smi1 = Chem.MolToSmiles(mols1[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols2[0]))
                    outsmiles.append(smi1)

                if i == 1:
                    pickle_at_2 = enumerator.Serialize()

            # make sure we can pickle the state as well
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(pickle_at_2)
            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator3.GetEnumerator().Type())

            iteren3 = iter(enumerator3)
            outsmiles2 = []
            for i in range(8):
                prods3 = iteren3.next()
                for mols3 in prods3:
                    self.assertEquals(len(mols3), 1)
                    smi1 = Chem.MolToSmiles(mols3[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols3[0]))
                    outsmiles2.append(smi1)

            self.assertEquals(outsmiles2, outsmiles[2:])
예제 #5
0
    def testRandomEnumerateLibrary(self):
        log("testRandomEnumerateLibrary")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleStrategy())
        self.assertTrue(enumerator)
        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        results = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]

        enumerator = rdChemReactions.EnumerateLibrary(
            rxn, reagents, rdChemReactions.RandomSampleStrategy())
        iteren = iter(enumerator)
        res = set()
        count = 0
        while res != set(results):
            count += 1
            if count > 100000:
                print(
                    "Unable to find enumerate set with 100,000 random samples!",
                    file=sys.stderr)
                self.assertEquals(res, set(results))

            prod = iteren.next()
            for mols in prod:
                smi1 = Chem.MolToSmiles(mols[0])
                res.add(smi1)

        if rdChemReactions.EnumerateLibraryCanSerialize():
            enumerator = rdChemReactions.EnumerateLibrary(
                rxn, reagents, rdChemReactions.RandomSampleStrategy())
            pickle = enumerator.Serialize()
            enumerator2 = rdChemReactions.EnumerateLibrary()
            enumerator2.InitFromString(pickle)

            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator2.GetEnumerator().Type())

            iteren = iter(enumerator)
            iteren2 = iter(enumerator2)

            outsmiles = []
            for i in range(10):
                prods1 = iteren.next()
                prods2 = iteren2.next()
                self.assertEquals(len(prods1), len(prods2))
                for mols1, mols2 in zip(prods1, prods2):
                    self.assertEquals(len(mols1), 1)
                    smi1 = Chem.MolToSmiles(mols1[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols2[0]))
                    outsmiles.append(smi1)

                if i == 1:
                    pickle_at_2 = enumerator.Serialize()

            # make sure we can pickle the state as well
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(pickle_at_2)
            iteren3 = iter(enumerator3)
            outsmiles2 = []
            for i in range(8):
                prods3 = iteren3.next()
                for mols3 in prods3:
                    self.assertEquals(len(mols3), 1)
                    smi1 = Chem.MolToSmiles(mols3[0])
                    self.assertEquals(smi1, Chem.MolToSmiles(mols3[0]))
                    outsmiles2.append(smi1)

            self.assertEquals(outsmiles2, outsmiles[2:])
예제 #6
0
    def testEnumerateLibrary(self):
        log("testEnumerateLibrary")
        smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]"
        rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea)
        reagents = [[
            Chem.MolFromSmiles('C=CCN=C=S'),
            Chem.MolFromSmiles('CC=CCN=C=S')
        ],
                    [
                        Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'),
                        Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'),
                    ]]

        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        self.assertTrue(enumerator)

        # need to initialize the reaction before getting the binary serialization
        rxn.Initialize()
        self.assertEquals(rxn.ToBinary(), enumerator.GetReaction().ToBinary())

        bbs = enumerator.GetReagents()
        for i in range(len(bbs)):
            for j in range(len(bbs[i])):
                self.assertTrue(
                    Chem.MolToSmiles(reagents[i][j]) == Chem.MolToSmiles(bbs[i]
                                                                         [j]))

        smiresults = [
            'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br',
            'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'
        ]
        results = [
            Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults
        ]

        enumerators = [enumerator]

        # add serialized enumerators as well for testing if possible
        if rdChemReactions.EnumerateLibraryCanSerialize():
            pickle = enumerator.Serialize()
            enumerator2 = rdChemReactions.EnumerateLibrary()
            enumerator2.InitFromString(pickle)

            # make sure old pickles work
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(
                open(os.path.join(self.dataDir, "enumeration.pickle"),
                     'rb').read())

            print("==",
                  enumerator.GetEnumerator().Type(),
                  enumerator2.GetEnumerator().Type())
            self.assertEquals(enumerator.GetEnumerator().Type(),
                              enumerator2.GetEnumerator().Type())
            enumerators.append(enumerator2)
            enumerators.append(enumerator3)

        # check for fully sampled and deterministic ordering in final index values
        expected_positions = [[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]]

        out = []
        for en in enumerators:
            i = 0
            positions = []
            for i, prods in enumerate(en):
                positions.append(list(en.GetPosition()))
                for mols in prods:
                    self.assertEquals(len(mols), 1)
                    smi = Chem.MolToSmiles(mols[0])
                    if en is enumerator:
                        out.append(smi)
                    self.assertEquals(smi, results[i])

                if en is enumerator and i == 1 and rdChemReactions.EnumerateLibraryCanSerialize(
                ):
                    # save the state not at the start
                    pickle_at_2 = enumerator.Serialize()
            self.assertEquals(i, 5)
            self.assertEquals(positions, expected_positions)

        if rdChemReactions.EnumerateLibraryCanSerialize():
            # see if we can restore the enumeration from the middle
            out3 = []
            enumerator3 = rdChemReactions.EnumerateLibrary()
            enumerator3.InitFromString(pickle_at_2)
            for prods in enumerator3:
                for mols in prods:
                    self.assertEquals(len(mols), 1)
                    smi = Chem.MolToSmiles(mols[0])
                    out3.append(smi)

            self.assertEquals(out[2:], out3)
        # test smiles interface
        enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents)
        i = 0
        while enumerator:
            for mols in enumerator.nextSmiles():
                self.assertEquals(len(mols), 1)
                self.assertEquals(mols[0], results[i])
            i += 1
        self.assertEquals(i, 6)