コード例 #1
0
ファイル: rough_test.py プロジェクト: sirbiscuit/rdkit
  def test0SubstructLibrary(self):
    for fpholderCls in [None, rdSubstructLibrary.PatternHolder]:
      for holder in [rdSubstructLibrary.MolHolder(), rdSubstructLibrary.CachedMolHolder(),
                     rdSubstructLibrary.CachedSmilesMolHolder()]:
        if fpholderCls: fpholder = fpholderCls()
        else: fpholder = None
        slib_ = rdSubstructLibrary.SubstructLibrary(holder, fpholder)
        for i in range(100):
            m = Chem.MolFromSmiles("c1ccccc1")
            self.assertEqual(slib_.AddMol(m), i)

        libs = [slib_]
        if rdSubstructLibrary.SubstructLibraryCanSerialize():
          serialized1 = pickle.loads(pickle.dumps(slib_))
          serialized2 = rdSubstructLibrary.SubstructLibrary(slib_.Serialize())
          libs.append(serialized1)
          libs.append(serialized2)
          
        for slib in libs:
          res = slib.GetMatches(m)
          t2 = time.time()
          self.assertTrue(len(res) == 100)

          res = slib.GetMatches(m)

          self.assertEqual(len(res), 100)
          self.assertTrue(set(res) == set(list(range(100))))

          res = slib.GetMatches(m, maxResults=100);
          self.assertEqual(len(res), 100)
          self.assertEqual(len(slib.GetMatches(m, startIdx=0, endIdx=100)), 100)

          self.assertTrue(slib.HasMatch(m))
          self.assertEqual(slib.CountMatches(m), 100)
コード例 #2
0
ファイル: rough_test.py プロジェクト: wvandertoorn/rdkit
    def testOptions(self):
        mols = makeStereoExamples() * 10

        for holderCls in [
                rdSubstructLibrary.MolHolder,
                rdSubstructLibrary.CachedMolHolder,
                rdSubstructLibrary.CachedSmilesMolHolder,
                rdSubstructLibrary.CachedTrustedSmilesMolHolder,
        ]:
            holder = holderCls()
            slib_ = rdSubstructLibrary.SubstructLibrary(holder, None)

            for mol in mols:
                slib_.AddMol(mol)

            libs = [slib_]
            if rdSubstructLibrary.SubstructLibraryCanSerialize():
                serialized1 = pickle.loads(pickle.dumps(slib_))
                serialized2 = rdSubstructLibrary.SubstructLibrary(
                    slib_.Serialize())
                libs.append(serialized1)
                libs.append(serialized2)

            for slib in libs:
                core = Chem.MolFromSmarts("C-1-C-C-O-C(-*)(-*)1")
                res = slib.GetMatches(core)
                self.assertEqual(
                    len(res),
                    len([
                        x for x in mols
                        if x.HasSubstructMatch(core, useChirality=True)
                    ]))

                core = Chem.MolFromSmarts("C-1-C-C-O-C(-[O])(-[N])1")
                core.SetProp("core", "core")
                res = slib.GetMatches(core, useChirality=False)
                self.assertEqual(
                    len(res),
                    len([
                        x for x in mols
                        if x.HasSubstructMatch(core, useChirality=False)
                    ]))

                core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
                res = slib.GetMatches(core, useChirality=False)
                self.assertEqual(
                    len(res),
                    len([
                        x for x in mols
                        if x.HasSubstructMatch(core, useChirality=False)
                    ]))

                core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
                res = slib.GetMatches(core)
                self.assertEqual(
                    len(res),
                    len([
                        x for x in mols
                        if x.HasSubstructMatch(core, useChirality=True)
                    ]))
コード例 #3
0
ファイル: rough_test.py プロジェクト: sirbiscuit/rdkit
  def test_PatternHolder(self):
    fname = os.path.join(os.environ["RDBASE"], "Data", "NCI", "first_5K.smi")
    suppl = Chem.SmilesMolSupplier(fname, delimiter="\t", titleLine=False)
    mols1 = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
    fps1 = rdSubstructLibrary.PatternHolder(2048)
    ssslib1 = rdSubstructLibrary.SubstructLibrary(mols1, fps1)
    mols2 = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
    fps2 = rdSubstructLibrary.PatternHolder()
    ssslib2 = rdSubstructLibrary.SubstructLibrary(mols2, fps2)

    RDLogger.DisableLog('rdApp.error')
    for i in range(0, 1000, 10):
      try:
        mol = suppl[i]
      except Exception:
        continue
      if (not mol):
        continue
      mols1.AddSmiles(Chem.MolToSmiles(mol))
      fps1.AddFingerprint(fps1.MakeFingerprint(mol))
      ssslib2.AddMol(mol)
    RDLogger.EnableLog('rdApp.error')
    query = Chem.MolFromSmarts("N")
    self.assertIsNotNone(query)
    matches1 = sorted(ssslib1.GetMatches(query))
    matches2 = sorted(ssslib2.GetMatches(query))
    self.assertEqual(len(matches1), len(matches2))
    self.assertTrue(all([m1 == matches2[i] for i, m1 in enumerate(matches1)]))
コード例 #4
0
ファイル: rough_test.py プロジェクト: wvandertoorn/rdkit
    def test_init_from_and_to_stream(self):
        mols = makeStereoExamples() * 10
        holder = rdSubstructLibrary.CachedSmilesMolHolder()

        # one day I'll fix this, but we need to write text but read binary
        #  grrr....  something about the python_streambuf handler.
        slib = rdSubstructLibrary.SubstructLibrary(holder, None)

        for mol in mols:
            holder.AddSmiles(Chem.MolToSmiles(mol, isomericSmiles=True))

        if rdSubstructLibrary.SubstructLibraryCanSerialize():
            fd, path = tempfile.mkstemp()
            with open(path, 'w') as file:
                slib.ToStream(file)

            with open(path, 'rb') as file:
                slib2 = rdSubstructLibrary.SubstructLibrary()
                slib2.InitFromStream(file)
                self.assertEqual(len(slib), len(slib2))

        from io import StringIO, BytesIO
        s = StringIO()
        slib.ToStream(s)

        sb = BytesIO(s.getvalue().encode("ascii"))
        self.assertTrue(len(sb.getvalue()) > 0)
        slib3 = rdSubstructLibrary.SubstructLibrary()
        slib3.InitFromStream(sb)
        self.assertEqual(len(slib), len(slib2))
コード例 #5
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
    def test_addpatterns(self):
        pdb_ligands = [
            "CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1",
            "COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1",
            "COc1ccc(-c2oc3ncnc(N)c3c2-c2ccc(NC(=O)Nc3cc(C(F)(F)F)ccc3F)cc2)cc1",
            "COC(=O)Nc1nc2ccc(Oc3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)cc2[nH]1",
            "COc1cc(Nc2ncnc(-c3cccnc3Nc3ccccc3)n2)cc(OC)c1OC",
            "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
            "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
            "CNC(=O)c1cc(Oc2ccc3[nH]c(Nc4ccc(Cl)c(C(F)(F)F)c4)nc3c2)ccn1",
            "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
            "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
            "COc1cc2nccc(Oc3ccc4c(c3)OCCN4C(=O)Nc3ccc(Cl)cc3)c2cc1OC",
            "CNC(=O)c1c(C)oc2cc(Oc3cc[nH+]c4cc(OCCN5CCOCC5)ccc34)ccc12",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
            "Cc1ccc(C(=O)Nc2cc(CCC[NH+](C)C)cc(C(F)(F)F)c2)cc1Nc1ncccc1-c1ccncn1",
            "COc1cc(Nc2nccc(Nc3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
            "COc1cc(Nc2nccc(N(C)c3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
            "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
            "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
            "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
            "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
            "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21",
            "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21"
        ]

        for patterns in [
                rdSubstructLibrary.PatternHolder(),
                rdSubstructLibrary.TautomerPatternHolder()
        ]:
            mols = [Chem.MolFromSmiles(smi) for smi in pdb_ligands]
            holder = rdSubstructLibrary.CachedMolHolder()
            slib_with_patterns = rdSubstructLibrary.SubstructLibrary(
                holder, patterns)

            for mol in mols:
                slib_with_patterns.AddMol(mol)

            for nthreads in [1, 2, 0]:
                slib_without_patterns = rdSubstructLibrary.SubstructLibrary(
                    holder, None)
                rdSubstructLibrary.AddPatterns(slib_without_patterns, nthreads)
                # check for seg fault
                #  were the fingerprints really created
                slib_without_patterns.GetFpHolder().GetFingerprint(0)
                for mol in mols:
                    l1 = slib_with_patterns.CountMatches(mol)
                    l2 = slib_without_patterns.CountMatches(mol)
                    self.assertTrue(l1)
                    self.assertEqual(l1, l2)
コード例 #6
0
    def test0SubstructLibrary(self):
        for fpholderCls in [None, rdSubstructLibrary.PatternHolder]:
            for holder in [
                    rdSubstructLibrary.MolHolder(),
                    rdSubstructLibrary.CachedMolHolder(),
                    rdSubstructLibrary.CachedSmilesMolHolder()
            ]:
                if fpholderCls: fpholder = fpholderCls()
                else: fpholder = None
                slib = rdSubstructLibrary.SubstructLibrary(holder, fpholder)
                for i in range(100):
                    m = Chem.MolFromSmiles("c1ccccc1")
                    self.assertEqual(slib.AddMol(m), i)

                res = slib.GetMatches(m)
                t2 = time.time()
                self.assertTrue(len(res) == 100)

                res = slib.GetMatches(m)

                self.assertEqual(len(res), 100)
                self.assertTrue(set(res) == set(list(range(100))))

                res = slib.GetMatches(m, maxResults=100)
                self.assertEqual(len(res), 100)
                self.assertEqual(
                    len(slib.GetMatches(m, startIdx=0, endIdx=100)), 100)

                self.assertTrue(slib.HasMatch(m))
                self.assertEqual(slib.CountMatches(m), 100)
コード例 #7
0
ファイル: rough_test.py プロジェクト: chenlinkong/rdkit
 def testSearchOrder(self):
     ssl = rdSubstructLibrary.SubstructLibrary()
     for smi in ("CCCOC", "CCCCOCC", "CCOC", "COC", "CCCCCOC"):
         ssl.AddMol(Chem.MolFromSmiles(smi))
     ssl.SetSearchOrder((3, 2, 0, 1, 4))
     self.assertEqual(ssl.GetSearchOrder(), (3, 2, 0, 1, 4))
     qm = Chem.MolFromSmiles('COC')
     self.assertEqual(list(ssl.GetMatches(qm, maxResults=2)), [3, 2])
コード例 #8
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
 def testMolBundles(self):
     ssl = rdSubstructLibrary.SubstructLibrary()
     for smi in ('CCOC', 'CCNC', 'COOCOO', 'CCNC', 'CCCC'):
         ssl.AddMol(Chem.MolFromSmiles(smi))
     bndl = Chem.MolBundle()
     for smi in ('COC', 'CCC'):
         bndl.AddMol(Chem.MolFromSmiles(smi))
     self.assertEqual(list(ssl.GetMatches(bndl)), [0, 4])
     bndl.AddMol(Chem.MolFromSmiles('CN'))
     self.assertEqual(list(sorted(ssl.GetMatches(bndl))), [0, 1, 3, 4])
コード例 #9
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
    def testSubstructParameters(self):
        ssl = rdSubstructLibrary.SubstructLibrary()
        for smi in ('C[C@H](F)Cl', 'C[C@@H](F)Cl', 'CC(F)Cl'):
            ssl.AddMol(Chem.MolFromSmiles(smi))
        bndl = Chem.MolBundle()
        for smi in ('C[C@H](F)Cl', ):
            bndl.AddMol(Chem.MolFromSmiles(smi))
        params = Chem.SubstructMatchParameters()
        self.assertEqual(list(sorted(ssl.GetMatches(bndl, params))), [0, 1, 2])

        params.useChirality = True
        self.assertEqual(list(sorted(ssl.GetMatches(bndl, params))), [0])
コード例 #10
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
    def test_basic_addpatterns(self):
        # add mols
        pdb_ligands = [
            "CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1",
            "COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1",
            "COc1ccc(-c2oc3ncnc(N)c3c2-c2ccc(NC(=O)Nc3cc(C(F)(F)F)ccc3F)cc2)cc1",
            "COC(=O)Nc1nc2ccc(Oc3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)cc2[nH]1",
            "COc1cc(Nc2ncnc(-c3cccnc3Nc3ccccc3)n2)cc(OC)c1OC",
            "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
            "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
            "CNC(=O)c1cc(Oc2ccc3[nH]c(Nc4ccc(Cl)c(C(F)(F)F)c4)nc3c2)ccn1",
            "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
            "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
            "COc1cc2nccc(Oc3ccc4c(c3)OCCN4C(=O)Nc3ccc(Cl)cc3)c2cc1OC",
            "CNC(=O)c1c(C)oc2cc(Oc3cc[nH+]c4cc(OCCN5CCOCC5)ccc34)ccc12",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
            "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
            "Cc1ccc(C(=O)Nc2cc(CCC[NH+](C)C)cc(C(F)(F)F)c2)cc1Nc1ncccc1-c1ccncn1",
            "COc1cc(Nc2nccc(Nc3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
            "COc1cc(Nc2nccc(N(C)c3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
            "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
            "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
            "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
            "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
            "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
            "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21",
            "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21"
        ]

        for holder in [
                rdSubstructLibrary.CachedSmilesMolHolder(),
                rdSubstructLibrary.CachedTrustedSmilesMolHolder()
        ]:
            for smi in pdb_ligands:
                holder.AddSmiles(smi)

            for patttern in [
                    None,
                    rdSubstructLibrary.PatternHolder(),
                    rdSubstructLibrary.TautomerPatternHolder()
            ]:
                lib = rdSubstructLibrary.SubstructLibrary(holder)
                rdSubstructLibrary.AddPatterns(lib, numThreads=-1)
                self.assertEqual(len(lib.GetMolHolder()),
                                 len(lib.GetFpHolder()))
                for smi in pdb_ligands:
                    self.assertTrue(lib.CountMatches(Chem.MolFromSmiles(smi)))
コード例 #11
0
    def testOptions(self):
        mols = makeStereoExamples() * 10

        for holderCls in [
                rdSubstructLibrary.MolHolder,
                rdSubstructLibrary.CachedMolHolder,
                rdSubstructLibrary.CachedSmilesMolHolder,
                rdSubstructLibrary.CachedTrustedSmilesMolHolder,
        ]:
            holder = holderCls()
            slib = rdSubstructLibrary.SubstructLibrary(holder, None)

            for mol in mols:
                slib.AddMol(mol)

            core = Chem.MolFromSmarts("C-1-C-C-O-C(-*)(-*)1")
            res = slib.GetMatches(core)
            self.assertEqual(
                len(res),
                len([
                    x for x in mols
                    if x.HasSubstructMatch(core, useChirality=True)
                ]))

            core = Chem.MolFromSmarts("C-1-C-C-O-C(-[O])(-[N])1")
            core.SetProp("core", "core")
            res = slib.GetMatches(core, useChirality=False)
            self.assertEqual(
                len(res),
                len([
                    x for x in mols
                    if x.HasSubstructMatch(core, useChirality=False)
                ]))

            core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
            res = slib.GetMatches(core, useChirality=False)
            self.assertEqual(
                len(res),
                len([
                    x for x in mols
                    if x.HasSubstructMatch(core, useChirality=False)
                ]))

            core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
            res = slib.GetMatches(core)
            self.assertEqual(
                len(res),
                len([
                    x for x in mols
                    if x.HasSubstructMatch(core, useChirality=True)
                ]))
コード例 #12
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
    def testSearchOrder2(self):
        ssl = rdSubstructLibrary.SubstructLibrary()
        for smi in ("CCCOC", "CCCCOCC", "CCOC", "COC", "CCCCCOC"):
            ssl.AddMol(Chem.MolFromSmiles(smi))

        def setSearchSmallestFirst(sslib):
            searchOrder = list(range(len(sslib)))
            holder = sslib.GetMolHolder()
            searchOrder.sort(
                key=lambda x, holder=holder: holder.GetMol(x).GetNumAtoms())
            sslib.SetSearchOrder(searchOrder)

        setSearchSmallestFirst(ssl)
        qm = Chem.MolFromSmiles('COC')
        self.assertEqual(list(ssl.GetMatches(qm)), [3, 2, 0, 1, 4])
コード例 #13
0
def main(directory: str, chebml_version: str):
    """Download the ChEBML data."""
    os.makedirs(directory, exist_ok=True)

    bradley_path = os.path.join(directory, 'jm020472j_s2.xls')
    if not os.path.exists(bradley_path):
        try:
            wget.download(bradley_url, out=directory)
        except:
            click.echo('There goes ACS stopping science')

    chembl_url = (
        f'ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/'
        f'chembl_{chebml_version}/chembl_{chebml_version}.sdf.gz')

    sdf_path = os.path.join(directory, f'chembl_{chebml_version}.sdf.gz')
    if not os.path.exists(sdf_path):
        wget.download(chembl_url, out=directory)

    sss_path = os.path.join(directory, f'chembl{chebml_version}_sssdata.pkl')
    if not os.path.exists(sss_path):
        click.echo(f'RDKit Version: {rdBase.rdkitVersion}')
        data = []

        with gzip.GzipFile(sdf_path) as gz:
            suppl = Chem.ForwardSDMolSupplier(gz)
            for mol in tqdm(suppl,
                            desc=f'Processing ChEBML {chebml_version}',
                            unit_scale=True):
                if mol is None or mol.GetNumAtoms() > 50:
                    continue
                fp = Chem.PatternFingerprint(mol)
                smi = Chem.MolToSmiles(mol)
                data.append((smi, fp))

        click.echo(f'Outputting to {sss_path}')
        with open(sss_path, 'wb') as file:
            mols = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
            fps = rdSubstructLibrary.PatternHolder()
            for smi, fp in data:
                mols.AddSmiles(smi)
                fps.AddFingerprint(fp)
            library = rdSubstructLibrary.SubstructLibrary(mols, fps)
            pickle.dump(library, file, protocol=pickle.HIGHEST_PROTOCOL)

    click.echo('Done ;)')
コード例 #14
0
def read_in_lib(input_smi):
    t1 = time.time()
    mols = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
    fps = rdSubstructLibrary.PatternHolder()
    with open(input_smi, 'r') as inf:
        ls = [x.split() for x in inf]
        ls.pop(0)
        with open(input_smi.replace(".smi", ".pkl"), 'rb') as pklf:
            for l in tqdm.tqdm(ls):
                smi = l[1]
                mols.AddSmiles(smi)
                fp = pickle.load(pklf)
                fps.AddFingerprint(fp)
    library = rdSubstructLibrary.SubstructLibrary(mols, fps)
    t2 = time.time()
    print("That took %.2f seconds. The library has %d molecules." %
          (t2 - t1, len(library)))
    return library
コード例 #15
0
    def testBinaryCache(self):
        mols = makeStereoExamples() * 10
        holder = rdSubstructLibrary.CachedMolHolder()

        slib = rdSubstructLibrary.SubstructLibrary(holder, None)

        for mol in mols:
            holder.AddBinary(mol.ToBinary())

        core = Chem.MolFromSmarts("C-1-C-C-O-C(-*)(-*)1")
        res = slib.GetMatches(core)
        self.assertEqual(
            len(res),
            len([
                x for x in mols if x.HasSubstructMatch(core, useChirality=True)
            ]))

        core = Chem.MolFromSmarts("C-1-C-C-O-C(-[O])(-[N])1")
        core.SetProp("core", "core")
        res = slib.GetMatches(core, useChirality=False)
        self.assertEqual(
            len(res),
            len([
                x for x in mols
                if x.HasSubstructMatch(core, useChirality=False)
            ]))

        core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
        res = slib.GetMatches(core, useChirality=False)
        self.assertEqual(
            len(res),
            len([
                x for x in mols
                if x.HasSubstructMatch(core, useChirality=False)
            ]))

        core = Chem.MolFromSmarts("C-1-C-C-O-[C@@](-[O])(-[N])1")
        res = slib.GetMatches(core)
        self.assertEqual(
            len(res),
            len([
                x for x in mols if x.HasSubstructMatch(core, useChirality=True)
            ]))
コード例 #16
0
    def test1SubstructLibrary(self):
        for fpholderCls in [None, rdSubstructLibrary.PatternHolder]:
            for holder in [
                    rdSubstructLibrary.MolHolder(),
                    rdSubstructLibrary.CachedMolHolder(),
                    rdSubstructLibrary.CachedSmilesMolHolder()
            ]:
                if fpholderCls: fpholder = fpholderCls()
                else: fpholder = None
                slib = rdSubstructLibrary.SubstructLibrary(holder, fpholder)
                mols = []
                for i in range(100):
                    m = Chem.MolFromSmiles("c1ccccc1")
                    self.assertEqual(slib.AddMol(m), i * 2)
                    mols.append(m)
                    m2 = Chem.MolFromSmiles("CCCC")
                    self.assertEqual(slib.AddMol(m2), i * 2 + 1)
                    mols.append(m2)

                res = slib.GetMatches(m)
                self.assertEqual(len(res), 100)
                self.assertEqual(set(res), set(list(range(0, 200, 2))))

                res = slib.GetMatches(m2)
                self.assertEqual(len(res), 100)
                self.assertTrue(set(res) == set(list(range(1, 200, 2))))

                res = slib.GetMatches(m)
                self.assertEqual(len(res), 100)

                res = slib.GetMatches(m, maxResults=100)
                self.assertEqual(len(res), 100)

                self.assertEqual(
                    len(slib.GetMatches(m, startIdx=0, endIdx=50 * 2)), 50)
                self.assertEqual(
                    len(slib.GetMatches(m2, startIdx=1, endIdx=50 * 2 + 1)),
                    50)

                self.assertTrue(slib.HasMatch(m))
                self.assertTrue(slib.HasMatch(m2))
                self.assertEqual(slib.CountMatches(m), 100)
                self.assertEqual(slib.CountMatches(m2), 100)
コード例 #17
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
    def testSearchOrder(self):
        for keyholder in [None, rdSubstructLibrary.KeyFromPropHolder()]:
            ssl = rdSubstructLibrary.SubstructLibrary(
                rdSubstructLibrary.MolHolder(), keyholder)
            for idx, smi in enumerate(
                ("CCCOC", "CCCCOCC", "CCOC", "COC", "CCCCCOC")):
                m = Chem.MolFromSmiles(smi)
                m.SetProp("_Name", str(idx))
                ssl.AddMol(m)

            ssl.SetSearchOrder((3, 2, 0, 1, 4))
            self.assertEqual(ssl.GetSearchOrder(), (3, 2, 0, 1, 4))
            qm = Chem.MolFromSmiles('COC')
            self.assertEqual(list(ssl.GetMatches(qm, maxResults=2)), [3, 2])
            self.assertEqual(list(ssl.GetMatches(qm, maxResults=2)), [3, 2])
            if keyholder:
                self.assertEqual(keyholder.GetPropName(), "_Name")
                self.assertEqual(
                    list(ssl.GetKeyHolder().GetKeys(
                        ssl.GetMatches(qm, maxResults=2))), ['3', '2'])
コード例 #18
0
 def __init__(self, proj, datapath, dbpath, chembldb, flimit=1e-3, MinClusterSize=20, clustering='UPGMA', calcDists=True, calcScores=False):
     self.proj=proj
     self.datapath=datapath
     self.dbpath=dbpath
     self.chembldb=chembldb
     self.flimit=flimit
     self.MinClusterSize=MinClusterSize
     self.clustering=clustering
     self.calcScores=calcScores
     self.calcDists=calcDists
     # load data
     self.moldata_proj, self.distdata_proj=utilsDataPrep.PrepareData(self.proj,self.datapath,distMeasure='Tanimoto',FP='Morgan2', calcDists=self.calcDists)
     if arthor is not None:
         if not os.path.isdir(dbpath):
             os.mkdir(dbpath)    
         # set up project database for arthor substructure matching        
         df=self.moldata_proj[['Structure','ID']]
         df.to_csv('./arthor/{0}.smi'.format(self.proj), header=None, index=None, sep=' ')
         os.system('smi2atdb -j 0 -t {0}{1}.smi {0}{1}.atdb'.format(self.dbpath,self.proj))
         os.system('atdb2fp -j 0 {0}{1}.atdb'.format(self.dbpath,self.proj))
         self.proj_db=arthor.SubDb('{0}{1}.atdb'.format(self.dbpath,self.proj))
     else:
         if type(dbpath)==rdSubstructLibrary.SubstructLibrary:
             self.proj_db = dbpath
             self.db_size = len(self.proj_db)
         else:
             if not os.path.exists(dbpath):
                 print("creating database")
                 mols = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
                 fps = rdSubstructLibrary.PatternHolder()
                 for smi in self.moldata_proj['Structure']:
                     m = Chem.MolFromSmiles(smi)
                     mols.AddSmiles(Chem.MolToSmiles(m))
                     fps.AddFingerprint(Chem.PatternFingerprint(m))
                 self.proj_db = rdSubstructLibrary.SubstructLibrary(mols,fps)
                 self.db_size = len(mols)
                 pickle.dump(self.proj_db,open(dbpath,'wb+'))
             else:
                 self.proj_db = pickle.load(open(dbpath,'rb'))
                 self.db_size = len(self.proj_db)
コード例 #19
0
ファイル: rough_test.py プロジェクト: wvandertoorn/rdkit
    def testRingSmartsWithTrustedSmiles(self):
        pat = Chem.MolFromSmarts("[C&R1]")
        pat2 = Chem.MolFromSmarts("C@C")  # ring bond
        holder = rdSubstructLibrary.CachedTrustedSmilesMolHolder()
        lib = rdSubstructLibrary.SubstructLibrary(holder)
        lib.AddMol(Chem.MolFromSmiles("C1CC1"))

        # make sure we can get an unsanitized molecule that fails (no ring info)
        print("Testing atom rings")
        with self.assertRaises(RuntimeError):
            holder.GetMol(0).HasSubstructMatch(pat)
        print("testing bond rings")
        with self.assertRaises(RuntimeError):
            holder.GetMol(0).HasSubstructMatch(pat2)

        # shouldn't throw
        print("searching atom rings")
        self.assertEqual(len(lib.GetMatches(pat)), 1)
        self.assertEqual(lib.CountMatches(pat), 1)
        print("searching bond rings")
        self.assertEqual(len(lib.GetMatches(pat2)), 1)
        self.assertEqual(lib.CountMatches(pat2), 1)
        print("done")
コード例 #20
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
    def testPropHolder(self):
        for propname in [None, 'foo']:
            if propname is None:
                keyholder = rdSubstructLibrary.KeyFromPropHolder()
            else:
                keyholder = rdSubstructLibrary.KeyFromPropHolder(propname)

            library = rdSubstructLibrary.SubstructLibrary(
                rdSubstructLibrary.MolHolder(), keyholder)
            m = Chem.MolFromSmiles('CCC')
            if propname is None:
                self.assertEqual(keyholder.GetPropName(), "_Name")
            else:
                self.assertEqual(keyholder.GetPropName(), propname)

            if propname:
                m.SetProp(propname, 'Z11234')
            else:
                m.SetProp("_Name", 'Z11234')

            library.AddMol(m)
            indices = library.GetMatches(m)
            self.assertEqual(['Z11234'],
                             list(library.GetKeyHolder().GetKeys(indices)))
コード例 #21
0
ファイル: rough_test.py プロジェクト: lmmentel/rdkit
    def test1SubstructLibrary(self):
        for keyholderCls in [None, rdSubstructLibrary.KeyFromPropHolder]:
            for fpholderCls in [None, rdSubstructLibrary.PatternHolder]:
                for holder in [
                        rdSubstructLibrary.MolHolder(),
                        rdSubstructLibrary.CachedMolHolder(),
                        rdSubstructLibrary.CachedSmilesMolHolder()
                ]:
                    if fpholderCls: fpholder = fpholderCls()
                    else: fpholder = None
                    if keyholderCls:
                        keyholder = keyholderCls()
                        self.assertEqual(keyholder.GetPropName(), "_Name")
                    else:
                        keyholder = None

                    slib_ = rdSubstructLibrary.SubstructLibrary(
                        holder, fpholder, keyholder)
                    mols = []
                    for i in range(100):
                        m = Chem.MolFromSmiles("c1ccccc1")
                        m.SetProp("_Name", str(i * 2))
                        self.assertEqual(slib_.AddMol(m), i * 2)
                        mols.append(m)
                        m2 = Chem.MolFromSmiles("CCCC")
                        m2.SetProp("_Name", str(i * 2 + 1))
                        self.assertEqual(slib_.AddMol(m2), i * 2 + 1)
                        mols.append(m2)

                    libs = [slib_]
                    if rdSubstructLibrary.SubstructLibraryCanSerialize():
                        serialized1 = pickle.loads(pickle.dumps(slib_))
                        serialized2 = rdSubstructLibrary.SubstructLibrary(
                            slib_.Serialize())
                        libs.append(serialized1)
                        libs.append(serialized2)

                    for slib in libs:
                        res = slib.GetMatches(m)
                        self.assertEqual(len(res), 100)
                        self.assertEqual(set(res), set(list(range(0, 200, 2))))
                        if keyholderCls:
                            self.assertEqual(
                                [str(idx) for idx in res],
                                [str(idx) for idx in range(0, 200, 2)])

                        res = slib.GetMatches(m2)
                        self.assertEqual(len(res), 100)
                        self.assertTrue(
                            set(res) == set(list(range(1, 200, 2))))
                        if keyholderCls:
                            self.assertEqual(
                                [str(idx) for idx in res],
                                [str(idx) for idx in range(1, 200, 2)])

                        res = slib.GetMatches(m)
                        self.assertEqual(len(res), 100)

                        res = slib.GetMatches(m, maxResults=100)
                        self.assertEqual(len(res), 100)

                        self.assertEqual(
                            len(slib.GetMatches(m, startIdx=0, endIdx=50 * 2)),
                            50)
                        self.assertEqual(
                            len(
                                slib.GetMatches(m2,
                                                startIdx=1,
                                                endIdx=50 * 2 + 1)), 50)

                        self.assertTrue(slib.HasMatch(m))
                        self.assertTrue(slib.HasMatch(m2))
                        self.assertEqual(slib.CountMatches(m), 100)
                        self.assertEqual(slib.CountMatches(m2), 100)