Ejemplo n.º 1
0
    def test_InsertData(self):
        newTblName = 'NEW_TABLE'
        conn = DbConnect(self.tempDbName)
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            pass
        conn.Commit()
        conn.AddTable(newTblName, 'id int,val1 int, val2 int')
        for i in range(10):
            conn.InsertData(newTblName, (i, i + 1, 2 * i))
        conn.Commit()
        d = conn.GetData(table=newTblName)
        assert len(d) == 10

        self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 3)
        conn.AddColumn(newTblName, 'val3', 'int')
        conn.Commit()
        self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 4)
        d = conn.GetColumns('id,val3', table=newTblName)
        self.assertEqual(len(d), 10)
        self.assertTrue(all(r[1] is None for r in d))
        for r in d:
            conn.InsertColumnData(newTblName, 'val3', r[0],
                                  'id={0}'.format(r[0]))
        conn.Commit()
        d = conn.GetColumns('id,val3', table=newTblName)
        self.assertTrue(all(r[0] == r[1] for r in d))

        d = None
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            assert 0, 'drop table failed'
Ejemplo n.º 2
0
 def testAddTable(self):
     """ tests AddTable and GetTableNames functionalities """
     newTblName = 'NEW_TABLE'
     conn = DbConnect(self.tempDbName)
     try:
         conn.GetCursor().execute('drop table %s' % (newTblName))
     except Exception:
         pass
     conn.Commit()
     conn.AddTable(newTblName, 'id int')
     names = [x.strip() for x in conn.GetTableNames()]
     assert newTblName in names, 'name (%s) not found in %s' % (newTblName,
                                                                str(names))
     conn.GetCursor().execute('drop table %s' % (newTblName))
Ejemplo n.º 3
0
    def testCursor(self):
        """ tests GetCursor and GetTableNames functionalities """

        viewName = 'TEST_VIEW'
        conn = DbConnect(self.tempDbName)
        curs = conn.GetCursor()
        assert curs
        try:
            curs.execute('drop view %s' % (viewName))
        except Exception:
            pass
        try:
            curs.execute('create view %s as select val,id from ten_elements' %
                         (viewName))
        except Exception:
            import traceback
            traceback.print_exc()
            raise AssertionError('create view failed')
        conn.Commit()

        self.assertNotIn(
            viewName, [x.strip() for x in conn.GetTableNames(includeViews=0)],
            'improper view found')
        self.assertIn(viewName,
                      [x.strip() for x in conn.GetTableNames(includeViews=1)],
                      'improper view not found')
        try:
            curs.execute('drop view %s' % (viewName))
        except Exception:
            raise AssertionError('drop table failed')
Ejemplo n.º 4
0
    def testCursor(self):
        """ tests GetCursor and GetTableNames functionalities """

        viewName = 'TEST_VIEW'
        conn = DbConnect(self.tempDbName)
        curs = conn.GetCursor()
        assert curs
        try:
            curs.execute('drop view %s' % (viewName))
        except Exception:
            pass
        try:
            curs.execute('create view %s as select val,id from ten_elements' %
                         (viewName))
        except Exception:
            import traceback
            traceback.print_exc()
            assert 0
        conn.Commit()

        names = [x.strip() for x in conn.GetTableNames(includeViews=0)]
        assert viewName not in names, 'improper view found'
        names = [x.strip() for x in conn.GetTableNames(includeViews=1)]
        assert viewName in names, 'improper view found in %s' % (str(names))
        try:
            curs.execute('drop view %s' % (viewName))
        except Exception:
            assert 0, 'drop table failed'
Ejemplo n.º 5
0
    def testAddTable(self):
        """ tests AddTable and GetTableNames functionalities """
        newTblName = 'NEW_TABLE'
        conn = DbConnect(self.tempDbName)
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            pass
        conn.Commit()

        self.assertNotIn(newTblName, [x.strip() for x in conn.GetTableNames()])
        conn.AddTable(newTblName, 'id int')
        self.assertIn(newTblName, [x.strip() for x in conn.GetTableNames()])

        self.assertEqual(conn.GetColumnNames(table=newTblName), ['id'])

        conn.GetCursor().execute('drop table %s' % (newTblName))
Ejemplo n.º 6
0
def GetFingerprints(details):
    """ returns an iterable sequence of fingerprints
  each fingerprint will have a _fieldsFromDb member whose first entry is
  the id.

  """
    if details.dbName and details.tableName:
        try:
            conn = DbConnect(details.dbName, details.tableName)
            if hasattr(details, 'dbUser'):
                conn.user = details.dbUser
            if hasattr(details, 'dbPassword'):
                conn.password = details.dbPassword
        except:
            import traceback
            FingerprintMols.error(
                'Error: Problems establishing connection to database: %s|%s\n'
                % (details.dbName, details.tableName))
            traceback.print_exc()
        cmd = _ConstructSQL(details, extraFields=details.fpColName)
        curs = conn.GetCursor()
        #curs.execute(cmd)
        #print 'CURSOR:',curs,curs.closed
        if _dataSeq:
            suppl = _dataSeq(curs,
                             cmd,
                             depickle=not details.noPickle,
                             klass=DataStructs.ExplicitBitVect)
            _dataSeq._conn = conn
        else:
            suppl = DbFpSupplier.ForwardDbFpSupplier(
                data, fpColName=details.fpColName)
    elif details.inFileName:
        conn = None
        try:
            inF = open(details.inFileName, 'r')
        except IOError:
            import traceback
            FingerprintMols.error('Error: Problems reading from file %s\n' %
                                  (details.inFileName))
            traceback.print_exc()

        supple = []
        done = 0
        while not done:
            try:
                id, fp = cPickle.load(inF)
            except:
                done = 1
            else:
                fp._fieldsFromDb = [id]
                suppl.append(fp)
    else:
        suppl = None

    return suppl
Ejemplo n.º 7
0
    def test_GetTableNames(self):
        # We can get the table names of a database with prior instantiation of a cursor
        conn = DbConnect(self.tempDbName)
        conn.GetCursor()
        names_Cursor = sorted(conn.GetTableNames())

        # and without (this tests functionality of DbInfo
        conn = DbConnect(self.tempDbName)
        names_noCursor = sorted(conn.GetTableNames())
        self.assertEqual(names_Cursor, names_noCursor)
Ejemplo n.º 8
0
def ScreenInDb(details, mol):
    try:
        probeFp = apply(FingerprintMols.FingerprintMol, (mol, ),
                        details.__dict__)
    except:
        import traceback
        FingerprintMols.error('Error: problems fingerprinting molecule.\n')
        traceback.print_exc()
        return []
    if details.dbName and details.tableName:
        try:
            conn = DbConnect(details.dbName, details.tableName)
            if hasattr(details, 'dbUser'):
                conn.user = details.dbUser
            if hasattr(details, 'dbPassword'):
                conn.password = details.dbPassword
        except:
            import traceback
            FingerprintMols.error(
                'Error: Problems establishing connection to database: %s|%s\n'
                % (details.dbName, details.tableName))
            traceback.print_exc()

    if details.metric not in (DataStructs.TanimotoSimilarity,
                              DataStructs.DiceSimilarity,
                              DataStructs.CosineSimilarity):
        data = GetFingerprints(details)
        res = ScreenFingerprints(details, data, mol)
    else:
        res = []
        if details.metric == DataStructs.TanimotoSimilarity:
            func = 'rd_tanimoto'
            pkl = probeFp.ToBitString()
        elif details.metric == DataStructs.DiceSimilarity:
            func = 'rd_dice'
            pkl = probeFp.ToBitString()
        elif details.metric == DataStructs.CosineSimilarity:
            func = 'rd_cosine'
            pkl = probeFp.ToBitString()
        extraFields = "%s(%s,%s) as tani" % (func, DbModule.placeHolder,
                                             details.fpColName)
        cmd = _ConstructSQL(details, extraFields=extraFields)

        if details.doThreshold:
            # we need to do a subquery here:
            cmd = "select * from (%s) tmp where tani>%f" % (
                cmd, details.screenThresh)
        cmd += " order by tani desc"
        if not details.doThreshold and details.topN > 0:
            cmd += " limit %d" % details.topN
        curs = conn.GetCursor()
        curs.execute(cmd, (pkl, ))
        res = curs.fetchall()

    return res
Ejemplo n.º 9
0
    def testInsertData(self):
        """ tests InsertData and InsertColumnData functionalities """
        newTblName = 'NEW_TABLE'
        conn = DbConnect(self.tempDbName)
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            pass
        conn.Commit()
        conn.AddTable(newTblName, 'id int,val1 int, val2 int')
        for i in range(10):
            conn.InsertData(newTblName, (i, i + 1, 2 * i))
        conn.Commit()
        d = conn.GetData(table=newTblName)
        assert len(d) == 10

        d = None
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            assert 0, 'drop table failed'
Ejemplo n.º 10
0
    def test5TestBackwardsCompat(self):
        if os.path.exists('testData/bzr/Compounds.sqlt'):
            os.unlink('testData/bzr/Compounds.sqlt')
        if os.path.exists('testData/bzr/AtomPairs.sqlt'):
            os.unlink('testData/bzr/AtomPairs.sqlt')
        if os.path.exists('testData/bzr/Descriptors.sqlt'):
            os.unlink('testData/bzr/Descriptors.sqlt')
        if os.path.exists('testData/bzr/Fingerprints.sqlt'):
            os.unlink('testData/bzr/Fingerprints.sqlt')

        p = subprocess.Popen(
            ('python', 'CreateDb.py', '--dbDir=testData/bzr',
             '--noFingerprints', '--noDescriptors', 'testData/bzr.sdf'))
        res = p.wait()
        self.failIf(res)
        p = None

        conn = DbConnect('testData/bzr/AtomPairs.sqlt')
        curs = conn.GetCursor()
        curs.execute(
            'create table tmp as select compound_id,atompairfp,torsionfp from atompairs'
        )
        p = subprocess.Popen(
            ('python', 'SearchDb.py', '--dbDir=testData/bzr',
             '--molFormat=sdf', '--topN=5', '--outF=testData/bzr/search.out',
             '--similarityType=AtomPairs', '--pairTableName=tmp',
             'testData/bzr.sdf'))
        res = p.wait()
        self.failIf(res)
        p = None

        self.failUnless(os.path.exists('testData/bzr/search.out'))
        inF = file('testData/bzr/search.out', 'r')
        lines = inF.readlines()
        inF = None
        self.failUnlessEqual(len(lines), 163)
        splitLs = [x.strip().split(',') for x in lines]
        for line in splitLs:
            lbl = line[0]
            i = 1
            nbrs = {}
            lastVal = 1.0
            while i < len(line):
                nbrs[line[i]] = line[i + 1]
                self.failUnless(float(line[i + 1]) <= lastVal)
                lastVal = float(line[i + 1])
                i += 2
            self.failUnless(nbrs.has_key(lbl))
            self.failUnless(nbrs[lbl] == '1.000')
        os.unlink('testData/bzr/search.out')
Ejemplo n.º 11
0
    def Store(self,
              db='models.gdb',
              table='results',
              user='******',
              password='******'):
        """ adds the result to a database

      **Arguments**

        - db: name of the database to use

        - table: name of the table to use

        - user&password: connection information

    """
        cn = DbConnect(db, table, user, password)
        curs = cn.GetCursor()
        self._CreateTable(cn, table)

        cols = []
        vals = []
        for name, _ in self.fields:
            try:
                v = getattr(self, name)
            except AttributeError:
                pass
            else:
                cols.append('%s' % name)
                vals.append(v)

        nToDo = len(vals)
        qs = ','.join([DbModule.placeHolder] * nToDo)
        vals = tuple(vals)

        cmd = 'insert into %s (%s) values (%s)' % (table, ','.join(cols), qs)
        curs.execute(cmd, vals)
        cn.Commit()
Ejemplo n.º 12
0
                                 password=RDConfig.defaultDBPassword,
                                 pickleCol=details.pickleCol,
                                 pickleClass=DataStructs.ExplicitBitVect)

    descs = dataSet.GetVarNames()
    nPts = dataSet.GetNPts()
    message('npts: %d\n' % (nPts))
    final = numpy.zeros((nPts, 2), numpy.float)
    counts = numpy.zeros(nPts, numpy.integer)
    selPts = [None] * nPts

    models = []
    if details.persistTblName:
        conn = DbConnect(details.dbName, details.persistTblName)
        message('-> Retrieving models from database')
        curs = conn.GetCursor()
        curs.execute("select model from %s where note='%s'" %
                     (details.persistTblName, details.note))
        message('-> Reconstructing models')
        try:
            blob = curs.fetchone()
        except Exception:
            blob = None
        while blob:
            message(' Building model %d' % len(models))
            blob = blob[0]
            try:
                models.append(cPickle.loads(str(blob)))
            except Exception:
                import traceback
                traceback.print_exc()
Ejemplo n.º 13
0
def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False,
           redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False,
           regName='molecules',skipSmiles=False,maxRowsCached=-1,
           uniqNames=False,addComputedProps=False,lazySupplier=False,
           startAnew=True):
  if not lazySupplier:
    nMols = len(suppl)
  else:
    nMols=-1
  if not silent:
    logger.info("Generating molecular database in file %s"%dbName)
    if not lazySupplier:
      logger.info("  Processing %d molecules"%nMols)
  rows = []
  globalProps = {}
  namesSeen = set()
  nDone = 0
  typeConversions={0:('varchar',str),1:('float',float),2:('int',int)}
  for m in suppl:
    nDone +=1
    if not m:
      if errorsTo:
        if hasattr(suppl,'GetItemText'):
          d = suppl.GetItemText(nDone-1)
          errorsTo.write(d)
        else:
          logger.warning('full error file support not complete')
      continue

    row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp,
                   nameCol=nameCol,redraw=redraw,
                   keepHs=keepHs,skipProps=skipProps,
                   addComputedProps=addComputedProps,skipSmiles=skipSmiles,
                   uniqNames=uniqNames,namesSeen=namesSeen)
    if row is None: continue
    rows.append([nDone]+row)
    if not silent and not nDone%100:
      logger.info('  done %d'%nDone)
    if len(rows)==maxRowsCached:
      break

  nameDef='%s varchar not null'%nameCol
  if uniqNames:
    nameDef += ' unique'
  typs = ['guid integer not null primary key',nameDef]
  pns = []
  for pn,v in globalProps.items():
    addNm = re.sub(r'[\W]','_',pn)
    typs.append('%s %s'%(addNm,typeConversions[v][0]))
    pns.append(pn.lower())

  if not skipSmiles:
    if 'smiles' not in pns:
      typs.append('smiles varchar')
    else:
      typs.append('cansmiles varchar')
  typs.append('molpkl %s'%(DbModule.binaryTypeName))
  conn = DbConnect(dbName)
  curs = conn.GetCursor()
  if startAnew:
    try:
      curs.execute('drop table %s'%regName)
    except:
      pass
    curs.execute('create table %s (%s)'%(regName,','.join(typs)))
  else:
    curs.execute('select * from %s limit 1'%(regName,))
    ocolns = set([x[0] for x in curs.description])
    ncolns = set([x.split()[0] for x in typs])
    if ncolns != ocolns:
      raise ValueError('Column names do not match: %s != %s'%(ocolns,ncolns))
    curs.execute('select max(guid) from %s'%(regName,))
    offset = curs.fetchone()[0]
    for row in rows:
      row[0] += offset
    
  qs = ','.join([DbModule.placeHolder for x in typs])


  ConvertRows(rows,globalProps,defaultVal,skipSmiles)
  curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
  conn.Commit()

  rows = []
  while 1:
    nDone +=1
    try:
      m = next(suppl)
    except StopIteration:
      break
    if not m:
      if errorsTo:
        if hasattr(suppl,'GetItemText'):
          d = suppl.GetItemText(nDone-1)
          errorsTo.write(d)
        else:
          logger.warning('full error file support not complete')
      continue
    tmpProps={}
    row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp,
                   nameCol=nameCol,redraw=redraw,
                   keepHs=keepHs,skipProps=skipProps,
                   addComputedProps=addComputedProps,skipSmiles=skipSmiles,
                   uniqNames=uniqNames,namesSeen=namesSeen)
    if not row: continue
    rows.append([nDone]+row)
    if not silent and not nDone%100:
      logger.info('  done %d'%nDone)
    if len(rows)==maxRowsCached:
      ConvertRows(rows,globalProps,defaultVal,skipSmiles)
      curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
      conn.Commit()
      rows = []
  if len(rows):
    ConvertRows(rows,globalProps,defaultVal,skipSmiles)
    curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
    conn.Commit()
Ejemplo n.º 14
0
def CreateDb(options,dataFilename='',supplier=None):
  if not dataFilename and supplier is None:
    raise ValueError('Please provide either a data filename or a supplier')

  if options.errFilename:
    errFile=open(os.path.join(options.outDir,options.errFilename),'w+')
  else:
    errFile=None

  if options.noExtras:
    options.doPairs=False
    options.doDescriptors=False
    options.doFingerprints=False
    options.doPharm2D=False
    options.doGobbi2D=False
    options.doLayered=False
    options.doMorganFps=False

  if options.loadMols:
    if supplier is None:
      if not options.molFormat:
        ext = os.path.splitext(dataFilename)[-1].lower()
        if ext=='.sdf':
          options.molFormat='sdf'
        elif ext in ('.smi','.smiles','.txt','.csv'):
          options.molFormat='smiles'
          if not options.delimiter:
            # guess the delimiter
            import csv
            sniffer = csv.Sniffer()
            dlct=sniffer.sniff(open(dataFilename,'r').read(2000))
            options.delimiter=dlct.delimiter
            if not options.silent:
              logger.info('Guessing that delimiter is %s. Use --delimiter argument if this is wrong.'%repr(options.delimiter))

        if not options.silent:
          logger.info('Guessing that mol format is %s. Use --molFormat argument if this is wrong.'%repr(options.molFormat))  
      if options.molFormat=='smiles':
        if options.delimiter=='\\t': options.delimiter='\t'
        supplier=Chem.SmilesMolSupplier(dataFilename,
                                        titleLine=options.titleLine,
                                        delimiter=options.delimiter,
                                        smilesColumn=options.smilesColumn,
                                        nameColumn=options.nameColumn
                                        )
      else:
        supplier = Chem.SDMolSupplier(dataFilename)
    if not options.silent: logger.info('Reading molecules and constructing molecular database.')
    Loader.LoadDb(supplier,os.path.join(options.outDir,options.molDbName),
                  errorsTo=errFile,regName=options.regName,nameCol=options.molIdName,
                  skipProps=options.skipProps,defaultVal=options.missingPropertyVal,
                  addComputedProps=options.addProps,uniqNames=True,
                  skipSmiles=options.skipSmiles,maxRowsCached=int(options.maxRowsCached),
                  silent=options.silent,nameProp=options.nameProp,
                  lazySupplier=int(options.maxRowsCached)>0,
                  startAnew=not options.updateDb
                  )

  if options.doPairs:
    pairConn = DbConnect(os.path.join(options.outDir,options.pairDbName))
    pairCurs = pairConn.GetCursor()
    try:
      pairCurs.execute('drop table %s'%(options.pairTableName))
    except:
      pass
    pairCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,atompairfp blob,torsionfp blob)'%(options.pairTableName,
                                                                                                         options.molIdName))

  if options.doFingerprints or options.doPharm2D or options.doGobbi2D or options.doLayered:
    fpConn = DbConnect(os.path.join(options.outDir,options.fpDbName))
    fpCurs=fpConn.GetCursor()
    try:
      fpCurs.execute('drop table %s'%(options.fpTableName))
    except:
      pass
    try:
      fpCurs.execute('drop table %s'%(options.pharm2DTableName))
    except:
      pass
    try:
      fpCurs.execute('drop table %s'%(options.gobbi2DTableName))
    except:
      pass
    try:
      fpCurs.execute('drop table %s'%(options.layeredTableName))
    except:
      pass

    if options.doFingerprints:
      fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,rdkfp blob)'%(options.fpTableName,
                                                                                     options.molIdName))
    if options.doLayered:
      layeredQs = ','.join('?'*LayeredOptions.nWords)
      colDefs=','.join(['Col_%d integer'%(x+1) for x in range(LayeredOptions.nWords)])
      fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,%s)'%(options.layeredTableName,
                                                                             options.molIdName,
                                                                             colDefs))
      
    if options.doPharm2D:
      fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,pharm2dfp blob)'%(options.pharm2DTableName,
                                                                                     options.molIdName))
      sigFactory = BuildSigFactory(options)
    if options.doGobbi2D:
      fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,gobbi2dfp blob)'%(options.gobbi2DTableName,
                                                                                     options.molIdName))
      from rdkit.Chem.Pharm2D import Generate,Gobbi_Pharm2D

  if options.doMorganFps :
    fpConn = DbConnect(os.path.join(options.outDir,options.fpDbName))
    fpCurs=fpConn.GetCursor()
    try:
      fpCurs.execute('drop table %s'%(options.morganFpTableName))
    except:
      pass
    fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,morganfp blob)'%(options.morganFpTableName,
                                                                                        options.molIdName))

  if options.doDescriptors:
    descrConn=DbConnect(os.path.join(options.outDir,options.descrDbName))
    with open(options.descriptorCalcFilename,'r') as inTF:
      buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
      inTF.close()
    calc = cPickle.load(io.BytesIO(buf))
    nms = [x for x in calc.GetDescriptorNames()]
    descrCurs = descrConn.GetCursor()
    descrs = ['guid integer not null primary key','%s varchar not null unique'%options.molIdName]
    descrs.extend(['%s float'%x for x in nms])
    try:
      descrCurs.execute('drop table %s'%(options.descrTableName))
    except:
      pass
    descrCurs.execute('create table %s (%s)'%(options.descrTableName,','.join(descrs)))
    descrQuery=','.join([DbModule.placeHolder]*len(descrs))
  pairRows = []
  fpRows = []
  layeredRows = []
  descrRows = []
  pharm2DRows=[]
  gobbi2DRows=[]
  morganRows = []

  if not options.silent: logger.info('Generating fingerprints and descriptors:')
  molConn = DbConnect(os.path.join(options.outDir,options.molDbName))
  molCurs = molConn.GetCursor()
  if not options.skipSmiles:
    molCurs.execute('select guid,%s,smiles,molpkl from %s'%(options.molIdName,options.regName))
  else:
    molCurs.execute('select guid,%s,molpkl from %s'%(options.molIdName,options.regName))
  i=0
  while 1:
    try:
      tpl = molCurs.fetchone()
      molGuid = tpl[0]
      molId = tpl[1]
      pkl = tpl[-1]
      i+=1
    except:
      break
    if isinstance(pkl,(bytes,str)):
      mol = Chem.Mol(pkl)
    else:
      mol = Chem.Mol(str(pkl))
    if not mol: continue
     
    if options.doPairs:
      pairs = FingerprintUtils.BuildAtomPairFP(mol)
      torsions = FingerprintUtils.BuildTorsionsFP(mol)
      pkl1 = DbModule.binaryHolder(pairs.ToBinary())
      pkl2 = DbModule.binaryHolder(torsions.ToBinary())
      row = (molGuid,molId,pkl1,pkl2)
      pairRows.append(row)
    if options.doFingerprints:
      fp2 = FingerprintUtils.BuildRDKitFP(mol)
      pkl = DbModule.binaryHolder(fp2.ToBinary())
      row = (molGuid,molId,pkl)
      fpRows.append(row)
    if options.doLayered:
      words = LayeredOptions.GetWords(mol)
      row = [molGuid,molId]+words
      layeredRows.append(row)
    if options.doDescriptors:
      descrs= calc.CalcDescriptors(mol)
      row = [molGuid,molId]
      row.extend(descrs)
      descrRows.append(row)
    if options.doPharm2D:
      FingerprintUtils.sigFactory=sigFactory
      fp= FingerprintUtils.BuildPharm2DFP(mol)
      pkl = DbModule.binaryHolder(fp.ToBinary())
      row = (molGuid,molId,pkl)
      pharm2DRows.append(row)
    if options.doGobbi2D:
      FingerprintUtils.sigFactory=Gobbi_Pharm2D.factory
      fp= FingerprintUtils.BuildPharm2DFP(mol)
      pkl = DbModule.binaryHolder(fp.ToBinary())
      row = (molGuid,molId,pkl)
      gobbi2DRows.append(row)
    if options.doMorganFps:
      morgan = FingerprintUtils.BuildMorganFP(mol)
      pkl = DbModule.binaryHolder(morgan.ToBinary())
      row = (molGuid,molId,pkl)
      morganRows.append(row)

    if not i%500:
      if len(pairRows):
        pairCurs.executemany('insert into %s values (?,?,?,?)'%options.pairTableName,
                             pairRows)
        pairRows = []
        pairConn.Commit()
      if len(fpRows):
        fpCurs.executemany('insert into %s values (?,?,?)'%options.fpTableName,
                           fpRows)
        fpRows = []
        fpConn.Commit()
      if len(layeredRows):
        fpCurs.executemany('insert into %s values (?,?,%s)'%(options.layeredTableName,layeredQs),
                           layeredRows)
        layeredRows = []
        fpConn.Commit()
      if len(descrRows):
        descrCurs.executemany('insert into %s values (%s)'%(options.descrTableName,descrQuery),
                              descrRows)
        descrRows = []
        descrConn.Commit()
      if len(pharm2DRows):
        fpCurs.executemany('insert into %s values (?,?,?)'%options.pharm2DTableName,
                           pharm2DRows)
        pharm2DRows = []
        fpConn.Commit()
      if len(gobbi2DRows):
        fpCurs.executemany('insert into %s values (?,?,?)'%options.gobbi2DTableName,
                           gobbi2DRows)
        gobbi2DRows = []
        fpConn.Commit()
      if len(morganRows):
        fpCurs.executemany('insert into %s values (?,?,?)'%options.morganFpTableName,
                             morganRows)
        morganRows = []
        fpConn.Commit()
        
    if not options.silent and not i%500: 
      logger.info('  Done: %d'%(i))

  if len(pairRows):
    pairCurs.executemany('insert into %s values (?,?,?,?)'%options.pairTableName,
                         pairRows)
    pairRows = []
    pairConn.Commit()
  if len(fpRows):
    fpCurs.executemany('insert into %s values (?,?,?)'%options.fpTableName,
                       fpRows)
    fpRows = []
    fpConn.Commit()
  if len(layeredRows):
    fpCurs.executemany('insert into %s values (?,?,%s)'%(options.layeredTableName,layeredQs),
                       layeredRows)
    layeredRows = []
    fpConn.Commit()
  if len(descrRows):
    descrCurs.executemany('insert into %s values (%s)'%(options.descrTableName,descrQuery),
                          descrRows)
    descrRows = []
    descrConn.Commit()
  if len(pharm2DRows):
    fpCurs.executemany('insert into %s values (?,?,?)'%options.pharm2DTableName,
                       pharm2DRows)
    pharm2DRows = []
    fpConn.Commit()
  if len(gobbi2DRows):
    fpCurs.executemany('insert into %s values (?,?,?)'%options.gobbi2DTableName,
                       gobbi2DRows)
    gobbi2DRows = []
    fpConn.Commit()
  if len(morganRows):
    fpCurs.executemany('insert into %s values (?,?,?)'%options.morganFpTableName,
                       morganRows)
    morganRows = []
    fpConn.Commit()
    
  if not options.silent:
    logger.info('Finished.')
Ejemplo n.º 15
0
class TestCase(unittest.TestCase):
    def setUp(self):
        self.dbName = RDConfig.RDTestDatabase
        self.conn = DbConnect(self.dbName)
        self.curs = self.conn.GetCursor()

    def test1(self):
        """ test indexing in, ensure acceptable error conditions
    """
        cmd = 'select * from ten_elements'
        set = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        for i in range(12):
            try:
                val = set[i]
            except IndexError:
                assert i >= 10

    def test2(self):
        """ 
    """
        cmd = 'select * from ten_elements'
        set = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        assert len(set) == 10
        for i in range(len(set)):
            val = set[i]

    def test3(self):
        """ 
    """
        cmd = 'select * from ten_elements'
        set = DbResultSet(self.curs, self.conn, cmd)
        r = []
        for thing in set:
            r.append(thing)
        assert len(r) == 10

    def test4(self):
        """ 
    """
        cmd = 'select * from ten_elements_dups'
        set = DbResultSet(self.curs, self.conn, cmd, removeDups=0)
        r = []
        for thing in set:
            r.append(thing)
        assert len(r) == 10

    def test5(self):
        """ 
    """
        cmd = 'select * from ten_elements_dups'
        set = RandomAccessDbResultSet(self.curs, self.conn, cmd, removeDups=0)
        assert len(set) == 10
        for i in range(len(set)):
            val = set[i]

    def test6(self):
        """ 
    """
        cmd = 'select * from ten_elements_dups'
        set = DbResultSet(self.curs, self.conn, cmd, removeDups=0)
        r = []
        for thing in set:
            r.append(thing)
        assert len(r) == 10
Ejemplo n.º 16
0
class TestCase(unittest.TestCase):
    def setUp(self):
        self.dbName = RDConfig.RDTestDatabase
        self.conn = DbConnect(self.dbName)
        self.curs = self.conn.GetCursor()

    def test1(self):
        """ test indexing in, ensure acceptable error conditions """
        cmd = 'select * from ten_elements'
        resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        self.assertRaises(IndexError, resultSet.__getitem__, -1)
        for i in range(12):
            try:
                _ = resultSet[i]
            except IndexError:
                assert i >= 10
        self.assertEqual(resultSet.GetColumnNames(), ('id', 'val'))
        self.assertEqual(resultSet.GetColumnTypes(), ('integer', 'integer'))
        self.assertEqual(resultSet.GetColumnNamesAndTypes(),
                         (('id', 'integer'), ('val', 'integer')))

        cmd = 'select * from ten_elements_dups'
        resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        for i in range(22):
            try:
                _ = resultSet[i]
            except IndexError:
                assert i >= 20

        cmd = 'select * from ten_elements_dups'
        resultSet = RandomAccessDbResultSet(self.curs,
                                            self.conn,
                                            cmd,
                                            removeDups=0)
        for i in range(22):
            try:
                _ = resultSet[i]
            except IndexError:
                assert i >= 10

        # Test iterator
        resultSet = RandomAccessDbResultSet(self.curs,
                                            self.conn,
                                            cmd,
                                            removeDups=0)
        self.assertEqual(next(resultSet), resultSet[0])
        self.assertEqual(len(list(resultSet)), 10)

    def test2(self):
        cmd = 'select * from ten_elements'
        resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        assert len(resultSet) == 10
        for i in range(len(resultSet)):
            _ = resultSet[i]

    def test3(self):
        cmd = 'select * from ten_elements'
        resultSet = DbResultSet(self.curs, self.conn, cmd)
        r = [obj for obj in resultSet]
        self.assertEqual(len(r), 10)

        # Test iterator
        resultSet = DbResultSet(self.curs, self.conn, cmd)
        self.assertEqual(next(resultSet), (0, 11))
        self.assertEqual(len(list(resultSet)), 10)

    def test4(self):
        cmd = 'select * from ten_elements_dups'
        resultSet = DbResultSet(self.curs, self.conn, cmd)
        r = [obj for obj in resultSet]
        self.assertEqual(len(r), 20)

        resultSet = DbResultSet(self.curs, self.conn, cmd, removeDups=0)
        r = [obj for obj in resultSet]
        self.assertEqual(len(r), 10)

    def test5(self):
        cmd = 'select * from ten_elements_dups'
        resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        self.assertEqual(len(resultSet), 20)
        r = [obj for obj in resultSet]
        self.assertEqual(len(r), 20)

        resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        r = [obj for obj in resultSet]
        self.assertEqual(len(r), 20)
        self.assertEqual(len(resultSet), 20)

        resultSet = RandomAccessDbResultSet(self.curs,
                                            self.conn,
                                            cmd,
                                            removeDups=0)
        self.assertEqual(len(resultSet), 10)
        r = [obj for obj in resultSet]
        self.assertEqual(len(r), 10)

    def test6(self):
        cmd = 'select * from ten_elements_dups'
        resultSet = DbResultSet(self.curs, self.conn, cmd, removeDups=0)
        r = [obj for obj in resultSet]
        self.assertEqual(len(r), 10)
Ejemplo n.º 17
0
def RunSearch(options, queryFilename):
    global sigFactory
    if options.similarityType == 'AtomPairs':
        fpBuilder = FingerprintUtils.BuildAtomPairFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.pairDbName)
        fpTableName = options.pairTableName
        fpColName = options.pairColName
    elif options.similarityType == 'TopologicalTorsions':
        fpBuilder = FingerprintUtils.BuildTorsionsFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.torsionsDbName)
        fpTableName = options.torsionsTableName
        fpColName = options.torsionsColName
    elif options.similarityType == 'RDK':
        fpBuilder = FingerprintUtils.BuildRDKitFP
        simMetric = DataStructs.FingerprintSimilarity
        dbName = os.path.join(options.dbDir, options.fpDbName)
        fpTableName = options.fpTableName
        if not options.fpColName:
            options.fpColName = 'rdkfp'
        fpColName = options.fpColName
    elif options.similarityType == 'Pharm2D':
        fpBuilder = FingerprintUtils.BuildPharm2DFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.fpDbName)
        fpTableName = options.pharm2DTableName
        if not options.fpColName:
            options.fpColName = 'pharm2dfp'
        fpColName = options.fpColName
        FingerprintUtils.sigFactory = BuildSigFactory(options)
    elif options.similarityType == 'Gobbi2D':
        from rdkit.Chem.Pharm2D import Gobbi_Pharm2D
        fpBuilder = FingerprintUtils.BuildPharm2DFP
        simMetric = DataStructs.TanimotoSimilarity
        dbName = os.path.join(options.dbDir, options.fpDbName)
        fpTableName = options.gobbi2DTableName
        if not options.fpColName:
            options.fpColName = 'gobbi2dfp'
        fpColName = options.fpColName
        FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory
    elif options.similarityType == 'Morgan':
        fpBuilder = FingerprintUtils.BuildMorganFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.morganFpDbName)
        fpTableName = options.morganFpTableName
        fpColName = options.morganFpColName

    extraArgs = {}
    if options.similarityMetric == 'tanimoto':
        simMetric = DataStructs.TanimotoSimilarity
    elif options.similarityMetric == 'dice':
        simMetric = DataStructs.DiceSimilarity
    elif options.similarityMetric == 'tversky':
        simMetric = DataStructs.TverskySimilarity
        extraArgs['tverskyA'] = options.tverskyA
        extraArgs['tverskyB'] = options.tverskyB

    if options.smilesQuery:
        mol = Chem.MolFromSmiles(options.smilesQuery)
        if not mol:
            logger.error('could not build query molecule from smiles "%s"' %
                         options.smilesQuery)
            sys.exit(-1)
        options.queryMol = mol
    elif options.smartsQuery:
        mol = Chem.MolFromSmarts(options.smartsQuery)
        if not mol:
            logger.error('could not build query molecule from smarts "%s"' %
                         options.smartsQuery)
            sys.exit(-1)
        options.queryMol = mol

    if options.outF == '-':
        outF = sys.stdout
    elif options.outF == '':
        outF = None
    else:
        outF = open(options.outF, 'w+')

    molsOut = False
    if options.sdfOut:
        molsOut = True
        if options.sdfOut == '-':
            sdfOut = sys.stdout
        else:
            sdfOut = open(options.sdfOut, 'w+')
    else:
        sdfOut = None
    if options.smilesOut:
        molsOut = True
        if options.smilesOut == '-':
            smilesOut = sys.stdout
        else:
            smilesOut = open(options.smilesOut, 'w+')
    else:
        smilesOut = None

    if queryFilename:
        try:
            tmpF = open(queryFilename, 'r')
        except IOError:
            logger.error('could not open query file %s' % queryFilename)
            sys.exit(1)

        if options.molFormat == 'smiles':
            func = GetMolsFromSmilesFile
        elif options.molFormat == 'sdf':
            func = GetMolsFromSDFile

        if not options.silent:
            msg = 'Reading query molecules'
            if fpBuilder: msg += ' and generating fingerprints'
            logger.info(msg)
        probes = []
        i = 0
        nms = []
        for nm, smi, mol in func(queryFilename, None, options.nameProp):
            i += 1
            nms.append(nm)
            if not mol:
                logger.error('query molecule %d could not be built' % (i))
                probes.append((None, None))
                continue
            if fpBuilder:
                probes.append((mol, fpBuilder(mol)))
            else:
                probes.append((mol, None))
            if not options.silent and not i % 1000:
                logger.info("  done %d" % i)
    else:
        probes = None

    conn = None
    idName = options.molIdName
    ids = None
    names = None
    molDbName = os.path.join(options.dbDir, options.molDbName)
    molIdName = options.molIdName
    mConn = DbConnect(molDbName)
    cns = [(x.lower(), y)
           for x, y in mConn.GetColumnNamesAndTypes('molecules')]
    idCol, idTyp = cns[0]
    if options.propQuery or options.queryMol:
        conn = DbConnect(molDbName)
        curs = conn.GetCursor()
        if options.queryMol:
            if not options.silent: logger.info('Doing substructure query')
            if options.propQuery:
                where = 'where %s' % options.propQuery
            else:
                where = ''
            if not options.silent:
                curs.execute('select count(*) from molecules %(where)s' %
                             locals())
                nToDo = curs.fetchone()[0]

            join = ''
            doSubstructFPs = False
            fpDbName = os.path.join(options.dbDir, options.fpDbName)
            if os.path.exists(fpDbName) and not options.negateQuery:
                curs.execute("attach database '%s' as fpdb" % (fpDbName))
                try:
                    curs.execute('select * from fpdb.%s limit 1' %
                                 options.layeredTableName)
                except:
                    pass
                else:
                    doSubstructFPs = True
                    join = 'join fpdb.%s using (%s)' % (
                        options.layeredTableName, idCol)
                    query = LayeredOptions.GetQueryText(options.queryMol)
                    if query:
                        if not where:
                            where = 'where'
                        else:
                            where += ' and'
                        where += ' ' + query

            cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s' % locals(
            )
            curs.execute(cmd)
            row = curs.fetchone()
            nDone = 0
            ids = []
            while row:
                id, molpkl = row
                if not options.zipMols:
                    m = _molFromPkl(molpkl)
                else:
                    m = Chem.Mol(zlib.decompress(molpkl))
                matched = m.HasSubstructMatch(options.queryMol)
                if options.negateQuery:
                    matched = not matched
                if matched:
                    ids.append(id)
                nDone += 1
                if not options.silent and not nDone % 500:
                    if not doSubstructFPs:
                        logger.info(
                            '  searched %d (of %d) molecules; %d hits so far' %
                            (nDone, nToDo, len(ids)))
                    else:
                        logger.info(
                            '  searched through %d molecules; %d hits so far' %
                            (nDone, len(ids)))
                row = curs.fetchone()
            if not options.silent and doSubstructFPs and nToDo:
                nFiltered = nToDo - nDone
                logger.info(
                    '   Fingerprint screenout rate: %d of %d (%%%.2f)' %
                    (nFiltered, nToDo, 100. * nFiltered / nToDo))

        elif options.propQuery:
            if not options.silent: logger.info('Doing property query')
            propQuery = options.propQuery.split(';')[0]
            curs.execute(
                'select %(idCol)s from molecules where %(propQuery)s' %
                locals())
            ids = [x[0] for x in curs.fetchall()]
        if not options.silent:
            logger.info('Found %d molecules matching the query' % (len(ids)))

    t1 = time.time()
    if probes:
        if not options.silent: logger.info('Finding Neighbors')
        conn = DbConnect(dbName)
        cns = conn.GetColumnNames(fpTableName)
        curs = conn.GetCursor()

        if ids:
            ids = [(x, ) for x in ids]
            curs.execute(
                'create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' %
                locals())
            curs.executemany('insert into _tmpTbl values (?)', ids)
            join = 'join  _tmpTbl using (%(idCol)s)' % locals()
        else:
            join = ''

        if cns[0].lower() != idCol.lower():
            # backwards compatibility to the days when mol tables had a guid and
            # the fps tables did not:
            curs.execute("attach database '%(molDbName)s' as mols" % locals())
            curs.execute("""
  select %(idCol)s,%(fpColName)s from %(fpTableName)s join
      (select %(idCol)s,%(molIdName)s from mols.molecules %(join)s)
    using (%(molIdName)s)
""" % (locals()))
        else:
            curs.execute(
                'select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s'
                % locals())

        def poolFromCurs(curs, similarityMethod):
            row = curs.fetchone()
            while row:
                id, pkl = row
                fp = DepickleFP(pkl, similarityMethod)
                yield (id, fp)
                row = curs.fetchone()

        topNLists = GetNeighborLists(probes,
                                     options.topN,
                                     poolFromCurs(curs,
                                                  options.similarityType),
                                     simMetric=simMetric,
                                     simThresh=options.simThresh,
                                     **extraArgs)
        uniqIds = set()
        nbrLists = {}
        for i, nm in enumerate(nms):
            topNLists[i].reverse()
            scores = topNLists[i].GetPts()
            nbrNames = topNLists[i].GetExtras()
            nbrs = []
            for j, nbrGuid in enumerate(nbrNames):
                if nbrGuid is None:
                    break
                else:
                    uniqIds.add(nbrGuid)
                    nbrs.append((nbrGuid, scores[j]))
            nbrLists[(i, nm)] = nbrs
        t2 = time.time()
        if not options.silent:
            logger.info('The search took %.1f seconds' % (t2 - t1))

        if not options.silent: logger.info('Creating output')

        curs = mConn.GetCursor()
        ids = list(uniqIds)

        ids = [(x, ) for x in ids]
        curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' %
                     locals())
        curs.executemany('insert into _tmpTbl values (?)', ids)
        curs.execute(
            'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'
            % locals())
        nmDict = {}
        for guid, id in curs.fetchall():
            nmDict[guid] = str(id)

        ks = list(nbrLists.keys())
        ks.sort()
        if not options.transpose:
            for i, nm in ks:
                nbrs = nbrLists[(i, nm)]
                nbrTxt = options.outputDelim.join([nm] + [
                    '%s%s%.3f' % (nmDict[id], options.outputDelim, score)
                    for id, score in nbrs
                ])
                if outF: print(nbrTxt, file=outF)
        else:
            labels = [
                '%s%sSimilarity' % (x[1], options.outputDelim) for x in ks
            ]
            if outF: print(options.outputDelim.join(labels), file=outF)
            for i in range(options.topN):
                outL = []
                for idx, nm in ks:
                    nbr = nbrLists[(idx, nm)][i]
                    outL.append(nmDict[nbr[0]])
                    outL.append('%.3f' % nbr[1])
                if outF: print(options.outputDelim.join(outL), file=outF)
    else:
        if not options.silent: logger.info('Creating output')
        curs = mConn.GetCursor()
        ids = [(x, ) for x in set(ids)]
        curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' %
                     locals())
        curs.executemany('insert into _tmpTbl values (?)', ids)
        molIdName = options.molIdName
        curs.execute(
            'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'
            % locals())
        nmDict = {}
        for guid, id in curs.fetchall():
            nmDict[guid] = str(id)
        if outF: print('\n'.join(nmDict.values()), file=outF)
    if molsOut and ids:
        molDbName = os.path.join(options.dbDir, options.molDbName)
        cns = [x.lower() for x in mConn.GetColumnNames('molecules')]
        if cns[-1] != 'molpkl':
            cns.remove('molpkl')
            cns.append('molpkl')

        curs = mConn.GetCursor()
        #curs.execute('create temporary table _tmpTbl (guid integer)'%locals())
        #curs.executemany('insert into _tmpTbl values (?)',ids)
        cnText = ','.join(cns)
        curs.execute(
            'select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)' %
            locals())

        row = curs.fetchone()
        molD = {}
        while row:
            row = list(row)
            m = _molFromPkl(row[-1])
            guid = row[0]
            nm = nmDict[guid]
            if sdfOut:
                m.SetProp('_Name', nm)
                print(Chem.MolToMolBlock(m), file=sdfOut)
                for i in range(1, len(cns) - 1):
                    pn = cns[i]
                    pv = str(row[i])
                    print >> sdfOut, '> <%s>\n%s\n' % (pn, pv)
                print('$$$$', file=sdfOut)
            if smilesOut:
                smi = Chem.MolToSmiles(m, options.chiralSmiles)
            if smilesOut:
                print('%s %s' % (smi, str(row[1])), file=smilesOut)
            row = curs.fetchone()
    if not options.silent: logger.info('Done!')
Ejemplo n.º 18
0
class TestCase(unittest.TestCase):
    def setUp(self):
        self.dbName = RDConfig.RDTestDatabase
        self.conn = DbConnect(self.dbName)
        self.curs = self.conn.GetCursor()

    def test_MolSupplier(self):
        self.assertRaises(ValueError, MolSupplier)

    def test_general(self):
        # Check for a molecule column
        cmd = 'select * from ten_elements'
        results = DbResultSet(self.curs, self.conn, cmd)
        self.assertRaises(ValueError, ForwardDbMolSupplier, results)

    def test_ForwardDbMolSupplier(self):
        cmd = 'select * from simple_mols order by ID'
        results = DbResultSet(self.curs, self.conn, cmd)
        expected = list(results)

        results = DbResultSet(self.curs, self.conn, cmd)
        supp = ForwardDbMolSupplier(results)
        self.assertEqual(supp.GetColumnNames(), ('ID', ))

        for smiles, mol in zip(expected, supp):
            self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])),
                             Chem.MolToSmiles(mol))
            self.assertEqual(smiles[1], mol.GetProp('ID'))
        self.assertRaises(StopIteration, next, supp)

        # We can not use an index for ForwardDbMolSupplier
        with self.assertRaises(TypeError):
            supp[0]

    def test_RandomAccessDbMolSupplier(self):
        cmd = 'select * from simple_mols order by ID'
        results = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        expected = list(results)

        results = RandomAccessDbResultSet(self.curs, self.conn, cmd)
        supp = RandomAccessDbMolSupplier(results)
        self.assertEqual(len(supp), len(expected))
        self.assertEqual(supp.GetColumnNames(), ('ID', ))
        for smiles, mol in zip(expected, supp):
            self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])),
                             Chem.MolToSmiles(mol))
            self.assertEqual(smiles[1], mol.GetProp('ID'))

        # Check that we can randomly access the data
        indices = list(range(len(expected)))
        random.shuffle(indices)
        for idx in indices:
            smiles = expected[idx]
            mol = supp[idx]
            self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])),
                             Chem.MolToSmiles(mol))
            self.assertEqual(smiles[1], mol.GetProp('ID'))

        # We get an error if we access outside of the permitted range
        with self.assertRaises(IndexError):
            supp[len(expected)]

        # The DbMolSupplier doesn't support negative indices
        with self.assertRaises(IndexError):
            supp[-1]