def test_InsertData(self): newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int,val1 int, val2 int') for i in range(10): conn.InsertData(newTblName, (i, i + 1, 2 * i)) conn.Commit() d = conn.GetData(table=newTblName) assert len(d) == 10 self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 3) conn.AddColumn(newTblName, 'val3', 'int') conn.Commit() self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 4) d = conn.GetColumns('id,val3', table=newTblName) self.assertEqual(len(d), 10) self.assertTrue(all(r[1] is None for r in d)) for r in d: conn.InsertColumnData(newTblName, 'val3', r[0], 'id={0}'.format(r[0])) conn.Commit() d = conn.GetColumns('id,val3', table=newTblName) self.assertTrue(all(r[0] == r[1] for r in d)) d = None try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: assert 0, 'drop table failed'
def testAddTable(self): """ tests AddTable and GetTableNames functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int') names = [x.strip() for x in conn.GetTableNames()] assert newTblName in names, 'name (%s) not found in %s' % (newTblName, str(names)) conn.GetCursor().execute('drop table %s' % (newTblName))
def testCursor(self): """ tests GetCursor and GetTableNames functionalities """ viewName = 'TEST_VIEW' conn = DbConnect(self.tempDbName) curs = conn.GetCursor() assert curs try: curs.execute('drop view %s' % (viewName)) except Exception: pass try: curs.execute('create view %s as select val,id from ten_elements' % (viewName)) except Exception: import traceback traceback.print_exc() raise AssertionError('create view failed') conn.Commit() self.assertNotIn( viewName, [x.strip() for x in conn.GetTableNames(includeViews=0)], 'improper view found') self.assertIn(viewName, [x.strip() for x in conn.GetTableNames(includeViews=1)], 'improper view not found') try: curs.execute('drop view %s' % (viewName)) except Exception: raise AssertionError('drop table failed')
def testCursor(self): """ tests GetCursor and GetTableNames functionalities """ viewName = 'TEST_VIEW' conn = DbConnect(self.tempDbName) curs = conn.GetCursor() assert curs try: curs.execute('drop view %s' % (viewName)) except Exception: pass try: curs.execute('create view %s as select val,id from ten_elements' % (viewName)) except Exception: import traceback traceback.print_exc() assert 0 conn.Commit() names = [x.strip() for x in conn.GetTableNames(includeViews=0)] assert viewName not in names, 'improper view found' names = [x.strip() for x in conn.GetTableNames(includeViews=1)] assert viewName in names, 'improper view found in %s' % (str(names)) try: curs.execute('drop view %s' % (viewName)) except Exception: assert 0, 'drop table failed'
def testAddTable(self): """ tests AddTable and GetTableNames functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() self.assertNotIn(newTblName, [x.strip() for x in conn.GetTableNames()]) conn.AddTable(newTblName, 'id int') self.assertIn(newTblName, [x.strip() for x in conn.GetTableNames()]) self.assertEqual(conn.GetColumnNames(table=newTblName), ['id']) conn.GetCursor().execute('drop table %s' % (newTblName))
def GetFingerprints(details): """ returns an iterable sequence of fingerprints each fingerprint will have a _fieldsFromDb member whose first entry is the id. """ if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, 'dbUser'): conn.user = details.dbUser if hasattr(details, 'dbPassword'): conn.password = details.dbPassword except: import traceback FingerprintMols.error( 'Error: Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() cmd = _ConstructSQL(details, extraFields=details.fpColName) curs = conn.GetCursor() #curs.execute(cmd) #print 'CURSOR:',curs,curs.closed if _dataSeq: suppl = _dataSeq(curs, cmd, depickle=not details.noPickle, klass=DataStructs.ExplicitBitVect) _dataSeq._conn = conn else: suppl = DbFpSupplier.ForwardDbFpSupplier( data, fpColName=details.fpColName) elif details.inFileName: conn = None try: inF = open(details.inFileName, 'r') except IOError: import traceback FingerprintMols.error('Error: Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() supple = [] done = 0 while not done: try: id, fp = cPickle.load(inF) except: done = 1 else: fp._fieldsFromDb = [id] suppl.append(fp) else: suppl = None return suppl
def test_GetTableNames(self): # We can get the table names of a database with prior instantiation of a cursor conn = DbConnect(self.tempDbName) conn.GetCursor() names_Cursor = sorted(conn.GetTableNames()) # and without (this tests functionality of DbInfo conn = DbConnect(self.tempDbName) names_noCursor = sorted(conn.GetTableNames()) self.assertEqual(names_Cursor, names_noCursor)
def ScreenInDb(details, mol): try: probeFp = apply(FingerprintMols.FingerprintMol, (mol, ), details.__dict__) except: import traceback FingerprintMols.error('Error: problems fingerprinting molecule.\n') traceback.print_exc() return [] if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, 'dbUser'): conn.user = details.dbUser if hasattr(details, 'dbPassword'): conn.password = details.dbPassword except: import traceback FingerprintMols.error( 'Error: Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() if details.metric not in (DataStructs.TanimotoSimilarity, DataStructs.DiceSimilarity, DataStructs.CosineSimilarity): data = GetFingerprints(details) res = ScreenFingerprints(details, data, mol) else: res = [] if details.metric == DataStructs.TanimotoSimilarity: func = 'rd_tanimoto' pkl = probeFp.ToBitString() elif details.metric == DataStructs.DiceSimilarity: func = 'rd_dice' pkl = probeFp.ToBitString() elif details.metric == DataStructs.CosineSimilarity: func = 'rd_cosine' pkl = probeFp.ToBitString() extraFields = "%s(%s,%s) as tani" % (func, DbModule.placeHolder, details.fpColName) cmd = _ConstructSQL(details, extraFields=extraFields) if details.doThreshold: # we need to do a subquery here: cmd = "select * from (%s) tmp where tani>%f" % ( cmd, details.screenThresh) cmd += " order by tani desc" if not details.doThreshold and details.topN > 0: cmd += " limit %d" % details.topN curs = conn.GetCursor() curs.execute(cmd, (pkl, )) res = curs.fetchall() return res
def testInsertData(self): """ tests InsertData and InsertColumnData functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int,val1 int, val2 int') for i in range(10): conn.InsertData(newTblName, (i, i + 1, 2 * i)) conn.Commit() d = conn.GetData(table=newTblName) assert len(d) == 10 d = None try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: assert 0, 'drop table failed'
def test5TestBackwardsCompat(self): if os.path.exists('testData/bzr/Compounds.sqlt'): os.unlink('testData/bzr/Compounds.sqlt') if os.path.exists('testData/bzr/AtomPairs.sqlt'): os.unlink('testData/bzr/AtomPairs.sqlt') if os.path.exists('testData/bzr/Descriptors.sqlt'): os.unlink('testData/bzr/Descriptors.sqlt') if os.path.exists('testData/bzr/Fingerprints.sqlt'): os.unlink('testData/bzr/Fingerprints.sqlt') p = subprocess.Popen( ('python', 'CreateDb.py', '--dbDir=testData/bzr', '--noFingerprints', '--noDescriptors', 'testData/bzr.sdf')) res = p.wait() self.failIf(res) p = None conn = DbConnect('testData/bzr/AtomPairs.sqlt') curs = conn.GetCursor() curs.execute( 'create table tmp as select compound_id,atompairfp,torsionfp from atompairs' ) p = subprocess.Popen( ('python', 'SearchDb.py', '--dbDir=testData/bzr', '--molFormat=sdf', '--topN=5', '--outF=testData/bzr/search.out', '--similarityType=AtomPairs', '--pairTableName=tmp', 'testData/bzr.sdf')) res = p.wait() self.failIf(res) p = None self.failUnless(os.path.exists('testData/bzr/search.out')) inF = file('testData/bzr/search.out', 'r') lines = inF.readlines() inF = None self.failUnlessEqual(len(lines), 163) splitLs = [x.strip().split(',') for x in lines] for line in splitLs: lbl = line[0] i = 1 nbrs = {} lastVal = 1.0 while i < len(line): nbrs[line[i]] = line[i + 1] self.failUnless(float(line[i + 1]) <= lastVal) lastVal = float(line[i + 1]) i += 2 self.failUnless(nbrs.has_key(lbl)) self.failUnless(nbrs[lbl] == '1.000') os.unlink('testData/bzr/search.out')
def Store(self, db='models.gdb', table='results', user='******', password='******'): """ adds the result to a database **Arguments** - db: name of the database to use - table: name of the table to use - user&password: connection information """ cn = DbConnect(db, table, user, password) curs = cn.GetCursor() self._CreateTable(cn, table) cols = [] vals = [] for name, _ in self.fields: try: v = getattr(self, name) except AttributeError: pass else: cols.append('%s' % name) vals.append(v) nToDo = len(vals) qs = ','.join([DbModule.placeHolder] * nToDo) vals = tuple(vals) cmd = 'insert into %s (%s) values (%s)' % (table, ','.join(cols), qs) curs.execute(cmd, vals) cn.Commit()
password=RDConfig.defaultDBPassword, pickleCol=details.pickleCol, pickleClass=DataStructs.ExplicitBitVect) descs = dataSet.GetVarNames() nPts = dataSet.GetNPts() message('npts: %d\n' % (nPts)) final = numpy.zeros((nPts, 2), numpy.float) counts = numpy.zeros(nPts, numpy.integer) selPts = [None] * nPts models = [] if details.persistTblName: conn = DbConnect(details.dbName, details.persistTblName) message('-> Retrieving models from database') curs = conn.GetCursor() curs.execute("select model from %s where note='%s'" % (details.persistTblName, details.note)) message('-> Reconstructing models') try: blob = curs.fetchone() except Exception: blob = None while blob: message(' Building model %d' % len(models)) blob = blob[0] try: models.append(cPickle.loads(str(blob))) except Exception: import traceback traceback.print_exc()
def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False, redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False, regName='molecules',skipSmiles=False,maxRowsCached=-1, uniqNames=False,addComputedProps=False,lazySupplier=False, startAnew=True): if not lazySupplier: nMols = len(suppl) else: nMols=-1 if not silent: logger.info("Generating molecular database in file %s"%dbName) if not lazySupplier: logger.info(" Processing %d molecules"%nMols) rows = [] globalProps = {} namesSeen = set() nDone = 0 typeConversions={0:('varchar',str),1:('float',float),2:('int',int)} for m in suppl: nDone +=1 if not m: if errorsTo: if hasattr(suppl,'GetItemText'): d = suppl.GetItemText(nDone-1) errorsTo.write(d) else: logger.warning('full error file support not complete') continue row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp, nameCol=nameCol,redraw=redraw, keepHs=keepHs,skipProps=skipProps, addComputedProps=addComputedProps,skipSmiles=skipSmiles, uniqNames=uniqNames,namesSeen=namesSeen) if row is None: continue rows.append([nDone]+row) if not silent and not nDone%100: logger.info(' done %d'%nDone) if len(rows)==maxRowsCached: break nameDef='%s varchar not null'%nameCol if uniqNames: nameDef += ' unique' typs = ['guid integer not null primary key',nameDef] pns = [] for pn,v in globalProps.items(): addNm = re.sub(r'[\W]','_',pn) typs.append('%s %s'%(addNm,typeConversions[v][0])) pns.append(pn.lower()) if not skipSmiles: if 'smiles' not in pns: typs.append('smiles varchar') else: typs.append('cansmiles varchar') typs.append('molpkl %s'%(DbModule.binaryTypeName)) conn = DbConnect(dbName) curs = conn.GetCursor() if startAnew: try: curs.execute('drop table %s'%regName) except: pass curs.execute('create table %s (%s)'%(regName,','.join(typs))) else: curs.execute('select * from %s limit 1'%(regName,)) ocolns = set([x[0] for x in curs.description]) ncolns = set([x.split()[0] for x in typs]) if ncolns != ocolns: raise ValueError('Column names do not match: %s != %s'%(ocolns,ncolns)) curs.execute('select max(guid) from %s'%(regName,)) offset = curs.fetchone()[0] for row in rows: row[0] += offset qs = ','.join([DbModule.placeHolder for x in typs]) ConvertRows(rows,globalProps,defaultVal,skipSmiles) curs.executemany('insert into %s values (%s)'%(regName,qs),rows) conn.Commit() rows = [] while 1: nDone +=1 try: m = next(suppl) except StopIteration: break if not m: if errorsTo: if hasattr(suppl,'GetItemText'): d = suppl.GetItemText(nDone-1) errorsTo.write(d) else: logger.warning('full error file support not complete') continue tmpProps={} row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp, nameCol=nameCol,redraw=redraw, keepHs=keepHs,skipProps=skipProps, addComputedProps=addComputedProps,skipSmiles=skipSmiles, uniqNames=uniqNames,namesSeen=namesSeen) if not row: continue rows.append([nDone]+row) if not silent and not nDone%100: logger.info(' done %d'%nDone) if len(rows)==maxRowsCached: ConvertRows(rows,globalProps,defaultVal,skipSmiles) curs.executemany('insert into %s values (%s)'%(regName,qs),rows) conn.Commit() rows = [] if len(rows): ConvertRows(rows,globalProps,defaultVal,skipSmiles) curs.executemany('insert into %s values (%s)'%(regName,qs),rows) conn.Commit()
def CreateDb(options,dataFilename='',supplier=None): if not dataFilename and supplier is None: raise ValueError('Please provide either a data filename or a supplier') if options.errFilename: errFile=open(os.path.join(options.outDir,options.errFilename),'w+') else: errFile=None if options.noExtras: options.doPairs=False options.doDescriptors=False options.doFingerprints=False options.doPharm2D=False options.doGobbi2D=False options.doLayered=False options.doMorganFps=False if options.loadMols: if supplier is None: if not options.molFormat: ext = os.path.splitext(dataFilename)[-1].lower() if ext=='.sdf': options.molFormat='sdf' elif ext in ('.smi','.smiles','.txt','.csv'): options.molFormat='smiles' if not options.delimiter: # guess the delimiter import csv sniffer = csv.Sniffer() dlct=sniffer.sniff(open(dataFilename,'r').read(2000)) options.delimiter=dlct.delimiter if not options.silent: logger.info('Guessing that delimiter is %s. Use --delimiter argument if this is wrong.'%repr(options.delimiter)) if not options.silent: logger.info('Guessing that mol format is %s. Use --molFormat argument if this is wrong.'%repr(options.molFormat)) if options.molFormat=='smiles': if options.delimiter=='\\t': options.delimiter='\t' supplier=Chem.SmilesMolSupplier(dataFilename, titleLine=options.titleLine, delimiter=options.delimiter, smilesColumn=options.smilesColumn, nameColumn=options.nameColumn ) else: supplier = Chem.SDMolSupplier(dataFilename) if not options.silent: logger.info('Reading molecules and constructing molecular database.') Loader.LoadDb(supplier,os.path.join(options.outDir,options.molDbName), errorsTo=errFile,regName=options.regName,nameCol=options.molIdName, skipProps=options.skipProps,defaultVal=options.missingPropertyVal, addComputedProps=options.addProps,uniqNames=True, skipSmiles=options.skipSmiles,maxRowsCached=int(options.maxRowsCached), silent=options.silent,nameProp=options.nameProp, lazySupplier=int(options.maxRowsCached)>0, startAnew=not options.updateDb ) if options.doPairs: pairConn = DbConnect(os.path.join(options.outDir,options.pairDbName)) pairCurs = pairConn.GetCursor() try: pairCurs.execute('drop table %s'%(options.pairTableName)) except: pass pairCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,atompairfp blob,torsionfp blob)'%(options.pairTableName, options.molIdName)) if options.doFingerprints or options.doPharm2D or options.doGobbi2D or options.doLayered: fpConn = DbConnect(os.path.join(options.outDir,options.fpDbName)) fpCurs=fpConn.GetCursor() try: fpCurs.execute('drop table %s'%(options.fpTableName)) except: pass try: fpCurs.execute('drop table %s'%(options.pharm2DTableName)) except: pass try: fpCurs.execute('drop table %s'%(options.gobbi2DTableName)) except: pass try: fpCurs.execute('drop table %s'%(options.layeredTableName)) except: pass if options.doFingerprints: fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,rdkfp blob)'%(options.fpTableName, options.molIdName)) if options.doLayered: layeredQs = ','.join('?'*LayeredOptions.nWords) colDefs=','.join(['Col_%d integer'%(x+1) for x in range(LayeredOptions.nWords)]) fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,%s)'%(options.layeredTableName, options.molIdName, colDefs)) if options.doPharm2D: fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,pharm2dfp blob)'%(options.pharm2DTableName, options.molIdName)) sigFactory = BuildSigFactory(options) if options.doGobbi2D: fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,gobbi2dfp blob)'%(options.gobbi2DTableName, options.molIdName)) from rdkit.Chem.Pharm2D import Generate,Gobbi_Pharm2D if options.doMorganFps : fpConn = DbConnect(os.path.join(options.outDir,options.fpDbName)) fpCurs=fpConn.GetCursor() try: fpCurs.execute('drop table %s'%(options.morganFpTableName)) except: pass fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,morganfp blob)'%(options.morganFpTableName, options.molIdName)) if options.doDescriptors: descrConn=DbConnect(os.path.join(options.outDir,options.descrDbName)) with open(options.descriptorCalcFilename,'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() calc = cPickle.load(io.BytesIO(buf)) nms = [x for x in calc.GetDescriptorNames()] descrCurs = descrConn.GetCursor() descrs = ['guid integer not null primary key','%s varchar not null unique'%options.molIdName] descrs.extend(['%s float'%x for x in nms]) try: descrCurs.execute('drop table %s'%(options.descrTableName)) except: pass descrCurs.execute('create table %s (%s)'%(options.descrTableName,','.join(descrs))) descrQuery=','.join([DbModule.placeHolder]*len(descrs)) pairRows = [] fpRows = [] layeredRows = [] descrRows = [] pharm2DRows=[] gobbi2DRows=[] morganRows = [] if not options.silent: logger.info('Generating fingerprints and descriptors:') molConn = DbConnect(os.path.join(options.outDir,options.molDbName)) molCurs = molConn.GetCursor() if not options.skipSmiles: molCurs.execute('select guid,%s,smiles,molpkl from %s'%(options.molIdName,options.regName)) else: molCurs.execute('select guid,%s,molpkl from %s'%(options.molIdName,options.regName)) i=0 while 1: try: tpl = molCurs.fetchone() molGuid = tpl[0] molId = tpl[1] pkl = tpl[-1] i+=1 except: break if isinstance(pkl,(bytes,str)): mol = Chem.Mol(pkl) else: mol = Chem.Mol(str(pkl)) if not mol: continue if options.doPairs: pairs = FingerprintUtils.BuildAtomPairFP(mol) torsions = FingerprintUtils.BuildTorsionsFP(mol) pkl1 = DbModule.binaryHolder(pairs.ToBinary()) pkl2 = DbModule.binaryHolder(torsions.ToBinary()) row = (molGuid,molId,pkl1,pkl2) pairRows.append(row) if options.doFingerprints: fp2 = FingerprintUtils.BuildRDKitFP(mol) pkl = DbModule.binaryHolder(fp2.ToBinary()) row = (molGuid,molId,pkl) fpRows.append(row) if options.doLayered: words = LayeredOptions.GetWords(mol) row = [molGuid,molId]+words layeredRows.append(row) if options.doDescriptors: descrs= calc.CalcDescriptors(mol) row = [molGuid,molId] row.extend(descrs) descrRows.append(row) if options.doPharm2D: FingerprintUtils.sigFactory=sigFactory fp= FingerprintUtils.BuildPharm2DFP(mol) pkl = DbModule.binaryHolder(fp.ToBinary()) row = (molGuid,molId,pkl) pharm2DRows.append(row) if options.doGobbi2D: FingerprintUtils.sigFactory=Gobbi_Pharm2D.factory fp= FingerprintUtils.BuildPharm2DFP(mol) pkl = DbModule.binaryHolder(fp.ToBinary()) row = (molGuid,molId,pkl) gobbi2DRows.append(row) if options.doMorganFps: morgan = FingerprintUtils.BuildMorganFP(mol) pkl = DbModule.binaryHolder(morgan.ToBinary()) row = (molGuid,molId,pkl) morganRows.append(row) if not i%500: if len(pairRows): pairCurs.executemany('insert into %s values (?,?,?,?)'%options.pairTableName, pairRows) pairRows = [] pairConn.Commit() if len(fpRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.fpTableName, fpRows) fpRows = [] fpConn.Commit() if len(layeredRows): fpCurs.executemany('insert into %s values (?,?,%s)'%(options.layeredTableName,layeredQs), layeredRows) layeredRows = [] fpConn.Commit() if len(descrRows): descrCurs.executemany('insert into %s values (%s)'%(options.descrTableName,descrQuery), descrRows) descrRows = [] descrConn.Commit() if len(pharm2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.pharm2DTableName, pharm2DRows) pharm2DRows = [] fpConn.Commit() if len(gobbi2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.gobbi2DTableName, gobbi2DRows) gobbi2DRows = [] fpConn.Commit() if len(morganRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.morganFpTableName, morganRows) morganRows = [] fpConn.Commit() if not options.silent and not i%500: logger.info(' Done: %d'%(i)) if len(pairRows): pairCurs.executemany('insert into %s values (?,?,?,?)'%options.pairTableName, pairRows) pairRows = [] pairConn.Commit() if len(fpRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.fpTableName, fpRows) fpRows = [] fpConn.Commit() if len(layeredRows): fpCurs.executemany('insert into %s values (?,?,%s)'%(options.layeredTableName,layeredQs), layeredRows) layeredRows = [] fpConn.Commit() if len(descrRows): descrCurs.executemany('insert into %s values (%s)'%(options.descrTableName,descrQuery), descrRows) descrRows = [] descrConn.Commit() if len(pharm2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.pharm2DTableName, pharm2DRows) pharm2DRows = [] fpConn.Commit() if len(gobbi2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.gobbi2DTableName, gobbi2DRows) gobbi2DRows = [] fpConn.Commit() if len(morganRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.morganFpTableName, morganRows) morganRows = [] fpConn.Commit() if not options.silent: logger.info('Finished.')
class TestCase(unittest.TestCase): def setUp(self): self.dbName = RDConfig.RDTestDatabase self.conn = DbConnect(self.dbName) self.curs = self.conn.GetCursor() def test1(self): """ test indexing in, ensure acceptable error conditions """ cmd = 'select * from ten_elements' set = RandomAccessDbResultSet(self.curs, self.conn, cmd) for i in range(12): try: val = set[i] except IndexError: assert i >= 10 def test2(self): """ """ cmd = 'select * from ten_elements' set = RandomAccessDbResultSet(self.curs, self.conn, cmd) assert len(set) == 10 for i in range(len(set)): val = set[i] def test3(self): """ """ cmd = 'select * from ten_elements' set = DbResultSet(self.curs, self.conn, cmd) r = [] for thing in set: r.append(thing) assert len(r) == 10 def test4(self): """ """ cmd = 'select * from ten_elements_dups' set = DbResultSet(self.curs, self.conn, cmd, removeDups=0) r = [] for thing in set: r.append(thing) assert len(r) == 10 def test5(self): """ """ cmd = 'select * from ten_elements_dups' set = RandomAccessDbResultSet(self.curs, self.conn, cmd, removeDups=0) assert len(set) == 10 for i in range(len(set)): val = set[i] def test6(self): """ """ cmd = 'select * from ten_elements_dups' set = DbResultSet(self.curs, self.conn, cmd, removeDups=0) r = [] for thing in set: r.append(thing) assert len(r) == 10
class TestCase(unittest.TestCase): def setUp(self): self.dbName = RDConfig.RDTestDatabase self.conn = DbConnect(self.dbName) self.curs = self.conn.GetCursor() def test1(self): """ test indexing in, ensure acceptable error conditions """ cmd = 'select * from ten_elements' resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd) self.assertRaises(IndexError, resultSet.__getitem__, -1) for i in range(12): try: _ = resultSet[i] except IndexError: assert i >= 10 self.assertEqual(resultSet.GetColumnNames(), ('id', 'val')) self.assertEqual(resultSet.GetColumnTypes(), ('integer', 'integer')) self.assertEqual(resultSet.GetColumnNamesAndTypes(), (('id', 'integer'), ('val', 'integer'))) cmd = 'select * from ten_elements_dups' resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd) for i in range(22): try: _ = resultSet[i] except IndexError: assert i >= 20 cmd = 'select * from ten_elements_dups' resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd, removeDups=0) for i in range(22): try: _ = resultSet[i] except IndexError: assert i >= 10 # Test iterator resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd, removeDups=0) self.assertEqual(next(resultSet), resultSet[0]) self.assertEqual(len(list(resultSet)), 10) def test2(self): cmd = 'select * from ten_elements' resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd) assert len(resultSet) == 10 for i in range(len(resultSet)): _ = resultSet[i] def test3(self): cmd = 'select * from ten_elements' resultSet = DbResultSet(self.curs, self.conn, cmd) r = [obj for obj in resultSet] self.assertEqual(len(r), 10) # Test iterator resultSet = DbResultSet(self.curs, self.conn, cmd) self.assertEqual(next(resultSet), (0, 11)) self.assertEqual(len(list(resultSet)), 10) def test4(self): cmd = 'select * from ten_elements_dups' resultSet = DbResultSet(self.curs, self.conn, cmd) r = [obj for obj in resultSet] self.assertEqual(len(r), 20) resultSet = DbResultSet(self.curs, self.conn, cmd, removeDups=0) r = [obj for obj in resultSet] self.assertEqual(len(r), 10) def test5(self): cmd = 'select * from ten_elements_dups' resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd) self.assertEqual(len(resultSet), 20) r = [obj for obj in resultSet] self.assertEqual(len(r), 20) resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd) r = [obj for obj in resultSet] self.assertEqual(len(r), 20) self.assertEqual(len(resultSet), 20) resultSet = RandomAccessDbResultSet(self.curs, self.conn, cmd, removeDups=0) self.assertEqual(len(resultSet), 10) r = [obj for obj in resultSet] self.assertEqual(len(r), 10) def test6(self): cmd = 'select * from ten_elements_dups' resultSet = DbResultSet(self.curs, self.conn, cmd, removeDups=0) r = [obj for obj in resultSet] self.assertEqual(len(r), 10)
def RunSearch(options, queryFilename): global sigFactory if options.similarityType == 'AtomPairs': fpBuilder = FingerprintUtils.BuildAtomPairFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.pairDbName) fpTableName = options.pairTableName fpColName = options.pairColName elif options.similarityType == 'TopologicalTorsions': fpBuilder = FingerprintUtils.BuildTorsionsFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.torsionsDbName) fpTableName = options.torsionsTableName fpColName = options.torsionsColName elif options.similarityType == 'RDK': fpBuilder = FingerprintUtils.BuildRDKitFP simMetric = DataStructs.FingerprintSimilarity dbName = os.path.join(options.dbDir, options.fpDbName) fpTableName = options.fpTableName if not options.fpColName: options.fpColName = 'rdkfp' fpColName = options.fpColName elif options.similarityType == 'Pharm2D': fpBuilder = FingerprintUtils.BuildPharm2DFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.fpDbName) fpTableName = options.pharm2DTableName if not options.fpColName: options.fpColName = 'pharm2dfp' fpColName = options.fpColName FingerprintUtils.sigFactory = BuildSigFactory(options) elif options.similarityType == 'Gobbi2D': from rdkit.Chem.Pharm2D import Gobbi_Pharm2D fpBuilder = FingerprintUtils.BuildPharm2DFP simMetric = DataStructs.TanimotoSimilarity dbName = os.path.join(options.dbDir, options.fpDbName) fpTableName = options.gobbi2DTableName if not options.fpColName: options.fpColName = 'gobbi2dfp' fpColName = options.fpColName FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory elif options.similarityType == 'Morgan': fpBuilder = FingerprintUtils.BuildMorganFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.morganFpDbName) fpTableName = options.morganFpTableName fpColName = options.morganFpColName extraArgs = {} if options.similarityMetric == 'tanimoto': simMetric = DataStructs.TanimotoSimilarity elif options.similarityMetric == 'dice': simMetric = DataStructs.DiceSimilarity elif options.similarityMetric == 'tversky': simMetric = DataStructs.TverskySimilarity extraArgs['tverskyA'] = options.tverskyA extraArgs['tverskyB'] = options.tverskyB if options.smilesQuery: mol = Chem.MolFromSmiles(options.smilesQuery) if not mol: logger.error('could not build query molecule from smiles "%s"' % options.smilesQuery) sys.exit(-1) options.queryMol = mol elif options.smartsQuery: mol = Chem.MolFromSmarts(options.smartsQuery) if not mol: logger.error('could not build query molecule from smarts "%s"' % options.smartsQuery) sys.exit(-1) options.queryMol = mol if options.outF == '-': outF = sys.stdout elif options.outF == '': outF = None else: outF = open(options.outF, 'w+') molsOut = False if options.sdfOut: molsOut = True if options.sdfOut == '-': sdfOut = sys.stdout else: sdfOut = open(options.sdfOut, 'w+') else: sdfOut = None if options.smilesOut: molsOut = True if options.smilesOut == '-': smilesOut = sys.stdout else: smilesOut = open(options.smilesOut, 'w+') else: smilesOut = None if queryFilename: try: tmpF = open(queryFilename, 'r') except IOError: logger.error('could not open query file %s' % queryFilename) sys.exit(1) if options.molFormat == 'smiles': func = GetMolsFromSmilesFile elif options.molFormat == 'sdf': func = GetMolsFromSDFile if not options.silent: msg = 'Reading query molecules' if fpBuilder: msg += ' and generating fingerprints' logger.info(msg) probes = [] i = 0 nms = [] for nm, smi, mol in func(queryFilename, None, options.nameProp): i += 1 nms.append(nm) if not mol: logger.error('query molecule %d could not be built' % (i)) probes.append((None, None)) continue if fpBuilder: probes.append((mol, fpBuilder(mol))) else: probes.append((mol, None)) if not options.silent and not i % 1000: logger.info(" done %d" % i) else: probes = None conn = None idName = options.molIdName ids = None names = None molDbName = os.path.join(options.dbDir, options.molDbName) molIdName = options.molIdName mConn = DbConnect(molDbName) cns = [(x.lower(), y) for x, y in mConn.GetColumnNamesAndTypes('molecules')] idCol, idTyp = cns[0] if options.propQuery or options.queryMol: conn = DbConnect(molDbName) curs = conn.GetCursor() if options.queryMol: if not options.silent: logger.info('Doing substructure query') if options.propQuery: where = 'where %s' % options.propQuery else: where = '' if not options.silent: curs.execute('select count(*) from molecules %(where)s' % locals()) nToDo = curs.fetchone()[0] join = '' doSubstructFPs = False fpDbName = os.path.join(options.dbDir, options.fpDbName) if os.path.exists(fpDbName) and not options.negateQuery: curs.execute("attach database '%s' as fpdb" % (fpDbName)) try: curs.execute('select * from fpdb.%s limit 1' % options.layeredTableName) except: pass else: doSubstructFPs = True join = 'join fpdb.%s using (%s)' % ( options.layeredTableName, idCol) query = LayeredOptions.GetQueryText(options.queryMol) if query: if not where: where = 'where' else: where += ' and' where += ' ' + query cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s' % locals( ) curs.execute(cmd) row = curs.fetchone() nDone = 0 ids = [] while row: id, molpkl = row if not options.zipMols: m = _molFromPkl(molpkl) else: m = Chem.Mol(zlib.decompress(molpkl)) matched = m.HasSubstructMatch(options.queryMol) if options.negateQuery: matched = not matched if matched: ids.append(id) nDone += 1 if not options.silent and not nDone % 500: if not doSubstructFPs: logger.info( ' searched %d (of %d) molecules; %d hits so far' % (nDone, nToDo, len(ids))) else: logger.info( ' searched through %d molecules; %d hits so far' % (nDone, len(ids))) row = curs.fetchone() if not options.silent and doSubstructFPs and nToDo: nFiltered = nToDo - nDone logger.info( ' Fingerprint screenout rate: %d of %d (%%%.2f)' % (nFiltered, nToDo, 100. * nFiltered / nToDo)) elif options.propQuery: if not options.silent: logger.info('Doing property query') propQuery = options.propQuery.split(';')[0] curs.execute( 'select %(idCol)s from molecules where %(propQuery)s' % locals()) ids = [x[0] for x in curs.fetchall()] if not options.silent: logger.info('Found %d molecules matching the query' % (len(ids))) t1 = time.time() if probes: if not options.silent: logger.info('Finding Neighbors') conn = DbConnect(dbName) cns = conn.GetColumnNames(fpTableName) curs = conn.GetCursor() if ids: ids = [(x, ) for x in ids] curs.execute( 'create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals()) curs.executemany('insert into _tmpTbl values (?)', ids) join = 'join _tmpTbl using (%(idCol)s)' % locals() else: join = '' if cns[0].lower() != idCol.lower(): # backwards compatibility to the days when mol tables had a guid and # the fps tables did not: curs.execute("attach database '%(molDbName)s' as mols" % locals()) curs.execute(""" select %(idCol)s,%(fpColName)s from %(fpTableName)s join (select %(idCol)s,%(molIdName)s from mols.molecules %(join)s) using (%(molIdName)s) """ % (locals())) else: curs.execute( 'select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s' % locals()) def poolFromCurs(curs, similarityMethod): row = curs.fetchone() while row: id, pkl = row fp = DepickleFP(pkl, similarityMethod) yield (id, fp) row = curs.fetchone() topNLists = GetNeighborLists(probes, options.topN, poolFromCurs(curs, options.similarityType), simMetric=simMetric, simThresh=options.simThresh, **extraArgs) uniqIds = set() nbrLists = {} for i, nm in enumerate(nms): topNLists[i].reverse() scores = topNLists[i].GetPts() nbrNames = topNLists[i].GetExtras() nbrs = [] for j, nbrGuid in enumerate(nbrNames): if nbrGuid is None: break else: uniqIds.add(nbrGuid) nbrs.append((nbrGuid, scores[j])) nbrLists[(i, nm)] = nbrs t2 = time.time() if not options.silent: logger.info('The search took %.1f seconds' % (t2 - t1)) if not options.silent: logger.info('Creating output') curs = mConn.GetCursor() ids = list(uniqIds) ids = [(x, ) for x in ids] curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals()) curs.executemany('insert into _tmpTbl values (?)', ids) curs.execute( 'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)' % locals()) nmDict = {} for guid, id in curs.fetchall(): nmDict[guid] = str(id) ks = list(nbrLists.keys()) ks.sort() if not options.transpose: for i, nm in ks: nbrs = nbrLists[(i, nm)] nbrTxt = options.outputDelim.join([nm] + [ '%s%s%.3f' % (nmDict[id], options.outputDelim, score) for id, score in nbrs ]) if outF: print(nbrTxt, file=outF) else: labels = [ '%s%sSimilarity' % (x[1], options.outputDelim) for x in ks ] if outF: print(options.outputDelim.join(labels), file=outF) for i in range(options.topN): outL = [] for idx, nm in ks: nbr = nbrLists[(idx, nm)][i] outL.append(nmDict[nbr[0]]) outL.append('%.3f' % nbr[1]) if outF: print(options.outputDelim.join(outL), file=outF) else: if not options.silent: logger.info('Creating output') curs = mConn.GetCursor() ids = [(x, ) for x in set(ids)] curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals()) curs.executemany('insert into _tmpTbl values (?)', ids) molIdName = options.molIdName curs.execute( 'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)' % locals()) nmDict = {} for guid, id in curs.fetchall(): nmDict[guid] = str(id) if outF: print('\n'.join(nmDict.values()), file=outF) if molsOut and ids: molDbName = os.path.join(options.dbDir, options.molDbName) cns = [x.lower() for x in mConn.GetColumnNames('molecules')] if cns[-1] != 'molpkl': cns.remove('molpkl') cns.append('molpkl') curs = mConn.GetCursor() #curs.execute('create temporary table _tmpTbl (guid integer)'%locals()) #curs.executemany('insert into _tmpTbl values (?)',ids) cnText = ','.join(cns) curs.execute( 'select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)' % locals()) row = curs.fetchone() molD = {} while row: row = list(row) m = _molFromPkl(row[-1]) guid = row[0] nm = nmDict[guid] if sdfOut: m.SetProp('_Name', nm) print(Chem.MolToMolBlock(m), file=sdfOut) for i in range(1, len(cns) - 1): pn = cns[i] pv = str(row[i]) print >> sdfOut, '> <%s>\n%s\n' % (pn, pv) print('$$$$', file=sdfOut) if smilesOut: smi = Chem.MolToSmiles(m, options.chiralSmiles) if smilesOut: print('%s %s' % (smi, str(row[1])), file=smilesOut) row = curs.fetchone() if not options.silent: logger.info('Done!')
class TestCase(unittest.TestCase): def setUp(self): self.dbName = RDConfig.RDTestDatabase self.conn = DbConnect(self.dbName) self.curs = self.conn.GetCursor() def test_MolSupplier(self): self.assertRaises(ValueError, MolSupplier) def test_general(self): # Check for a molecule column cmd = 'select * from ten_elements' results = DbResultSet(self.curs, self.conn, cmd) self.assertRaises(ValueError, ForwardDbMolSupplier, results) def test_ForwardDbMolSupplier(self): cmd = 'select * from simple_mols order by ID' results = DbResultSet(self.curs, self.conn, cmd) expected = list(results) results = DbResultSet(self.curs, self.conn, cmd) supp = ForwardDbMolSupplier(results) self.assertEqual(supp.GetColumnNames(), ('ID', )) for smiles, mol in zip(expected, supp): self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) self.assertEqual(smiles[1], mol.GetProp('ID')) self.assertRaises(StopIteration, next, supp) # We can not use an index for ForwardDbMolSupplier with self.assertRaises(TypeError): supp[0] def test_RandomAccessDbMolSupplier(self): cmd = 'select * from simple_mols order by ID' results = RandomAccessDbResultSet(self.curs, self.conn, cmd) expected = list(results) results = RandomAccessDbResultSet(self.curs, self.conn, cmd) supp = RandomAccessDbMolSupplier(results) self.assertEqual(len(supp), len(expected)) self.assertEqual(supp.GetColumnNames(), ('ID', )) for smiles, mol in zip(expected, supp): self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) self.assertEqual(smiles[1], mol.GetProp('ID')) # Check that we can randomly access the data indices = list(range(len(expected))) random.shuffle(indices) for idx in indices: smiles = expected[idx] mol = supp[idx] self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) self.assertEqual(smiles[1], mol.GetProp('ID')) # We get an error if we access outside of the permitted range with self.assertRaises(IndexError): supp[len(expected)] # The DbMolSupplier doesn't support negative indices with self.assertRaises(IndexError): supp[-1]