def SupplierFromDetails(details): from rdkit.VLib.NodeLib.DbMolSupply import DbMolSupplyNode from rdkit.VLib.NodeLib.SmilesSupply import SmilesSupplyNode if details.dbName: conn = DbConnect(details.dbName, details.tableName) suppl = DbMolSupplyNode(conn.GetData()) else: suppl = SmilesSupplyNode(details.inFileName, delim=details.delim, nameColumn=details.nameCol, smilesColumn=details.smiCol, titleLine=details.hasTitle) if isinstance(details.actCol, int): suppl.reset() m = next(suppl) actName = m.GetPropNames()[details.actCol] details.actCol = actName if isinstance(details.nameCol, int): suppl.reset() m = next(suppl) nameName = m.GetPropNames()[details.nameCol] details.nameCol = nameName suppl.reset() if isinstance(details.actCol, int): suppl.reset() m = next(suppl) actName = m.GetPropNames()[details.actCol] details.actCol = actName if isinstance(details.nameCol, int): suppl.reset() m = next(suppl) nameName = m.GetPropNames()[details.nameCol] details.nameCol = nameName suppl.reset() return suppl
def testGetData1(self): """ basic functionality """ conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=1) assert len(d) == 10 assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) self.assertRaises(IndexError, lambda: d[11])
def testGetData2(self): """ using removeDups """ conn = DbConnect(self.dbName, 'ten_elements_dups') d = conn.GetData(randomAccess=1, removeDups=1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) assert len(d) == 10 self.assertRaises(IndexError, lambda: d[11])
def test_ForwardDbFpSupplier(self): # Additional tests to complete code coverage conn = DbConnect(RDConfig.RDTestDatabase, 'simple_combined') self.assertRaises(ValueError, DbFpSupplier.ForwardDbFpSupplier, conn.GetData(), fpColName='typo') suppl = DbFpSupplier.ForwardDbFpSupplier(conn.GetData(), fpColName='AutoFragmentFp') self.assertIn('ID', suppl.GetColumnNames())
def GetFingerprints(details): """ returns an iterable sequence of fingerprints each fingerprint will have a _fieldsFromDb member whose first entry is the id. """ if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, 'dbUser'): conn.user = details.dbUser if hasattr(details, 'dbPassword'): conn.password = details.dbPassword except: import traceback FingerprintMols.error( 'Error: Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() cmd = _ConstructSQL(details, extraFields=details.fpColName) curs = conn.GetCursor() #curs.execute(cmd) #print 'CURSOR:',curs,curs.closed if _dataSeq: suppl = _dataSeq(curs, cmd, depickle=not details.noPickle, klass=DataStructs.ExplicitBitVect) _dataSeq._conn = conn else: suppl = DbFpSupplier.ForwardDbFpSupplier( data, fpColName=details.fpColName) elif details.inFileName: conn = None try: inF = open(details.inFileName, 'r') except IOError: import traceback FingerprintMols.error('Error: Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() supple = [] done = 0 while not done: try: id, fp = cPickle.load(inF) except: done = 1 else: fp._fieldsFromDb = [id] suppl.append(fp) else: suppl = None return suppl
def GetDataSetInfo(self, **kwargs): """ Returns a MLDataSet pulled from a database using our stored values. """ conn = DbConnect(self.dbName, self.tableName) res = conn.GetColumnNamesAndTypes(join=self.dbJoin, what=self.dbWhat, where=self.dbWhere) return res
def ScreenInDb(details, mol): try: probeFp = apply(FingerprintMols.FingerprintMol, (mol, ), details.__dict__) except: import traceback FingerprintMols.error('Error: problems fingerprinting molecule.\n') traceback.print_exc() return [] if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, 'dbUser'): conn.user = details.dbUser if hasattr(details, 'dbPassword'): conn.password = details.dbPassword except: import traceback FingerprintMols.error( 'Error: Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() if details.metric not in (DataStructs.TanimotoSimilarity, DataStructs.DiceSimilarity, DataStructs.CosineSimilarity): data = GetFingerprints(details) res = ScreenFingerprints(details, data, mol) else: res = [] if details.metric == DataStructs.TanimotoSimilarity: func = 'rd_tanimoto' pkl = probeFp.ToBitString() elif details.metric == DataStructs.DiceSimilarity: func = 'rd_dice' pkl = probeFp.ToBitString() elif details.metric == DataStructs.CosineSimilarity: func = 'rd_cosine' pkl = probeFp.ToBitString() extraFields = "%s(%s,%s) as tani" % (func, DbModule.placeHolder, details.fpColName) cmd = _ConstructSQL(details, extraFields=extraFields) if details.doThreshold: # we need to do a subquery here: cmd = "select * from (%s) tmp where tani>%f" % ( cmd, details.screenThresh) cmd += " order by tani desc" if not details.doThreshold and details.topN > 0: cmd += " limit %d" % details.topN curs = conn.GetCursor() curs.execute(cmd, (pkl, )) res = curs.fetchall() return res
def GetComposites(details): res = [] if details.persistTblName and details.inNote: conn = DbConnect(details.dbName,details.persistTblName) mdls = conn.GetData(fields='MODEL',where="where note='%s'"%(details.inNote)) for row in mdls: rawD = row[0] res.append(cPickle.loads(str(rawD))) elif details.composFileName: res.append(cPickle.load(open(details.composFileName,'rb'))) return res
def testGetData5(self): """ using a RandomAccessDbResultSet with a Transform """ fn = lambda x: (x[0], x[1] * 2) conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=1, transform=fn) assert tuple(d[0]) == (0, 22), str(d[0]) assert tuple(d[2]) == (4, 62) assert len(d) == 10 self.assertRaises(IndexError, lambda: d[11])
def GetFingerprints(details): """ returns an iterable sequence of fingerprints each fingerprint will have a _fieldsFromDb member whose first entry is the id. """ if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, "dbUser"): conn.user = details.dbUser if hasattr(details, "dbPassword"): conn.password = details.dbPassword except Exception: import traceback FingerprintMols.error( "Error: Problems establishing connection to database: %s|%s\n" % (details.dbName, details.tableName) ) traceback.print_exc() cmd = _ConstructSQL(details, extraFields=details.fpColName) curs = conn.GetCursor() # curs.execute(cmd) # print 'CURSOR:',curs,curs.closed if _dataSeq: suppl = _dataSeq(curs, cmd, depickle=not details.noPickle, klass=DataStructs.ExplicitBitVect) _dataSeq._conn = conn else: suppl = DbFpSupplier.ForwardDbFpSupplier(data, fpColName=details.fpColName) elif details.inFileName: conn = None try: inF = open(details.inFileName, "r") except IOError: import traceback FingerprintMols.error("Error: Problems reading from file %s\n" % (details.inFileName)) traceback.print_exc() suppl = [] done = 0 while not done: try: ID, fp = cPickle.load(inF) except Exception: done = 1 else: fp._fieldsFromDb = [ID] suppl.append(fp) else: suppl = None return suppl
def DBToData(dbName, tableName, user='******', password='******', dupCol=-1, what='*', where='', join='', pickleCol=-1, pickleClass=None, ensembleIds=None): """ constructs an _MLData.MLDataSet_ from a database **Arguments** - dbName: the name of the database to be opened - tableName: the table name containing the data in the database - user: the user name to be used to connect to the database - password: the password to be used to connect to the database - dupCol: if nonzero specifies which column should be used to recognize duplicates. **Returns** an _MLData.MLDataSet_ **Notes** - this uses Dbase.DataUtils functionality """ conn = DbConnect(dbName, tableName, user, password) res = conn.GetData(fields=what, where=where, join=join, removeDups=dupCol, forceList=1) nPts = len(res) vals = [None] * nPts ptNames = [None] * nPts classWorks = True for i in range(nPts): tmp = list(res[i]) ptNames[i] = tmp.pop(0) if pickleCol >= 0: if not pickleClass or not classWorks: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) else: try: tmp[pickleCol] = pickleClass(str(tmp[pickleCol])) except Exception: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) classWorks = False if ensembleIds: tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol], ensembleIds) else: if ensembleIds: tmp = TakeEnsemble(tmp, ensembleIds, isDataVect=True) vals[i] = tmp varNames = conn.GetColumnNames(join=join, what=what) data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames) return data
def ScreenInDb(details, mol): try: probeFp = apply(FingerprintMols.FingerprintMol, (mol,), details.__dict__) except Exception: import traceback FingerprintMols.error("Error: problems fingerprinting molecule.\n") traceback.print_exc() return [] if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, "dbUser"): conn.user = details.dbUser if hasattr(details, "dbPassword"): conn.password = details.dbPassword except Exception: import traceback FingerprintMols.error( "Error: Problems establishing connection to database: %s|%s\n" % (details.dbName, details.tableName) ) traceback.print_exc() if details.metric not in (DataStructs.TanimotoSimilarity, DataStructs.DiceSimilarity, DataStructs.CosineSimilarity): data = GetFingerprints(details) res = ScreenFingerprints(details, data, mol) else: res = [] if details.metric == DataStructs.TanimotoSimilarity: func = "rd_tanimoto" pkl = probeFp.ToBitString() elif details.metric == DataStructs.DiceSimilarity: func = "rd_dice" pkl = probeFp.ToBitString() elif details.metric == DataStructs.CosineSimilarity: func = "rd_cosine" pkl = probeFp.ToBitString() extraFields = "%s(%s,%s) as tani" % (func, DbModule.placeHolder, details.fpColName) cmd = _ConstructSQL(details, extraFields=extraFields) if details.doThreshold: # we need to do a subquery here: cmd = "select * from (%s) tmp where tani>%f" % (cmd, details.screenThresh) cmd += " order by tani desc" if not details.doThreshold and details.topN > 0: cmd += " limit %d" % details.topN curs = conn.GetCursor() curs.execute(cmd, (pkl,)) res = curs.fetchall() return res
def testGetData4(self): """ non random access """ conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=0) self.assertRaises(TypeError, lambda: len(d)) rs = [] for thing in d: rs.append(thing) assert len(rs) == 10 assert tuple(rs[0]) == (0, 11) assert tuple(rs[2]) == (4, 31)
def testGetData6(self): """ using a DbResultSet with a Transform """ fn = lambda x: (x[0], x[1] * 2) conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=0, transform=fn) self.assertRaises(TypeError, lambda: len(d)) rs = [] for thing in d: rs.append(thing) assert len(rs) == 10 assert tuple(rs[0]) == (0, 22) assert tuple(rs[2]) == (4, 62)
def _confirm(self, tblName): conn = DbConnect(self.dbName, tblName) res = conn.GetColumnNamesAndTypes() assert len(res) == len(self.colHeads), 'bad number of columns' names = [x[0] for x in res] for i in range(len(names)): assert names[i].upper() == self.colHeads[i].upper( ), 'bad column head' if RDConfig.useSqlLite: # doesn't seem to be any column type info available return types = [x[1] for x in res] for i in range(len(types)): assert types[i] == self.colTypes[i], 'bad column type'
def test5TestBackwardsCompat(self): if os.path.exists('testData/bzr/Compounds.sqlt'): os.unlink('testData/bzr/Compounds.sqlt') if os.path.exists('testData/bzr/AtomPairs.sqlt'): os.unlink('testData/bzr/AtomPairs.sqlt') if os.path.exists('testData/bzr/Descriptors.sqlt'): os.unlink('testData/bzr/Descriptors.sqlt') if os.path.exists('testData/bzr/Fingerprints.sqlt'): os.unlink('testData/bzr/Fingerprints.sqlt') p = subprocess.Popen( ('python', 'CreateDb.py', '--dbDir=testData/bzr', '--noFingerprints', '--noDescriptors', 'testData/bzr.sdf')) res = p.wait() self.failIf(res) p = None conn = DbConnect('testData/bzr/AtomPairs.sqlt') curs = conn.GetCursor() curs.execute( 'create table tmp as select compound_id,atompairfp,torsionfp from atompairs' ) p = subprocess.Popen( ('python', 'SearchDb.py', '--dbDir=testData/bzr', '--molFormat=sdf', '--topN=5', '--outF=testData/bzr/search.out', '--similarityType=AtomPairs', '--pairTableName=tmp', 'testData/bzr.sdf')) res = p.wait() self.failIf(res) p = None self.failUnless(os.path.exists('testData/bzr/search.out')) inF = file('testData/bzr/search.out', 'r') lines = inF.readlines() inF = None self.failUnlessEqual(len(lines), 163) splitLs = [x.strip().split(',') for x in lines] for line in splitLs: lbl = line[0] i = 1 nbrs = {} lastVal = 1.0 while i < len(line): nbrs[line[i]] = line[i + 1] self.failUnless(float(line[i + 1]) <= lastVal) lastVal = float(line[i + 1]) i += 2 self.failUnless(nbrs.has_key(lbl)) self.failUnless(nbrs[lbl] == '1.000') os.unlink('testData/bzr/search.out')
def _ConnectToDatabase(details) -> DbConnect: if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, 'dbUser'): conn.user = details.dbUser if hasattr(details, 'dbPassword'): conn.password = details.dbPassword return conn except Exception: import traceback FingerprintMols.error(f'Error: Problems establishing connection to ' f'database:{details.dbName}|{details.tableName}\n') traceback.print_exc() return None
def test0Ranker(self): nbits = 5000 conn = DbConnect(_testDatabase) fps = getFingerprints(conn) nameAct = getNameAct(conn) sl = len(list(fps.values())[0]) rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.ENTROPY) print("Collecting Votes ....") for key in nameAct.keys(): if nameAct[key] == 100: rnkr.AccumulateVotes(fps[key], 0) if nameAct[key] == 0: rnkr.AccumulateVotes(fps[key], 1) # now do the ranking print("ranking bits ....") topN = rnkr.GetTopN(nbits) # get the combichem ranked list from a file cfile = os.path.join('test_data', 'combiRank.out') combiInfo = ReadCombiInfo(cfile) # now check if the infocontents are the same as the combichem stuff print("Comparing bit info contents ....") for i in range(900): assert feq(topN[i, 1], combiInfo[i]) ofile = os.path.join('test_data', 'rdTopBits.txt') rnkr.WriteTopBitsToFile(ofile)
def testGetData1(self): """ basic functionality """ conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=1) assert len(d) == 10 assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) self.assertRaises(IndexError, lambda: d[11]) d = conn.GetColumns(fields='id,val') self.assertEqual(len(d), 10) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) self.assertEqual(conn.GetDataCount(), 10)
def test1BiasRanker(self): nbits = 5000 dbName = os.path.join('../', 'test_data', 'FEW_CDK2.GDB') conn = DbConnect(dbName) fps = getFingerprints(conn) nameAct = getNameAct(conn) sl = len(fps.values()[0]) rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.BIASENTROPY) rnkr.SetBiasList([0]) print("Collecting Votes ....") for key in nameAct.keys(): if nameAct[key] == 100: rnkr.AccumulateVotes(fps[key], 0) if nameAct[key] == 0: rnkr.AccumulateVotes(fps[key], 1) # now do the ranking print("ranking bits ....") topN = rnkr.GetTopN(nbits) # get the combichem ranked list from a file cfile = os.path.join('test_data', 'combiRank.out') combiInfo = ReadCombiInfo(cfile) # now check if the infocontents are the same as the combichem stuff print("Comparing bit info contents ....") for i in range(nbits): assert feq(topN[i, 1], combiInfo[i])
def getTestData(self): dbName = RDConfig.RDTestDatabase conn = DbConnect(dbName, 'simple_mols1') mols = [] for smi, ID in conn.GetData(): mol = Chem.MolFromSmiles(str(smi)) mol.SetProp('_Name', str(ID)) mols.append(mol) # Calculate fingerprints probefps = [] for mol in mols: fp = Chem.RDKFingerprint(mol) fp._id = mol.GetProp('_Name') probefps.append(fp) return probefps
def GetAllDescriptorNames(db, tbl1, tbl2, user='******', password='******'): """ gets possible descriptor names from a database **Arguments** - db: the name of the database to use - tbl1: the name of the table to be used for reading descriptor values - tbl2: the name of the table to be used for reading notes about the descriptors (*descriptions of the descriptors if you like*) - user: the user name for DB access - password: the password for DB access **Returns** a 2-tuple containing: 1) a list of column names 2) a list of column descriptors **Notes** - this uses _Dbase.DbInfo_ and Dfunctionality for querying the database - it is assumed that tbl2 includes 'property' and 'notes' columns """ from rdkit.Dbase.DbConnection import DbConnect conn = DbConnect(db, user=user, password=password) colNames = conn.GetColumnNames(table=tbl1) colDesc = map(lambda x: (x[0].upper(), x[1]), conn.GetColumns('property,notes', table=tbl2)) for name, desc in countOptions: colNames.append(name) colDesc.append((name, desc)) return colNames, colDesc
def testAddTable(self): """ tests AddTable and GetTableNames functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() self.assertNotIn(newTblName, [x.strip() for x in conn.GetTableNames()]) conn.AddTable(newTblName, 'id int') self.assertIn(newTblName, [x.strip() for x in conn.GetTableNames()]) self.assertEqual(conn.GetColumnNames(table=newTblName), ['id']) conn.GetCursor().execute('drop table %s' % (newTblName))
def test_GetTableNames(self): # We can get the table names of a database with prior instantiation of a cursor conn = DbConnect(self.tempDbName) conn.GetCursor() names_Cursor = sorted(conn.GetTableNames()) # and without (this tests functionality of DbInfo conn = DbConnect(self.tempDbName) names_noCursor = sorted(conn.GetTableNames()) self.assertEqual(names_Cursor, names_noCursor)
def _init(self, refCompos, copyBounds=0): BuildComposite._verbose = 0 conn = DbConnect(self.details.dbName, self.details.tableName) cols = [x.upper() for x in conn.GetColumnNames()] cDescs = [x.upper() for x in refCompos.GetDescriptorNames()] self.assertEqual(cols, cDescs) self.details.nModels = 10 self.details.lockRandom = 1 self.details.randomSeed = refCompos._randomSeed self.details.splitFrac = refCompos._splitFrac if self.details.splitFrac: self.details.splitRun = 1 else: self.details.splitRun = 0 if not copyBounds: self.details.qBounds = [0] * len(cols) else: self.details.qBounds = refCompos.GetQuantBounds()[0]
def _setupDb(self): from rdkit.Dbase.DbConnection import DbConnect fName = RDConfig.RDTestDatabase if RDConfig.useSqlLite: _, tempName = tempfile.mkstemp(suffix='sqlt') self.tempDbName = tempName shutil.copyfile(fName, tempName) else: # pragma: nocover tempName = '::RDTests' self.conn = DbConnect(tempName) self.dbTblName = 'bit_ensemble_test' return self.conn
def testCursor(self): """ tests GetCursor and GetTableNames functionalities """ viewName = 'TEST_VIEW' conn = DbConnect(self.tempDbName) curs = conn.GetCursor() assert curs try: curs.execute('drop view %s' % (viewName)) except Exception: pass try: curs.execute('create view %s as select val,id from ten_elements' % (viewName)) except Exception: import traceback traceback.print_exc() raise AssertionError('create view failed') conn.Commit() self.assertNotIn( viewName, [x.strip() for x in conn.GetTableNames(includeViews=0)], 'improper view found') self.assertIn(viewName, [x.strip() for x in conn.GetTableNames(includeViews=1)], 'improper view not found') try: curs.execute('drop view %s' % (viewName)) except Exception: raise AssertionError('drop table failed')
def testCursor(self): """ tests GetCursor and GetTableNames functionalities """ viewName = 'TEST_VIEW' conn = DbConnect(self.tempDbName) curs = conn.GetCursor() assert curs try: curs.execute('drop view %s' % (viewName)) except Exception: pass try: curs.execute('create view %s as select val,id from ten_elements' % (viewName)) except Exception: import traceback traceback.print_exc() assert 0 conn.Commit() names = [x.strip() for x in conn.GetTableNames(includeViews=0)] assert viewName not in names, 'improper view found' names = [x.strip() for x in conn.GetTableNames(includeViews=1)] assert viewName in names, 'improper view found in %s' % (str(names)) try: curs.execute('drop view %s' % (viewName)) except Exception: assert 0, 'drop table failed'
def Store(self, db='models.gdb', table='results', user='******', password='******'): """ adds the result to a database **Arguments** - db: name of the database to use - table: name of the table to use - user&password: connection information """ cn = DbConnect(db, table, user, password) curs = cn.GetCursor() self._CreateTable(cn, table) cols = [] vals = [] for name, _ in self.fields: try: v = getattr(self, name) except AttributeError: pass else: cols.append('%s' % name) vals.append(v) nToDo = len(vals) qs = ','.join([DbModule.placeHolder] * nToDo) vals = tuple(vals) cmd = 'insert into %s (%s) values (%s)' % (table, ','.join(cols), qs) curs.execute(cmd, vals) cn.Commit()
def testInsertData(self): """ tests InsertData and InsertColumnData functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int,val1 int, val2 int') for i in range(10): conn.InsertData(newTblName, (i, i + 1, 2 * i)) conn.Commit() d = conn.GetData(table=newTblName) assert len(d) == 10 d = None try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: assert 0, 'drop table failed'
def testAddTable(self): """ tests AddTable and GetTableNames functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int') names = [x.strip() for x in conn.GetTableNames()] assert newTblName in names, 'name (%s) not found in %s' % (newTblName, str(names)) conn.GetCursor().execute('drop table %s' % (newTblName))
def testGetData3(self): """ without removeDups """ conn = DbConnect(self.dbName, 'ten_elements_dups') d = conn.GetData(randomAccess=1, removeDups=-1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (2, 21) assert len(d) == 20 self.assertRaises(IndexError, lambda: d[21]) # repeat that test to make sure the table argument works conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(table='ten_elements_dups', randomAccess=1, removeDups=-1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (2, 21) assert len(d) == 20 self.assertRaises(IndexError, lambda: d[21])