def test_ForwardDbFpSupplier(self): # Additional tests to complete code coverage conn = DbConnect(RDConfig.RDTestDatabase, 'simple_combined') self.assertRaises(ValueError, DbFpSupplier.ForwardDbFpSupplier, conn.GetData(), fpColName='typo') suppl = DbFpSupplier.ForwardDbFpSupplier(conn.GetData(), fpColName='AutoFragmentFp') self.assertIn('ID', suppl.GetColumnNames())
def test6Update(self): p = subprocess.Popen(('python', 'CreateDb.py','--dbDir=testData/bzr','--molFormat=smiles', 'testData/bzr.smi')) res=p.wait() self.assertFalse(res) p=None self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt')) self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules',fields='count(*)') self.assertEqual(d[0][0],10) conn = DbConnect('testData/bzr/AtomPairs.sqlt') d = conn.GetData('atompairs',fields='count(*)') self.assertEqual(d[0][0],10) conn = DbConnect('testData/bzr/Descriptors.sqlt') d = conn.GetData('descriptors_v1',fields='count(*)') self.assertEqual(d[0][0],10) conn = DbConnect('testData/bzr/Fingerprints.sqlt') d = conn.GetData('rdkitfps',fields='count(*)') self.assertEqual(d[0][0],10) p = subprocess.Popen(('python', 'CreateDb.py','--dbDir=testData/bzr','--molFormat=smiles', '--updateDb', 'testData/bzr.2.smi')) res=p.wait() self.assertFalse(res) p=None self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt')) self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules',fields='count(*)') self.assertEqual(d[0][0],20) conn = DbConnect('testData/bzr/AtomPairs.sqlt') d = conn.GetData('atompairs',fields='count(*)') self.assertEqual(d[0][0],20) conn = DbConnect('testData/bzr/Descriptors.sqlt') d = conn.GetData('descriptors_v1',fields='count(*)') self.assertEqual(d[0][0],20) conn = DbConnect('testData/bzr/Fingerprints.sqlt') d = conn.GetData('rdkitfps',fields='count(*)') self.assertEqual(d[0][0],20)
def SupplierFromDetails(details): from rdkit.VLib.NodeLib.DbMolSupply import DbMolSupplyNode from rdkit.VLib.NodeLib.SmilesSupply import SmilesSupplyNode if details.dbName: conn = DbConnect(details.dbName, details.tableName) suppl = DbMolSupplyNode(conn.GetData()) else: suppl = SmilesSupplyNode(details.inFileName, delim=details.delim, nameColumn=details.nameCol, smilesColumn=details.smiCol, titleLine=details.hasTitle) if isinstance(details.actCol, int): suppl.reset() m = next(suppl) actName = m.GetPropNames()[details.actCol] details.actCol = actName if isinstance(details.nameCol, int): suppl.reset() m = next(suppl) nameName = m.GetPropNames()[details.nameCol] details.nameCol = nameName suppl.reset() if isinstance(details.actCol, int): suppl.reset() m = next(suppl) actName = m.GetPropNames()[details.actCol] details.actCol = actName if isinstance(details.nameCol, int): suppl.reset() m = next(suppl) nameName = m.GetPropNames()[details.nameCol] details.nameCol = nameName suppl.reset() return suppl
def test_InsertData(self): newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int,val1 int, val2 int') for i in range(10): conn.InsertData(newTblName, (i, i + 1, 2 * i)) conn.Commit() d = conn.GetData(table=newTblName) assert len(d) == 10 self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 3) conn.AddColumn(newTblName, 'val3', 'int') conn.Commit() self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 4) d = conn.GetColumns('id,val3', table=newTblName) self.assertEqual(len(d), 10) self.assertTrue(all(r[1] is None for r in d)) for r in d: conn.InsertColumnData(newTblName, 'val3', r[0], 'id={0}'.format(r[0])) conn.Commit() d = conn.GetColumns('id,val3', table=newTblName) self.assertTrue(all(r[0] == r[1] for r in d)) d = None try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: assert 0, 'drop table failed'
def test1Create(self): p = subprocess.Popen( (sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', 'testData/bzr.smi')) res = p.wait() self.assertFalse(res) p = None self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt')) self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules', fields='count(*)') self.assertTrue(d[0][0] == 10) conn = DbConnect('testData/bzr/AtomPairs.sqlt') d = conn.GetData('atompairs', fields='count(*)') self.assertTrue(d[0][0] == 10) conn = DbConnect('testData/bzr/Descriptors.sqlt') d = conn.GetData('descriptors_v1', fields='count(*)') self.assertTrue(d[0][0] == 10) conn = DbConnect('testData/bzr/Fingerprints.sqlt') d = conn.GetData('rdkitfps', fields='count(*)') self.assertTrue(d[0][0] == 10) p = subprocess.Popen( (sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=sdf', '--doGobbi2D', 'testData/bzr.sdf')) res = p.wait() self.assertFalse(res) p = None self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt')) self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt')) self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules', fields='count(*)') self.assertTrue(d[0][0] == 163) conn = DbConnect('testData/bzr/AtomPairs.sqlt') d = conn.GetData('atompairs', fields='count(*)') self.assertTrue(d[0][0] == 163) conn = DbConnect('testData/bzr/Descriptors.sqlt') d = conn.GetData('descriptors_v1', fields='count(*)') self.assertTrue(d[0][0] == 163) conn = DbConnect('testData/bzr/Fingerprints.sqlt') d = conn.GetData('rdkitfps', fields='count(*)') self.assertTrue(d[0][0] == 163)
def testGetData1(self): """ basic functionality """ conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=1) assert len(d) == 10 assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) self.assertRaises(IndexError, lambda: d[11])
def testGetData2(self): """ using removeDups """ conn = DbConnect(self.dbName, 'ten_elements_dups') d = conn.GetData(randomAccess=1, removeDups=1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) assert len(d) == 10 self.assertRaises(IndexError, lambda: d[11])
def testGetData3(self): """ without removeDups """ conn = DbConnect(self.dbName, 'ten_elements_dups') d = conn.GetData(randomAccess=1, removeDups=-1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (2, 21) assert len(d) == 20 self.assertRaises(IndexError, lambda: d[21]) # repeat that test to make sure the table argument works conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(table='ten_elements_dups', randomAccess=1, removeDups=-1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (2, 21) assert len(d) == 20 self.assertRaises(IndexError, lambda: d[21])
def testGetData5(self): """ using a RandomAccessDbResultSet with a Transform """ fn = lambda x: (x[0], x[1] * 2) conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=1, transform=fn) assert tuple(d[0]) == (0, 22), str(d[0]) assert tuple(d[2]) == (4, 62) assert len(d) == 10 self.assertRaises(IndexError, lambda: d[11])
def GetComposites(details): res = [] if details.persistTblName and details.inNote: conn = DbConnect(details.dbName,details.persistTblName) mdls = conn.GetData(fields='MODEL',where="where note='%s'"%(details.inNote)) for row in mdls: rawD = row[0] res.append(cPickle.loads(str(rawD))) elif details.composFileName: res.append(cPickle.load(open(details.composFileName,'rb'))) return res
def DBToData(dbName, tableName, user='******', password='******', dupCol=-1, what='*', where='', join='', pickleCol=-1, pickleClass=None, ensembleIds=None): """ constructs an _MLData.MLDataSet_ from a database **Arguments** - dbName: the name of the database to be opened - tableName: the table name containing the data in the database - user: the user name to be used to connect to the database - password: the password to be used to connect to the database - dupCol: if nonzero specifies which column should be used to recognize duplicates. **Returns** an _MLData.MLDataSet_ **Notes** - this uses Dbase.DataUtils functionality """ conn = DbConnect(dbName, tableName, user, password) res = conn.GetData(fields=what, where=where, join=join, removeDups=dupCol, forceList=1) nPts = len(res) vals = [None] * nPts ptNames = [None] * nPts classWorks = True for i in range(nPts): tmp = list(res[i]) ptNames[i] = tmp.pop(0) if pickleCol >= 0: if not pickleClass or not classWorks: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) else: try: tmp[pickleCol] = pickleClass(str(tmp[pickleCol])) except Exception: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) classWorks = False if ensembleIds: tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol], ensembleIds) else: if ensembleIds: tmp = TakeEnsemble(tmp, ensembleIds, isDataVect=True) vals[i] = tmp varNames = conn.GetColumnNames(join=join, what=what) data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames) return data
def testGetData4(self): """ non random access """ conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=0) self.assertRaises(TypeError, lambda: len(d)) rs = [] for thing in d: rs.append(thing) assert len(rs) == 10 assert tuple(rs[0]) == (0, 11) assert tuple(rs[2]) == (4, 31)
def testGetData6(self): """ using a DbResultSet with a Transform """ fn = lambda x: (x[0], x[1] * 2) conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=0, transform=fn) self.assertRaises(TypeError, lambda: len(d)) rs = [] for thing in d: rs.append(thing) assert len(rs) == 10 assert tuple(rs[0]) == (0, 22) assert tuple(rs[2]) == (4, 62)
def getTestData(self): dbName = RDConfig.RDTestDatabase conn = DbConnect(dbName, 'simple_mols1') mols = [] for smi, ID in conn.GetData(): mol = Chem.MolFromSmiles(str(smi)) mol.SetProp('_Name', str(ID)) mols.append(mol) # Calculate fingerprints probefps = [] for mol in mols: fp = Chem.RDKFingerprint(mol) fp._id = mol.GetProp('_Name') probefps.append(fp) return probefps
def testGetData1(self): """ basic functionality """ conn = DbConnect(self.dbName, 'ten_elements') d = conn.GetData(randomAccess=1) assert len(d) == 10 assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) self.assertRaises(IndexError, lambda: d[11]) d = conn.GetColumns(fields='id,val') self.assertEqual(len(d), 10) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) self.assertEqual(conn.GetDataCount(), 10)
def testInsertData(self): """ tests InsertData and InsertColumnData functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int,val1 int, val2 int') for i in range(10): conn.InsertData(newTblName, (i, i + 1, 2 * i)) conn.Commit() d = conn.GetData(table=newTblName) assert len(d) == 10 d = None try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: assert 0, 'drop table failed'
elif arg == '--enrich': enrich = int(val) composites = [] if db is None: for arg in extras: composite = cPickle.load(open(arg, 'rb')) composites.append(composite) else: tbl = extras[0] conn = DbConnect(db, tbl) if note: where = "where note='%s'" % (note) else: where = '' if not skip: pkls = conn.GetData(fields='model', where=where) composites = [] for pkl in pkls: pkl = str(pkl[0]) comp = cPickle.loads(pkl) composites.append(comp) if len(composites): ProcessIt(composites, count, verbose=verbose) elif not skip: print('ERROR: no composite models found') sys.exit(-1) if db: res = ErrorStats(conn, where, enrich=enrich) if res:
def GetNode(dbName, tableName): from rdkit.Dbase.DbConnection import DbConnect conn = DbConnect(dbName, tableName) return DbMolSupplyNode(conn.GetData())
from rdkit import Chem from rdkit import RDConfig from rdkit.Dbase import DbModule from rdkit.Dbase.DbConnection import DbConnect import pickle if RDConfig.usePgSQL: dbName = "::RDTests" else: dbName = "data.sqlt" molTblName = 'simple_mols1' fpTblName = 'simple_mols1_fp' conn = DbConnect(dbName, molTblName) conn.AddTable(fpTblName, 'id varchar(10),autofragmentfp %s' % DbModule.binaryTypeName) d = conn.GetData() for smi, ID in d: print(repr(ID), repr(smi)) mol = Chem.MolFromSmiles(smi) fp = Chem.RDKFingerprint(mol) pkl = pickle.dumps(fp) conn.InsertData(fpTblName, (ID, DbModule.binaryHolder(pkl))) conn.Commit()
def test4CreateOptions(self): if os.path.exists('testData/bzr/Compounds.sqlt'): os.unlink('testData/bzr/Compounds.sqlt') if os.path.exists('testData/bzr/AtomPairs.sqlt'): os.unlink('testData/bzr/AtomPairs.sqlt') if os.path.exists('testData/bzr/Descriptors.sqlt'): os.unlink('testData/bzr/Descriptors.sqlt') if os.path.exists('testData/bzr/Fingerprints.sqlt'): os.unlink('testData/bzr/Fingerprints.sqlt') p = subprocess.Popen(('python', 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', '--noExtras', '--noSmiles', 'testData/bzr.smi')) res = p.wait() self.failIf(res) p = None self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt')) self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt')) self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules', fields='count(*)') self.failUnlessEqual(d[0][0], 10) d = conn.GetData('molecules', fields='*') self.failUnlessEqual(len(d), 10) cns = [x.lower() for x in d.GetColumnNames()] self.failIf('smiles' in cns) conn = None d = None if os.path.exists('testData/bzr/Compounds.sqlt'): os.unlink('testData/bzr/Compounds.sqlt') if os.path.exists('testData/bzr/AtomPairs.sqlt'): os.unlink('testData/bzr/AtomPairs.sqlt') if os.path.exists('testData/bzr/Descriptors.sqlt'): os.unlink('testData/bzr/Descriptors.sqlt') if os.path.exists('testData/bzr/Fingerprints.sqlt'): os.unlink('testData/bzr/Fingerprints.sqlt') p = subprocess.Popen( ('python', 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', '--noSmiles', '--noFingerprints', '--noLayeredFps', '--noMorganFps', '--noPairs', '--noDescriptors', 'testData/bzr.smi')) res = p.wait() self.failIf(res) p = None self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt')) self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt')) self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules', fields='count(*)') self.failUnless(d[0][0] == 10) d = conn.GetData('molecules', fields='*') self.failUnless(len(d) == 10) cns = [x.lower() for x in d.GetColumnNames()] self.failIf('smiles' in cns) p = subprocess.Popen( ('python', 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', '--noProps', '--noFingerprints', '--noLayeredFps', '--noMorganFps', '--noPairs', '--noDescriptors', 'testData/bzr.smi')) res = p.wait() self.failIf(res) p = None self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt')) self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt')) self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules', fields='count(*)') self.failUnlessEqual(d[0][0], 10) d = conn.GetData('molecules', fields='*') self.failUnlessEqual(len(d), 10) cns = [x.lower() for x in d.GetColumnNames()] self.failUnless('smiles' in cns) p = subprocess.Popen( ('python', 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', '--noFingerprints', '--noLayeredFps', '--noMorganFps', '--noPairs', '--noDescriptors', '--maxRowsCached=4', 'testData/bzr.smi')) res = p.wait() self.failIf(res) p = None self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt')) self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt')) self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt')) conn = DbConnect('testData/bzr/Compounds.sqlt') d = conn.GetData('molecules', fields='count(*)') self.failUnlessEqual(d[0][0], 10) d = conn.GetData('molecules', fields='*') self.failUnlessEqual(len(d), 10) cns = [x.lower() for x in d.GetColumnNames()] self.failUnless('smiles' in cns) p = subprocess.Popen( ('python', 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', '--noFingerprints', '--noPairs', '--noDescriptors', '--maxRowsCached=4', 'testData/bzr.smi')) res = p.wait() self.failIf(res) p = None self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt')) self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt')) self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt')) self.failUnless(os.path.exists('testData/bzr/Fingerprints.sqlt'))