def test_InsertData(self): newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int,val1 int, val2 int') for i in range(10): conn.InsertData(newTblName, (i, i + 1, 2 * i)) conn.Commit() d = conn.GetData(table=newTblName) assert len(d) == 10 self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 3) conn.AddColumn(newTblName, 'val3', 'int') conn.Commit() self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 4) d = conn.GetColumns('id,val3', table=newTblName) self.assertEqual(len(d), 10) self.assertTrue(all(r[1] is None for r in d)) for r in d: conn.InsertColumnData(newTblName, 'val3', r[0], 'id={0}'.format(r[0])) conn.Commit() d = conn.GetColumns('id,val3', table=newTblName) self.assertTrue(all(r[0] == r[1] for r in d)) d = None try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: assert 0, 'drop table failed'
def testCursor(self): """ tests GetCursor and GetTableNames functionalities """ viewName = 'TEST_VIEW' conn = DbConnect(self.tempDbName) curs = conn.GetCursor() assert curs try: curs.execute('drop view %s' % (viewName)) except Exception: pass try: curs.execute('create view %s as select val,id from ten_elements' % (viewName)) except Exception: import traceback traceback.print_exc() assert 0 conn.Commit() names = [x.strip() for x in conn.GetTableNames(includeViews=0)] assert viewName not in names, 'improper view found' names = [x.strip() for x in conn.GetTableNames(includeViews=1)] assert viewName in names, 'improper view found in %s' % (str(names)) try: curs.execute('drop view %s' % (viewName)) except Exception: assert 0, 'drop table failed'
def testCursor(self): """ tests GetCursor and GetTableNames functionalities """ viewName = 'TEST_VIEW' conn = DbConnect(self.tempDbName) curs = conn.GetCursor() assert curs try: curs.execute('drop view %s' % (viewName)) except Exception: pass try: curs.execute('create view %s as select val,id from ten_elements' % (viewName)) except Exception: import traceback traceback.print_exc() raise AssertionError('create view failed') conn.Commit() self.assertNotIn( viewName, [x.strip() for x in conn.GetTableNames(includeViews=0)], 'improper view found') self.assertIn(viewName, [x.strip() for x in conn.GetTableNames(includeViews=1)], 'improper view not found') try: curs.execute('drop view %s' % (viewName)) except Exception: raise AssertionError('drop table failed')
def testInsertData(self): """ tests InsertData and InsertColumnData functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int,val1 int, val2 int') for i in range(10): conn.InsertData(newTblName, (i, i + 1, 2 * i)) conn.Commit() d = conn.GetData(table=newTblName) assert len(d) == 10 d = None try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: assert 0, 'drop table failed'
def testAddTable(self): """ tests AddTable and GetTableNames functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() conn.AddTable(newTblName, 'id int') names = [x.strip() for x in conn.GetTableNames()] assert newTblName in names, 'name (%s) not found in %s' % (newTblName, str(names)) conn.GetCursor().execute('drop table %s' % (newTblName))
def testAddTable(self): """ tests AddTable and GetTableNames functionalities """ newTblName = 'NEW_TABLE' conn = DbConnect(self.tempDbName) try: conn.GetCursor().execute('drop table %s' % (newTblName)) except Exception: pass conn.Commit() self.assertNotIn(newTblName, [x.strip() for x in conn.GetTableNames()]) conn.AddTable(newTblName, 'id int') self.assertIn(newTblName, [x.strip() for x in conn.GetTableNames()]) self.assertEqual(conn.GetColumnNames(table=newTblName), ['id']) conn.GetCursor().execute('drop table %s' % (newTblName))
def Store(self, db='models.gdb', table='results', user='******', password='******'): """ adds the result to a database **Arguments** - db: name of the database to use - table: name of the table to use - user&password: connection information """ cn = DbConnect(db, table, user, password) curs = cn.GetCursor() self._CreateTable(cn, table) cols = [] vals = [] for name, _ in self.fields: try: v = getattr(self, name) except AttributeError: pass else: cols.append('%s' % name) vals.append(v) nToDo = len(vals) qs = ','.join([DbModule.placeHolder] * nToDo) vals = tuple(vals) cmd = 'insert into %s (%s) values (%s)' % (table, ','.join(cols), qs) curs.execute(cmd, vals) cn.Commit()
def FingerprintsFromDetails(details, reportFreq=10): data = None if details.dbName and details.tableName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbInfo from rdkit.ML.Data import DataUtils try: conn = DbConnect(details.dbName, details.tableName) except Exception: import traceback error('Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() if not details.idName: details.idName = DbInfo.GetColumnNames(details.dbName, details.tableName)[0] dataSet = DataUtils.DBToData(details.dbName, details.tableName, what='%s,%s' % (details.idName, details.smilesName)) idCol = 0 smiCol = 1 elif details.inFileName and details.useSmiles: from rdkit.ML.Data import DataUtils conn = None if not details.idName: details.idName = 'ID' try: dataSet = DataUtils.TextFileToData( details.inFileName, onlyCols=[details.idName, details.smilesName]) except IOError: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() idCol = 0 smiCol = 1 elif details.inFileName and details.useSD: conn = None dataset = None if not details.idName: details.idName = 'ID' dataSet = [] try: s = Chem.SDMolSupplier(details.inFileName) except Exception: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() else: while 1: try: m = s.next() except StopIteration: break if m: dataSet.append(m) if reportFreq > 0 and not len(dataSet) % reportFreq: message('Read %d molecules\n' % (len(dataSet))) if details.maxMols > 0 and len( dataSet) >= details.maxMols: break for i, mol in enumerate(dataSet): if mol.HasProp(details.idName): nm = mol.GetProp(details.idName) else: nm = mol.GetProp('_Name') dataSet[i] = (nm, mol) else: dataSet = None fps = None if dataSet and not details.useSD: data = dataSet.GetNamedData() if not details.molPklName: fps = FingerprintsFromSmiles(data, idCol, smiCol, **details.__dict__) else: fps = FingerprintsFromPickles(data, idCol, smiCol, **details.__dict__) elif dataSet and details.useSD: fps = FingerprintsFromMols(dataSet, **details.__dict__) if fps: if details.outFileName: outF = open(details.outFileName, 'wb+') for i in range(len(fps)): pickle.dump(fps[i], outF) outF.close() dbName = details.outDbName or details.dbName if details.outTableName and dbName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbUtils, DbModule conn = DbConnect(dbName) # # We don't have a db open already, so we'll need to figure out # the types of our columns... # colTypes = DbUtils.TypeFinder(data, len(data), len(data[0])) typeStrs = DbUtils.GetTypeStrings( [details.idName, details.smilesName], colTypes, keyCol=details.idName) cols = '%s, %s %s' % (typeStrs[0], details.fpColName, DbModule.binaryTypeName) # FIX: we should really check to see if the table # is already there and, if so, add the appropriate # column. # # create the new table # if details.replaceTable or \ details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]: conn.AddTable(details.outTableName, cols) # # And add the data # for ID, fp in fps: tpl = ID, DbModule.binaryHolder(fp.ToBinary()) conn.InsertData(details.outTableName, tpl) conn.Commit() return fps
def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False, redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False, regName='molecules',skipSmiles=False,maxRowsCached=-1, uniqNames=False,addComputedProps=False,lazySupplier=False, startAnew=True): if not lazySupplier: nMols = len(suppl) else: nMols=-1 if not silent: logger.info("Generating molecular database in file %s"%dbName) if not lazySupplier: logger.info(" Processing %d molecules"%nMols) rows = [] globalProps = {} namesSeen = set() nDone = 0 typeConversions={0:('varchar',str),1:('float',float),2:('int',int)} for m in suppl: nDone +=1 if not m: if errorsTo: if hasattr(suppl,'GetItemText'): d = suppl.GetItemText(nDone-1) errorsTo.write(d) else: logger.warning('full error file support not complete') continue row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp, nameCol=nameCol,redraw=redraw, keepHs=keepHs,skipProps=skipProps, addComputedProps=addComputedProps,skipSmiles=skipSmiles, uniqNames=uniqNames,namesSeen=namesSeen) if row is None: continue rows.append([nDone]+row) if not silent and not nDone%100: logger.info(' done %d'%nDone) if len(rows)==maxRowsCached: break nameDef='%s varchar not null'%nameCol if uniqNames: nameDef += ' unique' typs = ['guid integer not null primary key',nameDef] pns = [] for pn,v in globalProps.items(): addNm = re.sub(r'[\W]','_',pn) typs.append('%s %s'%(addNm,typeConversions[v][0])) pns.append(pn.lower()) if not skipSmiles: if 'smiles' not in pns: typs.append('smiles varchar') else: typs.append('cansmiles varchar') typs.append('molpkl %s'%(DbModule.binaryTypeName)) conn = DbConnect(dbName) curs = conn.GetCursor() if startAnew: try: curs.execute('drop table %s'%regName) except: pass curs.execute('create table %s (%s)'%(regName,','.join(typs))) else: curs.execute('select * from %s limit 1'%(regName,)) ocolns = set([x[0] for x in curs.description]) ncolns = set([x.split()[0] for x in typs]) if ncolns != ocolns: raise ValueError('Column names do not match: %s != %s'%(ocolns,ncolns)) curs.execute('select max(guid) from %s'%(regName,)) offset = curs.fetchone()[0] for row in rows: row[0] += offset qs = ','.join([DbModule.placeHolder for x in typs]) ConvertRows(rows,globalProps,defaultVal,skipSmiles) curs.executemany('insert into %s values (%s)'%(regName,qs),rows) conn.Commit() rows = [] while 1: nDone +=1 try: m = next(suppl) except StopIteration: break if not m: if errorsTo: if hasattr(suppl,'GetItemText'): d = suppl.GetItemText(nDone-1) errorsTo.write(d) else: logger.warning('full error file support not complete') continue tmpProps={} row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp, nameCol=nameCol,redraw=redraw, keepHs=keepHs,skipProps=skipProps, addComputedProps=addComputedProps,skipSmiles=skipSmiles, uniqNames=uniqNames,namesSeen=namesSeen) if not row: continue rows.append([nDone]+row) if not silent and not nDone%100: logger.info(' done %d'%nDone) if len(rows)==maxRowsCached: ConvertRows(rows,globalProps,defaultVal,skipSmiles) curs.executemany('insert into %s values (%s)'%(regName,qs),rows) conn.Commit() rows = [] if len(rows): ConvertRows(rows,globalProps,defaultVal,skipSmiles) curs.executemany('insert into %s values (%s)'%(regName,qs),rows) conn.Commit()
def CreateDb(options,dataFilename='',supplier=None): if not dataFilename and supplier is None: raise ValueError('Please provide either a data filename or a supplier') if options.errFilename: errFile=open(os.path.join(options.outDir,options.errFilename),'w+') else: errFile=None if options.noExtras: options.doPairs=False options.doDescriptors=False options.doFingerprints=False options.doPharm2D=False options.doGobbi2D=False options.doLayered=False options.doMorganFps=False if options.loadMols: if supplier is None: if not options.molFormat: ext = os.path.splitext(dataFilename)[-1].lower() if ext=='.sdf': options.molFormat='sdf' elif ext in ('.smi','.smiles','.txt','.csv'): options.molFormat='smiles' if not options.delimiter: # guess the delimiter import csv sniffer = csv.Sniffer() dlct=sniffer.sniff(open(dataFilename,'r').read(2000)) options.delimiter=dlct.delimiter if not options.silent: logger.info('Guessing that delimiter is %s. Use --delimiter argument if this is wrong.'%repr(options.delimiter)) if not options.silent: logger.info('Guessing that mol format is %s. Use --molFormat argument if this is wrong.'%repr(options.molFormat)) if options.molFormat=='smiles': if options.delimiter=='\\t': options.delimiter='\t' supplier=Chem.SmilesMolSupplier(dataFilename, titleLine=options.titleLine, delimiter=options.delimiter, smilesColumn=options.smilesColumn, nameColumn=options.nameColumn ) else: supplier = Chem.SDMolSupplier(dataFilename) if not options.silent: logger.info('Reading molecules and constructing molecular database.') Loader.LoadDb(supplier,os.path.join(options.outDir,options.molDbName), errorsTo=errFile,regName=options.regName,nameCol=options.molIdName, skipProps=options.skipProps,defaultVal=options.missingPropertyVal, addComputedProps=options.addProps,uniqNames=True, skipSmiles=options.skipSmiles,maxRowsCached=int(options.maxRowsCached), silent=options.silent,nameProp=options.nameProp, lazySupplier=int(options.maxRowsCached)>0, startAnew=not options.updateDb ) if options.doPairs: pairConn = DbConnect(os.path.join(options.outDir,options.pairDbName)) pairCurs = pairConn.GetCursor() try: pairCurs.execute('drop table %s'%(options.pairTableName)) except: pass pairCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,atompairfp blob,torsionfp blob)'%(options.pairTableName, options.molIdName)) if options.doFingerprints or options.doPharm2D or options.doGobbi2D or options.doLayered: fpConn = DbConnect(os.path.join(options.outDir,options.fpDbName)) fpCurs=fpConn.GetCursor() try: fpCurs.execute('drop table %s'%(options.fpTableName)) except: pass try: fpCurs.execute('drop table %s'%(options.pharm2DTableName)) except: pass try: fpCurs.execute('drop table %s'%(options.gobbi2DTableName)) except: pass try: fpCurs.execute('drop table %s'%(options.layeredTableName)) except: pass if options.doFingerprints: fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,rdkfp blob)'%(options.fpTableName, options.molIdName)) if options.doLayered: layeredQs = ','.join('?'*LayeredOptions.nWords) colDefs=','.join(['Col_%d integer'%(x+1) for x in range(LayeredOptions.nWords)]) fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,%s)'%(options.layeredTableName, options.molIdName, colDefs)) if options.doPharm2D: fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,pharm2dfp blob)'%(options.pharm2DTableName, options.molIdName)) sigFactory = BuildSigFactory(options) if options.doGobbi2D: fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,gobbi2dfp blob)'%(options.gobbi2DTableName, options.molIdName)) from rdkit.Chem.Pharm2D import Generate,Gobbi_Pharm2D if options.doMorganFps : fpConn = DbConnect(os.path.join(options.outDir,options.fpDbName)) fpCurs=fpConn.GetCursor() try: fpCurs.execute('drop table %s'%(options.morganFpTableName)) except: pass fpCurs.execute('create table %s (guid integer not null primary key,%s varchar not null unique,morganfp blob)'%(options.morganFpTableName, options.molIdName)) if options.doDescriptors: descrConn=DbConnect(os.path.join(options.outDir,options.descrDbName)) with open(options.descriptorCalcFilename,'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() calc = cPickle.load(io.BytesIO(buf)) nms = [x for x in calc.GetDescriptorNames()] descrCurs = descrConn.GetCursor() descrs = ['guid integer not null primary key','%s varchar not null unique'%options.molIdName] descrs.extend(['%s float'%x for x in nms]) try: descrCurs.execute('drop table %s'%(options.descrTableName)) except: pass descrCurs.execute('create table %s (%s)'%(options.descrTableName,','.join(descrs))) descrQuery=','.join([DbModule.placeHolder]*len(descrs)) pairRows = [] fpRows = [] layeredRows = [] descrRows = [] pharm2DRows=[] gobbi2DRows=[] morganRows = [] if not options.silent: logger.info('Generating fingerprints and descriptors:') molConn = DbConnect(os.path.join(options.outDir,options.molDbName)) molCurs = molConn.GetCursor() if not options.skipSmiles: molCurs.execute('select guid,%s,smiles,molpkl from %s'%(options.molIdName,options.regName)) else: molCurs.execute('select guid,%s,molpkl from %s'%(options.molIdName,options.regName)) i=0 while 1: try: tpl = molCurs.fetchone() molGuid = tpl[0] molId = tpl[1] pkl = tpl[-1] i+=1 except: break if isinstance(pkl,(bytes,str)): mol = Chem.Mol(pkl) else: mol = Chem.Mol(str(pkl)) if not mol: continue if options.doPairs: pairs = FingerprintUtils.BuildAtomPairFP(mol) torsions = FingerprintUtils.BuildTorsionsFP(mol) pkl1 = DbModule.binaryHolder(pairs.ToBinary()) pkl2 = DbModule.binaryHolder(torsions.ToBinary()) row = (molGuid,molId,pkl1,pkl2) pairRows.append(row) if options.doFingerprints: fp2 = FingerprintUtils.BuildRDKitFP(mol) pkl = DbModule.binaryHolder(fp2.ToBinary()) row = (molGuid,molId,pkl) fpRows.append(row) if options.doLayered: words = LayeredOptions.GetWords(mol) row = [molGuid,molId]+words layeredRows.append(row) if options.doDescriptors: descrs= calc.CalcDescriptors(mol) row = [molGuid,molId] row.extend(descrs) descrRows.append(row) if options.doPharm2D: FingerprintUtils.sigFactory=sigFactory fp= FingerprintUtils.BuildPharm2DFP(mol) pkl = DbModule.binaryHolder(fp.ToBinary()) row = (molGuid,molId,pkl) pharm2DRows.append(row) if options.doGobbi2D: FingerprintUtils.sigFactory=Gobbi_Pharm2D.factory fp= FingerprintUtils.BuildPharm2DFP(mol) pkl = DbModule.binaryHolder(fp.ToBinary()) row = (molGuid,molId,pkl) gobbi2DRows.append(row) if options.doMorganFps: morgan = FingerprintUtils.BuildMorganFP(mol) pkl = DbModule.binaryHolder(morgan.ToBinary()) row = (molGuid,molId,pkl) morganRows.append(row) if not i%500: if len(pairRows): pairCurs.executemany('insert into %s values (?,?,?,?)'%options.pairTableName, pairRows) pairRows = [] pairConn.Commit() if len(fpRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.fpTableName, fpRows) fpRows = [] fpConn.Commit() if len(layeredRows): fpCurs.executemany('insert into %s values (?,?,%s)'%(options.layeredTableName,layeredQs), layeredRows) layeredRows = [] fpConn.Commit() if len(descrRows): descrCurs.executemany('insert into %s values (%s)'%(options.descrTableName,descrQuery), descrRows) descrRows = [] descrConn.Commit() if len(pharm2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.pharm2DTableName, pharm2DRows) pharm2DRows = [] fpConn.Commit() if len(gobbi2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.gobbi2DTableName, gobbi2DRows) gobbi2DRows = [] fpConn.Commit() if len(morganRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.morganFpTableName, morganRows) morganRows = [] fpConn.Commit() if not options.silent and not i%500: logger.info(' Done: %d'%(i)) if len(pairRows): pairCurs.executemany('insert into %s values (?,?,?,?)'%options.pairTableName, pairRows) pairRows = [] pairConn.Commit() if len(fpRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.fpTableName, fpRows) fpRows = [] fpConn.Commit() if len(layeredRows): fpCurs.executemany('insert into %s values (?,?,%s)'%(options.layeredTableName,layeredQs), layeredRows) layeredRows = [] fpConn.Commit() if len(descrRows): descrCurs.executemany('insert into %s values (%s)'%(options.descrTableName,descrQuery), descrRows) descrRows = [] descrConn.Commit() if len(pharm2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.pharm2DTableName, pharm2DRows) pharm2DRows = [] fpConn.Commit() if len(gobbi2DRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.gobbi2DTableName, gobbi2DRows) gobbi2DRows = [] fpConn.Commit() if len(morganRows): fpCurs.executemany('insert into %s values (?,?,?)'%options.morganFpTableName, morganRows) morganRows = [] fpConn.Commit() if not options.silent: logger.info('Finished.')
from rdkit import Chem from rdkit import RDConfig from rdkit.Dbase import DbModule from rdkit.Dbase.DbConnection import DbConnect import pickle if RDConfig.usePgSQL: dbName = "::RDTests" else: dbName = "data.sqlt" molTblName = 'simple_mols1' fpTblName = 'simple_mols1_fp' conn = DbConnect(dbName, molTblName) conn.AddTable(fpTblName, 'id varchar(10),autofragmentfp %s' % DbModule.binaryTypeName) d = conn.GetData() for smi, ID in d: print(repr(ID), repr(smi)) mol = Chem.MolFromSmiles(smi) fp = Chem.RDKFingerprint(mol) pkl = pickle.dumps(fp) conn.InsertData(fpTblName, (ID, DbModule.binaryHolder(pkl))) conn.Commit()