def test_AdjustColHeadings(self): headers = ['abc def', ' abc def', 'abc-def ', 'abc.def'] self.assertEqual(DbUtils._AdjustColHeadings(headers, 7), ['abc_def'] * 4) f = StringIO() with redirect_stdout(f): headers = ['abc def', ' abc def', 'abc-def ', 'abc.def'] self.assertEqual(DbUtils._AdjustColHeadings(headers, 3), ['abc'] * 4) self.assertIn('Heading', f.getvalue())
def test_GetTypeStrings(self): headers = ['pk', 'a', 'b', 'c'] colTypes = [(int, 2), (int, 3), (float, 5), (str, 10)] self.assertEqual( DbUtils.GetTypeStrings(headers, colTypes), ['pk integer', 'a integer', 'b double precision', 'c varchar(10)']) self.assertEqual( DbUtils.GetTypeStrings(headers, colTypes, keyCol='pk'), ['pk integer not null primary key', 'a integer', 'b double precision', 'c varchar(10)'])
def test_GetData_where(self): d = DbUtils.GetData(self.dbName, 'ten_elements_dups', forceList=0, randomAccess=0, whereString='id<4') self.assertEqual(len(list(d)), 4) self.assertTrue(all(x[0] < 4 for x in d)) d = DbUtils.GetData(self.dbName, 'ten_elements_dups', forceList=0, randomAccess=0, whereString='id<10') self.assertEqual(len(list(d)), 10) self.assertTrue(all(x[0] < 10 for x in d)) d = DbUtils.GetData(self.dbName, 'ten_elements_dups', removeDups=1, forceList=True) self.assertEqual(len(list(d)), 10)
def test_DatabaseToText(self): txt = DbUtils.DatabaseToText(self.dbName, 'ten_elements') self.assertIn('id,val', txt) self.assertIn('0,11', txt) self.assertIn('18,101', txt) self.assertEqual(len(txt.split('\n')), 11) txt = DbUtils.DatabaseToText(self.dbName, 'ten_elements', fields='val') self.assertNotIn('id', txt) self.assertNotIn(',', txt) txt = DbUtils.DatabaseToText(self.dbName, 'ten_elements', where='id<4') self.assertIn('id,val', txt) self.assertEqual(len(txt.split('\n')), 3)
def GetColumns(self, fields, table='', join='', **kwargs): """ gets a set of data from a table **Arguments** - fields: a string with the names of the fields to be extracted, this should be a comma delimited list **Returns** a list of the data **Notes** - this uses _DbUtils.GetColumns_ """ if not table: table = self.tableName return DbUtils.GetColumns(self.dbName, table, fields, self.user, self.password, join=join)
def GetData(self,table=None,fields='*',where='',removeDups=-1,join='', transform=None,randomAccess=1,**kwargs): """ a more flexible method to get a set of data from a table **Arguments** - table: (optional) the table to use - fields: a string with the names of the fields to be extracted, this should be a comma delimited list - where: the SQL where clause to be used with the DB query - removeDups: indicates which column should be used to recognize duplicates in the data. -1 for no duplicate removal. **Returns** a list of the data **Notes** - this uses _DbUtils.GetData_ """ if table is None: table = self.tableName kwargs['forceList'] = kwargs.get('forceList',0) return DbUtils.GetData(self.dbName,table,fieldString=fields,whereString=where, user=self.user,password=self.password,removeDups=removeDups, join=join,cn=self.cn, transform=transform,randomAccess=randomAccess,**kwargs)
def test_DatabaseToDatabase(self): tblName = 'db2db' f = StringIO() with redirect_stdout(f): DbUtils.DatabaseToDatabase(self.dbName, 'ten_elements', self.tempDbName, tblName) self._confirm(tblName, dbName=self.tempDbName, colHeads=['id', 'val'])
def GetDataCount(self, table=None, where='', join='', **kwargs): """ returns a count of the number of results a query will return **Arguments** - table: (optional) the table to use - where: the SQL where clause to be used with the DB query - join: the SQL join clause to be used with the DB query **Returns** an int **Notes** - this uses _DbUtils.GetData_ """ table = table or self.tableName return DbUtils.GetData(self.dbName, table, fieldString='count(*)', whereString=where, cn=self.cn, user=self.user, password=self.password, join=join, forceList=0)[0][0]
def test1Txt(self): """ test reading from a text file """ with open(os.path.join(self.baseDir, 'dbtest.csv'), 'r') as inF: tblName = 'fromtext' f = StringIO() with redirect_stdout(f): DbUtils.TextFileToDatabase(self.tempDbName, tblName, inF) self._confirm(tblName, dbName=self.tempDbName)
def testGetData1(self): """ basic functionality """ d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=1) assert len(d) == 10 assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) with self.assertRaisesRegexp(IndexError, ""): d[11]
def testGetData2(self): """ using a RandomAccessDbResultSet """ d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) assert len(d) == 10 with self.assertRaisesRegexp(IndexError, ""): d[11]
def test3Txt(self): """ test reading from a text file including null markers""" with open(os.path.join(self.baseDir, 'dbtest.nulls.csv'), 'r') as inF: tblName = 'fromtext2' DbUtils.TextFileToDatabase(self.tempDbName, tblName, inF, nullMarker='NA') self._confirm(tblName, dbName=self.tempDbName)
def testGetData4(self): """ using a RandomAccessDbResultSet with a Transform """ fn = lambda x:(x[0],x[1]*2) d = DbUtils.GetData(self.dbName,'ten_elements',forceList=0,randomAccess=1, transform=fn) assert tuple(d[0])==(0,22) assert tuple(d[2])==(4,62) assert len(d)==10 with self.assertRaisesRegexp(IndexError, ""): d[11]
def testGetData3(self): """ using a DbResultSet """ d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=0) with self.assertRaisesRegexp(TypeError, ""): len(d) rs = [] for thing in d: rs.append(thing) assert len(rs) == 10 assert tuple(rs[0]) == (0, 11) assert tuple(rs[2]) == (4, 31)
def testGetData1(self): """ basic functionality """ d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=1) assert len(d) == 10 assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) try: d[11] except IndexError: pass except: assert 0, 'bad exception type raised' else: assert 0, 'failed to raise expected exception'
def testGetData5(self): """ using a DbResultSet with a Transform """ fn = lambda x:(x[0],x[1]*2) d = DbUtils.GetData(self.dbName,'ten_elements',forceList=0,randomAccess=0, transform=fn) with self.assertRaisesRegexp(TypeError, ""): len(d) rs = [] for thing in d: rs.append(thing) assert len(rs)==10 assert tuple(rs[0])==(0,22) assert tuple(rs[2])==(4,62)
def testGetData2(self): """ using a RandomAccessDbResultSet """ d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=1) assert tuple(d[0]) == (0, 11) assert tuple(d[2]) == (4, 31) assert len(d) == 10 try: d[11] except IndexError: pass except: assert 0, 'bad exception type raised' else: assert 0, 'failed to raise expected exception'
def testGetData4(self): """ using a RandomAccessDbResultSet with a Transform """ fn = lambda x: (x[0], x[1] * 2) d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=1, transform=fn) assert tuple(d[0]) == (0, 22) assert tuple(d[2]) == (4, 62) assert len(d) == 10 try: d[11] except IndexError: pass except: assert 0, 'bad exception type raised' else: assert 0, 'failed to raise expected exception'
def testGetData3(self): """ using a DbResultSet """ d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=0) try: len(d) except TypeError: pass except: assert 0, 'bad exception type raised' else: assert 0, 'failed to raise expected exception' rs = [] for thing in d: rs.append(thing) assert len(rs) == 10 assert tuple(rs[0]) == (0, 11) assert tuple(rs[2]) == (4, 31)
def testGetData5(self): """ using a DbResultSet with a Transform """ fn = lambda x: (x[0], x[1] * 2) d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=0, transform=fn) try: len(d) except TypeError: pass except: assert 0, 'bad exception type raised' else: assert 0, 'failed to raise expected exception' rs = [] for thing in d: rs.append(thing) assert len(rs) == 10 assert tuple(rs[0]) == (0, 22) assert tuple(rs[2]) == (4, 62)
def test_take(self): self.assertEqual(list(DbUtils._take([1, 2, 3, 4], [2, 3])), [3, 4]) self.assertEqual(list(DbUtils._take([1, 2, 3, 4], [0, 3])), [1, 4])
def FingerprintsFromDetails(details, reportFreq=10): data = None if details.dbName and details.tableName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbInfo from rdkit.ML.Data import DataUtils try: conn = DbConnect(details.dbName, details.tableName) except Exception: import traceback error('Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() if not details.idName: details.idName = DbInfo.GetColumnNames(details.dbName, details.tableName)[0] dataSet = DataUtils.DBToData(details.dbName, details.tableName, what='%s,%s' % (details.idName, details.smilesName)) idCol = 0 smiCol = 1 elif details.inFileName and details.useSmiles: from rdkit.ML.Data import DataUtils conn = None if not details.idName: details.idName = 'ID' try: dataSet = DataUtils.TextFileToData( details.inFileName, onlyCols=[details.idName, details.smilesName]) except IOError: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() idCol = 0 smiCol = 1 elif details.inFileName and details.useSD: conn = None dataset = None if not details.idName: details.idName = 'ID' dataSet = [] try: s = Chem.SDMolSupplier(details.inFileName) except Exception: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() else: while 1: try: m = s.next() except StopIteration: break if m: dataSet.append(m) if reportFreq > 0 and not len(dataSet) % reportFreq: message('Read %d molecules\n' % (len(dataSet))) if details.maxMols > 0 and len( dataSet) >= details.maxMols: break for i, mol in enumerate(dataSet): if mol.HasProp(details.idName): nm = mol.GetProp(details.idName) else: nm = mol.GetProp('_Name') dataSet[i] = (nm, mol) else: dataSet = None fps = None if dataSet and not details.useSD: data = dataSet.GetNamedData() if not details.molPklName: fps = FingerprintsFromSmiles(data, idCol, smiCol, **details.__dict__) else: fps = FingerprintsFromPickles(data, idCol, smiCol, **details.__dict__) elif dataSet and details.useSD: fps = FingerprintsFromMols(dataSet, **details.__dict__) if fps: if details.outFileName: outF = open(details.outFileName, 'wb+') for i in range(len(fps)): pickle.dump(fps[i], outF) outF.close() dbName = details.outDbName or details.dbName if details.outTableName and dbName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbUtils, DbModule conn = DbConnect(dbName) # # We don't have a db open already, so we'll need to figure out # the types of our columns... # colTypes = DbUtils.TypeFinder(data, len(data), len(data[0])) typeStrs = DbUtils.GetTypeStrings( [details.idName, details.smilesName], colTypes, keyCol=details.idName) cols = '%s, %s %s' % (typeStrs[0], details.fpColName, DbModule.binaryTypeName) # FIX: we should really check to see if the table # is already there and, if so, add the appropriate # column. # # create the new table # if details.replaceTable or \ details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]: conn.AddTable(details.outTableName, cols) # # And add the data # for ID, fp in fps: tpl = ID, DbModule.binaryHolder(fp.ToBinary()) conn.InsertData(details.outTableName, tpl) conn.Commit() return fps
def test_GetColumns(self): d = DbUtils.GetColumns(self.dbName, 'ten_elements', 'val') self.assertEqual(len(d), 10)
def test1Txt(self): """ test reading from a text file """ inF = open(os.path.join(self.baseDir, 'dbtest.csv'), 'r') tblName = 'fromtext' DbUtils.TextFileToDatabase(self.dbName, tblName, inF) self._confirm(tblName)
Fe13Pd7,0.641773222364,13.5561904063,1,1,0,3.53454303741,1.1 Fe13Pt7,0.644834950054,13.4918245347,1,0,1,3.53454303741,1.1 Fe3Pt17,0.657625302294,14.7500407392,1,0,1,3.53454303741,1.1 Fe3Rh7,0.648586371291,13.4137878702,1,1,0,3.53454303741,1.1 FePd26,0.675906847437,14.6853460112,1,1,0,3.53454303741,1.1 Mn2Pt3,0.59614900293,14.761410246,1,0,1,4.20857000351,1.1 MnPd3,0.692173333021,13.3637046657,1,1,0,4.20857000351,1.1 NiPd,0.752482590563,13.289344,1,1,0,1.89935600758,1.1 NiRh,0.759710124228,12.5047695128,1,1,0,1.89935600758,0.1 Fe2Hf,0.46150987372,14.4453391927,1,0,1,3.53454303741,1.1 Fe2Zr,0.449503092939,14.8311919793,1,1,0,3.53454303741,1.1 Co3Ir,0.750380223096,11.9939195131,1,0,1,2.7561609745,1.1 Co3Ir7,0.744357269808,12.0909680943,1,0,1,2.7561609745,0.1 Co3Rh,0.750380223096,11.9939195131,1,1,0,2.7561609745,1.1 Co4Rh,0.772980027292,11.6432503845,1,1,0,2.7561609745,1.1 CoIr,0.720410936244,12.4928697598,1,0,1,2.7561609745,0.1 CoOs,0.680645456475,12.4881462429,1,0,1,2.7561609745,0.1 CoRu,0.697198731059,12.1916458269,1,1,0,2.7561609745,0.1 Fe16Rh,0.668851387723,12.0487505556,1,1,0,3.53454303741,1.1 Fe32Pt,0.672765581933,11.9812996935,1,0,1,3.53454303741,1.1 Fe9Pt,0.662584205604,12.3757854936,1,0,1,3.53454303741,1.1 Fe9Rh,0.664715105171,12.1856716313,1,1,0,3.53454303741,1.1 """ io = StringIO(ferro_quant) DbUtils.TextFileToDatabase(RDConfig.RDTestDatabase,'ferro_quant',io) io = StringIO(ferro_noquant) DbUtils.TextFileToDatabase(RDConfig.RDTestDatabase,'ferro_noquant',io) io = StringIO(ferro_noquant_realact) DbUtils.TextFileToDatabase(RDConfig.RDTestDatabase,'ferro_noquant_realact',io)
def test_TypeFinder(self): data = [('-', 1.45, 'abc', None), (20, 3, 'defgh', None)] self.assertEqual(DbUtils.TypeFinder(data, 2, 4, nullMarker='-'), [[int, 2], [float, 4], [str, 5], [-1, 1]])