def test_GetTypeStrings(self): headers = ['pk', 'a', 'b', 'c'] colTypes = [(int, 2), (int, 3), (float, 5), (str, 10)] self.assertEqual( DbUtils.GetTypeStrings(headers, colTypes), ['pk integer', 'a integer', 'b double precision', 'c varchar(10)']) self.assertEqual( DbUtils.GetTypeStrings(headers, colTypes, keyCol='pk'), ['pk integer not null primary key', 'a integer', 'b double precision', 'c varchar(10)'])
def FingerprintsFromDetails(details, reportFreq=10): data = None if details.dbName and details.tableName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbInfo from rdkit.ML.Data import DataUtils try: conn = DbConnect(details.dbName, details.tableName) except Exception: import traceback error('Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() if not details.idName: details.idName = DbInfo.GetColumnNames(details.dbName, details.tableName)[0] dataSet = DataUtils.DBToData(details.dbName, details.tableName, what='%s,%s' % (details.idName, details.smilesName)) idCol = 0 smiCol = 1 elif details.inFileName and details.useSmiles: from rdkit.ML.Data import DataUtils conn = None if not details.idName: details.idName = 'ID' try: dataSet = DataUtils.TextFileToData( details.inFileName, onlyCols=[details.idName, details.smilesName]) except IOError: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() idCol = 0 smiCol = 1 elif details.inFileName and details.useSD: conn = None dataset = None if not details.idName: details.idName = 'ID' dataSet = [] try: s = Chem.SDMolSupplier(details.inFileName) except Exception: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() else: while 1: try: m = s.next() except StopIteration: break if m: dataSet.append(m) if reportFreq > 0 and not len(dataSet) % reportFreq: message('Read %d molecules\n' % (len(dataSet))) if details.maxMols > 0 and len( dataSet) >= details.maxMols: break for i, mol in enumerate(dataSet): if mol.HasProp(details.idName): nm = mol.GetProp(details.idName) else: nm = mol.GetProp('_Name') dataSet[i] = (nm, mol) else: dataSet = None fps = None if dataSet and not details.useSD: data = dataSet.GetNamedData() if not details.molPklName: fps = FingerprintsFromSmiles(data, idCol, smiCol, **details.__dict__) else: fps = FingerprintsFromPickles(data, idCol, smiCol, **details.__dict__) elif dataSet and details.useSD: fps = FingerprintsFromMols(dataSet, **details.__dict__) if fps: if details.outFileName: outF = open(details.outFileName, 'wb+') for i in range(len(fps)): pickle.dump(fps[i], outF) outF.close() dbName = details.outDbName or details.dbName if details.outTableName and dbName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbUtils, DbModule conn = DbConnect(dbName) # # We don't have a db open already, so we'll need to figure out # the types of our columns... # colTypes = DbUtils.TypeFinder(data, len(data), len(data[0])) typeStrs = DbUtils.GetTypeStrings( [details.idName, details.smilesName], colTypes, keyCol=details.idName) cols = '%s, %s %s' % (typeStrs[0], details.fpColName, DbModule.binaryTypeName) # FIX: we should really check to see if the table # is already there and, if so, add the appropriate # column. # # create the new table # if details.replaceTable or \ details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]: conn.AddTable(details.outTableName, cols) # # And add the data # for ID, fp in fps: tpl = ID, DbModule.binaryHolder(fp.ToBinary()) conn.InsertData(details.outTableName, tpl) conn.Commit() return fps