Beispiel #1
0
    def test_InsertData(self):
        newTblName = 'NEW_TABLE'
        conn = DbConnect(self.tempDbName)
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            pass
        conn.Commit()
        conn.AddTable(newTblName, 'id int,val1 int, val2 int')
        for i in range(10):
            conn.InsertData(newTblName, (i, i + 1, 2 * i))
        conn.Commit()
        d = conn.GetData(table=newTblName)
        assert len(d) == 10

        self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 3)
        conn.AddColumn(newTblName, 'val3', 'int')
        conn.Commit()
        self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 4)
        d = conn.GetColumns('id,val3', table=newTblName)
        self.assertEqual(len(d), 10)
        self.assertTrue(all(r[1] is None for r in d))
        for r in d:
            conn.InsertColumnData(newTblName, 'val3', r[0],
                                  'id={0}'.format(r[0]))
        conn.Commit()
        d = conn.GetColumns('id,val3', table=newTblName)
        self.assertTrue(all(r[0] == r[1] for r in d))

        d = None
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            assert 0, 'drop table failed'
Beispiel #2
0
    def testInsertData(self):
        """ tests InsertData and InsertColumnData functionalities """
        newTblName = 'NEW_TABLE'
        conn = DbConnect(self.tempDbName)
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            pass
        conn.Commit()
        conn.AddTable(newTblName, 'id int,val1 int, val2 int')
        for i in range(10):
            conn.InsertData(newTblName, (i, i + 1, 2 * i))
        conn.Commit()
        d = conn.GetData(table=newTblName)
        assert len(d) == 10

        d = None
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            assert 0, 'drop table failed'
Beispiel #3
0
def FingerprintsFromDetails(details, reportFreq=10):
    data = None
    if details.dbName and details.tableName:
        from rdkit.Dbase.DbConnection import DbConnect
        from rdkit.Dbase import DbInfo
        from rdkit.ML.Data import DataUtils
        try:
            conn = DbConnect(details.dbName, details.tableName)
        except Exception:
            import traceback
            error('Problems establishing connection to database: %s|%s\n' %
                  (details.dbName, details.tableName))
            traceback.print_exc()
        if not details.idName:
            details.idName = DbInfo.GetColumnNames(details.dbName,
                                                   details.tableName)[0]
        dataSet = DataUtils.DBToData(details.dbName,
                                     details.tableName,
                                     what='%s,%s' %
                                     (details.idName, details.smilesName))
        idCol = 0
        smiCol = 1
    elif details.inFileName and details.useSmiles:
        from rdkit.ML.Data import DataUtils
        conn = None
        if not details.idName:
            details.idName = 'ID'
        try:
            dataSet = DataUtils.TextFileToData(
                details.inFileName,
                onlyCols=[details.idName, details.smilesName])
        except IOError:
            import traceback
            error('Problems reading from file %s\n' % (details.inFileName))
            traceback.print_exc()

        idCol = 0
        smiCol = 1
    elif details.inFileName and details.useSD:
        conn = None
        dataset = None
        if not details.idName:
            details.idName = 'ID'
        dataSet = []
        try:
            s = Chem.SDMolSupplier(details.inFileName)
        except Exception:
            import traceback
            error('Problems reading from file %s\n' % (details.inFileName))
            traceback.print_exc()
        else:
            while 1:
                try:
                    m = s.next()
                except StopIteration:
                    break
                if m:
                    dataSet.append(m)
                    if reportFreq > 0 and not len(dataSet) % reportFreq:
                        message('Read %d molecules\n' % (len(dataSet)))
                        if details.maxMols > 0 and len(
                                dataSet) >= details.maxMols:
                            break

        for i, mol in enumerate(dataSet):
            if mol.HasProp(details.idName):
                nm = mol.GetProp(details.idName)
            else:
                nm = mol.GetProp('_Name')
            dataSet[i] = (nm, mol)
    else:
        dataSet = None

    fps = None
    if dataSet and not details.useSD:
        data = dataSet.GetNamedData()
        if not details.molPklName:
            fps = FingerprintsFromSmiles(data, idCol, smiCol,
                                         **details.__dict__)
        else:
            fps = FingerprintsFromPickles(data, idCol, smiCol,
                                          **details.__dict__)
    elif dataSet and details.useSD:
        fps = FingerprintsFromMols(dataSet, **details.__dict__)

    if fps:
        if details.outFileName:
            outF = open(details.outFileName, 'wb+')
            for i in range(len(fps)):
                pickle.dump(fps[i], outF)
            outF.close()
        dbName = details.outDbName or details.dbName
        if details.outTableName and dbName:
            from rdkit.Dbase.DbConnection import DbConnect
            from rdkit.Dbase import DbUtils, DbModule
            conn = DbConnect(dbName)
            #
            #  We don't have a db open already, so we'll need to figure out
            #    the types of our columns...
            #
            colTypes = DbUtils.TypeFinder(data, len(data), len(data[0]))
            typeStrs = DbUtils.GetTypeStrings(
                [details.idName, details.smilesName],
                colTypes,
                keyCol=details.idName)
            cols = '%s, %s %s' % (typeStrs[0], details.fpColName,
                                  DbModule.binaryTypeName)

            # FIX: we should really check to see if the table
            #  is already there and, if so, add the appropriate
            #  column.

            #
            # create the new table
            #
            if details.replaceTable or \
               details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]:
                conn.AddTable(details.outTableName, cols)

            #
            # And add the data
            #
            for ID, fp in fps:
                tpl = ID, DbModule.binaryHolder(fp.ToBinary())
                conn.InsertData(details.outTableName, tpl)
            conn.Commit()
    return fps
Beispiel #4
0
from rdkit import Chem
from rdkit import RDConfig
from rdkit.Dbase import DbModule
from rdkit.Dbase.DbConnection import DbConnect
import pickle

if RDConfig.usePgSQL:
    dbName = "::RDTests"
else:
    dbName = "data.sqlt"

molTblName = 'simple_mols1'
fpTblName = 'simple_mols1_fp'
conn = DbConnect(dbName, molTblName)
conn.AddTable(fpTblName,
              'id varchar(10),autofragmentfp %s' % DbModule.binaryTypeName)
d = conn.GetData()
for smi, ID in d:
    print(repr(ID), repr(smi))
    mol = Chem.MolFromSmiles(smi)
    fp = Chem.RDKFingerprint(mol)
    pkl = pickle.dumps(fp)
    conn.InsertData(fpTblName, (ID, DbModule.binaryHolder(pkl)))
conn.Commit()