Beispiel #1
0
    def test_GetDbNames(self):
        DbModule.fileWildcard = '*.sqlite'
        self.assertEqual(
            len(
                DbInfo.GetDbNames(dirName=os.path.join(
                    os.path.dirname(__file__), 'test_data'))), 2)

        DbModule.fileWildcard = '*.notexisting'
        self.assertEqual(DbInfo.GetDbNames(), [])

        DbModule.fileWildcard = None
        self.assertEqual(DbInfo.GetDbNames(), [])
Beispiel #2
0
    def GetColumnNamesAndTypes(self,
                               table='',
                               join='',
                               what='*',
                               where='',
                               **kwargs):
        """ gets a list of columns available in the current table along with their types

      **Returns**

          a list of 2-tuples containing:

            1) column name

            2) column type

      **Notes**

       - this uses _DbInfo.GetColumnNamesAndTypes_

    """
        table = table or self.tableName
        return DbInfo.GetColumnNamesAndTypes(self.dbName,
                                             table,
                                             self.user,
                                             self.password,
                                             join=join,
                                             what=what,
                                             cn=self.cn)
Beispiel #3
0
 def _initColumnNamesAndTypes(self):
   self.colNames = []
   self.colTypes = []
   for cName, cType in DbInfo.GetColumnInfoFromCursor(self.cursor):
     self.colNames.append(cName)
     self.colTypes.append(cType)
   self.colNames = tuple(self.colNames)
   self.colTypes = tuple(self.colTypes)
Beispiel #4
0
  def GetColumnNames(self, table='', join='', what='*', where='', **kwargs):
    """ gets a list of columns available in the current table

      **Returns**

          a list of column names

      **Notes**

       - this uses _DbInfo.GetColumnNames_

    """
    table = table or self.tableName
    return DbInfo.GetColumnNames(self.dbName, table, self.user, self.password, join=join, what=what,
                                 cn=self.cn)
Beispiel #5
0
  def GetTableNames(self, includeViews=0):
    """ gets a list of tables available in a database

      **Arguments**

      - includeViews: if this is non-null, the views in the db will
        also be returned

      **Returns**

          a list of table names

      **Notes**

       - this uses _DbInfo.GetTableNames_

    """
    return DbInfo.GetTableNames(self.dbName, self.user, self.password, includeViews=includeViews,
                                cn=self.cn)
Beispiel #6
0
def FingerprintsFromDetails(details, reportFreq=10):
    data = None
    if details.dbName and details.tableName:
        from rdkit.Dbase.DbConnection import DbConnect
        from rdkit.Dbase import DbInfo
        from rdkit.ML.Data import DataUtils
        try:
            conn = DbConnect(details.dbName, details.tableName)
        except Exception:
            import traceback
            error('Problems establishing connection to database: %s|%s\n' %
                  (details.dbName, details.tableName))
            traceback.print_exc()
        if not details.idName:
            details.idName = DbInfo.GetColumnNames(details.dbName,
                                                   details.tableName)[0]
        dataSet = DataUtils.DBToData(details.dbName,
                                     details.tableName,
                                     what='%s,%s' %
                                     (details.idName, details.smilesName))
        idCol = 0
        smiCol = 1
    elif details.inFileName and details.useSmiles:
        from rdkit.ML.Data import DataUtils
        conn = None
        if not details.idName:
            details.idName = 'ID'
        try:
            dataSet = DataUtils.TextFileToData(
                details.inFileName,
                onlyCols=[details.idName, details.smilesName])
        except IOError:
            import traceback
            error('Problems reading from file %s\n' % (details.inFileName))
            traceback.print_exc()

        idCol = 0
        smiCol = 1
    elif details.inFileName and details.useSD:
        conn = None
        dataset = None
        if not details.idName:
            details.idName = 'ID'
        dataSet = []
        try:
            s = Chem.SDMolSupplier(details.inFileName)
        except Exception:
            import traceback
            error('Problems reading from file %s\n' % (details.inFileName))
            traceback.print_exc()
        else:
            while 1:
                try:
                    m = s.next()
                except StopIteration:
                    break
                if m:
                    dataSet.append(m)
                    if reportFreq > 0 and not len(dataSet) % reportFreq:
                        message('Read %d molecules\n' % (len(dataSet)))
                        if details.maxMols > 0 and len(
                                dataSet) >= details.maxMols:
                            break

        for i, mol in enumerate(dataSet):
            if mol.HasProp(details.idName):
                nm = mol.GetProp(details.idName)
            else:
                nm = mol.GetProp('_Name')
            dataSet[i] = (nm, mol)
    else:
        dataSet = None

    fps = None
    if dataSet and not details.useSD:
        data = dataSet.GetNamedData()
        if not details.molPklName:
            fps = FingerprintsFromSmiles(data, idCol, smiCol,
                                         **details.__dict__)
        else:
            fps = FingerprintsFromPickles(data, idCol, smiCol,
                                          **details.__dict__)
    elif dataSet and details.useSD:
        fps = FingerprintsFromMols(dataSet, **details.__dict__)

    if fps:
        if details.outFileName:
            outF = open(details.outFileName, 'wb+')
            for i in range(len(fps)):
                pickle.dump(fps[i], outF)
            outF.close()
        dbName = details.outDbName or details.dbName
        if details.outTableName and dbName:
            from rdkit.Dbase.DbConnection import DbConnect
            from rdkit.Dbase import DbUtils, DbModule
            conn = DbConnect(dbName)
            #
            #  We don't have a db open already, so we'll need to figure out
            #    the types of our columns...
            #
            colTypes = DbUtils.TypeFinder(data, len(data), len(data[0]))
            typeStrs = DbUtils.GetTypeStrings(
                [details.idName, details.smilesName],
                colTypes,
                keyCol=details.idName)
            cols = '%s, %s %s' % (typeStrs[0], details.fpColName,
                                  DbModule.binaryTypeName)

            # FIX: we should really check to see if the table
            #  is already there and, if so, add the appropriate
            #  column.

            #
            # create the new table
            #
            if details.replaceTable or \
               details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]:
                conn.AddTable(details.outTableName, cols)

            #
            # And add the data
            #
            for ID, fp in fps:
                tpl = ID, DbModule.binaryHolder(fp.ToBinary())
                conn.InsertData(details.outTableName, tpl)
            conn.Commit()
    return fps