def test_AdjustColHeadings(self):
   headers = ['abc def', ' abc def', 'abc-def ', 'abc.def']
   self.assertEqual(DbUtils._AdjustColHeadings(headers, 7), ['abc_def'] * 4)
   f = StringIO()
   with redirect_stdout(f):
     headers = ['abc def', ' abc def', 'abc-def ', 'abc.def']
     self.assertEqual(DbUtils._AdjustColHeadings(headers, 3), ['abc'] * 4)
   self.assertIn('Heading', f.getvalue())
Beispiel #2
0
 def test_AdjustColHeadings(self):
   headers = ['abc def', ' abc def', 'abc-def ', 'abc.def']
   self.assertEqual(DbUtils._AdjustColHeadings(headers, 7), ['abc_def'] * 4)
   f = StringIO()
   with redirect_stdout(f):
     headers = ['abc def', ' abc def', 'abc-def ', 'abc.def']
     self.assertEqual(DbUtils._AdjustColHeadings(headers, 3), ['abc'] * 4)
   self.assertIn('Heading', f.getvalue())
 def test_GetTypeStrings(self):
   headers = ['pk', 'a', 'b', 'c']
   colTypes = [(int, 2), (int, 3), (float, 5), (str, 10)]
   self.assertEqual(
     DbUtils.GetTypeStrings(headers, colTypes),
     ['pk integer', 'a integer', 'b double precision', 'c varchar(10)'])
   self.assertEqual(
     DbUtils.GetTypeStrings(headers, colTypes, keyCol='pk'),
     ['pk integer not null primary key', 'a integer', 'b double precision', 'c varchar(10)'])
  def test_GetData_where(self):
    d = DbUtils.GetData(self.dbName, 'ten_elements_dups', forceList=0, randomAccess=0,
                        whereString='id<4')
    self.assertEqual(len(list(d)), 4)
    self.assertTrue(all(x[0] < 4 for x in d))

    d = DbUtils.GetData(self.dbName, 'ten_elements_dups', forceList=0, randomAccess=0,
                        whereString='id<10')
    self.assertEqual(len(list(d)), 10)
    self.assertTrue(all(x[0] < 10 for x in d))

    d = DbUtils.GetData(self.dbName, 'ten_elements_dups', removeDups=1, forceList=True)
    self.assertEqual(len(list(d)), 10)
Beispiel #5
0
    def test_DatabaseToText(self):
        txt = DbUtils.DatabaseToText(self.dbName, 'ten_elements')
        self.assertIn('id,val', txt)
        self.assertIn('0,11', txt)
        self.assertIn('18,101', txt)
        self.assertEqual(len(txt.split('\n')), 11)

        txt = DbUtils.DatabaseToText(self.dbName, 'ten_elements', fields='val')
        self.assertNotIn('id', txt)
        self.assertNotIn(',', txt)

        txt = DbUtils.DatabaseToText(self.dbName, 'ten_elements', where='id<4')
        self.assertIn('id,val', txt)
        self.assertEqual(len(txt.split('\n')), 3)
Beispiel #6
0
    def GetColumns(self, fields, table='', join='', **kwargs):
        """ gets a set of data from a table

      **Arguments**

       - fields: a string with the names of the fields to be extracted,
         this should be a comma delimited list

      **Returns**

          a list of the data

      **Notes**

        - this uses _DbUtils.GetColumns_

    """
        if not table:
            table = self.tableName
        return DbUtils.GetColumns(self.dbName,
                                  table,
                                  fields,
                                  self.user,
                                  self.password,
                                  join=join)
Beispiel #7
0
  def GetData(self,table=None,fields='*',where='',removeDups=-1,join='',
              transform=None,randomAccess=1,**kwargs):
    """ a more flexible method to get a set of data from a table

      **Arguments**

       - table: (optional) the table to use
       
       - fields: a string with the names of the fields to be extracted,
         this should be a comma delimited list

       - where: the SQL where clause to be used with the DB query

       - removeDups: indicates which column should be used to recognize
         duplicates in the data.  -1 for no duplicate removal.

      **Returns**

          a list of the data

      **Notes**

        - this uses _DbUtils.GetData_
       

    """
    if table is None:
      table = self.tableName
    kwargs['forceList'] = kwargs.get('forceList',0)  
    return DbUtils.GetData(self.dbName,table,fieldString=fields,whereString=where,
                           user=self.user,password=self.password,removeDups=removeDups,
                           join=join,cn=self.cn,
                           transform=transform,randomAccess=randomAccess,**kwargs)
Beispiel #8
0
 def test_DatabaseToDatabase(self):
     tblName = 'db2db'
     f = StringIO()
     with redirect_stdout(f):
         DbUtils.DatabaseToDatabase(self.dbName, 'ten_elements',
                                    self.tempDbName, tblName)
     self._confirm(tblName, dbName=self.tempDbName, colHeads=['id', 'val'])
Beispiel #9
0
    def GetDataCount(self, table=None, where='', join='', **kwargs):
        """ returns a count of the number of results a query will return

      **Arguments**

       - table: (optional) the table to use

       - where: the SQL where clause to be used with the DB query

       - join: the SQL join clause to be used with the DB query


      **Returns**

          an int

      **Notes**

        - this uses _DbUtils.GetData_

    """
        table = table or self.tableName
        return DbUtils.GetData(self.dbName,
                               table,
                               fieldString='count(*)',
                               whereString=where,
                               cn=self.cn,
                               user=self.user,
                               password=self.password,
                               join=join,
                               forceList=0)[0][0]
Beispiel #10
0
 def test1Txt(self):
     """ test reading from a text file """
     with open(os.path.join(self.baseDir, 'dbtest.csv'), 'r') as inF:
         tblName = 'fromtext'
         f = StringIO()
         with redirect_stdout(f):
             DbUtils.TextFileToDatabase(self.tempDbName, tblName, inF)
         self._confirm(tblName, dbName=self.tempDbName)
Beispiel #11
0
 def testGetData1(self):
     """ basic functionality
 """
     d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=1)
     assert len(d) == 10
     assert tuple(d[0]) == (0, 11)
     assert tuple(d[2]) == (4, 31)
     with self.assertRaisesRegexp(IndexError, ""):
         d[11]
 def testGetData2(self):
   """ using a RandomAccessDbResultSet
   """
   d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=1)
   assert tuple(d[0]) == (0, 11)
   assert tuple(d[2]) == (4, 31)
   assert len(d) == 10
   with self.assertRaisesRegexp(IndexError, ""):
     d[11]
Beispiel #13
0
 def test3Txt(self):
     """ test reading from a text file including null markers"""
     with open(os.path.join(self.baseDir, 'dbtest.nulls.csv'), 'r') as inF:
         tblName = 'fromtext2'
         DbUtils.TextFileToDatabase(self.tempDbName,
                                    tblName,
                                    inF,
                                    nullMarker='NA')
         self._confirm(tblName, dbName=self.tempDbName)
Beispiel #14
0
 def testGetData4(self):
   """ using a RandomAccessDbResultSet with a Transform
   """
   fn = lambda x:(x[0],x[1]*2)
   d = DbUtils.GetData(self.dbName,'ten_elements',forceList=0,randomAccess=1,
                       transform=fn)
   assert tuple(d[0])==(0,22)
   assert tuple(d[2])==(4,62)
   assert len(d)==10
   with self.assertRaisesRegexp(IndexError, ""):
     d[11]
 def testGetData3(self):
   """ using a DbResultSet
   """
   d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=0, randomAccess=0)
   with self.assertRaisesRegexp(TypeError, ""):
     len(d)
   rs = []
   for thing in d:
     rs.append(thing)
   assert len(rs) == 10
   assert tuple(rs[0]) == (0, 11)
   assert tuple(rs[2]) == (4, 31)
Beispiel #16
0
 def testGetData1(self):
     """ basic functionality
 """
     d = DbUtils.GetData(self.dbName, 'ten_elements', forceList=1)
     assert len(d) == 10
     assert tuple(d[0]) == (0, 11)
     assert tuple(d[2]) == (4, 31)
     try:
         d[11]
     except IndexError:
         pass
     except:
         assert 0, 'bad exception type raised'
     else:
         assert 0, 'failed to raise expected exception'
Beispiel #17
0
  def testGetData5(self):
    """ using a DbResultSet with a Transform
    """
    fn = lambda x:(x[0],x[1]*2)
    d = DbUtils.GetData(self.dbName,'ten_elements',forceList=0,randomAccess=0,
                        transform=fn)
    with self.assertRaisesRegexp(TypeError, ""):
      len(d)

    rs = []
    for thing in d:
      rs.append(thing)
    assert len(rs)==10
    assert tuple(rs[0])==(0,22)
    assert tuple(rs[2])==(4,62)
Beispiel #18
0
 def testGetData2(self):
     """ using a RandomAccessDbResultSet
 """
     d = DbUtils.GetData(self.dbName,
                         'ten_elements',
                         forceList=0,
                         randomAccess=1)
     assert tuple(d[0]) == (0, 11)
     assert tuple(d[2]) == (4, 31)
     assert len(d) == 10
     try:
         d[11]
     except IndexError:
         pass
     except:
         assert 0, 'bad exception type raised'
     else:
         assert 0, 'failed to raise expected exception'
Beispiel #19
0
 def testGetData4(self):
     """ using a RandomAccessDbResultSet with a Transform
 """
     fn = lambda x: (x[0], x[1] * 2)
     d = DbUtils.GetData(self.dbName,
                         'ten_elements',
                         forceList=0,
                         randomAccess=1,
                         transform=fn)
     assert tuple(d[0]) == (0, 22)
     assert tuple(d[2]) == (4, 62)
     assert len(d) == 10
     try:
         d[11]
     except IndexError:
         pass
     except:
         assert 0, 'bad exception type raised'
     else:
         assert 0, 'failed to raise expected exception'
Beispiel #20
0
 def testGetData3(self):
     """ using a DbResultSet
 """
     d = DbUtils.GetData(self.dbName,
                         'ten_elements',
                         forceList=0,
                         randomAccess=0)
     try:
         len(d)
     except TypeError:
         pass
     except:
         assert 0, 'bad exception type raised'
     else:
         assert 0, 'failed to raise expected exception'
     rs = []
     for thing in d:
         rs.append(thing)
     assert len(rs) == 10
     assert tuple(rs[0]) == (0, 11)
     assert tuple(rs[2]) == (4, 31)
Beispiel #21
0
 def testGetData5(self):
     """ using a DbResultSet with a Transform
 """
     fn = lambda x: (x[0], x[1] * 2)
     d = DbUtils.GetData(self.dbName,
                         'ten_elements',
                         forceList=0,
                         randomAccess=0,
                         transform=fn)
     try:
         len(d)
     except TypeError:
         pass
     except:
         assert 0, 'bad exception type raised'
     else:
         assert 0, 'failed to raise expected exception'
     rs = []
     for thing in d:
         rs.append(thing)
     assert len(rs) == 10
     assert tuple(rs[0]) == (0, 22)
     assert tuple(rs[2]) == (4, 62)
Beispiel #22
0
 def test_take(self):
   self.assertEqual(list(DbUtils._take([1, 2, 3, 4], [2, 3])), [3, 4])
   self.assertEqual(list(DbUtils._take([1, 2, 3, 4], [0, 3])), [1, 4])
Beispiel #23
0
def FingerprintsFromDetails(details, reportFreq=10):
    data = None
    if details.dbName and details.tableName:
        from rdkit.Dbase.DbConnection import DbConnect
        from rdkit.Dbase import DbInfo
        from rdkit.ML.Data import DataUtils
        try:
            conn = DbConnect(details.dbName, details.tableName)
        except Exception:
            import traceback
            error('Problems establishing connection to database: %s|%s\n' %
                  (details.dbName, details.tableName))
            traceback.print_exc()
        if not details.idName:
            details.idName = DbInfo.GetColumnNames(details.dbName,
                                                   details.tableName)[0]
        dataSet = DataUtils.DBToData(details.dbName,
                                     details.tableName,
                                     what='%s,%s' %
                                     (details.idName, details.smilesName))
        idCol = 0
        smiCol = 1
    elif details.inFileName and details.useSmiles:
        from rdkit.ML.Data import DataUtils
        conn = None
        if not details.idName:
            details.idName = 'ID'
        try:
            dataSet = DataUtils.TextFileToData(
                details.inFileName,
                onlyCols=[details.idName, details.smilesName])
        except IOError:
            import traceback
            error('Problems reading from file %s\n' % (details.inFileName))
            traceback.print_exc()

        idCol = 0
        smiCol = 1
    elif details.inFileName and details.useSD:
        conn = None
        dataset = None
        if not details.idName:
            details.idName = 'ID'
        dataSet = []
        try:
            s = Chem.SDMolSupplier(details.inFileName)
        except Exception:
            import traceback
            error('Problems reading from file %s\n' % (details.inFileName))
            traceback.print_exc()
        else:
            while 1:
                try:
                    m = s.next()
                except StopIteration:
                    break
                if m:
                    dataSet.append(m)
                    if reportFreq > 0 and not len(dataSet) % reportFreq:
                        message('Read %d molecules\n' % (len(dataSet)))
                        if details.maxMols > 0 and len(
                                dataSet) >= details.maxMols:
                            break

        for i, mol in enumerate(dataSet):
            if mol.HasProp(details.idName):
                nm = mol.GetProp(details.idName)
            else:
                nm = mol.GetProp('_Name')
            dataSet[i] = (nm, mol)
    else:
        dataSet = None

    fps = None
    if dataSet and not details.useSD:
        data = dataSet.GetNamedData()
        if not details.molPklName:
            fps = FingerprintsFromSmiles(data, idCol, smiCol,
                                         **details.__dict__)
        else:
            fps = FingerprintsFromPickles(data, idCol, smiCol,
                                          **details.__dict__)
    elif dataSet and details.useSD:
        fps = FingerprintsFromMols(dataSet, **details.__dict__)

    if fps:
        if details.outFileName:
            outF = open(details.outFileName, 'wb+')
            for i in range(len(fps)):
                pickle.dump(fps[i], outF)
            outF.close()
        dbName = details.outDbName or details.dbName
        if details.outTableName and dbName:
            from rdkit.Dbase.DbConnection import DbConnect
            from rdkit.Dbase import DbUtils, DbModule
            conn = DbConnect(dbName)
            #
            #  We don't have a db open already, so we'll need to figure out
            #    the types of our columns...
            #
            colTypes = DbUtils.TypeFinder(data, len(data), len(data[0]))
            typeStrs = DbUtils.GetTypeStrings(
                [details.idName, details.smilesName],
                colTypes,
                keyCol=details.idName)
            cols = '%s, %s %s' % (typeStrs[0], details.fpColName,
                                  DbModule.binaryTypeName)

            # FIX: we should really check to see if the table
            #  is already there and, if so, add the appropriate
            #  column.

            #
            # create the new table
            #
            if details.replaceTable or \
               details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]:
                conn.AddTable(details.outTableName, cols)

            #
            # And add the data
            #
            for ID, fp in fps:
                tpl = ID, DbModule.binaryHolder(fp.ToBinary())
                conn.InsertData(details.outTableName, tpl)
            conn.Commit()
    return fps
Beispiel #24
0
 def test_GetColumns(self):
     d = DbUtils.GetColumns(self.dbName, 'ten_elements', 'val')
     self.assertEqual(len(d), 10)
Beispiel #25
0
 def test1Txt(self):
     """ test reading from a text file """
     inF = open(os.path.join(self.baseDir, 'dbtest.csv'), 'r')
     tblName = 'fromtext'
     DbUtils.TextFileToDatabase(self.dbName, tblName, inF)
     self._confirm(tblName)
Beispiel #26
0
 def test_take(self):
     self.assertEqual(list(DbUtils._take([1, 2, 3, 4], [2, 3])), [3, 4])
     self.assertEqual(list(DbUtils._take([1, 2, 3, 4], [0, 3])), [1, 4])
Beispiel #27
0
Fe13Pd7,0.641773222364,13.5561904063,1,1,0,3.53454303741,1.1
Fe13Pt7,0.644834950054,13.4918245347,1,0,1,3.53454303741,1.1
Fe3Pt17,0.657625302294,14.7500407392,1,0,1,3.53454303741,1.1
Fe3Rh7,0.648586371291,13.4137878702,1,1,0,3.53454303741,1.1
FePd26,0.675906847437,14.6853460112,1,1,0,3.53454303741,1.1
Mn2Pt3,0.59614900293,14.761410246,1,0,1,4.20857000351,1.1
MnPd3,0.692173333021,13.3637046657,1,1,0,4.20857000351,1.1
NiPd,0.752482590563,13.289344,1,1,0,1.89935600758,1.1
NiRh,0.759710124228,12.5047695128,1,1,0,1.89935600758,0.1
Fe2Hf,0.46150987372,14.4453391927,1,0,1,3.53454303741,1.1
Fe2Zr,0.449503092939,14.8311919793,1,1,0,3.53454303741,1.1
Co3Ir,0.750380223096,11.9939195131,1,0,1,2.7561609745,1.1
Co3Ir7,0.744357269808,12.0909680943,1,0,1,2.7561609745,0.1
Co3Rh,0.750380223096,11.9939195131,1,1,0,2.7561609745,1.1
Co4Rh,0.772980027292,11.6432503845,1,1,0,2.7561609745,1.1
CoIr,0.720410936244,12.4928697598,1,0,1,2.7561609745,0.1
CoOs,0.680645456475,12.4881462429,1,0,1,2.7561609745,0.1
CoRu,0.697198731059,12.1916458269,1,1,0,2.7561609745,0.1
Fe16Rh,0.668851387723,12.0487505556,1,1,0,3.53454303741,1.1
Fe32Pt,0.672765581933,11.9812996935,1,0,1,3.53454303741,1.1
Fe9Pt,0.662584205604,12.3757854936,1,0,1,3.53454303741,1.1
Fe9Rh,0.664715105171,12.1856716313,1,1,0,3.53454303741,1.1
"""

io = StringIO(ferro_quant)
DbUtils.TextFileToDatabase(RDConfig.RDTestDatabase,'ferro_quant',io)
io = StringIO(ferro_noquant)
DbUtils.TextFileToDatabase(RDConfig.RDTestDatabase,'ferro_noquant',io)
io = StringIO(ferro_noquant_realact)
DbUtils.TextFileToDatabase(RDConfig.RDTestDatabase,'ferro_noquant_realact',io)
Beispiel #28
0
 def test_TypeFinder(self):
     data = [('-', 1.45, 'abc', None), (20, 3, 'defgh', None)]
     self.assertEqual(DbUtils.TypeFinder(data, 2, 4, nullMarker='-'),
                      [[int, 2], [float, 4], [str, 5], [-1, 1]])