Ejemplo n.º 1
0
  def test_ForwardDbFpSupplier(self):
    # Additional tests to complete code coverage
    conn = DbConnect(RDConfig.RDTestDatabase, 'simple_combined')

    self.assertRaises(ValueError, DbFpSupplier.ForwardDbFpSupplier, conn.GetData(),
                      fpColName='typo')

    suppl = DbFpSupplier.ForwardDbFpSupplier(conn.GetData(), fpColName='AutoFragmentFp')
    self.assertIn('ID', suppl.GetColumnNames())
Ejemplo n.º 2
0
  def test6Update(self):
    p = subprocess.Popen(('python', 'CreateDb.py','--dbDir=testData/bzr','--molFormat=smiles',
                          'testData/bzr.smi'))
    res=p.wait()
    self.assertFalse(res)
    p=None

    self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt'))
    self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt'))
    self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt'))
    self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt'))
    
    conn = DbConnect('testData/bzr/Compounds.sqlt')
    d = conn.GetData('molecules',fields='count(*)')
    self.assertEqual(d[0][0],10)
    
    conn = DbConnect('testData/bzr/AtomPairs.sqlt')
    d = conn.GetData('atompairs',fields='count(*)')
    self.assertEqual(d[0][0],10)

    
    conn = DbConnect('testData/bzr/Descriptors.sqlt')
    d = conn.GetData('descriptors_v1',fields='count(*)')
    self.assertEqual(d[0][0],10)

    
    conn = DbConnect('testData/bzr/Fingerprints.sqlt')
    d = conn.GetData('rdkitfps',fields='count(*)')
    self.assertEqual(d[0][0],10)


    p = subprocess.Popen(('python', 'CreateDb.py','--dbDir=testData/bzr','--molFormat=smiles',
                          '--updateDb',
                          'testData/bzr.2.smi'))
    res=p.wait()
    self.assertFalse(res)
    p=None

    self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt'))
    self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt'))
    self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt'))
    self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt'))
    
    conn = DbConnect('testData/bzr/Compounds.sqlt')
    d = conn.GetData('molecules',fields='count(*)')
    self.assertEqual(d[0][0],20)
    
    conn = DbConnect('testData/bzr/AtomPairs.sqlt')
    d = conn.GetData('atompairs',fields='count(*)')
    self.assertEqual(d[0][0],20)

    
    conn = DbConnect('testData/bzr/Descriptors.sqlt')
    d = conn.GetData('descriptors_v1',fields='count(*)')
    self.assertEqual(d[0][0],20)

    
    conn = DbConnect('testData/bzr/Fingerprints.sqlt')
    d = conn.GetData('rdkitfps',fields='count(*)')
    self.assertEqual(d[0][0],20)
Ejemplo n.º 3
0
def SupplierFromDetails(details):
  from rdkit.VLib.NodeLib.DbMolSupply import DbMolSupplyNode
  from rdkit.VLib.NodeLib.SmilesSupply import SmilesSupplyNode

  if details.dbName:
    conn = DbConnect(details.dbName, details.tableName)
    suppl = DbMolSupplyNode(conn.GetData())
  else:
    suppl = SmilesSupplyNode(details.inFileName, delim=details.delim, nameColumn=details.nameCol,
                             smilesColumn=details.smiCol, titleLine=details.hasTitle)
    if isinstance(details.actCol, int):
      suppl.reset()
      m = next(suppl)
      actName = m.GetPropNames()[details.actCol]
      details.actCol = actName
    if isinstance(details.nameCol, int):
      suppl.reset()
      m = next(suppl)
      nameName = m.GetPropNames()[details.nameCol]
      details.nameCol = nameName
      suppl.reset()
  if isinstance(details.actCol, int):
    suppl.reset()
    m = next(suppl)
    actName = m.GetPropNames()[details.actCol]
    details.actCol = actName
  if isinstance(details.nameCol, int):
    suppl.reset()
    m = next(suppl)
    nameName = m.GetPropNames()[details.nameCol]
    details.nameCol = nameName
    suppl.reset()
  return suppl
Ejemplo n.º 4
0
    def test_InsertData(self):
        newTblName = 'NEW_TABLE'
        conn = DbConnect(self.tempDbName)
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            pass
        conn.Commit()
        conn.AddTable(newTblName, 'id int,val1 int, val2 int')
        for i in range(10):
            conn.InsertData(newTblName, (i, i + 1, 2 * i))
        conn.Commit()
        d = conn.GetData(table=newTblName)
        assert len(d) == 10

        self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 3)
        conn.AddColumn(newTblName, 'val3', 'int')
        conn.Commit()
        self.assertEqual(len(conn.GetColumnNames(table=newTblName)), 4)
        d = conn.GetColumns('id,val3', table=newTblName)
        self.assertEqual(len(d), 10)
        self.assertTrue(all(r[1] is None for r in d))
        for r in d:
            conn.InsertColumnData(newTblName, 'val3', r[0],
                                  'id={0}'.format(r[0]))
        conn.Commit()
        d = conn.GetColumns('id,val3', table=newTblName)
        self.assertTrue(all(r[0] == r[1] for r in d))

        d = None
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            assert 0, 'drop table failed'
Ejemplo n.º 5
0
    def test1Create(self):
        p = subprocess.Popen(
            (sys.executable, 'CreateDb.py', '--dbDir=testData/bzr',
             '--molFormat=smiles', 'testData/bzr.smi'))
        res = p.wait()
        self.assertFalse(res)
        p = None

        self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt'))
        self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt'))
        self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt'))
        self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt'))

        conn = DbConnect('testData/bzr/Compounds.sqlt')
        d = conn.GetData('molecules', fields='count(*)')
        self.assertTrue(d[0][0] == 10)

        conn = DbConnect('testData/bzr/AtomPairs.sqlt')
        d = conn.GetData('atompairs', fields='count(*)')
        self.assertTrue(d[0][0] == 10)

        conn = DbConnect('testData/bzr/Descriptors.sqlt')
        d = conn.GetData('descriptors_v1', fields='count(*)')
        self.assertTrue(d[0][0] == 10)

        conn = DbConnect('testData/bzr/Fingerprints.sqlt')
        d = conn.GetData('rdkitfps', fields='count(*)')
        self.assertTrue(d[0][0] == 10)

        p = subprocess.Popen(
            (sys.executable, 'CreateDb.py', '--dbDir=testData/bzr',
             '--molFormat=sdf', '--doGobbi2D', 'testData/bzr.sdf'))
        res = p.wait()
        self.assertFalse(res)
        p = None

        self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt'))
        self.assertTrue(os.path.exists('testData/bzr/AtomPairs.sqlt'))
        self.assertTrue(os.path.exists('testData/bzr/Descriptors.sqlt'))
        self.assertTrue(os.path.exists('testData/bzr/Fingerprints.sqlt'))

        conn = DbConnect('testData/bzr/Compounds.sqlt')
        d = conn.GetData('molecules', fields='count(*)')
        self.assertTrue(d[0][0] == 163)

        conn = DbConnect('testData/bzr/AtomPairs.sqlt')
        d = conn.GetData('atompairs', fields='count(*)')
        self.assertTrue(d[0][0] == 163)

        conn = DbConnect('testData/bzr/Descriptors.sqlt')
        d = conn.GetData('descriptors_v1', fields='count(*)')
        self.assertTrue(d[0][0] == 163)

        conn = DbConnect('testData/bzr/Fingerprints.sqlt')
        d = conn.GetData('rdkitfps', fields='count(*)')
        self.assertTrue(d[0][0] == 163)
Ejemplo n.º 6
0
 def testGetData1(self):
     """ basic functionality
 """
     conn = DbConnect(self.dbName, 'ten_elements')
     d = conn.GetData(randomAccess=1)
     assert len(d) == 10
     assert tuple(d[0]) == (0, 11)
     assert tuple(d[2]) == (4, 31)
     self.assertRaises(IndexError, lambda: d[11])
Ejemplo n.º 7
0
 def testGetData2(self):
     """ using removeDups
 """
     conn = DbConnect(self.dbName, 'ten_elements_dups')
     d = conn.GetData(randomAccess=1, removeDups=1)
     assert tuple(d[0]) == (0, 11)
     assert tuple(d[2]) == (4, 31)
     assert len(d) == 10
     self.assertRaises(IndexError, lambda: d[11])
Ejemplo n.º 8
0
    def testGetData3(self):
        """ without removeDups
    """
        conn = DbConnect(self.dbName, 'ten_elements_dups')
        d = conn.GetData(randomAccess=1, removeDups=-1)
        assert tuple(d[0]) == (0, 11)
        assert tuple(d[2]) == (2, 21)
        assert len(d) == 20
        self.assertRaises(IndexError, lambda: d[21])

        # repeat that test to make sure the table argument works
        conn = DbConnect(self.dbName, 'ten_elements')
        d = conn.GetData(table='ten_elements_dups',
                         randomAccess=1,
                         removeDups=-1)
        assert tuple(d[0]) == (0, 11)
        assert tuple(d[2]) == (2, 21)
        assert len(d) == 20
        self.assertRaises(IndexError, lambda: d[21])
Ejemplo n.º 9
0
    def testGetData5(self):
        """ using a RandomAccessDbResultSet with a Transform
    """
        fn = lambda x: (x[0], x[1] * 2)
        conn = DbConnect(self.dbName, 'ten_elements')
        d = conn.GetData(randomAccess=1, transform=fn)

        assert tuple(d[0]) == (0, 22), str(d[0])
        assert tuple(d[2]) == (4, 62)
        assert len(d) == 10
        self.assertRaises(IndexError, lambda: d[11])
Ejemplo n.º 10
0
def GetComposites(details):
  res = []
  if details.persistTblName and details.inNote:
    conn = DbConnect(details.dbName,details.persistTblName)
    mdls = conn.GetData(fields='MODEL',where="where note='%s'"%(details.inNote))
    for row in mdls:
      rawD = row[0]
      res.append(cPickle.loads(str(rawD)))
  elif details.composFileName:
    res.append(cPickle.load(open(details.composFileName,'rb')))
  return res
Ejemplo n.º 11
0
def DBToData(dbName, tableName, user='******', password='******', dupCol=-1, what='*', where='',
             join='', pickleCol=-1, pickleClass=None, ensembleIds=None):
  """ constructs  an _MLData.MLDataSet_ from a database

    **Arguments**

      - dbName: the name of the database to be opened

      - tableName: the table name containing the data in the database

      - user: the user name to be used to connect to the database

      - password: the password to be used to connect to the database

      - dupCol: if nonzero specifies which column should be used to recognize
        duplicates.

    **Returns**

       an _MLData.MLDataSet_

    **Notes**

      - this uses Dbase.DataUtils functionality

  """
  conn = DbConnect(dbName, tableName, user, password)
  res = conn.GetData(fields=what, where=where, join=join, removeDups=dupCol, forceList=1)
  nPts = len(res)
  vals = [None] * nPts
  ptNames = [None] * nPts
  classWorks = True
  for i in range(nPts):
    tmp = list(res[i])
    ptNames[i] = tmp.pop(0)
    if pickleCol >= 0:
      if not pickleClass or not classWorks:
        tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol]))
      else:
        try:
          tmp[pickleCol] = pickleClass(str(tmp[pickleCol]))
        except Exception:
          tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol]))
          classWorks = False
      if ensembleIds:
        tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol], ensembleIds)
    else:
      if ensembleIds:
        tmp = TakeEnsemble(tmp, ensembleIds, isDataVect=True)
    vals[i] = tmp
  varNames = conn.GetColumnNames(join=join, what=what)
  data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames)
  return data
Ejemplo n.º 12
0
    def testGetData4(self):
        """ non random access
    """
        conn = DbConnect(self.dbName, 'ten_elements')
        d = conn.GetData(randomAccess=0)
        self.assertRaises(TypeError, lambda: len(d))

        rs = []
        for thing in d:
            rs.append(thing)
        assert len(rs) == 10
        assert tuple(rs[0]) == (0, 11)
        assert tuple(rs[2]) == (4, 31)
Ejemplo n.º 13
0
 def testGetData6(self):
     """ using a DbResultSet with a Transform
 """
     fn = lambda x: (x[0], x[1] * 2)
     conn = DbConnect(self.dbName, 'ten_elements')
     d = conn.GetData(randomAccess=0, transform=fn)
     self.assertRaises(TypeError, lambda: len(d))
     rs = []
     for thing in d:
         rs.append(thing)
     assert len(rs) == 10
     assert tuple(rs[0]) == (0, 22)
     assert tuple(rs[2]) == (4, 62)
Ejemplo n.º 14
0
    def getTestData(self):
        dbName = RDConfig.RDTestDatabase
        conn = DbConnect(dbName, 'simple_mols1')
        mols = []
        for smi, ID in conn.GetData():
            mol = Chem.MolFromSmiles(str(smi))
            mol.SetProp('_Name', str(ID))
            mols.append(mol)

        # Calculate fingerprints
        probefps = []
        for mol in mols:
            fp = Chem.RDKFingerprint(mol)
            fp._id = mol.GetProp('_Name')
            probefps.append(fp)
        return probefps
Ejemplo n.º 15
0
    def testGetData1(self):
        """ basic functionality
    """
        conn = DbConnect(self.dbName, 'ten_elements')
        d = conn.GetData(randomAccess=1)
        assert len(d) == 10
        assert tuple(d[0]) == (0, 11)
        assert tuple(d[2]) == (4, 31)
        self.assertRaises(IndexError, lambda: d[11])

        d = conn.GetColumns(fields='id,val')
        self.assertEqual(len(d), 10)
        assert tuple(d[0]) == (0, 11)
        assert tuple(d[2]) == (4, 31)

        self.assertEqual(conn.GetDataCount(), 10)
Ejemplo n.º 16
0
    def testInsertData(self):
        """ tests InsertData and InsertColumnData functionalities """
        newTblName = 'NEW_TABLE'
        conn = DbConnect(self.tempDbName)
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            pass
        conn.Commit()
        conn.AddTable(newTblName, 'id int,val1 int, val2 int')
        for i in range(10):
            conn.InsertData(newTblName, (i, i + 1, 2 * i))
        conn.Commit()
        d = conn.GetData(table=newTblName)
        assert len(d) == 10

        d = None
        try:
            conn.GetCursor().execute('drop table %s' % (newTblName))
        except Exception:
            assert 0, 'drop table failed'
Ejemplo n.º 17
0
        elif arg == '--enrich':
            enrich = int(val)
    composites = []
    if db is None:
        for arg in extras:
            composite = cPickle.load(open(arg, 'rb'))
            composites.append(composite)
    else:
        tbl = extras[0]
        conn = DbConnect(db, tbl)
        if note:
            where = "where note='%s'" % (note)
        else:
            where = ''
        if not skip:
            pkls = conn.GetData(fields='model', where=where)
            composites = []
            for pkl in pkls:
                pkl = str(pkl[0])
                comp = cPickle.loads(pkl)
                composites.append(comp)

    if len(composites):
        ProcessIt(composites, count, verbose=verbose)
    elif not skip:
        print('ERROR: no composite models found')
        sys.exit(-1)

    if db:
        res = ErrorStats(conn, where, enrich=enrich)
        if res:
Ejemplo n.º 18
0
def GetNode(dbName, tableName):
    from rdkit.Dbase.DbConnection import DbConnect
    conn = DbConnect(dbName, tableName)
    return DbMolSupplyNode(conn.GetData())
Ejemplo n.º 19
0
from rdkit import Chem
from rdkit import RDConfig
from rdkit.Dbase import DbModule
from rdkit.Dbase.DbConnection import DbConnect
import pickle

if RDConfig.usePgSQL:
    dbName = "::RDTests"
else:
    dbName = "data.sqlt"

molTblName = 'simple_mols1'
fpTblName = 'simple_mols1_fp'
conn = DbConnect(dbName, molTblName)
conn.AddTable(fpTblName,
              'id varchar(10),autofragmentfp %s' % DbModule.binaryTypeName)
d = conn.GetData()
for smi, ID in d:
    print(repr(ID), repr(smi))
    mol = Chem.MolFromSmiles(smi)
    fp = Chem.RDKFingerprint(mol)
    pkl = pickle.dumps(fp)
    conn.InsertData(fpTblName, (ID, DbModule.binaryHolder(pkl)))
conn.Commit()
Ejemplo n.º 20
0
    def test4CreateOptions(self):
        if os.path.exists('testData/bzr/Compounds.sqlt'):
            os.unlink('testData/bzr/Compounds.sqlt')
        if os.path.exists('testData/bzr/AtomPairs.sqlt'):
            os.unlink('testData/bzr/AtomPairs.sqlt')
        if os.path.exists('testData/bzr/Descriptors.sqlt'):
            os.unlink('testData/bzr/Descriptors.sqlt')
        if os.path.exists('testData/bzr/Fingerprints.sqlt'):
            os.unlink('testData/bzr/Fingerprints.sqlt')

        p = subprocess.Popen(('python', 'CreateDb.py', '--dbDir=testData/bzr',
                              '--molFormat=smiles', '--noExtras', '--noSmiles',
                              'testData/bzr.smi'))
        res = p.wait()
        self.failIf(res)
        p = None

        self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt'))
        self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt'))

        conn = DbConnect('testData/bzr/Compounds.sqlt')
        d = conn.GetData('molecules', fields='count(*)')
        self.failUnlessEqual(d[0][0], 10)
        d = conn.GetData('molecules', fields='*')
        self.failUnlessEqual(len(d), 10)
        cns = [x.lower() for x in d.GetColumnNames()]
        self.failIf('smiles' in cns)

        conn = None
        d = None

        if os.path.exists('testData/bzr/Compounds.sqlt'):
            os.unlink('testData/bzr/Compounds.sqlt')
        if os.path.exists('testData/bzr/AtomPairs.sqlt'):
            os.unlink('testData/bzr/AtomPairs.sqlt')
        if os.path.exists('testData/bzr/Descriptors.sqlt'):
            os.unlink('testData/bzr/Descriptors.sqlt')
        if os.path.exists('testData/bzr/Fingerprints.sqlt'):
            os.unlink('testData/bzr/Fingerprints.sqlt')

        p = subprocess.Popen(
            ('python', 'CreateDb.py', '--dbDir=testData/bzr',
             '--molFormat=smiles', '--noSmiles', '--noFingerprints',
             '--noLayeredFps', '--noMorganFps', '--noPairs', '--noDescriptors',
             'testData/bzr.smi'))
        res = p.wait()
        self.failIf(res)
        p = None

        self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt'))
        self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt'))

        conn = DbConnect('testData/bzr/Compounds.sqlt')
        d = conn.GetData('molecules', fields='count(*)')
        self.failUnless(d[0][0] == 10)
        d = conn.GetData('molecules', fields='*')
        self.failUnless(len(d) == 10)
        cns = [x.lower() for x in d.GetColumnNames()]
        self.failIf('smiles' in cns)

        p = subprocess.Popen(
            ('python', 'CreateDb.py', '--dbDir=testData/bzr',
             '--molFormat=smiles', '--noProps', '--noFingerprints',
             '--noLayeredFps', '--noMorganFps', '--noPairs', '--noDescriptors',
             'testData/bzr.smi'))
        res = p.wait()
        self.failIf(res)
        p = None

        self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt'))
        self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt'))

        conn = DbConnect('testData/bzr/Compounds.sqlt')
        d = conn.GetData('molecules', fields='count(*)')
        self.failUnlessEqual(d[0][0], 10)
        d = conn.GetData('molecules', fields='*')
        self.failUnlessEqual(len(d), 10)
        cns = [x.lower() for x in d.GetColumnNames()]
        self.failUnless('smiles' in cns)

        p = subprocess.Popen(
            ('python', 'CreateDb.py', '--dbDir=testData/bzr',
             '--molFormat=smiles', '--noFingerprints', '--noLayeredFps',
             '--noMorganFps', '--noPairs', '--noDescriptors',
             '--maxRowsCached=4', 'testData/bzr.smi'))
        res = p.wait()
        self.failIf(res)
        p = None

        self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt'))
        self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Fingerprints.sqlt'))

        conn = DbConnect('testData/bzr/Compounds.sqlt')
        d = conn.GetData('molecules', fields='count(*)')
        self.failUnlessEqual(d[0][0], 10)
        d = conn.GetData('molecules', fields='*')
        self.failUnlessEqual(len(d), 10)
        cns = [x.lower() for x in d.GetColumnNames()]
        self.failUnless('smiles' in cns)

        p = subprocess.Popen(
            ('python', 'CreateDb.py', '--dbDir=testData/bzr',
             '--molFormat=smiles', '--noFingerprints', '--noPairs',
             '--noDescriptors', '--maxRowsCached=4', 'testData/bzr.smi'))
        res = p.wait()
        self.failIf(res)
        p = None

        self.failUnless(os.path.exists('testData/bzr/Compounds.sqlt'))
        self.failIf(os.path.exists('testData/bzr/AtomPairs.sqlt'))
        self.failIf(os.path.exists('testData/bzr/Descriptors.sqlt'))
        self.failUnless(os.path.exists('testData/bzr/Fingerprints.sqlt'))