Ejemplo n.º 1
0
 def testQuantPickle(self):
     # " testing QuantDataSet pickling "
     self.setUpQuantLoad()
     DataUtils.WritePickledData(
         RDConfig.RDCodeDir + '/ML/Data/test_data/testquant.qdat.pkl',
         self.d)
     with open(RDConfig.RDCodeDir + '/ML/Data/test_data/testquant.qdat.pkl',
               'rb') as f:
         vNames = pickle.load(f)
         qBounds = pickle.load(f)
         ptNames = pickle.load(f)
         examples = pickle.load(f)
     d = MLData.MLQuantDataSet(examples,
                               varNames=vNames,
                               qBounds=qBounds,
                               ptNames=ptNames)
     assert self.d.GetNPts() == d.GetNPts(), 'nPts wrong'
     assert self.d.GetNVars() == d.GetNVars(), 'nVars wrong'
     assert self.d.GetNResults() == d.GetNResults(), 'nResults wrong'
     assert self.d.GetVarNames() == d.GetVarNames(), 'varNames wrong'
     assert self.d.GetPtNames() == d.GetPtNames(), 'ptNames wrong'
     assert self.d.GetNPossibleVals() == d.GetNPossibleVals(
     ), 'nPossible Wrong'
     assert self.d.GetQuantBounds() == d.GetQuantBounds(
     ), 'quantBounds Wrong'
     assert self.d.GetResults() == d.GetResults(), 'GetResults wrong'
     assert self.d.GetAllData()[1] == d.GetAllData()[1], 'GetAllData wrong'
     assert self.d.GetInputData()[3] == d.GetInputData(
     )[3], 'GetInputData wrong'
     assert self.d.GetNamedData()[2] == d.GetNamedData(
     )[2], 'GetNamedData wrong'
Ejemplo n.º 2
0
    def _setupMultiTree(self):
        examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2],
                    [1, 0, 0, 2], [1, 0, 1, 2], [1, 1, 0, 2], [1, 1, 1, 0]]

        data = MLData.MLQuantDataSet(examples)
        attrs = range(0, data.GetNVars())
        t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals())
        self.t1 = t1
        self.examples = examples
Ejemplo n.º 3
0
def DBToData(dbName, tableName, user='******', password='******', dupCol=-1, what='*', where='',
             join='', pickleCol=-1, pickleClass=None, ensembleIds=None):
  """ constructs  an _MLData.MLDataSet_ from a database

    **Arguments**

      - dbName: the name of the database to be opened

      - tableName: the table name containing the data in the database

      - user: the user name to be used to connect to the database

      - password: the password to be used to connect to the database

      - dupCol: if nonzero specifies which column should be used to recognize
        duplicates.

    **Returns**

       an _MLData.MLDataSet_

    **Notes**

      - this uses Dbase.DataUtils functionality

  """
  conn = DbConnect(dbName, tableName, user, password)
  res = conn.GetData(fields=what, where=where, join=join, removeDups=dupCol, forceList=1)
  nPts = len(res)
  vals = [None] * nPts
  ptNames = [None] * nPts
  classWorks = True
  for i in range(nPts):
    tmp = list(res[i])
    ptNames[i] = tmp.pop(0)
    if pickleCol >= 0:
      if not pickleClass or not classWorks:
        tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol]))
      else:
        try:
          tmp[pickleCol] = pickleClass(str(tmp[pickleCol]))
        except Exception:
          tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol]))
          classWorks = False
      if ensembleIds:
        tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol], ensembleIds)
    else:
      if ensembleIds:
        tmp = TakeEnsemble(tmp, ensembleIds, isDataVect=True)
    vals[i] = tmp
  varNames = conn.GetColumnNames(join=join, what=what)
  data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames)
  return data
Ejemplo n.º 4
0
    def _setupPyMultiTree(self):
        from rdkit.ML.InfoTheory import entropy
        ID3.entropy.InfoEntropy = entropy.PyInfoEntropy
        ID3.entropy.InfoGain = entropy.PyInfoGain

        examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2],
                    [1, 0, 0, 2], [1, 0, 1, 2], [1, 1, 0, 2], [1, 1, 1, 0]]

        data = MLData.MLQuantDataSet(examples)
        attrs = range(0, data.GetNVars())
        t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals())
        self.t1 = t1
        self.examples = examples
Ejemplo n.º 5
0
def TextToData(reader, ignoreCols=[], onlyCols=None):
    """ constructs  an _MLData.MLDataSet_ from a bunch of text
#DOC
    **Arguments**
      - reader needs to be iterable and return lists of elements
        (like a csv.reader)

    **Returns**

       an _MLData.MLDataSet_

  """

    varNames = next(reader)
    if not onlyCols:
        keepCols = []
        for i, name in enumerate(varNames):
            if name not in ignoreCols:
                keepCols.append(i)
    else:
        keepCols = [-1] * len(onlyCols)
        for i, name in enumerate(varNames):
            if name in onlyCols:
                keepCols[onlyCols.index(name)] = i

    nCols = len(varNames)
    varNames = tuple([varNames[x] for x in keepCols])
    nVars = len(varNames)
    vals = []
    ptNames = []
    for splitLine in reader:
        if len(splitLine):
            if len(splitLine) != nCols:
                raise ValueError('unequal line lengths')
            tmp = [splitLine[x] for x in keepCols]
            ptNames.append(tmp[0])
            pt = [None] * (nVars - 1)
            for j in range(nVars - 1):
                try:
                    val = int(tmp[j + 1])
                except ValueError:
                    try:
                        val = float(tmp[j + 1])
                    except ValueError:
                        val = str(tmp[j + 1])
                pt[j] = val
            vals.append(pt)
    data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames)
    return data
Ejemplo n.º 6
0
def BuildDataSet(fileName):
  """ builds a data set from a .dat file

    **Arguments**

      - fileName: the name of the .dat file

    **Returns**

      an _MLData.MLDataSet_
      
  """
  with open(fileName, 'r') as inFile:
    varNames, qBounds = ReadVars(inFile)
    ptNames, examples = ReadGeneralExamples(inFile)
  data = MLData.MLDataSet(examples, qBounds=qBounds, varNames=varNames, ptNames=ptNames)
  return data
Ejemplo n.º 7
0
def BuildQuantDataSet(fileName):
    """ builds a data set from a .qdat file

    **Arguments**

      - fileName: the name of the .qdat file

    **Returns**

      an _MLData.MLQuantDataSet_
      
  """
    inFile = open(fileName, 'r')

    varNames, qBounds = ReadVars(inFile)
    ptNames, examples = ReadQuantExamples(inFile)
    data = MLData.MLQuantDataSet(examples,
                                 qBounds=qBounds,
                                 varNames=varNames,
                                 ptNames=ptNames)
    return data