Ejemplo n.º 1
0
def GenVarTable(examples, nPossibleVals, vars):
    """Generates a list of variable tables for the examples passed in.

    The table for a given variable records the number of times each possible value
    of that variable appears for each possible result of the function.

  **Arguments**
  
    - examples: a list (nInstances long) of lists of variable values + instance
              values

    - nPossibleVals: a list containing the number of possible values of
                   each variable + the number of values of the function.

    - vars:  a list of the variables to include in the var table


  **Returns**

      a list of variable result tables. Each table is a Numeric array
        which is varValues x nResults
  """
    nVars = len(vars)
    res = [None] * nVars
    nFuncVals = nPossibleVals[-1]

    for i in xrange(nVars):
        res[i] = numpy.zeros((nPossibleVals[vars[i]], nFuncVals), 'i')
    for example in examples:
        val = int(example[-1])
        for i in xrange(nVars):
            res[i][int(example[vars[i]]), val] += 1

    return res
Ejemplo n.º 2
0
    def ConstructNodes(self, nodeCounts, actFunc, actFuncParms):
        """ build an unconnected network and set node counts

      **Arguments**

        - nodeCounts: a list containing the number of nodes to be in each layer.
           the ordering is:
            (nInput,nHidden1,nHidden2, ... , nHiddenN, nOutput)

    """
        self.nodeCounts = nodeCounts
        self.numInputNodes = nodeCounts[0]
        self.numOutputNodes = nodeCounts[-1]
        self.numHiddenLayers = len(nodeCounts) - 2
        self.numInHidden = [None] * self.numHiddenLayers
        for i in xrange(self.numHiddenLayers):
            self.numInHidden[i] = nodeCounts[i + 1]

        numNodes = sum(self.nodeCounts)
        self.nodeList = [None] * (numNodes)
        for i in xrange(numNodes):
            self.nodeList[i] = NetNode.NetNode(i,
                                               self.nodeList,
                                               actFunc=actFunc,
                                               actFuncParms=actFuncParms)

        self.layerIndices = [None] * len(nodeCounts)
        start = 0
        for i in xrange(len(nodeCounts)):
            end = start + nodeCounts[i]
            self.layerIndices[i] = range(start, end)
            start = end
Ejemplo n.º 3
0
  def ConstructNodes(self,nodeCounts,actFunc,actFuncParms):
    """ build an unconnected network and set node counts

      **Arguments**

        - nodeCounts: a list containing the number of nodes to be in each layer.
           the ordering is:
            (nInput,nHidden1,nHidden2, ... , nHiddenN, nOutput)

    """
    self.nodeCounts = nodeCounts
    self.numInputNodes = nodeCounts[0]
    self.numOutputNodes = nodeCounts[-1]
    self.numHiddenLayers = len(nodeCounts)-2
    self.numInHidden = [None]*self.numHiddenLayers
    for i in xrange(self.numHiddenLayers):
      self.numInHidden[i] = nodeCounts[i+1]

    numNodes = sum(self.nodeCounts)
    self.nodeList = [None]*(numNodes)
    for i in xrange(numNodes):
      self.nodeList[i] = NetNode.NetNode(i,self.nodeList,
                                         actFunc=actFunc,
                                         actFuncParms=actFuncParms)

    self.layerIndices = [None]*len(nodeCounts)
    start = 0
    for i in xrange(len(nodeCounts)):
      end = start + nodeCounts[i]
      self.layerIndices[i] = range(start,end)
      start = end
Ejemplo n.º 4
0
Archivo: ID3.py Proyecto: ASKCOS/rdkit
def GenVarTable(examples,nPossibleVals,vars):
  """Generates a list of variable tables for the examples passed in.

    The table for a given variable records the number of times each possible value
    of that variable appears for each possible result of the function.

  **Arguments**
  
    - examples: a list (nInstances long) of lists of variable values + instance
              values

    - nPossibleVals: a list containing the number of possible values of
                   each variable + the number of values of the function.

    - vars:  a list of the variables to include in the var table


  **Returns**

      a list of variable result tables. Each table is a Numeric array
        which is varValues x nResults
  """
  nVars = len(vars)
  res = [None]*nVars
  nFuncVals = nPossibleVals[-1]

  for i in xrange(nVars):
    res[i] = numpy.zeros((nPossibleVals[vars[i]],nFuncVals),'i')
  for example in examples:
    val = int(example[-1])
    for i in xrange(nVars):
      res[i][int(example[vars[i]]),val] += 1

  return res
Ejemplo n.º 5
0
  def ClassifyExample(self,example,appendExamples=0):
    """ classifies a given example and returns the results of the output layer.

      **Arguments**

        - example: the example to be classified

      **NOTE:**

        if the output layer is only one element long,
        a scalar (not a list) will be returned.  This is why a lot of the other
        network code claims to only support single valued outputs.

    """
    if len(example) > self.numInputNodes:
      if len(example)-self.numInputNodes > self.numOutputNodes:
        example = example[1:-self.numOutputNodes]
      else:
        example = example[:-self.numOutputNodes]
    assert len(example) == self.numInputNodes
    totNumNodes = sum(self.nodeCounts)
    results = numpy.zeros(totNumNodes,numpy.float64)
    for i in xrange(self.numInputNodes):
      results[i] = example[i]
    for i in xrange(self.numInputNodes,totNumNodes):
      self.nodeList[i].Eval(results)
    self.lastResults = results[:]
    if self.numOutputNodes == 1:
      return results[-1]
    else:
      return results
Ejemplo n.º 6
0
    def ClassifyExample(self, example, appendExamples=0):
        """ classifies a given example and returns the results of the output layer.

      **Arguments**

        - example: the example to be classified

      **NOTE:**

        if the output layer is only one element long,
        a scalar (not a list) will be returned.  This is why a lot of the other
        network code claims to only support single valued outputs.

    """
        if len(example) > self.numInputNodes:
            if len(example) - self.numInputNodes > self.numOutputNodes:
                example = example[1:-self.numOutputNodes]
            else:
                example = example[:-self.numOutputNodes]
        assert len(example) == self.numInputNodes
        totNumNodes = sum(self.nodeCounts)
        results = numpy.zeros(totNumNodes, numpy.float64)
        for i in xrange(self.numInputNodes):
            results[i] = example[i]
        for i in xrange(self.numInputNodes, totNumNodes):
            self.nodeList[i].Eval(results)
        self.lastResults = results[:]
        if self.numOutputNodes == 1:
            return results[-1]
        else:
            return results
Ejemplo n.º 7
0
def CalcNPossibleUsingMap(data,order,qBounds,nQBounds=None):
  """ calculates the number of possible values for each variable in a data set

   **Arguments**

     - data: a list of examples

     - order: the ordering map between the variables in _data_ and _qBounds_

     - qBounds: the quantization bounds for the variables

   **Returns**

      a list with the number of possible values each variable takes on in the data set

   **Notes**

     - variables present in _qBounds_ will have their _nPossible_ number read
       from _qbounds

     - _nPossible_ for other numeric variables will be calculated

  """
  numericTypes = [int, float]
  if six.PY2:
    numericTypes.append(long)
    
  print('order:',order, len(order))
  print('qB:',qBounds)
  #print('nQB:',nQBounds, len(nQBounds))
  assert (qBounds and len(order)==len(qBounds)) or (nQBounds and len(order)==len(nQBounds)),\
         'order/qBounds mismatch'
  nVars = len(order)
  nPossible = [-1]*nVars
  cols = range(nVars)
  for i in xrange(nVars):
    if nQBounds and nQBounds[i] != 0:
      nPossible[i] = -1
      cols.remove(i)
    elif len(qBounds[i])>0:
      nPossible[i] = len(qBounds[i])
      cols.remove(i)

  nPts = len(data)
  for i in xrange(nPts):
    for col in cols[:]:
      d = data[i][order[col]]
      if type(d) in numericTypes:
        if int(d) == d:
          nPossible[col] = max(int(d),nPossible[col])
        else:
          nPossible[col] = -1
          cols.remove(col)
      else:
        print('bye bye col %d: %s'%(col,repr(d)))
        nPossible[col] = -1
        cols.remove(col)

  return list(map(lambda x:int(x)+1,nPossible))
Ejemplo n.º 8
0
def CalcNPossibleUsingMap(data, order, qBounds, nQBounds=None):
    """ calculates the number of possible values for each variable in a data set

   **Arguments**

     - data: a list of examples

     - order: the ordering map between the variables in _data_ and _qBounds_

     - qBounds: the quantization bounds for the variables

   **Returns**

      a list with the number of possible values each variable takes on in the data set

   **Notes**

     - variables present in _qBounds_ will have their _nPossible_ number read
       from _qbounds

     - _nPossible_ for other numeric variables will be calculated

  """
    numericTypes = [int, float]
    if six.PY2:
        numericTypes.append(long)

    print('order:', order, len(order))
    print('qB:', qBounds)
    #print('nQB:',nQBounds, len(nQBounds))
    assert (qBounds and len(order)==len(qBounds)) or (nQBounds and len(order)==len(nQBounds)),\
           'order/qBounds mismatch'
    nVars = len(order)
    nPossible = [-1] * nVars
    cols = range(nVars)
    for i in xrange(nVars):
        if nQBounds and nQBounds[i] != 0:
            nPossible[i] = -1
            cols.remove(i)
        elif len(qBounds[i]) > 0:
            nPossible[i] = len(qBounds[i])
            cols.remove(i)

    nPts = len(data)
    for i in xrange(nPts):
        for col in cols[:]:
            d = data[i][order[col]]
            if type(d) in numericTypes:
                if int(d) == d:
                    nPossible[col] = max(int(d), nPossible[col])
                else:
                    nPossible[col] = -1
                    cols.remove(col)
            else:
                print('bye bye col %d: %s' % (col, repr(d)))
                nPossible[col] = -1
                cols.remove(col)

    return list(map(lambda x: int(x) + 1, nPossible))
Ejemplo n.º 9
0
 def test3Classify(self):
   " testing classification "
   self._setupTree1()
   self._setupTree2()
   for i in xrange(len(self.examples1)):
     assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
            'examples1[%d] misclassified'%i
   for i in xrange(len(self.examples2)):
     assert self.t2.ClassifyExample(self.examples2[i])==self.examples2[i][-1],\
            'examples2[%d] misclassified'%i
Ejemplo n.º 10
0
 def test3Classify(self):
     " testing classification "
     self._setupTree1()
     self._setupTree2()
     for i in xrange(len(self.examples1)):
         assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
                'examples1[%d] misclassified'%i
     for i in xrange(len(self.examples2)):
         assert self.t2.ClassifyExample(self.examples2[i])==self.examples2[i][-1],\
                'examples2[%d] misclassified'%i
Ejemplo n.º 11
0
def TypeFinder(data, nRows, nCols, nullMarker=None):
    """

    finds the types of the columns in _data_

    if nullMarker is not None, elements of the data table which are
      equal to nullMarker will not count towards setting the type of
      their columns.

  """
    priorities = {float: 3, int: 2, str: 1, -1: -1}
    res = [None] * nCols
    for col in xrange(nCols):
        typeHere = [-1, 1]
        for row in xrange(nRows):
            d = data[row][col]
            if d is None:
                continue
            locType = type(d)
            if locType != float and locType != int:
                locType = str
                try:
                    d = str(d)
                except UnicodeError as msg:
                    print(
                        'cannot convert text from row %d col %d to a string' %
                        (row + 2, col))
                    print('\t>%s' % (repr(d)))
                    raise UnicodeError(msg)
            else:
                typeHere[1] = max(typeHere[1], len(str(d)))
            if isinstance(d, string_types):
                if nullMarker is None or d != nullMarker:
                    l = max(len(d), typeHere[1])
                    typeHere = [str, l]
            else:
                try:
                    fD = float(int(d))
                except OverflowError:
                    locType = float
                else:
                    if fD == d:
                        locType = int
                if not isinstance(typeHere[0], string_types) and \
                   priorities[locType] > priorities[typeHere[0]]:
                    typeHere[0] = locType
        res[col] = typeHere
    return res
Ejemplo n.º 12
0
 def testSingleCalcs(self):
   " testing calculation of a single descriptor "
   for i in xrange(len(self.cExprs)):
     cExpr= self.cExprs[i]
     argVect = self.piece1 + [cExpr]
     res = Parser.CalcSingleCompoundDescriptor(self.compos,argVect,self.aDict,self.pDict)
     self.assertAlmostEqual(res,self.results[i],2)
Ejemplo n.º 13
0
def CrossValidate(tree, testExamples, appendExamples=0):
    """ Determines the classification error for the testExamples

    **Arguments**

      - tree: a decision tree (or anything supporting a _ClassifyExample()_ method)

      - testExamples: a list of examples to be used for testing

      - appendExamples: a toggle which is passed along to the tree as it does
        the classification. The trees can use this to store the examples they
        classify locally.

    **Returns**

      a 2-tuple consisting of:

        1) the percent error of the tree

        2) a list of misclassified examples
        
  """
    nTest = len(testExamples)
    nBad = 0
    badExamples = []
    for i in xrange(nTest):
        testEx = testExamples[i]
        trueRes = testEx[-1]
        res = tree.ClassifyExample(testEx, appendExamples)
        if (trueRes != res).any():
            badExamples.append(testEx)
            nBad += 1

    return float(nBad) / nTest, badExamples
Ejemplo n.º 14
0
def CharacteristicPolynomial(mol,mat=None):
  """ calculates the characteristic polynomial for a molecular graph

    if mat is not passed in, the molecule's Weighted Adjacency Matrix will
    be used.

    The approach used is the Le Verrier-Faddeev-Frame method described
    in _Chemical Graph Theory, 2nd Edition_ by Nenad Trinajstic (CRC Press,
    1992), pg 76.
    
  """
  nAtoms = mol.GetNumAtoms()
  if mat is None:
    # FIX: complete this:
    #A = mol.GetWeightedAdjacencyMatrix()
    pass
  else:
    A = mat
  I = 1.*numpy.identity(nAtoms)
  An = A
  res = numpy.zeros(nAtoms+1,numpy.float)
  res[0] = 1.0
  for n in xrange(1,nAtoms+1):
    res[n] = 1./n*numpy.trace(An)
    Bn = An - res[n]*I
    An = numpy.dot(A,Bn)

  res[1:] *= -1
  return res  
Ejemplo n.º 15
0
def PyInfoGain(varMat):
  """ calculates the information gain for a variable

    **Arguments**

      varMat is a Numeric array with the number of possible occurances
        of each result for reach possible value of the given variable.

      So, for a variable which adopts 4 possible values and a result which
        has 3 possible values, varMat would be 4x3

    **Returns**

      The expected information gain
  """
  variableRes = numpy.sum(varMat,1) # indexed by variable, Sv in Mitchell's notation
  overallRes = numpy.sum(varMat,0) # indexed by result, S in Mitchell's notation

  term2 = 0
  for i in xrange(len(variableRes)):
    term2 = term2 + variableRes[i] * InfoEntropy(varMat[i]) 
  tSum = sum(overallRes)
  if tSum != 0.0:
    term2 = 1./tSum * term2
    gain = InfoEntropy(overallRes) - term2
  else:
    gain = 0
  return gain
Ejemplo n.º 16
0
    def ProcessSimpleList(self):
        """ Handles the list of simple descriptors

      This constructs the list of _nonZeroDescriptors_ and _requiredDescriptors_.

      There's some other magic going on that I can't decipher at the moment.

    """
        global countOptions

        self.nonZeroDescriptors = []
        lCopy = self.simpleList[:]
        tList = map(lambda x: x[0], countOptions)
        for i in xrange(len(lCopy)):
            entry = lCopy[i]
            if 'NONZERO' in entry[1]:
                if entry[0] not in tList:
                    self.nonZeroDescriptors.append('%s != 0' % entry[0])
                if len(entry[1]) == 1:
                    self.simpleList.remove(entry)
                else:
                    self.simpleList[self.simpleList.index(entry)][1].remove(
                        'NONZERO')
        self.requiredDescriptors = map(lambda x: x[0], self.simpleList)
        for entry in tList:
            if entry in self.requiredDescriptors:
                self.requiredDescriptors.remove(entry)
Ejemplo n.º 17
0
def ReadVars(inFile):
  """ reads the variables and quantization bounds from a .qdat or .dat file

    **Arguments**

      - inFile: a file object

    **Returns**

      a 2-tuple containing:

        1) varNames: a list of the variable names

        2) qbounds: the list of quantization bounds for each variable

  """
  varNames = []
  qBounds = []
  fileutils.MoveToMatchingLine(inFile,'Variable Table')
  inLine = inFile.readline()
  while inLine.find('# ----') == -1:
    splitLine = inLine[2:].split('[')
    varNames.append(splitLine[0].strip())
    qBounds.append(splitLine[1][:-2])
    inLine = inFile.readline()
  for i in xrange(len(qBounds)):
    
    if qBounds[i] != '':
      l = qBounds[i].split(',')
      qBounds[i] = []
      for item in l:
        qBounds[i].append(float(item))
    else:
      qBounds[i] = []
  return varNames,qBounds
Ejemplo n.º 18
0
    def testTreeGrow(self):
        " testing tree-based composite "
        with open(
                RDConfig.RDCodeDir +
                '/ML/Composite/test_data/composite_base.pkl', 'rb') as pklF:
            self.refCompos = cPickle.load(pklF)

        composite = Composite.Composite()
        composite._varNames = self.varNames
        composite.SetQuantBounds(self.qBounds, self.nPoss)
        from rdkit.ML.DecTree import CrossValidate
        driver = CrossValidate.CrossValidationDriver
        pruner = None
        composite.Grow(self.examples,
                       self.attrs, [],
                       buildDriver=driver,
                       pruner=pruner,
                       nTries=100,
                       silent=1)
        composite.AverageErrors()
        composite.SortModels()

        #with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF:
        #  cPickle.dump(composite,pklF)

        self.treeComposite = composite
        self.assertEqual(len(composite), len(self.refCompos))
        for i in xrange(len(composite)):
            t1, c1, e1 = composite[i]
            t2, c2, e2 = self.refCompos[i]
            self.assertEqual(e1, e2)
Ejemplo n.º 19
0
def ChooseOptimalRoot(examples, trainExamples, testExamples, attrs, nPossibleVals, treeBuilder,
                      nQuantBounds=[], **kwargs):
  """ loops through all possible tree roots and chooses the one which produces the best tree

  **Arguments**

    - examples: the full set of examples

    - trainExamples: the training examples

    - testExamples: the testing examples

    - attrs: a list of attributes to consider in the tree building

    - nPossibleVals: a list of the number of possible values each variable can adopt

    - treeBuilder: the function to be used to actually build the tree

    - nQuantBounds: an optional list.  If present, it's assumed that the builder
      algorithm takes this argument as well (for building QuantTrees)

  **Returns**

    The best tree found
    
  **Notes**

    1) Trees are built using _trainExamples_

    2) Testing of each tree (to determine which is best) is done using _CrossValidate_ and
       the entire set of data (i.e. all of _examples_)

    3) _trainExamples_ is not used at all, which immediately raises the question of
       why it's even being passed in

  """
  attrs = attrs[:]
  if nQuantBounds:
    for i in range(len(nQuantBounds)):
      if nQuantBounds[i] == -1 and i in attrs:
        attrs.remove(i)
  nAttrs = len(attrs)
  trees = [None] * nAttrs
  errs = [0] * nAttrs
  errs[0] = 1e6

  for i in xrange(1, nAttrs):
    argD = {'initialVar': attrs[i]}
    argD.update(kwargs)
    if nQuantBounds is None or nQuantBounds == []:
      trees[i] = treeBuilder(trainExamples, attrs, nPossibleVals, **argd)
    else:
      trees[i] = treeBuilder(trainExamples, attrs, nPossibleVals, nQuantBounds, **argD)
    if trees[i]:
      errs[i], foo = CrossValidate(trees[i], examples, appendExamples=0)
    else:
      errs[i] = 1e6
  best = numpy.argmin(errs)
  # FIX: this used to say 'trees[i]', could that possibly have been right?
  return trees[best]
Ejemplo n.º 20
0
  def CalcCompoundDescriptorsForComposition(self, compos='', composList=None, propDict={}):
    """ calculates all simple descriptors for a given composition

      **Arguments**

        - compos: a string representation of the composition

        - composList: a *composVect*

        - propDict: a dictionary containing the properties of the composition
          as a whole (e.g. structural variables, etc.)

        The client must provide either _compos_ or _composList_.  If both are
        provided, _composList_ takes priority.

      **Returns**
        the list of descriptor values

      **Notes**

        - when _compos_ is provided, this uses _chemutils.SplitComposition_
          to split the composition into its individual pieces

    """
    if composList is None:
      composList = chemutils.SplitComposition(compos)
    res = []
    for i in xrange(len(self.compoundList)):
      val = Parser.CalcSingleCompoundDescriptor(composList, self.compoundList[i][1:], self.atomDict,
                                                propDict)
      res.append(val)
    return res
Ejemplo n.º 21
0
  def ProcessSimpleList(self):
    """ Handles the list of simple descriptors

      This constructs the list of _nonZeroDescriptors_ and _requiredDescriptors_.

      There's some other magic going on that I can't decipher at the moment.

    """
    global countOptions

    self.nonZeroDescriptors = []
    lCopy = self.simpleList[:]
    tList = map(lambda x: x[0], countOptions)
    for i in xrange(len(lCopy)):
      entry = lCopy[i]
      if 'NONZERO' in entry[1]:
        if entry[0] not in tList:
          self.nonZeroDescriptors.append('%s != 0' % entry[0])
        if len(entry[1]) == 1:
          self.simpleList.remove(entry)
        else:
          self.simpleList[self.simpleList.index(entry)][1].remove('NONZERO')
    self.requiredDescriptors = map(lambda x: x[0], self.simpleList)
    for entry in tList:
      if entry in self.requiredDescriptors:
        self.requiredDescriptors.remove(entry)
Ejemplo n.º 22
0
    def testTreeGrow(self):
        " testing tree-based composite "
        with open(RDConfig.RDCodeDir + "/ML/Composite/test_data/composite_base.pkl", "r") as pklTF:
            buf = pklTF.read().replace("\r\n", "\n").encode("utf-8")
            pklTF.close()
        with io.BytesIO(buf) as pklF:
            self.refCompos = cPickle.load(pklF)

        composite = Composite.Composite()
        composite._varNames = self.varNames
        composite.SetQuantBounds(self.qBounds, self.nPoss)
        from rdkit.ML.DecTree import CrossValidate

        driver = CrossValidate.CrossValidationDriver
        pruner = None
        composite.Grow(self.examples, self.attrs, [], buildDriver=driver, pruner=pruner, nTries=100, silent=1)
        composite.AverageErrors()
        composite.SortModels()

        # with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF:
        #  cPickle.dump(composite,pklF)

        self.treeComposite = composite
        self.assertEqual(len(composite), len(self.refCompos))
        for i in xrange(len(composite)):
            t1, c1, e1 = composite[i]
            t2, c2, e2 = self.refCompos[i]
            self.assertEqual(e1, e2)
Ejemplo n.º 23
0
def PyInfoGain(varMat):
  """ calculates the information gain for a variable

    **Arguments**

      varMat is a Numeric array with the number of possible occurances
        of each result for reach possible value of the given variable.

      So, for a variable which adopts 4 possible values and a result which
        has 3 possible values, varMat would be 4x3

    **Returns**

      The expected information gain
  """
  variableRes = numpy.sum(varMat, 1)  # indexed by variable, Sv in Mitchell's notation
  overallRes = numpy.sum(varMat, 0)  # indexed by result, S in Mitchell's notation

  term2 = 0
  for i in xrange(len(variableRes)):
    term2 = term2 + variableRes[i] * InfoEntropy(varMat[i])
  tSum = sum(overallRes)
  if tSum != 0.0:
    term2 = 1. / tSum * term2
    gain = InfoEntropy(overallRes) - term2
  else:
    gain = 0
  return gain
Ejemplo n.º 24
0
 def testSingleCalcs(self):
   " testing calculation of a single descriptor "
   for i in xrange(len(self.cExprs)):
     cExpr = self.cExprs[i]
     argVect = self.piece1 + [cExpr]
     res = Parser.CalcSingleCompoundDescriptor(self.compos, argVect, self.aDict, self.pDict)
     self.assertAlmostEqual(res, self.results[i], 2)
Ejemplo n.º 25
0
  def _CalcNPossible(self, data):
    """calculates the number of possible values of each variable (where possible)

      **Arguments**

         -data: a list of examples to be used

      **Returns**

         a list of nPossible values for each variable

    """
    nVars = self.GetNVars() + self.nResults
    nPossible = [-1] * nVars
    cols = list(xrange(nVars))
    for i, bounds in enumerate(self.qBounds):
      if len(bounds) > 0:
        nPossible[i] = len(bounds)
        cols.remove(i)

    nPts = self.GetNPts()
    for i, pt in enumerate(self.data):
      for col in cols[:]:
        d = pt[col]
        if type(d) in numericTypes:
          if math.floor(d) == d:
            nPossible[col] = max(math.floor(d), nPossible[col])
          else:
            nPossible[col] = -1
            cols.remove(col)
        else:
          nPossible[col] = -1
          cols.remove(col)
    return [int(x) + 1 for x in nPossible]
Ejemplo n.º 26
0
def ReadVars(inFile):
    """ reads the variables and quantization bounds from a .qdat or .dat file

    **Arguments**

      - inFile: a file object

    **Returns**

      a 2-tuple containing:

        1) varNames: a list of the variable names

        2) qbounds: the list of quantization bounds for each variable

  """
    varNames = []
    qBounds = []
    fileutils.MoveToMatchingLine(inFile, 'Variable Table')
    inLine = inFile.readline()
    while inLine.find('# ----') == -1:
        splitLine = inLine[2:].split('[')
        varNames.append(splitLine[0].strip())
        qBounds.append(splitLine[1][:-2])
        inLine = inFile.readline()
    for i in xrange(len(qBounds)):

        if qBounds[i] != '':
            l = qBounds[i].split(',')
            qBounds[i] = []
            for item in l:
                qBounds[i].append(float(item))
        else:
            qBounds[i] = []
    return varNames, qBounds
Ejemplo n.º 27
0
def CrossValidate(tree, testExamples, appendExamples=0):
  """ Determines the classification error for the testExamples

    **Arguments**

      - tree: a decision tree (or anything supporting a _ClassifyExample()_ method)

      - testExamples: a list of examples to be used for testing

      - appendExamples: a toggle which is passed along to the tree as it does
        the classification. The trees can use this to store the examples they
        classify locally.

    **Returns**

      a 2-tuple consisting of:

        1) the percent error of the tree

        2) a list of misclassified examples
        
  """
  nTest = len(testExamples)
  nBad = 0
  badExamples = []
  for i in xrange(nTest):
    testEx = testExamples[i]
    trueRes = testEx[-1]
    res = tree.ClassifyExample(testEx, appendExamples)
    if (trueRes != res).any():
      badExamples.append(testEx)
      nBad += 1

  return float(nBad) / nTest, badExamples
Ejemplo n.º 28
0
def CharacteristicPolynomial(mol, mat=None):
    """ calculates the characteristic polynomial for a molecular graph

    if mat is not passed in, the molecule's Weighted Adjacency Matrix will
    be used.

    The approach used is the Le Verrier-Faddeev-Frame method described
    in _Chemical Graph Theory, 2nd Edition_ by Nenad Trinajstic (CRC Press,
    1992), pg 76.
    
  """
    nAtoms = mol.GetNumAtoms()
    if mat is None:
        # FIX: complete this:
        #A = mol.GetWeightedAdjacencyMatrix()
        pass
    else:
        A = mat
    I = 1. * numpy.identity(nAtoms)
    An = A
    res = numpy.zeros(nAtoms + 1, numpy.float)
    res[0] = 1.0
    for n in xrange(1, nAtoms + 1):
        res[n] = 1. / n * numpy.trace(An)
        Bn = An - res[n] * I
        An = numpy.dot(A, Bn)

    res[1:] *= -1
    return res
Ejemplo n.º 29
0
def TypeFinder(data, nRows, nCols, nullMarker=None):
  """

    finds the types of the columns in _data_

    if nullMarker is not None, elements of the data table which are
      equal to nullMarker will not count towards setting the type of
      their columns.

  """
  priorities = {float: 3, int: 2, str: 1, -1: -1}
  res = [None] * nCols
  for col in xrange(nCols):
    typeHere = [-1, 1]
    for row in xrange(nRows):
      d = data[row][col]
      if d is None:
        continue
      locType = type(d)
      if locType != float and locType != int:
        locType = str
        try:
          d = str(d)
        except UnicodeError as msg:  # pragma: nocover
          print('cannot convert text from row %d col %d to a string' % (row + 2, col))
          print('\t>%s' % (repr(d)))
          raise UnicodeError(msg)
      else:
        typeHere[1] = max(typeHere[1], len(str(d)))
      if isinstance(d, string_types):
        if nullMarker is None or d != nullMarker:
          l = max(len(d), typeHere[1])
          typeHere = [str, l]
      else:
        try:
          fD = float(int(d))
        except OverflowError:  # pragma: nocover
          locType = float
        else:
          if fD == d:
            locType = int
        if not isinstance(typeHere[0], string_types) and \
           priorities[locType] > priorities[typeHere[0]]:
          typeHere[0] = locType
    res[col] = typeHere
  return res
Ejemplo n.º 30
0
def GetAtomicData(atomDict, descriptorsDesired, dBase=_atomDbName, table='atomic_data', where='',
                  user='******', password='******', includeElCounts=0):
  """ pulls atomic data from a database

    **Arguments**

      - atomDict: the dictionary to populate

      - descriptorsDesired: the descriptors to pull for each atom

      - dBase: the DB to use

      - table: the DB table to use

      - where: the SQL where clause

      - user: the user name to use with the DB

      - password: the password to use with the DB

      - includeElCounts: if nonzero, valence electron count fields are added to
         the _atomDict_

  """
  extraFields = ['NVAL', 'NVAL_NO_FULL_F', 'NVAL_NO_FULL_D', 'NVAL_NO_FULL']
  from rdkit.Dbase import DbModule
  cn = DbModule.connect(dBase, user, password)
  c = cn.cursor()
  descriptorsDesired = [s.upper() for s in descriptorsDesired]
  if 'NAME' not in descriptorsDesired:
    descriptorsDesired.append('NAME')
  if includeElCounts and 'CONFIG' not in descriptorsDesired:
    descriptorsDesired.append('CONFIG')
  for field in extraFields:
    if field in descriptorsDesired:
      descriptorsDesired.remove(field)
  toPull = ','.join(descriptorsDesired)
  command = 'select %s from atomic_data %s' % (toPull, where)
  try:
    c.execute(command)
  except Exception:
    print('Problems executing command:', command)
    return
  res = c.fetchall()
  for atom in res:
    tDict = {}
    for i in xrange(len(descriptorsDesired)):
      desc = descriptorsDesired[i]
      val = atom[i]
      tDict[desc] = val
    name = tDict['NAME']
    atomDict[name] = tDict
    if includeElCounts:
      config = atomDict[name]['CONFIG']
      atomDict[name]['NVAL'] = ConfigToNumElectrons(config)
      atomDict[name]['NVAL_NO_FULL_F'] = ConfigToNumElectrons(config, ignoreFullF=1)
      atomDict[name]['NVAL_NO_FULL_D'] = ConfigToNumElectrons(config, ignoreFullD=1)
      atomDict[name]['NVAL_NO_FULL'] = ConfigToNumElectrons(config, ignoreFullF=1, ignoreFullD=1)
Ejemplo n.º 31
0
 def test4UnusedVars(self):
   " testing unused variables "
   self._setupTree1a()
   with open(self.qTree1Name,'rb') as inFile:
     t2 = cPickle.load(inFile)
   assert self.t1 == t2, 'Incorrect tree generated.'
   for i in xrange(len(self.examples1)):
     assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
            'examples1[%d] misclassified'%i
Ejemplo n.º 32
0
 def test4UnusedVars(self):
     " testing unused variables "
     self._setupTree1a()
     with open(self.qTree1Name, 'rb') as inFile:
         t2 = cPickle.load(inFile)
     assert self.t1 == t2, 'Incorrect tree generated.'
     for i in xrange(len(self.examples1)):
         assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
                'examples1[%d] misclassified'%i
Ejemplo n.º 33
0
 def testMultipleCalcs(self):
   " testing calculation of multiple descriptors "
   for i in xrange(len(self.cExprs)):
     cExpr = self.cExprs[i]
     argVect = self.piece1 + [cExpr]
     res = Parser.CalcMultipleCompoundsDescriptor([self.compos, self.compos], argVect, self.aDict,
                                                  [self.pDict, self.pDict])
     self.assertAlmostEqual(res[0], self.results[i], 2)
     self.assertAlmostEqual(res[1], self.results[i], 2)
Ejemplo n.º 34
0
 def testMultipleCalcs(self):
   " testing calculation of multiple descriptors "
   for i in xrange(len(self.cExprs)):
     cExpr= self.cExprs[i]
     argVect = self.piece1 + [cExpr]
     res = Parser.CalcMultipleCompoundsDescriptor([self.compos,self.compos],argVect,
                                                 self.aDict,[self.pDict,self.pDict])
     self.assertAlmostEqual(res[0],self.results[i],2)
     self.assertAlmostEqual(res[1],self.results[i],2)
Ejemplo n.º 35
0
  def __str__(self):
    """ provides a string representation of the network """
    outStr = 'Network:\n'
    for i in xrange(len(self.nodeList)):
      outStr = outStr + '\tnode(% 3d):\n'%i
      outStr = outStr + '\t\tinputs:  %s\n'%(str(self.nodeList[i].GetInputs()))
      outStr = outStr + '\t\tweights: %s\n'%(str(self.nodeList[i].GetWeights()))

    outStr = outStr + 'Total Number of Connections: % 4d'%self.nConnections
    return outStr
Ejemplo n.º 36
0
 def testQuantize(self):
     " testing data quantization "
     qBounds = [[], [1, 2, 3]]
     examples = [["foo", 0], ["foo", 1.5], ["foo", 5.5], ["foo", 2.5]]
     answers = [["foo", 0], ["foo", 1], ["foo", 3], ["foo", 2]]
     nPoss = [0, 4]
     composite = Composite.Composite()
     composite.SetQuantBounds(qBounds, nPoss)
     for i in xrange(len(examples)):
         qEx = composite.QuantizeExample(examples[i])
         self.assertEqual(qEx, answers[i])
Ejemplo n.º 37
0
 def testQuantize(self):
     " testing data quantization "
     qBounds = [[], [1, 2, 3]]
     examples = [['foo', 0], ['foo', 1.5], ['foo', 5.5], ['foo', 2.5]]
     answers = [['foo', 0], ['foo', 1], ['foo', 3], ['foo', 2]]
     nPoss = [0, 4]
     composite = Composite.Composite()
     composite.SetQuantBounds(qBounds, nPoss)
     for i in xrange(len(examples)):
         qEx = composite.QuantizeExample(examples[i])
         self.assertEqual(qEx, answers[i])
Ejemplo n.º 38
0
def _AddDataToDb(dBase, table, user, password, colDefs, colTypes, data, nullMarker=None,
                 blockSize=100, cn=None):
  """ *For Internal Use*

    (drops and) creates a table and then inserts the values

  """
  if not cn:
    cn = DbModule.connect(dBase, user, password)
  c = cn.cursor()
  try:
    c.execute('drop table %s' % (table))
  except Exception:
    print('cannot drop table %s' % (table))
  try:
    sqlStr = 'create table %s (%s)' % (table, colDefs)
    c.execute(sqlStr)
  except Exception:
    print('create table failed: ', sqlStr)
    print('here is the exception:')
    import traceback
    traceback.print_exc()
    return
  cn.commit()
  c = None

  block = []
  entryTxt = [DbModule.placeHolder] * len(data[0])
  dStr = ','.join(entryTxt)
  sqlStr = 'insert into %s values (%s)' % (table, dStr)
  nDone = 0
  for row in data:
    entries = [None] * len(row)
    for col in xrange(len(row)):
      if row[col] is not None and \
         (nullMarker is None or row[col] != nullMarker):
        if colTypes[col][0] == float:
          entries[col] = float(row[col])
        elif colTypes[col][0] == int:
          entries[col] = int(row[col])
        else:
          entries[col] = str(row[col])
      else:
        entries[col] = None
    block.append(tuple(entries))
    if len(block) >= blockSize:
      nDone += _insertBlock(cn, sqlStr, block)
      if not hasattr(cn, 'autocommit') or not cn.autocommit:
        cn.commit()
      block = []
  if len(block):
    nDone += _insertBlock(cn, sqlStr, block)
  if not hasattr(cn, 'autocommit') or not cn.autocommit:
    cn.commit()
Ejemplo n.º 39
0
def _AddDataToDb(dBase,table,user,password,colDefs,colTypes,data,
                 nullMarker=None,blockSize=100,cn=None):
  """ *For Internal Use*

    (drops and) creates a table and then inserts the values

  """
  if not cn:
    cn = DbModule.connect(dBase,user,password)
  c = cn.cursor()
  try:
    c.execute('drop table %s'%(table))
  except:
    print('cannot drop table %s'%(table))
  try:
    sqlStr = 'create table %s (%s)'%(table,colDefs)
    c.execute(sqlStr)
  except:
    print('create table failed: ', sqlStr)
    print('here is the exception:')
    import traceback
    traceback.print_exc()
    return
  cn.commit()
  c = None
  
  block = []
  entryTxt = [DbModule.placeHolder]*len(data[0])
  dStr = ','.join(entryTxt)
  sqlStr = 'insert into %s values (%s)'%(table,dStr)
  nDone = 0
  for row in data:
    entries = [None]*len(row)
    for col in xrange(len(row)):
      if row[col] is not None and \
         (nullMarker is None or row[col] != nullMarker):
        if colTypes[col][0] == float:
          entries[col] = float(row[col])
        elif colTypes[col][0] == int:
          entries[col] = int(row[col])
        else:
          entries[col] = str(row[col])
      else:
        entries[col] = None
    block.append(tuple(entries))
    if len(block)>=blockSize:
      nDone += _insertBlock(cn,sqlStr,block)
      if not hasattr(cn,'autocommit') or not cn.autocommit:
        cn.commit()
      block = []
  if len(block):
    nDone += _insertBlock(cn,sqlStr,block)
  if not hasattr(cn,'autocommit') or not cn.autocommit:
    cn.commit()
Ejemplo n.º 40
0
 def testSimpleDescriptorCalc(self):
   " testing simple descriptor calculation "
   composList = ['Nb','Nb3','NbPt','Nb2Pt']
   compare = [[2.32224798203, 0.0, 1.34000003338, 1.34000003338],
          [2.32224798203, 0.0, 1.34000003338, 1.34000003338],
          [1.51555249095, 0.806695491076, 1.34000003338, 1.29999995232],
          [1.78445098797, 0.717062658734, 1.34000003338, 1.29999995232]]
   for i in xrange(len(composList)):
     assert max(map(lambda x,y:abs(x-y),compare[i],
                    self.desc.CalcSimpleDescriptorsForComposition(composList[i]))) < self.tol,\
     'Descriptor calculation failed'
Ejemplo n.º 41
0
 def test4UnusedVars(self):
   " testing unused variables "
   self._setupTree1a()
   with open(self.qTree1Name, 'r') as inTFile:
     buf = inTFile.read().replace('\r\n', '\n').encode('utf-8')
     inTFile.close()
   with io.BytesIO(buf) as inFile:
     t2 = cPickle.load(inFile)
   assert self.t1 == t2, 'Incorrect tree generated.'
   for i in xrange(len(self.examples1)):
     assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
            'examples1[%d] misclassified'%i
Ejemplo n.º 42
0
 def test4UnusedVars(self):
     " testing unused variables "
     self._setupTree1a()
     with open(self.qTree1Name, 'r') as inTFile:
         buf = inTFile.read().replace('\r\n', '\n').encode('utf-8')
         inTFile.close()
     with io.BytesIO(buf) as inFile:
         t2 = cPickle.load(inFile)
     assert self.t1 == t2, 'Incorrect tree generated.'
     for i in xrange(len(self.examples1)):
         assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
                'examples1[%d] misclassified'%i
Ejemplo n.º 43
0
  def GetNamedData(self):
    """ returns a list of named examples

     **Note**

       a named example is the result of prepending the example
        name to the data list
        
    """
    res = [None] * self.nPts
    for i in xrange(self.nPts):
      res[i] = [self.ptNames[i]] + self.data[i].tolist()
    return res
Ejemplo n.º 44
0
def ID3Boot(examples,
            attrs,
            nPossibleVals,
            initialVar=None,
            depth=0,
            maxDepth=-1,
            **kwargs):
    """ Bootstrapping code for the ID3 algorithm

    see ID3 for descriptions of the arguments

    If _initialVar_ is not set, the algorithm will automatically
     choose the first variable in the tree (the standard greedy
     approach).  Otherwise, _initialVar_ will be used as the first
     split.
     
  """
    totEntropy = CalcTotalEntropy(examples, nPossibleVals)
    varTable = GenVarTable(examples, nPossibleVals, attrs)

    tree = DecTree.DecTreeNode(None, 'node')
    #tree.SetExamples(examples)
    tree._nResultCodes = nPossibleVals[-1]

    # <perl>you've got to love any language which will let you
    # do this much work in a single line :-)</perl>
    if initialVar is None:
        best = attrs[numpy.argmax([entropy.InfoGain(x) for x in varTable])]
    else:
        best = initialVar

    tree.SetName('Var: %d' % best)
    tree.SetData(totEntropy)
    tree.SetLabel(best)
    tree.SetTerminal(0)
    nextAttrs = list(attrs)
    if not kwargs.get('recycleVars', 0):
        nextAttrs.remove(best)

    for val in xrange(nPossibleVals[best]):
        nextExamples = []
        for example in examples:
            if example[best] == val:
                nextExamples.append(example)

        tree.AddChildNode(
            ID3(nextExamples, best, nextAttrs, nPossibleVals, depth, maxDepth,
                **kwargs))
    return tree
Ejemplo n.º 45
0
def GetTypeStrings(colHeadings,colTypes,keyCol=None):
  """  returns a list of SQL type strings
  """
  typeStrs=[]
  for i in xrange(len(colTypes)):
    typ = colTypes[i]
    if typ[0] == float:
      typeStrs.append('%s double precision'%colHeadings[i])
    elif typ[0] == int:
      typeStrs.append('%s integer'%colHeadings[i])
    else:
      typeStrs.append('%s varchar(%d)'%(colHeadings[i],typ[1]))
    if colHeadings[i] == keyCol:
      typeStrs[-1] = '%s not null primary key'%(typeStrs[-1])
  return typeStrs 
Ejemplo n.º 46
0
def GetTypeStrings(colHeadings, colTypes, keyCol=None):
  """  returns a list of SQL type strings
  """
  typeStrs = []
  for i in xrange(len(colTypes)):
    typ = colTypes[i]
    if typ[0] == float:
      typeStrs.append('%s double precision' % colHeadings[i])
    elif typ[0] == int:
      typeStrs.append('%s integer' % colHeadings[i])
    else:
      typeStrs.append('%s varchar(%d)' % (colHeadings[i], typ[1]))
    if colHeadings[i] == keyCol:
      typeStrs[-1] = '%s not null primary key' % (typeStrs[-1])
  return typeStrs
Ejemplo n.º 47
0
def GenRandomExamples(nVars=10,randScale=0.3,bitProb=0.5,nExamples=500,seed=(0,0),
                      addResults=1):
    random.seed(seed[0])
    varWeights = numpy.array([random.random() for x in range(nVars)])*randScale
    examples = [None]*nExamples

    for i in xrange(nExamples):
        varVals=[random.random()>bitProb for x in range(nVars)]
        temp = numpy.array(varVals) * varWeights
        res = sum(temp)
        if addResults:
            varVals.append(res>=1.)
        examples[i] = varVals

    nPossibleVals = [2]*(nExamples+1)
    attrs = list(range(nVars))

    return (examples,attrs,nPossibleVals)
Ejemplo n.º 48
0
def ReadGeneralExamples(inFile):
    """ reads the examples from a .dat file

    **Arguments**

      - inFile: a file object

    **Returns**

      a 2-tuple containing:

        1) the names of the examples

        2) a list of lists containing the examples themselves

    **Note**

      - this attempts to convert variable values to ints, then floats.
        if those both fail, they are left as strings

  """
    expr1 = re.compile(r'^#')
    expr2 = re.compile(r'[\ ]*|[\t]*')
    examples = []
    names = []
    inLine = inFile.readline()
    while inLine:
        if expr1.search(inLine) is None:
            resArr = expr2.split(inLine)[:-1]
            if len(resArr) > 1:
                for i in xrange(1, len(resArr)):
                    d = resArr[i]
                    try:
                        resArr[i] = int(d)
                    except ValueError:
                        try:
                            resArr[i] = float(d)
                        except ValueError:
                            pass
                examples.append(resArr[1:])
                names.append(resArr[0])
        inLine = inFile.readline()
    return names, examples
Ejemplo n.º 49
0
Archivo: ID3.py Proyecto: ASKCOS/rdkit
def ID3Boot(examples,attrs,nPossibleVals,initialVar=None,depth=0,maxDepth=-1,
            **kwargs):
  """ Bootstrapping code for the ID3 algorithm

    see ID3 for descriptions of the arguments

    If _initialVar_ is not set, the algorithm will automatically
     choose the first variable in the tree (the standard greedy
     approach).  Otherwise, _initialVar_ will be used as the first
     split.
     
  """
  totEntropy = CalcTotalEntropy(examples,nPossibleVals)
  varTable = GenVarTable(examples,nPossibleVals,attrs)

  tree=DecTree.DecTreeNode(None,'node')
  #tree.SetExamples(examples)
  tree._nResultCodes = nPossibleVals[-1]

  # <perl>you've got to love any language which will let you
  # do this much work in a single line :-)</perl>
  if initialVar is None:
    best = attrs[numpy.argmax([entropy.InfoGain(x) for x in varTable])]
  else:
    best = initialVar

  tree.SetName('Var: %d'%best)
  tree.SetData(totEntropy)
  tree.SetLabel(best)
  tree.SetTerminal(0)
  nextAttrs = list(attrs)
  if not kwargs.get('recycleVars',0):
    nextAttrs.remove(best)

  for val in xrange(nPossibleVals[best]):
    nextExamples = []
    for example in examples:
      if example[best] == val:
        nextExamples.append(example)

    tree.AddChildNode(ID3(nextExamples,best,nextAttrs,nPossibleVals,depth,maxDepth,
                          **kwargs))
  return tree
Ejemplo n.º 50
0
def ReadGeneralExamples(inFile):
  """ reads the examples from a .dat file

    **Arguments**

      - inFile: a file object

    **Returns**

      a 2-tuple containing:

        1) the names of the examples

        2) a list of lists containing the examples themselves

    **Note**

      - this attempts to convert variable values to ints, then floats.
        if those both fail, they are left as strings

  """
  expr1 = re.compile(r'^#')
  expr2 = re.compile(r'[\ ]*|[\t]*')
  examples = []
  names = []
  inLine = inFile.readline()
  while inLine:
    if expr1.search(inLine) is None:
      resArr = expr2.split(inLine)[:-1]
      if len(resArr)>1:
        for i in xrange(1,len(resArr)):
          d = resArr[i]
          try:
            resArr[i] = int(d)
          except ValueError:
            try:
              resArr[i] = float(d)
            except ValueError:
              pass
        examples.append(resArr[1:])
        names.append(resArr[0])
    inLine = inFile.readline()
  return names,examples
Ejemplo n.º 51
0
  def FullyConnectNodes(self):
    """ Fully connects each layer in the network to the one above it


     **Note**
       this sets the connections, but does not assign weights
       
    """
    nodeList = range(self.numInputNodes)
    nConnections = 0
    for layer in xrange(self.numHiddenLayers):
      for i in self.layerIndices[layer+1]:
        self.nodeList[i].SetInputs(nodeList)
        nConnections = nConnections + len(nodeList)
      nodeList = self.layerIndices[layer+1]

    for i in self.layerIndices[-1]:
      self.nodeList[i].SetInputs(nodeList)
      nConnections = nConnections + len(nodeList)
    self.nConnections = nConnections
Ejemplo n.º 52
0
    def FullyConnectNodes(self):
        """ Fully connects each layer in the network to the one above it


     **Note**
       this sets the connections, but does not assign weights
       
    """
        nodeList = range(self.numInputNodes)
        nConnections = 0
        for layer in xrange(self.numHiddenLayers):
            for i in self.layerIndices[layer + 1]:
                self.nodeList[i].SetInputs(nodeList)
                nConnections = nConnections + len(nodeList)
            nodeList = self.layerIndices[layer + 1]

        for i in self.layerIndices[-1]:
            self.nodeList[i].SetInputs(nodeList)
            nConnections = nConnections + len(nodeList)
        self.nConnections = nConnections
Ejemplo n.º 53
0
    def CalcSimpleDescriptorsForComposition(self, compos='', composList=None):
        """ calculates all simple descriptors for a given composition

      **Arguments**

        - compos: a string representation of the composition

        - composList: a *composVect*

        The client must provide either _compos_ or _composList_.  If both are
        provided, _composList_ takes priority.

      **Returns**
        the list of descriptor values

      **Notes**

        - when _compos_ is provided, this uses _chemutils.SplitComposition_
          to split the composition into its individual pieces

        - if problems are encountered because of either an unknown descriptor or
          atom type, a _KeyError_ will be raised.

    """
        if composList is None:
            composList = chemutils.SplitComposition(compos)
        try:
            res = []
            for i in xrange(len(self.simpleList)):
                descName, targets = self.simpleList[i]
                for target in targets:
                    try:
                        method = getattr(self, target)
                    except AttributeError:
                        print('Method %s does not exist' % (target))
                    else:
                        res.append(method(descName, composList))
        except KeyError as msg:
            print('composition %s caused problems' % composList)
            raise KeyError(msg)
        return res
Ejemplo n.º 54
0
def _AdjustColHeadings(colHeadings, maxColLabelLen):
  """ *For Internal Use*

    removes illegal characters from column headings
    and truncates those which are too long.
    
  """
  for i in xrange(len(colHeadings)):
    # replace unallowed characters and strip extra white space
    colHeadings[i] = colHeadings[i].strip()
    colHeadings[i] = colHeadings[i].replace(' ', '_')
    colHeadings[i] = colHeadings[i].replace('-', '_')
    colHeadings[i] = colHeadings[i].replace('.', '_')

    if len(colHeadings[i]) > maxColLabelLen:
      # interbase (at least) has a limit on the maximum length of a column name
      newHead = colHeadings[i].replace('_', '')
      newHead = newHead[:maxColLabelLen]
      print('\tHeading %s too long, changed to %s' % (colHeadings[i], newHead))
      colHeadings[i] = newHead
  return colHeadings
Ejemplo n.º 55
0
    def GetDescriptorNames(self):
        """ returns a list of the names of the descriptors this calculator generates

    """
        if self.descriptorNames is not None:
            return self.descriptorNames
        else:
            res = []
            for i in xrange(len(self.simpleList)):
                descName, targets = self.simpleList[i]
                for target in targets:
                    try:
                        method = getattr(self, target)
                    except AttributeError:
                        print('Method %s does not exist' % (target))
                    else:
                        res.append('%s_%s' % (target, descName))
            for entry in self.compoundList:
                res.append(entry[0])
            self.descriptorNames = res[:]
            return tuple(res)